diff --git a/.github/prehooks/pre-commit b/.github/prehooks/pre-commit index 3d05a7e7a..7f3c615b7 100755 --- a/.github/prehooks/pre-commit +++ b/.github/prehooks/pre-commit @@ -5,13 +5,16 @@ echo "Running pre-commit hook..." # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ echo ">> Updating R documentation..." if command -v Rscript > /dev/null; then - if [ "$(Rscript -e 'cat(all(c('"'pkgload'"', '"'devtools'"', '"'dplyr'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then - Rscript -e "source('data-raw/pre-commit-hook.R')" + if [ "$(Rscript -e 'cat(all(c('"'pkgload'"', '"'devtools'"', '"'dplyr'"', '"'styler'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then + Rscript -e "source('data-raw/_pre_commit_hook.R')" currentpkg=`Rscript -e "cat(pkgload::pkg_name())"` + echo ">> Adding all files in folders 'data-raw', 'inst', 'man', and 'R' to this git commit" + git add data-raw/* + git add inst/* git add man/* - git add R/sysdata.rda + git add R/* else - echo ">> R package 'pkgload', 'devtools', or 'dplyr' not installed!" + echo ">> R package 'pkgload', 'devtools', 'dplyr', or 'styler' not installed!" currentpkg="your" fi else diff --git a/DESCRIPTION b/DESCRIPTION index e1c9ccd89..c1674e477 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.8.1.9032 -Date: 2022-08-27 +Version: 1.8.1.9033 +Date: 2022-08-28 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by diff --git a/NEWS.md b/NEWS.md index 1a44c2bbb..182462501 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.8.1.9032 +# AMR 1.8.1.9033 ### New * EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations. @@ -25,6 +25,7 @@ * New website to make use of the new Bootstrap 5 and pkgdown v2.0. The website now contains results for all examples and will be automatically regenerated with every change to our repository, using GitHub Actions * Added Peter Dutey-Magni and Anton Mymrikov as contributors, to thank them for their valuable input * Set up Git Large File Storage (Git LFS) for the large SAS and SPSS file formats +* All R and Rmd files in this project are now styled using the `styler` package # `AMR` 1.8.1 diff --git a/R/aa_globals.R b/R/aa_globals.R index e14910196..b5ac71f62 100755 --- a/R/aa_globals.R +++ b/R/aa_globals.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,34 +24,50 @@ # ==================================================================== # # add new version numbers here, and add the rules themselves to "data-raw/eucast_rules.tsv" and rsi_translation -# (sourcing "data-raw/pre-commit-hook.R" will process the TSV file) -EUCAST_VERSION_BREAKPOINTS <- list("11.0" = list(version_txt = "v11.0", - year = 2021, - title = "'EUCAST Clinical Breakpoint Tables'", - url = "https://www.eucast.org/clinical_breakpoints/"), - "10.0" = list(version_txt = "v10.0", - year = 2020, - title = "'EUCAST Clinical Breakpoint Tables'", - url = "https://www.eucast.org/ast_of_bacteria/previous_versions_of_documents/")) -EUCAST_VERSION_EXPERT_RULES <- list("3.1" = list(version_txt = "v3.1", - year = 2016, - title = "'EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes'", - url = "https://www.eucast.org/expert_rules_and_expected_phenotypes/"), - "3.2" = list(version_txt = "v3.2", - year = 2020, - title = "'EUCAST Expert Rules' and 'EUCAST Intrinsic Resistance and Unusual Phenotypes'", - url = "https://www.eucast.org/expert_rules_and_expected_phenotypes/"), - "3.3" = list(version_txt = "v3.3", - year = 2021, - title = "'EUCAST Expert Rules' and 'EUCAST Intrinsic Resistance and Unusual Phenotypes'", - url = "https://www.eucast.org/expert_rules_and_expected_phenotypes/")) +# (sourcing "data-raw/_pre_commit_hook.R" will process the TSV file) +EUCAST_VERSION_BREAKPOINTS <- list( + "11.0" = list( + version_txt = "v11.0", + year = 2021, + title = "'EUCAST Clinical Breakpoint Tables'", + url = "https://www.eucast.org/clinical_breakpoints/" + ), + "10.0" = list( + version_txt = "v10.0", + year = 2020, + title = "'EUCAST Clinical Breakpoint Tables'", + url = "https://www.eucast.org/ast_of_bacteria/previous_versions_of_documents/" + ) +) +EUCAST_VERSION_EXPERT_RULES <- list( + "3.1" = list( + version_txt = "v3.1", + year = 2016, + title = "'EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes'", + url = "https://www.eucast.org/expert_rules_and_expected_phenotypes/" + ), + "3.2" = list( + version_txt = "v3.2", + year = 2020, + title = "'EUCAST Expert Rules' and 'EUCAST Intrinsic Resistance and Unusual Phenotypes'", + url = "https://www.eucast.org/expert_rules_and_expected_phenotypes/" + ), + "3.3" = list( + version_txt = "v3.3", + year = 2021, + title = "'EUCAST Expert Rules' and 'EUCAST Intrinsic Resistance and Unusual Phenotypes'", + url = "https://www.eucast.org/expert_rules_and_expected_phenotypes/" + ) +) -SNOMED_VERSION <- list(title = "Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS)", - current_source = "US Edition of SNOMED CT from 1 September 2020", - current_version = 12, - current_oid = "2.16.840.1.114222.4.11.1009", - value_set_name = "Microorganism", - url = "https://phinvads.cdc.gov/vads/ViewValueSet.action?oid=2.16.840.1.114222.4.11.1009") +SNOMED_VERSION <- list( + title = "Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS)", + current_source = "US Edition of SNOMED CT from 1 September 2020", + current_version = 12, + current_oid = "2.16.840.1.114222.4.11.1009", + value_set_name = "Microorganism", + url = "https://phinvads.cdc.gov/vads/ViewValueSet.action?oid=2.16.840.1.114222.4.11.1009" +) CATALOGUE_OF_LIFE <- list( year = 2019, @@ -61,73 +77,75 @@ CATALOGUE_OF_LIFE <- list( yearmonth_LPSN = "5 October 2021" ) -globalVariables(c(".rowid", - "ab", - "ab_txt", - "affect_ab_name", - "affect_mo_name", - "angle", - "antibiotic", - "antibiotics", - "atc_group1", - "atc_group2", - "base_ab", - "code", - "cols", - "count", - "data", - "disk", - "dosage", - "dose", - "dose_times", - "fullname", - "fullname_lower", - "g_species", - "genus", - "gr", - "group", - "guideline", - "hjust", - "input", - "intrinsic_resistant", - "isolates", - "lang", - "language", - "lookup", - "method", - "mic", - "mic ", - "microorganism", - "microorganisms", - "microorganisms.codes", - "microorganisms.old", - "mo", - "name", - "new", - "observations", - "old", - "old_name", - "pattern", - "R", - "rank_index", - "reference.rule", - "reference.rule_group", - "reference.version", - "rowid", - "rsi", - "rsi_translation", - "rule_group", - "rule_name", - "se_max", - "se_min", - "species", - "species_id", - "total", - "txt", - "type", - "value", - "varname", - "xvar", - "y", - "year", - "yvar")) +globalVariables(c( + ".rowid", + "ab", + "ab_txt", + "affect_ab_name", + "affect_mo_name", + "angle", + "antibiotic", + "antibiotics", + "atc_group1", + "atc_group2", + "base_ab", + "code", + "cols", + "count", + "data", + "disk", + "dosage", + "dose", + "dose_times", + "fullname", + "fullname_lower", + "g_species", + "genus", + "gr", + "group", + "guideline", + "hjust", + "input", + "intrinsic_resistant", + "isolates", + "lang", + "language", + "lookup", + "method", + "mic", + "mic ", + "microorganism", + "microorganisms", + "microorganisms.codes", + "microorganisms.old", + "mo", + "name", + "new", + "observations", + "old", + "old_name", + "pattern", + "R", + "rank_index", + "reference.rule", + "reference.rule_group", + "reference.version", + "rowid", + "rsi", + "rsi_translation", + "rule_group", + "rule_name", + "se_max", + "se_min", + "species", + "species_id", + "total", + "txt", + "type", + "value", + "varname", + "xvar", + "y", + "year", + "yvar" +)) diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index c1572ced3..94c1fa46d 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -43,11 +43,16 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { colnames(x)[int_x] <- paste0(colnames(x)[int_x], suffix[1L]) colnames(y)[int_y] <- paste0(colnames(y)[int_y], suffix[2L]) - merged <- cbind(x, - y[match(x[, by[1], drop = TRUE], - y[, by[2], drop = TRUE]), - colnames(y)[!colnames(y) %in% colnames(x) & !colnames(y) == by[2]], - drop = FALSE]) + merged <- cbind( + x, + y[match( + x[, by[1], drop = TRUE], + y[, by[2], drop = TRUE] + ), + colnames(y)[!colnames(y) %in% colnames(x) & !colnames(y) == by[2]], + drop = FALSE + ] + ) rownames(merged) <- NULL merged @@ -73,64 +78,74 @@ where <- function(fn) { } # copied and slightly rewritten from poorman under same license (2021-10-15) -quick_case_when <- function (...) { +quick_case_when <- function(...) { fs <- list(...) - lapply(fs, function(x) if (class(x) != "formula") - stop("`case_when()` requires formula inputs.")) + lapply(fs, function(x) { + if (class(x) != "formula") { + stop("`case_when()` requires formula inputs.") + } + }) n <- length(fs) - if (n == 0L) + if (n == 0L) { stop("No cases provided.") - - validate_case_when_length <- function (query, value, fs) { + } + + validate_case_when_length <- function(query, value, fs) { lhs_lengths <- lengths(query) rhs_lengths <- lengths(value) all_lengths <- unique(c(lhs_lengths, rhs_lengths)) - if (length(all_lengths) <= 1L) + if (length(all_lengths) <= 1L) { return(all_lengths[[1L]]) + } non_atomic_lengths <- all_lengths[all_lengths != 1L] len <- non_atomic_lengths[[1L]] - if (length(non_atomic_lengths) == 1L) + if (length(non_atomic_lengths) == 1L) { return(len) + } inconsistent_lengths <- non_atomic_lengths[-1L] lhs_problems <- lhs_lengths %in% inconsistent_lengths rhs_problems <- rhs_lengths %in% inconsistent_lengths problems <- lhs_problems | rhs_problems if (any(problems)) { - stop("The following formulas must be length ", len, " or 1, not ", - paste(inconsistent_lengths, collapse = ", "), ".\n ", - paste(fs[problems], collapse = "\n "), - call. = FALSE) + stop("The following formulas must be length ", len, " or 1, not ", + paste(inconsistent_lengths, collapse = ", "), ".\n ", + paste(fs[problems], collapse = "\n "), + call. = FALSE + ) } } - - replace_with <- function (x, i, val, arg_name) { - if (is.null(val)) + + replace_with <- function(x, i, val, arg_name) { + if (is.null(val)) { return(x) + } i[is.na(i)] <- FALSE if (length(val) == 1L) { x[i] <- val - } - else { + } else { x[i] <- val[i] } x } - + query <- vector("list", n) value <- vector("list", n) default_env <- parent.frame() for (i in seq_len(n)) { query[[i]] <- eval(fs[[i]][[2]], envir = default_env) value[[i]] <- eval(fs[[i]][[3]], envir = default_env) - if (!is.logical(query[[i]])) + if (!is.logical(query[[i]])) { stop(fs[[i]][[2]], " does not return a `logical` vector.") + } } m <- validate_case_when_length(query, value, fs) out <- value[[1]][rep(NA_integer_, m)] replaced <- rep(FALSE, m) for (i in seq_len(n)) { - out <- replace_with(out, query[[i]] & !replaced, value[[i]], - NULL) + out <- replace_with( + out, query[[i]] & !replaced, value[[i]], + NULL + ) replaced <- replaced | (query[[i]] & !is.na(query[[i]])) } out @@ -144,13 +159,13 @@ addin_insert_in <- function() { # No export, no Rd addin_insert_like <- function() { # we want Shift + Ctrl/Cmd + L to iterate over %like%, %unlike%, %like_case%, and %unlike_case% - + getActiveDocumentContext <- import_fn("getActiveDocumentContext", "rstudioapi") insertText <- import_fn("insertText", "rstudioapi") modifyRange <- import_fn("modifyRange", "rstudioapi") document_range <- import_fn("document_range", "rstudioapi") document_position <- import_fn("document_position", "rstudioapi") - + context <- getActiveDocumentContext() current_row <- context$selection[[1]]$range$end[1] current_col <- context$selection[[1]]$range$end[2] @@ -159,22 +174,27 @@ addin_insert_like <- function() { insertText(" %like% ") return(invisible()) } - + pos_preceded_by <- function(txt) { if (tryCatch(substr(current_row_txt, current_col - nchar(trimws(txt, which = "right")), current_col) == trimws(txt, which = "right"), - error = function(e) FALSE)) { + error = function(e) FALSE + )) { return(TRUE) } tryCatch(substr(current_row_txt, current_col - nchar(txt), current_col) %like% paste0("^", txt), - error = function(e) FALSE) + error = function(e) FALSE + ) } replace_pos <- function(old, with) { - modifyRange(document_range(document_position(current_row, current_col - nchar(old)), - document_position(current_row, current_col)), - text = with, - id = context$id) + modifyRange(document_range( + document_position(current_row, current_col - nchar(old)), + document_position(current_row, current_col) + ), + text = with, + id = context$id + ) } - + if (pos_preceded_by(" %like% ")) { replace_pos(" %like% ", with = " %unlike% ") } else if (pos_preceded_by(" %unlike% ")) { @@ -202,40 +222,53 @@ check_dataset_integrity <- function() { plural <- c(" is", "s", "") } if (message_not_thrown_before("check_dataset_integrity", overwritten)) { - warning_("The following data set", plural[1], - " overwritten by your global environment and prevent", plural[2], - " the AMR package from working correctly: ", - vector_and(overwritten, quotes = "'"), - ".\nPlease rename your object", plural[3], ".") + warning_( + "The following data set", plural[1], + " overwritten by your global environment and prevent", plural[2], + " the AMR package from working correctly: ", + vector_and(overwritten, quotes = "'"), + ".\nPlease rename your object", plural[3], "." + ) } } # check if other packages did not overwrite our data sets valid_microorganisms <- TRUE valid_antibiotics <- TRUE - tryCatch({ - valid_microorganisms <- all(c("mo", "fullname", "kingdom", "phylum", - "class", "order", "family", "genus", - "species", "subspecies", "rank", - "species_id", "source", "ref", "prevalence") %in% colnames(microorganisms), - na.rm = TRUE) - valid_antibiotics <- all(c("ab", "atc", "cid", "name", "group", - "atc_group1", "atc_group2", "abbreviations", - "synonyms", "oral_ddd", "oral_units", - "iv_ddd", "iv_units", "loinc") %in% colnames(antibiotics), - na.rm = TRUE) - }, error = function(e) { - # package not yet loaded - require("AMR") - }) - stop_if(!valid_microorganisms | !valid_antibiotics, - "the data set `microorganisms` or `antibiotics` was overwritten in your environment because another package with the same object name(s) was loaded _after_ the AMR package, preventing the AMR package from working correctly. Please load the AMR package last.") + tryCatch( + { + valid_microorganisms <- all(c( + "mo", "fullname", "kingdom", "phylum", + "class", "order", "family", "genus", + "species", "subspecies", "rank", + "species_id", "source", "ref", "prevalence" + ) %in% colnames(microorganisms), + na.rm = TRUE + ) + valid_antibiotics <- all(c( + "ab", "atc", "cid", "name", "group", + "atc_group1", "atc_group2", "abbreviations", + "synonyms", "oral_ddd", "oral_units", + "iv_ddd", "iv_units", "loinc" + ) %in% colnames(antibiotics), + na.rm = TRUE + ) + }, + error = function(e) { + # package not yet loaded + require("AMR") + } + ) + stop_if( + !valid_microorganisms | !valid_antibiotics, + "the data set `microorganisms` or `antibiotics` was overwritten in your environment because another package with the same object name(s) was loaded _after_ the AMR package, preventing the AMR package from working correctly. Please load the AMR package last." + ) invisible(TRUE) } search_type_in_df <- function(x, type, info = TRUE) { meet_criteria(x, allow_class = "data.frame") meet_criteria(type, allow_class = "character", has_length = 1) - + # try to find columns based on type found <- NULL @@ -249,7 +282,7 @@ search_type_in_df <- function(x, type, info = TRUE) { # take first column found <- colnames(x)[vapply(FUN.VALUE = logical(1), x, is.mo)] } else if ("mo" %in% colnames_formatted & - suppressWarnings(all(x$mo %in% c(NA, microorganisms$mo)))) { + suppressWarnings(all(x$mo %in% c(NA, microorganisms$mo)))) { found <- "mo" } else if (any(colnames_formatted %like_case% "^(mo|microorganism|organism|bacteria|ba[ck]terie)s?$")) { found <- sort(colnames(x)[colnames_formatted %like_case% "^(mo|microorganism|organism|bacteria|ba[ck]terie)s?$"]) @@ -258,7 +291,6 @@ search_type_in_df <- function(x, type, info = TRUE) { } else if (any(colnames_formatted %like_case% "species")) { found <- sort(colnames(x)[colnames_formatted %like_case% "species"]) } - } # -- key antibiotics if (type %in% c("keyantibiotics", "keyantimicrobials")) { @@ -272,11 +304,13 @@ search_type_in_df <- function(x, type, info = TRUE) { # WHONET support found <- sort(colnames(x)[colnames_formatted %like_case% "^(specimen date|specimen_date|spec_date)"]) if (!any(class(pm_pull(x, found)) %in% c("Date", "POSIXct"))) { - stop(font_red(paste0("Found column '", font_bold(found), "' to be used as input for `col_", type, - "`, but this column contains no valid dates. Transform its values to valid dates first.")), - call. = FALSE) + stop(font_red(paste0( + "Found column '", font_bold(found), "' to be used as input for `col_", type, + "`, but this column contains no valid dates. Transform its values to valid dates first." + )), + call. = FALSE + ) } - } else if (any(vapply(FUN.VALUE = logical(1), x, function(x) inherits(x, c("Date", "POSIXct"))))) { # take first column found <- colnames(x)[vapply(FUN.VALUE = logical(1), x, function(x) inherits(x, c("Date", "POSIXct")))] @@ -313,15 +347,16 @@ search_type_in_df <- function(x, type, info = TRUE) { # this column should contain logicals if (!is.logical(x[, found, drop = TRUE])) { message_("Column '", font_bold(found), "' found as input for `col_", type, - "`, but this column does not contain 'logical' values (TRUE/FALSE) and was ignored.", - add_fn = font_red) + "`, but this column does not contain 'logical' values (TRUE/FALSE) and was ignored.", + add_fn = font_red + ) found <- NULL } } } - + found <- found[1] - + if (!is.null(found) & info == TRUE) { if (message_not_thrown_before("search_in_type", type)) { msg <- paste0("Using column '", font_bold(found), "' as input for `col_", type, "`.") @@ -335,36 +370,52 @@ search_type_in_df <- function(x, type, info = TRUE) { } is_valid_regex <- function(x) { - regex_at_all <- tryCatch(vapply(FUN.VALUE = logical(1), - X = strsplit(x, ""), - FUN = function(y) any(y %in% c("$", "(", ")", "*", "+", "-", - ".", "?", "[", "]", "^", "{", - "|", "}", "\\"), - na.rm = TRUE), - USE.NAMES = FALSE), - error = function(e) rep(TRUE, length(x))) - regex_valid <- vapply(FUN.VALUE = logical(1), - X = x, - FUN = function(y) !"try-error" %in% class(try(grepl(y, "", perl = TRUE), - silent = TRUE)), - USE.NAMES = FALSE) + regex_at_all <- tryCatch(vapply( + FUN.VALUE = logical(1), + X = strsplit(x, ""), + FUN = function(y) { + any(y %in% c( + "$", "(", ")", "*", "+", "-", + ".", "?", "[", "]", "^", "{", + "|", "}", "\\" + ), + na.rm = TRUE + ) + }, + USE.NAMES = FALSE + ), + error = function(e) rep(TRUE, length(x)) + ) + regex_valid <- vapply( + FUN.VALUE = logical(1), + X = x, + FUN = function(y) { + !"try-error" %in% class(try(grepl(y, "", perl = TRUE), + silent = TRUE + )) + }, + USE.NAMES = FALSE + ) regex_at_all & regex_valid } stop_ifnot_installed <- function(package) { # no "utils::installed.packages()" since it requires non-staged install since R 3.6.0 # https://developer.r-project.org/Blog/public/2019/02/14/staged-install/index.html - vapply(FUN.VALUE = character(1), package, function(pkg) + vapply(FUN.VALUE = character(1), package, function(pkg) { tryCatch(get(".packageName", envir = asNamespace(pkg)), - error = function(e) { - if (pkg == "rstudioapi") { - stop("This function only works in RStudio when using R >= 3.2.", call. = FALSE) - } else if (pkg != "base") { - stop("This requires the '", pkg, "' package.", - "\nTry to install it with: install.packages(\"", pkg, "\")", - call. = FALSE) - } - })) + error = function(e) { + if (pkg == "rstudioapi") { + stop("This function only works in RStudio when using R >= 3.2.", call. = FALSE) + } else if (pkg != "base") { + stop("This requires the '", pkg, "' package.", + "\nTry to install it with: install.packages(\"", pkg, "\")", + call. = FALSE + ) + } + } + ) + }) return(invisible()) } @@ -385,17 +436,19 @@ import_fn <- function(name, pkg, error_on_fail = TRUE) { stop_ifnot_installed(pkg) } tryCatch( - # don't use get() to avoid fetching non-API functions + # don't use get() to avoid fetching non-API functions getExportedValue(name = name, ns = asNamespace(pkg)), error = function(e) { if (isTRUE(error_on_fail)) { stop_("function ", name, "() is not an exported object from package '", pkg, - "'. Please create an issue at https://github.com/msberends/AMR/issues. Many thanks!", - call = FALSE) + "'. Please create an issue at https://github.com/msberends/AMR/issues. Many thanks!", + call = FALSE + ) } else { return(NULL) } - }) + } + ) } # this alternative wrapper to the message(), warning() and stop() functions: @@ -404,28 +457,31 @@ import_fn <- function(name, pkg, error_on_fail = TRUE) { # - adds indentation dependent on the type of message (such as NOTE) # - can add additional formatting functions like blue or bold text word_wrap <- function(..., - add_fn = list(), + add_fn = list(), as_note = FALSE, width = 0.95 * getOption("width"), extra_indent = 0) { msg <- paste0(c(...), collapse = "") - + if (isTRUE(as_note)) { msg <- paste0(pkg_env$info_icon, " ", gsub("^note:? ?", "", msg, ignore.case = TRUE)) } - + if (msg %like% "\n") { # run word_wraps() over every line here, bind them and return again - return(paste0(vapply(FUN.VALUE = character(1), - trimws(unlist(strsplit(msg, "\n")), which = "right"), - word_wrap, - add_fn = add_fn, - as_note = FALSE, - width = width, - extra_indent = extra_indent), - collapse = "\n")) + return(paste0(vapply( + FUN.VALUE = character(1), + trimws(unlist(strsplit(msg, "\n")), which = "right"), + word_wrap, + add_fn = add_fn, + as_note = FALSE, + width = width, + extra_indent = extra_indent + ), + collapse = "\n" + )) } - + # correct for operators (will add the space later on) ops <- "([,./><\\]\\[])" msg <- gsub(paste0(ops, " ", ops), "\\1\\2", msg, perl = TRUE) @@ -433,11 +489,14 @@ word_wrap <- function(..., msg_stripped <- font_stripstyle(msg) # where are the spaces now? msg_stripped_wrapped <- paste0(strwrap(msg_stripped, - simplify = TRUE, - width = width), - collapse = "\n") + simplify = TRUE, + width = width + ), + collapse = "\n" + ) msg_stripped_wrapped <- paste0(unlist(strsplit(msg_stripped_wrapped, "(\n|\\*\\|\\*)")), - collapse = "\n") + collapse = "\n" + ) msg_stripped_spaces <- which(unlist(strsplit(msg_stripped, "")) == " ") msg_stripped_wrapped_spaces <- which(unlist(strsplit(msg_stripped_wrapped, "")) != "\n") # so these are the indices of spaces that need to be replaced @@ -449,7 +508,7 @@ word_wrap <- function(..., msg <- gsub(paste0(ops, ops), "\\1 \\2", msg, perl = TRUE) msg <- paste0(msg, collapse = " ") msg <- gsub("\n ", "\n", msg, fixed = TRUE) - + if (msg_stripped %like% "\u2139 ") { indentation <- 2 + extra_indent } else if (msg_stripped %like% "^=> ") { @@ -469,13 +528,13 @@ word_wrap <- function(..., msg <- add_fn[[i]](msg) } } - + # format backticks msg <- gsub("(`.+?`)", font_grey_bg("\\1"), msg) - + # clean introduced whitespace between fullstops msg <- gsub("[.] +[.]", "..", msg) - + msg } @@ -483,21 +542,25 @@ message_ <- function(..., appendLF = TRUE, add_fn = list(font_blue), as_note = TRUE) { - message(word_wrap(..., - add_fn = add_fn, - as_note = as_note), - appendLF = appendLF) + message(word_wrap(..., + add_fn = add_fn, + as_note = as_note + ), + appendLF = appendLF + ) } warning_ <- function(..., add_fn = list(), immediate = FALSE, call = FALSE) { - warning(word_wrap(..., - add_fn = add_fn, - as_note = FALSE), - immediate. = immediate, - call. = call) + warning(word_wrap(..., + add_fn = add_fn, + as_note = FALSE + ), + immediate. = immediate, + call. = call + ) } # this alternative to the stop() function: @@ -553,8 +616,9 @@ stop_ifnot <- function(expr, ..., call = TRUE) { } } ifelse(!is.na(x), - x, - ifelse(!is.na(y), y, NA)) + x, + ifelse(!is.na(y), y, NA) + ) } return_after_integrity_check <- function(value, type, check_vector) { @@ -596,7 +660,7 @@ create_eucast_ab_documentation <- function() { ab <- character() for (val in x) { if (paste0("AB_", val) %in% ls(envir = asNamespace("AMR"))) { - # antibiotic group names, as defined in data-raw/pre-commit-hook.R, such as `CARBAPENEMS` + # antibiotic group names, as defined in data-raw/_pre_commit_hook.R, such as `CARBAPENEMS` val <- eval(parse(text = paste0("AB_", val)), envir = asNamespace("AMR")) } else if (val %in% AB_lookup$ab) { # separate drugs, such as `AMX` @@ -646,13 +710,17 @@ vector_or <- function(v, quotes = TRUE, reverse = FALSE, sort = TRUE, initial_ca v <- c("R", "S", "I") } # all commas except for last item, so will become '"val1", "val2", "val3" or "val4"' - paste0(paste0(quotes, v[seq_len(length(v) - 1)], quotes, collapse = ", "), - last_sep, paste0(quotes, v[length(v)], quotes)) + paste0( + paste0(quotes, v[seq_len(length(v) - 1)], quotes, collapse = ", "), + last_sep, paste0(quotes, v[length(v)], quotes) + ) } vector_and <- function(v, quotes = TRUE, reverse = FALSE, sort = TRUE, initial_captital = FALSE) { - vector_or(v = v, quotes = quotes, reverse = reverse, sort = sort, - initial_captital = initial_captital, last_sep = " and ") + vector_or( + v = v, quotes = quotes, reverse = reverse, sort = sort, + initial_captital = initial_captital, last_sep = " and " + ) } format_class <- function(class, plural = FALSE) { @@ -664,9 +732,11 @@ format_class <- function(class, plural = FALSE) { } class[class == "character"] <- "text string" class[class %in% c("Date", "POSIXt")] <- "date" - class[class != class.bak] <- paste0(ifelse(plural, "", "a "), - class[class != class.bak], - ifelse(plural, "s", "")) + class[class != class.bak] <- paste0( + ifelse(plural, "", "a "), + class[class != class.bak], + ifelse(plural, "s", "") + ) # exceptions class[class == "logical"] <- ifelse(plural, "a vector of `TRUE`/`FALSE`", "`TRUE` or `FALSE`") class[class == "data.frame"] <- "a data set" @@ -704,10 +774,11 @@ meet_criteria <- function(object, obj_name <- deparse(substitute(object)) call_depth <- -2 - abs(.call_depth) - + # if object is missing, or another error: tryCatch(invisible(object), - error = function(e) pkg_env$meet_criteria_error_txt <- e$message) + error = function(e) pkg_env$meet_criteria_error_txt <- e$message + ) if (!is.null(pkg_env$meet_criteria_error_txt)) { error_txt <- pkg_env$meet_criteria_error_txt pkg_env$meet_criteria_error_txt <- NULL @@ -726,30 +797,34 @@ meet_criteria <- function(object, if (!is.null(allow_class)) { stop_ifnot(inherits(object, allow_class), "argument `", obj_name, - "` must be ", format_class(allow_class, plural = isTRUE(has_length > 1)), - ", i.e. not be ", format_class(class(object), plural = isTRUE(has_length > 1)), - call = call_depth) + "` must be ", format_class(allow_class, plural = isTRUE(has_length > 1)), + ", i.e. not be ", format_class(class(object), plural = isTRUE(has_length > 1)), + call = call_depth + ) # check data.frames for data if (inherits(object, "data.frame")) { stop_if(any(dim(object) == 0), - "the data provided in argument `", obj_name, - "` must contain rows and columns (current dimensions: ", - paste(dim(object), collapse = "x"), ")", - call = call_depth) + "the data provided in argument `", obj_name, + "` must contain rows and columns (current dimensions: ", + paste(dim(object), collapse = "x"), ")", + call = call_depth + ) } } if (!is.null(has_length)) { stop_ifnot(length(object) %in% has_length, "argument `", obj_name, - "` must ", # ifelse(allow_NULL, "be NULL or must ", ""), - "be of length ", vector_or(has_length, quotes = FALSE), - ", not ", length(object), - call = call_depth) + "` must ", # ifelse(allow_NULL, "be NULL or must ", ""), + "be of length ", vector_or(has_length, quotes = FALSE), + ", not ", length(object), + call = call_depth + ) } if (!is.null(looks_like)) { stop_ifnot(object %like% looks_like, "argument `", obj_name, - "` must ", # ifelse(allow_NULL, "be NULL or must ", ""), - "resemble the regular expression \"", looks_like, "\"", - call = call_depth) + "` must ", # ifelse(allow_NULL, "be NULL or must ", ""), + "resemble the regular expression \"", looks_like, "\"", + call = call_depth + ) } if (!is.null(is_in)) { if (ignore.case == TRUE) { @@ -757,48 +832,59 @@ meet_criteria <- function(object, is_in <- tolower(is_in) } stop_ifnot(all(object %in% is_in, na.rm = TRUE), "argument `", obj_name, "` ", - ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, - "must be either ", - "must only contain values "), - vector_or(is_in, quotes = !isTRUE(any(c("double", "numeric", "integer") %in% allow_class))), - ifelse(allow_NA == TRUE, ", or NA", ""), - call = call_depth) + ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, + "must be either ", + "must only contain values " + ), + vector_or(is_in, quotes = !isTRUE(any(c("double", "numeric", "integer") %in% allow_class))), + ifelse(allow_NA == TRUE, ", or NA", ""), + call = call_depth + ) } if (isTRUE(is_positive)) { stop_if(is.numeric(object) && !all(object > 0, na.rm = TRUE), "argument `", obj_name, - "` must ", - ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, - "be a number higher than zero", - "all be numbers higher than zero"), - call = call_depth) + "` must ", + ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, + "be a number higher than zero", + "all be numbers higher than zero" + ), + call = call_depth + ) } if (isTRUE(is_positive_or_zero)) { stop_if(is.numeric(object) && !all(object >= 0, na.rm = TRUE), "argument `", obj_name, - "` must ", - ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, - "be zero or a positive number", - "all be zero or numbers higher than zero"), - call = call_depth) + "` must ", + ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, + "be zero or a positive number", + "all be zero or numbers higher than zero" + ), + call = call_depth + ) } if (isTRUE(is_finite)) { stop_if(is.numeric(object) && !all(is.finite(object[!is.na(object)]), na.rm = TRUE), "argument `", obj_name, - "` must ", - ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, - "be a finite number", - "all be finite numbers"), - " (i.e. not be infinite)", - call = call_depth) + "` must ", + ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, + "be a finite number", + "all be finite numbers" + ), + " (i.e. not be infinite)", + call = call_depth + ) } if (!is.null(contains_column_class)) { - stop_ifnot(any(vapply(FUN.VALUE = logical(1), - object, - function(col, columns_class = contains_column_class) { - inherits(col, columns_class) - }), na.rm = TRUE), - "the data provided in argument `", obj_name, - "` must contain at least one column of class <", contains_column_class, ">. ", - "See ?as.", contains_column_class, ".", - call = call_depth) + stop_ifnot(any(vapply( + FUN.VALUE = logical(1), + object, + function(col, columns_class = contains_column_class) { + inherits(col, columns_class) + } + ), na.rm = TRUE), + "the data provided in argument `", obj_name, + "` must contain at least one column of class <", contains_column_class, ">. ", + "See ?as.", contains_column_class, ".", + call = call_depth + ) } return(invisible()) } @@ -817,43 +903,44 @@ get_current_data <- function(arg_name, call) { return(out) } } - + # try a manual (base R) method, by going over all underlying environments with sys.frames() for (env in sys.frames()) { if (!is.null(env$`.Generic`)) { # don't check `".Generic" %in% names(env)`, because in R < 3.2, `names(env)` is always NULL - + if (valid_df(env$`.data`)) { # an element `.data` will be in the environment when using `dplyr::select()` # (but not when using `dplyr::filter()`, `dplyr::mutate()` or `dplyr::summarise()`) return(env$`.data`) - } else if (valid_df(env$xx)) { # an element `xx` will be in the environment for rows + cols, e.g. `example_isolates[c(1:3), carbapenems()]` return(env$xx) - } else if (valid_df(env$x)) { # an element `x` will be in the environment for only cols, e.g. `example_isolates[, carbapenems()]` return(env$x) } } } - + # no data.frame found, so an error must be returned: if (is.na(arg_name)) { if (isTRUE(is.numeric(call))) { fn <- as.character(sys.call(call + 1)[1]) - examples <- paste0(", e.g.:\n", - " your_data %>% select(", fn, "())\n", - " your_data %>% select(column_a, column_b, ", fn, "())\n", - " your_data[, ", fn, "()]\n", - ' your_data[, c("column_a", "column_b", ', fn, "())]") + examples <- paste0( + ", e.g.:\n", + " your_data %>% select(", fn, "())\n", + " your_data %>% select(column_a, column_b, ", fn, "())\n", + " your_data[, ", fn, "()]\n", + ' your_data[, c("column_a", "column_b", ', fn, "())]" + ) } else { examples <- "" } stop_("this function must be used inside a `dplyr` verb or `data.frame` call", - examples, - call = call) + examples, + call = call + ) } else { # mimic a base R error that the argument is missing stop_("argument `", arg_name, "` is missing with no default", call = call) @@ -869,7 +956,7 @@ get_current_column <- function() { return(out) } } - + # cur_column() doesn't always work (only allowed for certain conditions set by dplyr), but it's probably still possible: frms <- lapply(sys.frames(), function(env) { if (!is.null(env$i)) { @@ -889,7 +976,7 @@ get_current_column <- function() { NULL } }) - + vars <- unlist(frms) if (length(vars) > 0) { vars[length(vars)] @@ -908,22 +995,26 @@ unique_call_id <- function(entire_session = FALSE, match_fn = NULL) { if (entire_session == TRUE) { return(c(envir = "session", call = "session")) } - + # combination of environment ID (such as "0x7fed4ee8c848") # and relevant system call (where 'match_fn' is being called in) calls <- sys.calls() if (!identical(Sys.getenv("R_RUN_TINYTEST"), "true") && - !any(as.character(calls[[1]]) %like_case% "run_test_dir|run_test_file|test_all|tinytest|test_package|testthat")) { + !any(as.character(calls[[1]]) %like_case% "run_test_dir|run_test_file|test_all|tinytest|test_package|testthat")) { for (i in seq_len(length(calls))) { call_clean <- gsub("[^a-zA-Z0-9_().-]", "", as.character(calls[[i]]), perl = TRUE) if (any(call_clean %like% paste0(match_fn, "\\("), na.rm = TRUE)) { - return(c(envir = gsub("", "\\1", utils::capture.output(sys.frames()[[1]]), perl = TRUE), - call = paste0(deparse(calls[[i]]), collapse = ""))) + return(c( + envir = gsub("", "\\1", utils::capture.output(sys.frames()[[1]]), perl = TRUE), + call = paste0(deparse(calls[[i]]), collapse = "") + )) } } } - c(envir = paste0(sample(c(c(0:9), letters[1:6]), size = 32, replace = TRUE), collapse = ""), - call = paste0(sample(c(c(0:9), letters[1:6]), size = 32, replace = TRUE), collapse = "")) + c( + envir = paste0(sample(c(c(0:9), letters[1:6]), size = 32, replace = TRUE), collapse = ""), + call = paste0(sample(c(c(0:9), letters[1:6]), size = 32, replace = TRUE), collapse = "") + ) } #' @noRd @@ -935,21 +1026,27 @@ message_not_thrown_before <- function(fn, ..., entire_session = FALSE) { # e.g. this would show a msg 4 times: example_isolates %>% group_by(ward) %>% filter(mo_is_gram_negative()) salt <- gsub("[^a-zA-Z0-9|_-]", "?", paste(c(...), sep = "|", collapse = "|"), perl = TRUE) not_thrown_before <- is.null(pkg_env[[paste0("thrown_msg.", fn, ".", salt)]]) || - !identical(pkg_env[[paste0("thrown_msg.", fn, ".", salt)]], - unique_call_id(entire_session = entire_session, - match_fn = fn)) + !identical( + pkg_env[[paste0("thrown_msg.", fn, ".", salt)]], + unique_call_id( + entire_session = entire_session, + match_fn = fn + ) + ) if (isTRUE(not_thrown_before)) { # message was not thrown before - remember this so on the next run it will return FALSE: - assign(x = paste0("thrown_msg.", fn, ".", salt), - value = unique_call_id(entire_session = entire_session, match_fn = fn), - envir = pkg_env) + assign( + x = paste0("thrown_msg.", fn, ".", salt), + value = unique_call_id(entire_session = entire_session, match_fn = fn), + envir = pkg_env + ) } not_thrown_before } has_colour <- function() { # this is a base R version of crayon::has_color, but disables colours on emacs - + if (Sys.getenv("EMACS") != "" || Sys.getenv("INSIDE_EMACS") != "") { # disable on emacs, which only supports 8 colours return(FALSE) @@ -965,8 +1062,12 @@ has_colour <- function() { if ((cols <- Sys.getenv("RSTUDIO_CONSOLE_COLOR", "")) != "" && !is.na(as.double(cols))) { return(TRUE) } - tryCatch(get("isAvailable", envir = asNamespace("rstudioapi"))(), error = function(e) return(FALSE)) && - tryCatch(get("hasFun", envir = asNamespace("rstudioapi"))("getConsoleHasColor"), error = function(e) return(FALSE)) + tryCatch(get("isAvailable", envir = asNamespace("rstudioapi"))(), error = function(e) { + return(FALSE) + }) && + tryCatch(get("hasFun", envir = asNamespace("rstudioapi"))("getConsoleHasColor"), error = function(e) { + return(FALSE) + }) } if (rstudio_with_ansi_support() && sink.number() == 0) { return(TRUE) @@ -989,10 +1090,12 @@ has_colour <- function() { if (Sys.getenv("TERM") == "dumb") { return(FALSE) } - grepl(pattern = "^screen|^xterm|^vt100|color|ansi|cygwin|linux", - x = Sys.getenv("TERM"), - ignore.case = TRUE, - perl = TRUE) + grepl( + pattern = "^screen|^xterm|^vt100|color|ansi|cygwin|linux", + x = Sys.getenv("TERM"), + ignore.case = TRUE, + perl = TRUE + ) } # set colours if console has_colour() @@ -1041,7 +1144,7 @@ font_grey <- function(..., collapse = " ") { font_grey_bg <- function(..., collapse = " ") { if (tryCatch(import_fn("getThemeInfo", "rstudioapi", error_on_fail = FALSE)()$dark, error = function(e) FALSE)) { # similar to HTML #444444 - try_colour(..., before = "\033[48;5;238m", after = "\033[49m", collapse = collapse) + try_colour(..., before = "\033[48;5;238m", after = "\033[49m", collapse = collapse) } else { # similar to HTML #f0f0f0 try_colour(..., before = "\033[48;5;255m", after = "\033[49m", collapse = collapse) @@ -1051,15 +1154,15 @@ font_green_bg <- function(..., collapse = " ") { try_colour(..., before = "\033[42m", after = "\033[49m", collapse = collapse) } font_rsi_R_bg <- function(..., collapse = " ") { - #ED553B + # ED553B try_colour(..., before = "\033[48;5;203m", after = "\033[49m", collapse = collapse) } font_rsi_S_bg <- function(..., collapse = " ") { - #3CAEA3 + # 3CAEA3 try_colour(..., before = "\033[48;5;79m", after = "\033[49m", collapse = collapse) } font_rsi_I_bg <- function(..., collapse = " ") { - #F6D55C + # F6D55C try_colour(..., before = "\033[48;5;222m", after = "\033[49m", collapse = collapse) } font_red_bg <- function(..., collapse = " ") { @@ -1101,8 +1204,10 @@ progress_ticker <- function(n = 1, n_min = 0, print = TRUE, ...) { if (!is.null(progress_bar)) { # so we use progress::progress_bar # a close() method was also added, see below this function - pb <- progress_bar$new(format = "[:bar] :percent (:current/:total)", - total = n) + pb <- progress_bar$new( + format = "[:bar] :percent (:current/:total)", + total = n + ) } else { pb <- utils::txtProgressBar(max = n, style = 3) pb$tick <- function() { @@ -1180,16 +1285,14 @@ s3_register <- function(generic, class, method = NULL) { top <- topenv(caller) if (isNamespace(top)) { asNamespace(environmentName(top)) - } - else { + } else { caller } } get_method <- function(method, env) { if (is.null(method)) { get(paste0(generic, ".", class), envir = get_method_env()) - } - else { + } else { method } } @@ -1215,19 +1318,30 @@ s3_register <- function(generic, class, method = NULL) { round2 <- function(x, digits = 1, force_zero = TRUE) { x <- as.double(x) # https://stackoverflow.com/a/12688836/4575331 - val <- (trunc((abs(x) * 10 ^ digits) + 0.5) / 10 ^ digits) * sign(x) + val <- (trunc((abs(x) * 10^digits) + 0.5) / 10^digits) * sign(x) if (digits > 0 & force_zero == TRUE) { values_trans <- val[val != as.integer(val) & !is.na(val)] - val[val != as.integer(val) & !is.na(val)] <- paste0(values_trans, - strrep("0", - max(0, - digits - nchar( - format( - as.double( - gsub(".*[.](.*)$", - "\\1", - values_trans)), - scientific = FALSE))))) + val[val != as.integer(val) & !is.na(val)] <- paste0( + values_trans, + strrep( + "0", + max( + 0, + digits - nchar( + format( + as.double( + gsub( + ".*[.](.*)$", + "\\1", + values_trans + ) + ), + scientific = FALSE + ) + ) + ) + ) + ) } as.double(val) } @@ -1244,12 +1358,20 @@ percentage <- function(x, digits = NULL, ...) { if (minimum > maximum) { minimum <- maximum } - max_places <- max(unlist(lapply(strsplit(sub("0+$", "", - as.character(x * 100)), ".", fixed = TRUE), - function(y) ifelse(length(y) == 2, nchar(y[2]), 0))), na.rm = TRUE) + max_places <- max(unlist(lapply( + strsplit(sub( + "0+$", "", + as.character(x * 100) + ), ".", fixed = TRUE), + function(y) ifelse(length(y) == 2, nchar(y[2]), 0) + )), na.rm = TRUE) max(min(max_places, - maximum, na.rm = TRUE), - minimum, na.rm = TRUE) + maximum, + na.rm = TRUE + ), + minimum, + na.rm = TRUE + ) } # format_percentage() function @@ -1263,10 +1385,11 @@ percentage <- function(x, digits = NULL, ...) { # round right: percentage(0.4455) and format(as.percentage(0.4455), 1) should return "44.6%", not "44.5%" x_formatted <- format(round2(as.double(x), digits = digits + 2) * 100, - scientific = FALSE, - digits = max(1, digits), - nsmall = digits, - ...) + scientific = FALSE, + digits = max(1, digits), + nsmall = digits, + ... + ) x_formatted <- paste0(x_formatted, "%") x_formatted[!grepl(pattern = "^[0-9.,e-]+$", x = x)] <- NA_character_ x_formatted @@ -1278,9 +1401,12 @@ percentage <- function(x, digits = NULL, ...) { # max one digit if undefined digits <- getdecimalplaces(x, minimum = 0, maximum = 1) } - format_percentage(structure(.Data = as.double(x), - class = c("percentage", "numeric")), - digits = digits, ...) + format_percentage(structure( + .Data = as.double(x), + class = c("percentage", "numeric") + ), + digits = digits, ... + ) } time_start_tracking <- function() { @@ -1296,13 +1422,16 @@ time_track <- function(name = NULL) { # see here for the full list: https://github.com/r-lib/backports strrep <- function(x, times) { x <- as.character(x) - if (length(x) == 0L) + if (length(x) == 0L) { return(x) + } unlist(.mapply(function(x, times) { - if (is.na(x) || is.na(times)) + if (is.na(x) || is.na(times)) { return(NA_character_) - if (times <= 0L) + } + if (times <= 0L) { return("") + } paste0(replicate(times, x), collapse = "") }, list(x = x, times = times), MoreArgs = list()), use.names = FALSE) } @@ -1310,9 +1439,10 @@ trimws <- function(x, which = c("both", "left", "right"), whitespace = "[ \t\r\n which <- match.arg(which) mysub <- function(re, x) sub(re, "", x, perl = TRUE) switch(which, - left = mysub(paste0("^", whitespace, "+"), x), - right = mysub(paste0(whitespace, "+$"), x), - both = mysub(paste0(whitespace, "+$"), mysub(paste0("^", whitespace, "+"), x))) + left = mysub(paste0("^", whitespace, "+"), x), + right = mysub(paste0(whitespace, "+$"), x), + both = mysub(paste0(whitespace, "+$"), mysub(paste0("^", whitespace, "+"), x)) + ) } isFALSE <- function(x) { is.logical(x) && length(x) == 1L && !is.na(x) && !x @@ -1346,7 +1476,7 @@ if (getRversion() < "3.1") { sinpi <- function(...) 1 tanpi <- function(...) 1 } -dir.exists <- function (paths) { - x = base::file.info(paths)$isdir +dir.exists <- function(paths) { + x <- base::file.info(paths)$isdir !is.na(x) & x } diff --git a/R/aa_helper_pm_functions.R b/R/aa_helper_pm_functions.R index 5a11d120f..53e033de0 100644 --- a/R/aa_helper_pm_functions.R +++ b/R/aa_helper_pm_functions.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -34,10 +34,10 @@ # # All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes. # -# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a +# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a # copy of the software and associated documentation files (the "Software"), to deal in the Software # without restriction, including without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software # is furnished to do so', given that a copyright notice is given in the software. # # Copyright notice on 19 September 2020, the day this code was downloaded, as found on @@ -206,7 +206,9 @@ pm_distinct <- function(.data, ...) { } pm_distinct.default <- function(.data, ..., .keep_all = FALSE) { - if (ncol(.data) == 0L) return(.data[1, ]) + if (ncol(.data) == 0L) { + return(.data[1, ]) + } cols <- pm_deparse_dots(...) col_names <- names(cols) col_len <- length(cols) @@ -336,7 +338,9 @@ pm_print.grouped_data <- function(x, ..., digits = NULL, quote = FALSE, right = } pm_group_data <- function(.data) { - if (!pm_has_groups(.data)) return(data.frame(.rows = I(list(seq_len(nrow(.data)))))) + if (!pm_has_groups(.data)) { + return(data.frame(.rows = I(list(seq_len(nrow(.data)))))) + } pm_groups <- pm_get_groups(.data) pm_group_data_worker(.data, pm_groups) } @@ -360,7 +364,9 @@ pm_group_rows <- function(.data) { } pm_group_indices <- function(.data) { - if (!pm_has_groups(.data)) return(rep(1L, nrow(.data))) + if (!pm_has_groups(.data)) { + return(rep(1L, nrow(.data))) + } pm_groups <- pm_get_groups(.data) res <- unique(.data[, pm_groups, drop = FALSE]) res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE] @@ -417,7 +423,9 @@ pm_group_keys <- function(.data) { pm_context$setup(.data) res <- pm_context$.data[, pm_context$get_colnames() %in% pm_groups, drop = FALSE] res <- res[!duplicated(res), , drop = FALSE] - if (nrow(res) == 0L) return(res) + if (nrow(res) == 0L) { + return(res) + } class(res) <- "data.frame" res <- res[do.call(order, lapply(pm_groups, function(x) res[, x])), , drop = FALSE] rownames(res) <- NULL @@ -509,7 +517,9 @@ pm_join_message <- function(by) { pm_lag <- function(x, pm_n = 1L, default = NA) { if (inherits(x, "ts")) stop("`x` must be a vector, not a `ts` object, do you want `stats::pm_lag()`?") if (length(pm_n) != 1L || !is.numeric(pm_n) || pm_n < 0L) stop("`pm_n` must be a nonnegative integer scalar") - if (pm_n == 0L) return(x) + if (pm_n == 0L) { + return(x) + } tryCatch( storage.mode(default) <- typeof(x), warning = function(w) { @@ -525,7 +535,9 @@ pm_lag <- function(x, pm_n = 1L, default = NA) { pm_lead <- function(x, pm_n = 1L, default = NA) { if (length(pm_n) != 1L || !is.numeric(pm_n) || pm_n < 0L) stop("pm_n must be a nonnegative integer scalar") - if (pm_n == 0L) return(x) + if (pm_n == 0L) { + return(x) + } tryCatch( storage.mode(default) <- typeof(x), warning = function(w) { @@ -565,7 +577,9 @@ pm_mutate.grouped_data <- function(.data, ...) { } pm_n_distinct <- function(..., na.rm = FALSE) { res <- c(...) - if (is.list(res)) return(nrow(unique(as.data.frame(res, stringsAsFactors = FALSE)))) + if (is.list(res)) { + return(nrow(unique(as.data.frame(res, stringsAsFactors = FALSE)))) + } if (isTRUE(na.rm)) res <- res[!is.na(res)] length(unique(res)) } @@ -593,7 +607,7 @@ pm_pull <- function(.data, var = -1) { } else if (var_deparse %in% col_names) { var <- var_deparse } - .data[, var] + .data[, var, drop = TRUE] } pm_set_names <- function(object = nm, nm) { names(object) <- nm @@ -669,15 +683,16 @@ pm_rename_with <- function(.data, .fn, .cols = pm_everything(), ...) { .data } pm_replace_with <- function(x, i, val, arg_name) { - if (is.null(val)) return(x) + if (is.null(val)) { + return(x) + } pm_check_length(val, x, arg_name) pm_check_type(val, x, arg_name) pm_check_class(val, x, arg_name) i[is.na(i)] <- FALSE if (length(val) == 1L) { x[i] <- val - } - else { + } else { x[i] <- val[i] } x @@ -686,7 +701,9 @@ pm_replace_with <- function(x, i, val, arg_name) { pm_check_length <- function(x, y, arg_name) { length_x <- length(x) length_y <- length(y) - if (all(length_x %in% c(1L, length_y))) return() + if (all(length_x %in% c(1L, length_y))) { + return() + } if (length_y == 1) { stop(arg_name, " must be length 1, not ", paste(length_x, sep = ", ")) } else { @@ -697,15 +714,21 @@ pm_check_length <- function(x, y, arg_name) { pm_check_type <- function(x, y, arg_name) { x_type <- typeof(x) y_type <- typeof(y) - if (identical(x_type, y_type)) return() + if (identical(x_type, y_type)) { + return() + } stop(arg_name, " must be `", y_type, "`, not `", x_type, "`") } pm_check_class <- function(x, y, arg_name) { - if (!is.object(x)) return() + if (!is.object(x)) { + return() + } exp_classes <- class(y) out_classes <- class(x) - if (identical(out_classes, exp_classes)) return() + if (identical(out_classes, exp_classes)) { + return() + } stop(arg_name, " must have class `", exp_classes, "`, not class `", out_classes, "`") } pm_rownames_to_column <- function(.data, var = "rowname") { @@ -827,8 +850,7 @@ pm_select_positions <- function(.data, ..., .group_pos = FALSE) { pm_eval_expr <- function(x) { type <- typeof(x) - switch( - type, + switch(type, "integer" = x, "double" = as.integer(x), "character" = pm_select_char(x), @@ -864,8 +886,7 @@ pm_select_symbol <- function(expr) { pm_eval_call <- function(x) { type <- as.character(x[[1]]) - switch( - type, + switch(type, `:` = pm_select_seq(x), `!` = pm_select_negate(x), `-` = pm_select_minus(x), @@ -1029,7 +1050,7 @@ pm_is_wholenumber <- function(x) { x %% 1L == 0L } -pm_seq2 <- function (from, to) { +pm_seq2 <- function(from, to) { if (length(from) != 1) stop("`from` must be length one") if (length(to) != 1) stop("`to` must be length one") if (from > to) integer() else seq.int(from, to) @@ -1041,19 +1062,25 @@ pm_is_function <- function(x, frame) { warning = function(w) FALSE, error = function(e) FALSE ) - if (isTRUE(res)) return(res) + if (isTRUE(res)) { + return(res) + } res <- tryCatch( is.function(eval(x)), warning = function(w) FALSE, error = function(e) FALSE ) - if (isTRUE(res)) return(res) + if (isTRUE(res)) { + return(res) + } res <- tryCatch( is.function(eval(as.symbol(deparse(substitute(x))))), warning = function(w) FALSE, error = function(e) FALSE ) - if (isTRUE(res)) return(res) + if (isTRUE(res)) { + return(res) + } FALSE } diff --git a/R/ab.R b/R/ab.R index 9d7affd05..e10db2dd5 100755 --- a/R/ab.R +++ b/R/ab.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -33,16 +33,16 @@ #' @rdname as.ab #' @inheritSection WHOCC WHOCC #' @details All entries in the [antibiotics] data set have three different identifiers: a human readable EARS-Net code (column `ab`, used by ECDC and WHONET), an ATC code (column `atc`, used by WHO), and a CID code (column `cid`, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem. Not that some drugs contain multiple ATC codes. -#' +#' #' All these properties will be searched for the user input. The [as.ab()] can correct for different forms of misspelling: -#' +#' #' * Wrong spelling of drug names (such as "tobramicin" or "gentamycin"), which corrects for most audible similarities such as f/ph, x/ks, c/z/s, t/th, etc. #' * Too few or too many vowels or consonants #' * Switching two characters (such as "mreopenem", often the case in clinical data, when doctors typed too fast) #' * Digitalised paper records, leaving artefacts like 0/o/O (zero and O's), B/8, n/r, etc. #' #' Use the [`ab_*`][ab_property()] functions to get properties based on the returned antibiotic ID, see *Examples*. -#' +#' #' Note: the [as.ab()] and [`ab_*`][ab_property()] functions may use very long regular expression to match brand names of antimicrobial agents. This may fail on some systems. #' @section Source: #' World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology: \url{https://www.whocc.no/atc_ddd_index/} @@ -50,7 +50,7 @@ #' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{https://ec.europa.eu/health/documents/community-register/html/reg_hum_atc.htm} #' @aliases ab #' @return A [character] [vector] with additional class [`ab`] -#' @seealso +#' @seealso #' * [antibiotics] for the [data.frame] that is being used to determine ATCs #' * [ab_from_text()] for a function to retrieve antimicrobial drugs from clinical text (from health care records) #' @inheritSection AMR Reference Data Publicly Available @@ -65,52 +65,52 @@ #' as.ab("ERYT") #' as.ab("ERY") #' as.ab("eritromicine") # spelled wrong, yet works -#' as.ab("Erythrocin") # trade name -#' as.ab("Romycin") # trade name -#' +#' as.ab("Erythrocin") # trade name +#' as.ab("Romycin") # trade name +#' #' # spelling from different languages and dyslexia are no problem #' ab_atc("ceftriaxon") -#' ab_atc("cephtriaxone") # small spelling error -#' ab_atc("cephthriaxone") # or a bit more severe +#' ab_atc("cephtriaxone") # small spelling error +#' ab_atc("cephthriaxone") # or a bit more severe #' ab_atc("seephthriaaksone") # and even this works #' #' # use ab_* functions to get a specific properties (see ?ab_property); #' # they use as.ab() internally: -#' ab_name("J01FA01") # "Erythromycin" -#' ab_name("eryt") # "Erythromycin" +#' ab_name("J01FA01") # "Erythromycin" +#' ab_name("eryt") # "Erythromycin" #' \donttest{ #' if (require("dplyr")) { -#' +#' #' # you can quickly rename columns using dplyr >= 1.0.0: #' example_isolates %>% #' rename_with(as.ab, where(is.rsi)) -#' #' } #' } as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { meet_criteria(x, allow_class = c("character", "numeric", "integer", "factor"), allow_NA = TRUE) meet_criteria(flag_multiple_results, allow_class = "logical", has_length = 1) meet_criteria(info, allow_class = "logical", has_length = 1) - + check_dataset_integrity() - + if (is.ab(x)) { return(x) } if (all(x %in% c(AB_lookup$ab, NA))) { # all valid AB codes, but not yet right class return(set_clean_class(x, - new_class = c("ab", "character"))) + new_class = c("ab", "character") + )) } - + initial_search <- is.null(list(...)$initial_search) already_regex <- isTRUE(list(...)$already_regex) fast_mode <- isTRUE(list(...)$fast_mode) - + x_bak <- x x <- toupper(x) x_nonNA <- x[!is.na(x)] - + # remove diacritics x <- iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT") x <- gsub('"', "", x, fixed = TRUE) @@ -121,11 +121,11 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { if (already_regex == FALSE) { x_bak_clean <- generalise_antibiotic_name(x_bak_clean) } - + x <- unique(x_bak_clean) # this means that every x is in fact generalise_antibiotic_name(x) x_new <- rep(NA_character_, length(x)) x_unknown <- character(0) - + note_if_more_than_one_found <- function(found, index, from_text) { if (initial_search == TRUE & isTRUE(length(from_text) > 1)) { abnames <- ab_name(from_text, tolower = TRUE, initial_search = FALSE) @@ -133,13 +133,15 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { abnames <- abnames[!abnames %in% c("clavulanic acid", "avibactam")] } if (length(abnames) > 1) { - message_("More than one result was found for item ", index, ": ", - vector_and(abnames, quotes = FALSE)) + message_( + "More than one result was found for item ", index, ": ", + vector_and(abnames, quotes = FALSE) + ) } } found[1L] } - + # Fill in names, AB codes, CID codes and ATC codes directly (`x` is already clean and uppercase) known_names <- x %in% AB_lookup$generalised_name x_new[known_names] <- AB_lookup$ab[match(x[known_names], AB_lookup$generalised_name)] @@ -147,77 +149,89 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { known_codes_atc <- vapply(FUN.VALUE = logical(1), x, function(x_) x_ %in% unlist(AB_lookup$atc), USE.NAMES = FALSE) known_codes_cid <- x %in% AB_lookup$cid x_new[known_codes_ab] <- AB_lookup$ab[match(x[known_codes_ab], AB_lookup$ab)] - x_new[known_codes_atc] <- AB_lookup$ab[vapply(FUN.VALUE = integer(1), - x[known_codes_atc], - function(x_) which(vapply(FUN.VALUE = logical(1), - AB_lookup$atc, - function(atc) x_ %in% atc))[1L], - USE.NAMES = FALSE)] + x_new[known_codes_atc] <- AB_lookup$ab[vapply( + FUN.VALUE = integer(1), + x[known_codes_atc], + function(x_) { + which(vapply( + FUN.VALUE = logical(1), + AB_lookup$atc, + function(atc) x_ %in% atc + ))[1L] + }, + USE.NAMES = FALSE + )] x_new[known_codes_cid] <- AB_lookup$ab[match(x[known_codes_cid], AB_lookup$cid)] already_known <- known_names | known_codes_ab | known_codes_atc | known_codes_cid - + if (initial_search == TRUE & sum(already_known) < length(x)) { progress <- progress_ticker(n = sum(!already_known), n_min = 25, print = info) # start if n >= 25 on.exit(close(progress)) } - + for (i in which(!already_known)) { - if (initial_search == TRUE) { progress$tick() } - + if (is.na(x[i]) | is.null(x[i])) { next } if (identical(x[i], "") | - # prevent "bacteria" from coercing to TMP, since Bacterial is a brand name of it: - identical(tolower(x[i]), "bacteria")) { + # prevent "bacteria" from coercing to TMP, since Bacterial is a brand name of it: + identical(tolower(x[i]), "bacteria")) { x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1]) next } - + if (fast_mode == FALSE && flag_multiple_results == TRUE && x[i] %like% "[ ]") { from_text <- tryCatch(suppressWarnings(ab_from_text(x[i], initial_search = FALSE, translate_ab = FALSE)[[1]]), - error = function(e) character(0)) + error = function(e) character(0) + ) } else { from_text <- character(0) } - + # old code for phenoxymethylpenicillin (Peni V) if (x[i] == "PNV") { x_new[i] <- "PHN" next } - + # exact LOINC code - loinc_found <- unlist(lapply(AB_lookup$generalised_loinc, - function(s) x[i] %in% s)) + loinc_found <- unlist(lapply( + AB_lookup$generalised_loinc, + function(s) x[i] %in% s + )) found <- antibiotics$ab[loinc_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - + # exact synonym - synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms, - function(s) x[i] %in% s)) + synonym_found <- unlist(lapply( + AB_lookup$generalised_synonyms, + function(s) x[i] %in% s + )) found <- antibiotics$ab[synonym_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - + # exact abbreviation - abbr_found <- unlist(lapply(AB_lookup$generalised_abbreviations, - # require at least 2 characters for abbreviations - function(s) x[i] %in% s & nchar(x[i]) >= 2)) + abbr_found <- unlist(lapply( + AB_lookup$generalised_abbreviations, + # require at least 2 characters for abbreviations + function(s) x[i] %in% s & nchar(x[i]) >= 2 + )) found <- antibiotics$ab[abbr_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - + # length of input is quite long, and Levenshtein distance is only max 2 if (nchar(x[i]) >= 10) { levenshtein <- as.double(utils::adist(x[i], AB_lookup$generalised_name)) @@ -227,7 +241,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { next } } - + # allow characters that resemble others, but only continue when having more than 3 characters if (nchar(x[i]) <= 3) { x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1]) @@ -235,7 +249,6 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { } x_spelling <- x[i] if (already_regex == FALSE) { - x_spelling <- gsub("[IY]+", "[IY]+", x_spelling, perl = TRUE) x_spelling <- gsub("(C|K|Q|QU|S|Z|X|KS)+", "(C|K|Q|QU|S|Z|X|KS)+", x_spelling, perl = TRUE) x_spelling <- gsub("(PH|F|V)+", "(PH|F|V)+", x_spelling, perl = TRUE) @@ -258,7 +271,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { x_spelling <- gsub("(O|0)", "(O|0)+", x_spelling, perl = TRUE) x_spelling <- gsub("++", "+", x_spelling, fixed = TRUE) } - + # try if name starts with it found <- antibiotics[which(AB_lookup$generalised_name %like% paste0("^", x_spelling)), "ab", drop = TRUE] if (length(found) > 0) { @@ -271,21 +284,23 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - + # and try if any synonym starts with it - synonym_found <- unlist(lapply(AB_lookup$generalised_synonyms, - function(s) any(s %like% paste0("^", x_spelling)))) + synonym_found <- unlist(lapply( + AB_lookup$generalised_synonyms, + function(s) any(s %like% paste0("^", x_spelling)) + )) found <- antibiotics$ab[synonym_found == TRUE] if (length(found) > 0) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - + # INITIAL SEARCH - More uncertain results ---- - + if (initial_search == TRUE && fast_mode == FALSE) { # only run on first try - + # try by removing all spaces if (x[i] %like% " ") { found <- suppressWarnings(as.ab(gsub(" +", "", x[i], perl = TRUE), initial_search = FALSE)) @@ -294,7 +309,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { next } } - + # try by removing all spaces and numbers if (x[i] %like% " " | x[i] %like% "[0-9]") { found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i], perl = TRUE), initial_search = FALSE)) @@ -303,45 +318,53 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { next } } - + # transform back from other languages and try again - x_translated <- paste(lapply(strsplit(x[i], "[^A-Z0-9]"), - function(y) { - for (i in seq_len(length(y))) { - for (lang in LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED != "en"]) { - y[i] <- ifelse(tolower(y[i]) %in% tolower(TRANSLATIONS[, lang, drop = TRUE]), - TRANSLATIONS[which(tolower(TRANSLATIONS[, lang, drop = TRUE]) == tolower(y[i]) & - !isFALSE(TRANSLATIONS$fixed)), "pattern"], - y[i]) - } - } - generalise_antibiotic_name(y) - })[[1]], - collapse = "/") + x_translated <- paste(lapply( + strsplit(x[i], "[^A-Z0-9]"), + function(y) { + for (i in seq_len(length(y))) { + for (lang in LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED != "en"]) { + y[i] <- ifelse(tolower(y[i]) %in% tolower(TRANSLATIONS[, lang, drop = TRUE]), + TRANSLATIONS[which(tolower(TRANSLATIONS[, lang, drop = TRUE]) == tolower(y[i]) & + !isFALSE(TRANSLATIONS$fixed)), "pattern"], + y[i] + ) + } + } + generalise_antibiotic_name(y) + } + )[[1]], + collapse = "/" + ) x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE)) if (!is.na(x_translated_guess)) { x_new[i] <- x_translated_guess next } - + # now also try to coerce brandname combinations like "Amoxy/clavulanic acid" - x_translated <- paste(lapply(strsplit(x_translated, "[^A-Z0-9 ]"), - function(y) { - for (i in seq_len(length(y))) { - y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial_search = FALSE)) - y[i] <- ifelse(!is.na(y_name), - y_name, - y[i]) - } - generalise_antibiotic_name(y) - })[[1]], - collapse = "/") + x_translated <- paste(lapply( + strsplit(x_translated, "[^A-Z0-9 ]"), + function(y) { + for (i in seq_len(length(y))) { + y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial_search = FALSE)) + y[i] <- ifelse(!is.na(y_name), + y_name, + y[i] + ) + } + generalise_antibiotic_name(y) + } + )[[1]], + collapse = "/" + ) x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE)) if (!is.na(x_translated_guess)) { x_new[i] <- x_translated_guess next } - + # try by removing all trailing capitals if (x[i] %like_case% "[a-z]+[A-Z]+$") { found <- suppressWarnings(as.ab(gsub("[A-Z]+$", "", x[i], perl = TRUE), initial_search = FALSE)) @@ -350,27 +373,28 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { next } } - + # keep only letters found <- suppressWarnings(as.ab(gsub("[^A-Z]", "", x[i], perl = TRUE), initial_search = FALSE)) if (!is.na(found)) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - + # try from a bigger text, like from a health care record, see ?ab_from_text # already calculated above if flag_multiple_results = TRUE if (flag_multiple_results == TRUE) { found <- from_text[1L] } else { found <- tryCatch(suppressWarnings(ab_from_text(x[i], initial_search = FALSE, translate_ab = FALSE)[[1]][1L]), - error = function(e) NA_character_) + error = function(e) NA_character_ + ) } if (!is.na(found)) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - + # first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!) found <- suppressWarnings(as.ab(substr(x[i], 1, 5), initial_search = FALSE)) if (!is.na(found) && ab_group(found, initial_search = FALSE) %unlike% "cephalosporins") { @@ -382,7 +406,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - + # make all consonants facultative search_str <- gsub("([BCDFGHJKLMNPQRSTVWXZ])", "\\1*", x[i], perl = TRUE) found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE)) @@ -394,7 +418,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - + # make all vowels facultative search_str <- gsub("([AEIOUY])", "\\1*", x[i], perl = TRUE) found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE)) @@ -406,7 +430,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - + # allow misspelling of vowels x_spelling <- gsub("A+", "[AEIOU]+", x_spelling, fixed = TRUE) x_spelling <- gsub("E+", "[AEIOU]+", x_spelling, fixed = TRUE) @@ -418,17 +442,18 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { x_new[i] <- note_if_more_than_one_found(found, i, from_text) next } - + # try with switched character, like "mreopenem" for (j in seq_len(nchar(x[i]))) { x_switched <- paste0( # beginning part: substr(x[i], 1, j - 1), # here is the switching of 2 characters: - substr(x[i], j + 1, j + 1), - substr(x[i], j, j), + substr(x[i], j + 1, j + 1), + substr(x[i], j, j), # ending part: - substr(x[i], j + 2, nchar(x[i]))) + substr(x[i], j + 2, nchar(x[i])) + ) found <- suppressWarnings(as.ab(x_switched, initial_search = FALSE)) if (!is.na(found)) { break @@ -438,37 +463,41 @@ as.ab <- function(x, flag_multiple_results = TRUE, info = interactive(), ...) { x_new[i] <- found[1L] next } - } # end of initial_search = TRUE - + # not found x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1]) } - + if (initial_search == TRUE & sum(already_known) < length(x)) { close(progress) } - + # take failed ATC codes apart from rest x_unknown_ATCs <- x_unknown[x_unknown %like% "[A-Z][0-9][0-9][A-Z][A-Z][0-9][0-9]"] x_unknown <- x_unknown[!x_unknown %in% x_unknown_ATCs] if (length(x_unknown_ATCs) > 0 & fast_mode == FALSE) { - warning_("in `as.ab()`: these ATC codes are not (yet) in the antibiotics data set: ", - vector_and(x_unknown_ATCs), ".") + warning_( + "in `as.ab()`: these ATC codes are not (yet) in the antibiotics data set: ", + vector_and(x_unknown_ATCs), "." + ) } - + if (length(x_unknown) > 0 & fast_mode == FALSE) { - warning_("in `as.ab()`: these values could not be coerced to a valid antimicrobial ID: ", - vector_and(x_unknown), ".") + warning_( + "in `as.ab()`: these values could not be coerced to a valid antimicrobial ID: ", + vector_and(x_unknown), "." + ) } - + x_result <- x_new[match(x_bak_clean, x)] if (length(x_result) == 0) { x_result <- NA_character_ } - + set_clean_class(x_result, - new_class = c("ab", "character")) + new_class = c("ab", "character") + ) } #' @rdname as.ab diff --git a/R/ab_from_text.R b/R/ab_from_text.R index 626c7c829..bc241b403 100644 --- a/R/ab_from_text.R +++ b/R/ab_from_text.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,7 +24,7 @@ # ==================================================================== # #' Retrieve Antimicrobial Drug Names and Doses from Clinical Text -#' +#' #' Use this function on e.g. clinical texts from health care records. It returns a [list] with all antimicrobial drugs, doses and forms of administration found in the texts. #' @param text text to analyse #' @param type type of property to search for, either `"drug"`, `"dose"` or `"administration"`, see *Examples* @@ -34,56 +34,62 @@ #' @param info a [logical] to indicate whether a progress bar should be printed, defaults to `TRUE` only in interactive mode #' @param ... arguments passed on to [as.ab()] #' @details This function is also internally used by [as.ab()], although it then only searches for the first drug name and will throw a note if more drug names could have been returned. Note: the [as.ab()] function may use very long regular expression to match brand names of antimicrobial agents. This may fail on some systems. -#' +#' #' ## Argument `type` #' At default, the function will search for antimicrobial drug names. All text elements will be searched for official names, ATC codes and brand names. As it uses [as.ab()] internally, it will correct for misspelling. -#' +#' #' With `type = "dose"` (or similar, like "dosing", "doses"), all text elements will be searched for [numeric] values that are higher than 100 and do not resemble years. The output will be [numeric]. It supports any unit (g, mg, IE, etc.) and multiple values in one clinical text, see *Examples*. -#' +#' #' With `type = "administration"` (or abbreviations, like "admin", "adm"), all text elements will be searched for a form of drug administration. It supports the following forms (including common abbreviations): buccal, implant, inhalation, instillation, intravenous, nasal, oral, parenteral, rectal, sublingual, transdermal and vaginal. Abbreviations for oral (such as 'po', 'per os') will become "oral", all values for intravenous (such as 'iv', 'intraven') will become "iv". It supports multiple values in one clinical text, see *Examples*. -#' +#' #' ## Argument `collapse` #' Without using `collapse`, this function will return a [list]. This can be convenient to use e.g. inside a `mutate()`):\cr -#' `df %>% mutate(abx = ab_from_text(clinical_text))` -#' +#' `df %>% mutate(abx = ab_from_text(clinical_text))` +#' #' The returned AB codes can be transformed to official names, groups, etc. with all [`ab_*`][ab_property()] functions such as [ab_name()] and [ab_group()], or by using the `translate_ab` argument. -#' +#' #' With using `collapse`, this function will return a [character]:\cr -#' `df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))` +#' `df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))` #' @export #' @return A [list], or a [character] if `collapse` is not `NULL` -#' @examples -#' # mind the bad spelling of amoxicillin in this line, +#' @examples +#' # mind the bad spelling of amoxicillin in this line, #' # straight from a true health care record: #' ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds") -#' +#' #' ab_from_text("500 mg amoxi po and 400mg cipro iv") #' ab_from_text("500 mg amoxi po and 400mg cipro iv", type = "dose") #' ab_from_text("500 mg amoxi po and 400mg cipro iv", type = "admin") -#' +#' #' ab_from_text("500 mg amoxi po and 400mg cipro iv", collapse = ", ") #' \donttest{ #' # if you want to know which antibiotic groups were administered, do e.g.: #' abx <- ab_from_text("500 mg amoxi po and 400mg cipro iv") #' ab_group(abx[[1]]) -#' +#' #' if (require("dplyr")) { -#' tibble(clinical_text = c("given 400mg cipro and 500 mg amox", -#' "started on doxy iv today")) %>% -#' mutate(abx_codes = ab_from_text(clinical_text), -#' abx_doses = ab_from_text(clinical_text, type = "doses"), -#' abx_admin = ab_from_text(clinical_text, type = "admin"), -#' abx_coll = ab_from_text(clinical_text, collapse = "|"), -#' abx_coll_names = ab_from_text(clinical_text, -#' collapse = "|", -#' translate_ab = "name"), -#' abx_coll_doses = ab_from_text(clinical_text, -#' type = "doses", -#' collapse = "|"), -#' abx_coll_admin = ab_from_text(clinical_text, -#' type = "admin", -#' collapse = "|")) -#' +#' tibble(clinical_text = c( +#' "given 400mg cipro and 500 mg amox", +#' "started on doxy iv today" +#' )) %>% +#' mutate( +#' abx_codes = ab_from_text(clinical_text), +#' abx_doses = ab_from_text(clinical_text, type = "doses"), +#' abx_admin = ab_from_text(clinical_text, type = "admin"), +#' abx_coll = ab_from_text(clinical_text, collapse = "|"), +#' abx_coll_names = ab_from_text(clinical_text, +#' collapse = "|", +#' translate_ab = "name" +#' ), +#' abx_coll_doses = ab_from_text(clinical_text, +#' type = "doses", +#' collapse = "|" +#' ), +#' abx_coll_admin = ab_from_text(clinical_text, +#' type = "admin", +#' collapse = "|" +#' ) +#' ) #' } #' } ab_from_text <- function(text, @@ -96,7 +102,7 @@ ab_from_text <- function(text, if (missing(type)) { type <- type[1L] } - + meet_criteria(text) meet_criteria(type, allow_class = "character", has_length = 1) meet_criteria(collapse, has_length = 1, allow_NULL = TRUE) @@ -105,18 +111,17 @@ ab_from_text <- function(text, meet_criteria(info, allow_class = "logical", has_length = 1) type <- tolower(trimws(type)) - + text <- tolower(as.character(text)) text_split_all <- strsplit(text, "[ ;.,:\\|]") progress <- progress_ticker(n = length(text_split_all), n_min = 5, print = info) on.exit(close(progress)) - + if (type %like% "(drug|ab|anti)") { - translate_ab <- get_translate_ab(translate_ab) - - if (isTRUE(thorough_search) | - (isTRUE(is.null(thorough_search)) & max(vapply(FUN.VALUE = double(1), text_split_all, length), na.rm = TRUE) <= 3)) { + + if (isTRUE(thorough_search) | + (isTRUE(is.null(thorough_search)) & max(vapply(FUN.VALUE = double(1), text_split_all, length), na.rm = TRUE) <= 3)) { text_split_all <- text_split_all[nchar(text_split_all) >= 4 & grepl("[a-z]+", text_split_all)] result <- lapply(text_split_all, function(text_split) { progress$tick() @@ -124,7 +129,6 @@ ab_from_text <- function(text, out <- as.ab(text_split, ...) ) }) - } else { # no thorough search abbr <- unlist(antibiotics$abbreviations) @@ -136,25 +140,30 @@ ab_from_text <- function(text, synonyms_part1 <- synonyms[seq_len(0.5 * length(synonyms))] synonyms_part2 <- synonyms[!synonyms %in% synonyms_part1] to_regex <- function(x) { - paste0("^(", - paste0(unique(gsub("[^a-z0-9]+", "", sort(tolower(x)))), collapse = "|"), - ").*") + paste0( + "^(", + paste0(unique(gsub("[^a-z0-9]+", "", sort(tolower(x)))), collapse = "|"), + ").*" + ) } result <- lapply(text_split_all, function(text_split) { progress$tick() suppressWarnings( - out <- as.ab(unique(c(text_split[text_split %like_case% to_regex(abbr)], - text_split[text_split %like_case% to_regex(names_atc)], - text_split[text_split %like_case% to_regex(synonyms_part1)], - text_split[text_split %like_case% to_regex(synonyms_part2)]) - ), - ...) + out <- as.ab( + unique(c( + text_split[text_split %like_case% to_regex(abbr)], + text_split[text_split %like_case% to_regex(names_atc)], + text_split[text_split %like_case% to_regex(synonyms_part1)], + text_split[text_split %like_case% to_regex(synonyms_part2)] + )), + ... + ) ) }) } - + close(progress) - + result <- lapply(result, function(out) { out <- out[!is.na(out)] if (length(out) == 0) { @@ -165,27 +174,24 @@ ab_from_text <- function(text, } out } - }) - } else if (type %like% "dos") { text_split_all <- strsplit(text, " ") result <- lapply(text_split_all, function(text_split) { text_split <- text_split[text_split %like% "^[0-9]{2,}(/[0-9]+)?[a-z]*$"] # only left part of "/", like 500 in "500/125" - text_split <- gsub("/.*", "", text_split) + text_split <- gsub("/.*", "", text_split) text_split <- gsub(",", ".", text_split, fixed = TRUE) # foreign system using comma as decimal sep text_split <- as.double(gsub("[^0-9.]", "", text_split)) # minimal 100 units/mg and no years that unlikely doses text_split <- text_split[text_split >= 100 & !text_split %in% c(1951:1999, 2001:2049)] - + if (length(text_split) > 0) { text_split } else { NA_real_ } }) - } else if (type %like% "adm") { result <- lapply(text_split_all, function(text_split) { text_split <- text_split[text_split %like% "(^iv$|intraven|^po$|per os|oral|implant|inhal|instill|nasal|paren|rectal|sublingual|buccal|trans.*dermal|vaginal)"] @@ -197,11 +203,10 @@ ab_from_text <- function(text, NA_character_ } }) - } else { stop_("`type` must be either 'drug', 'dose' or 'administration'") } - + # collapse text if needed if (!is.null(collapse)) { result <- vapply(FUN.VALUE = character(1), result, function(x) { @@ -212,7 +217,6 @@ ab_from_text <- function(text, } }) } - + result - } diff --git a/R/ab_property.R b/R/ab_property.R index 2fe1dd938..c9f81856b 100644 --- a/R/ab_property.R +++ b/R/ab_property.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -37,14 +37,14 @@ #' @param snake_case a [logical] to indicate whether the names should be in so-called [snake case](https://en.wikipedia.org/wiki/Snake_case): in lower case and all spaces/slashes replaced with an underscore (`_`) #' @param only_first a [logical] to indicate whether only the first ATC code must be returned, with giving preference to J0-codes (i.e., the antimicrobial drug group) #' @details All output [will be translated][translate] where possible. -#' +#' #' The function [ab_url()] will return the direct URL to the official WHO website. A warning will be returned if the required ATC code is not available. -#' +#' #' The function [set_ab_names()] is a special column renaming function for [data.frame]s. It renames columns names that resemble antimicrobial drugs. It always makes sure that the new column names are unique. If `property = "atc"` is set, preference is given to ATC codes from the J-group. #' @inheritSection as.ab Source #' @rdname ab_property #' @name ab_property -#' @return +#' @return #' - An [integer] in case of [ab_cid()] #' - A named [list] in case of [ab_info()] and multiple [ab_atc()]/[ab_synonyms()]/[ab_tradenames()] #' - A [double] in case of [ab_ddd()] @@ -55,43 +55,45 @@ #' @inheritSection AMR Reference Data Publicly Available #' @examples #' # all properties: -#' ab_name("AMX") # "Amoxicillin" -#' ab_atc("AMX") # "J01CA04" (ATC code from the WHO) -#' ab_cid("AMX") # 33613 (Compound ID from PubChem) -#' ab_synonyms("AMX") # a list with brand names of amoxicillin +#' ab_name("AMX") # "Amoxicillin" +#' ab_atc("AMX") # "J01CA04" (ATC code from the WHO) +#' ab_cid("AMX") # 33613 (Compound ID from PubChem) +#' ab_synonyms("AMX") # a list with brand names of amoxicillin #' ab_tradenames("AMX") # same -#' ab_group("AMX") # "Beta-lactams/penicillins" +#' ab_group("AMX") # "Beta-lactams/penicillins" #' ab_atc_group1("AMX") # "Beta-lactam antibacterials, penicillins" #' ab_atc_group2("AMX") # "Penicillins with extended spectrum" -#' ab_url("AMX") # link to the official WHO page +#' ab_url("AMX") # link to the official WHO page #' #' # smart lowercase tranformation -#' ab_name(x = c("AMC", "PLB")) # "Amoxicillin/clavulanic acid" "Polymyxin B" -#' ab_name(x = c("AMC", "PLB"), -#' tolower = TRUE) # "amoxicillin/clavulanic acid" "polymyxin B" +#' ab_name(x = c("AMC", "PLB")) # "Amoxicillin/clavulanic acid" "Polymyxin B" +#' ab_name( +#' x = c("AMC", "PLB"), +#' tolower = TRUE +#' ) # "amoxicillin/clavulanic acid" "polymyxin B" #' #' # defined daily doses (DDD) -#' ab_ddd("AMX", "oral") # 1.5 +#' ab_ddd("AMX", "oral") # 1.5 #' ab_ddd_units("AMX", "oral") # "g" -#' ab_ddd("AMX", "iv") # 3 -#' ab_ddd_units("AMX", "iv") # "g" +#' ab_ddd("AMX", "iv") # 3 +#' ab_ddd_units("AMX", "iv") # "g" #' -#' ab_info("AMX") # all properties as a list +#' ab_info("AMX") # all properties as a list #' #' # all ab_* functions use as.ab() internally, so you can go from 'any' to 'any': -#' ab_atc("AMP") # ATC code of AMP (ampicillin) -#' ab_group("J01CA01") # Drug group of ampicillins ATC code -#' ab_loinc("ampicillin") # LOINC codes of ampicillin -#' ab_name("21066-6") # "Ampicillin" (using LOINC) -#' ab_name(6249) # "Ampicillin" (using CID) -#' ab_name("J01CA01") # "Ampicillin" (using ATC) -#' +#' ab_atc("AMP") # ATC code of AMP (ampicillin) +#' ab_group("J01CA01") # Drug group of ampicillins ATC code +#' ab_loinc("ampicillin") # LOINC codes of ampicillin +#' ab_name("21066-6") # "Ampicillin" (using LOINC) +#' ab_name(6249) # "Ampicillin" (using CID) +#' ab_name("J01CA01") # "Ampicillin" (using ATC) +#' #' # spelling from different languages and dyslexia are no problem #' ab_atc("ceftriaxon") #' ab_atc("cephtriaxone") #' ab_atc("cephthriaxone") #' ab_atc("seephthriaaksone") -#' +#' #' # use set_ab_names() for renaming columns #' colnames(example_isolates) #' colnames(set_ab_names(example_isolates)) @@ -101,31 +103,31 @@ #' example_isolates %>% #' set_ab_names() %>% #' head() -#' +#' #' # this does the same: #' example_isolates %>% -#' rename_with(set_ab_names)%>% +#' rename_with(set_ab_names) %>% #' head() -#' +#' #' # set_ab_names() works with any AB property: #' example_isolates %>% -#' set_ab_names(property = "atc")%>% +#' set_ab_names(property = "atc") %>% #' head() -#' -#' example_isolates %>% -#' set_ab_names(where(is.rsi)) %>% -#' colnames() -#' -#' example_isolates %>% -#' set_ab_names(NIT:VAN) %>% -#' colnames() +#' +#' example_isolates %>% +#' set_ab_names(where(is.rsi)) %>% +#' colnames() +#' +#' example_isolates %>% +#' set_ab_names(NIT:VAN) %>% +#' colnames() #' } #' } ab_name <- function(x, language = get_AMR_locale(), tolower = FALSE, ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(tolower, allow_class = "logical", has_length = 1) - + x <- translate_into_language(ab_validate(x = x, property = "name", ...), language = language, only_affect_ab_names = TRUE) if (tolower == TRUE) { # use perl to only transform the first character @@ -176,27 +178,29 @@ ab_group <- function(x, language = get_AMR_locale(), ...) { ab_atc <- function(x, only_first = FALSE, ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(only_first, allow_class = "logical", has_length = 1) - + atcs <- ab_validate(x = x, property = "atc", ...) - + if (only_first == TRUE) { - atcs <- vapply(FUN.VALUE = character(1), - # get only the first ATC code - atcs, - function(x) { - # try to get the J-group - if (any(x %like% "^J")) { - x[x %like% "^J"][1L] - } else { - as.character(x[1L]) - } - }) + atcs <- vapply( + FUN.VALUE = character(1), + # get only the first ATC code + atcs, + function(x) { + # try to get the J-group + if (any(x %like% "^J")) { + x[x %like% "^J"][1L] + } else { + as.character(x[1L]) + } + } + ) } else if (length(atcs) == 1) { atcs <- unname(unlist(atcs)) } else { names(atcs) <- x } - + atcs } @@ -234,26 +238,30 @@ ab_loinc <- function(x, ...) { ab_ddd <- function(x, administration = "oral", ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(administration, is_in = c("oral", "iv"), has_length = 1) - + x <- as.ab(x, ...) ddd_prop <- administration # old behaviour units <- list(...)$units if (!is.null(units) && isTRUE(units)) { if (message_not_thrown_before("ab_ddd", entire_session = TRUE)) { - warning_("in `ab_ddd()`: using `ab_ddd(..., units = TRUE)` is deprecated, use `ab_ddd_units()` to retrieve units instead.", - "This warning will be shown once per session.") + warning_( + "in `ab_ddd()`: using `ab_ddd(..., units = TRUE)` is deprecated, use `ab_ddd_units()` to retrieve units instead.", + "This warning will be shown once per session." + ) } ddd_prop <- paste0(ddd_prop, "_units") } else { ddd_prop <- paste0(ddd_prop, "_ddd") } out <- ab_validate(x = x, property = ddd_prop) - + if (any(ab_name(x, language = NULL) %like% "/" & is.na(out))) { - warning_("in `ab_ddd()`: DDDs of some combined products are available for different dose combinations and not (yet) part of the AMR package.", - "Please refer to the WHOCC website:\n", - "www.whocc.no/ddd/list_of_ddds_combined_products/") + warning_( + "in `ab_ddd()`: DDDs of some combined products are available for different dose combinations and not (yet) part of the AMR package.", + "Please refer to the WHOCC website:\n", + "www.whocc.no/ddd/list_of_ddds_combined_products/" + ) } out } @@ -263,14 +271,16 @@ ab_ddd <- function(x, administration = "oral", ...) { ab_ddd_units <- function(x, administration = "oral", ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(administration, is_in = c("oral", "iv"), has_length = 1) - + x <- as.ab(x, ...) if (any(ab_name(x, language = NULL) %like% "/")) { - warning_("in `ab_ddd_units()`: DDDs of combined products are available for different dose combinations and not (yet) part of the AMR package.", - "Please refer to the WHOCC website:\n", - "www.whocc.no/ddd/list_of_ddds_combined_products/") + warning_( + "in `ab_ddd_units()`: DDDs of combined products are available for different dose combinations and not (yet) part of the AMR package.", + "Please refer to the WHOCC website:\n", + "www.whocc.no/ddd/list_of_ddds_combined_products/" + ) } - + ddd_prop <- paste0(administration, "_units") ab_validate(x = x, property = ddd_prop) } @@ -280,21 +290,29 @@ ab_ddd_units <- function(x, administration = "oral", ...) { ab_info <- function(x, language = get_AMR_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- as.ab(x, ...) - list(ab = as.character(x), - cid = ab_cid(x), - name = ab_name(x, language = language), - group = ab_group(x, language = language), - atc = ab_atc(x), - atc_group1 = ab_atc_group1(x, language = language), - atc_group2 = ab_atc_group2(x, language = language), - tradenames = ab_tradenames(x), - loinc = ab_loinc(x), - ddd = list(oral = list(amount = ab_ddd(x, administration = "oral"), - units = ab_ddd_units(x, administration = "oral")), - iv = list(amount = ab_ddd(x, administration = "iv"), - units = ab_ddd_units(x, administration = "iv")))) + list( + ab = as.character(x), + cid = ab_cid(x), + name = ab_name(x, language = language), + group = ab_group(x, language = language), + atc = ab_atc(x), + atc_group1 = ab_atc_group1(x, language = language), + atc_group2 = ab_atc_group2(x, language = language), + tradenames = ab_tradenames(x), + loinc = ab_loinc(x), + ddd = list( + oral = list( + amount = ab_ddd(x, administration = "oral"), + units = ab_ddd_units(x, administration = "oral") + ), + iv = list( + amount = ab_ddd(x, administration = "iv"), + units = ab_ddd_units(x, administration = "iv") + ) + ) + ) } @@ -303,18 +321,18 @@ ab_info <- function(x, language = get_AMR_locale(), ...) { ab_url <- function(x, open = FALSE, ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(open, allow_class = "logical", has_length = 1) - + ab <- as.ab(x = x, ...) atcs <- ab_atc(ab, only_first = TRUE) u <- paste0("https://www.whocc.no/atc_ddd_index/?code=", atcs, "&showdescription=no") u[is.na(atcs)] <- NA_character_ names(u) <- ab_name(ab) - + NAs <- ab_name(ab, tolower = TRUE, language = NULL)[!is.na(ab) & is.na(atcs)] if (length(NAs) > 0) { warning_("in `ab_url()`: no ATC code available for ", vector_and(NAs, quotes = FALSE), ".") } - + if (open == TRUE) { if (length(u) > 1 & !is.na(u[1L])) { warning_("in `ab_url()`: only the first URL will be opened, as `browseURL()` only suports one string.") @@ -343,17 +361,17 @@ set_ab_names <- function(data, ..., property = "name", language = get_AMR_locale meet_criteria(property, is_in = colnames(antibiotics), has_length = 1, ignore.case = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(snake_case, allow_class = "logical", has_length = 1, allow_NULL = TRUE) - + x_deparsed <- deparse(substitute(data)) if (length(x_deparsed) > 1 || any(x_deparsed %unlike% "[a-z]+")) { x_deparsed <- "your_data" } - + property <- tolower(property) if (is.null(snake_case)) { snake_case <- property == "name" } - + if (is.data.frame(data)) { if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) { df <- pm_select(data, ...) @@ -370,44 +388,50 @@ set_ab_names <- function(data, ..., property = "name", language = get_AMR_locale vars_ab <- as.ab(data, fast_mode = TRUE) vars <- data[!is.na(vars_ab)] } - x <- vapply(FUN.VALUE = character(1), - ab_property(vars, property = property, language = language), - function(x) { - if (property == "atc") { - # try to get the J-group - if (any(x %like% "^J")) { - x[x %like% "^J"][1L] - } else { - as.character(x[1L]) - } - } else { - as.character(x[1L]) - } - }, - USE.NAMES = FALSE) + x <- vapply( + FUN.VALUE = character(1), + ab_property(vars, property = property, language = language), + function(x) { + if (property == "atc") { + # try to get the J-group + if (any(x %like% "^J")) { + x[x %like% "^J"][1L] + } else { + as.character(x[1L]) + } + } else { + as.character(x[1L]) + } + }, + USE.NAMES = FALSE + ) if (any(x %in% c("", NA))) { - warning_("in `set_ab_names()`: no ", property, " found for column(s): ", - vector_and(vars[x %in% c("", NA)], sort = FALSE)) + warning_( + "in `set_ab_names()`: no ", property, " found for column(s): ", + vector_and(vars[x %in% c("", NA)], sort = FALSE) + ) x[x %in% c("", NA)] <- vars[x %in% c("", NA)] } - + if (snake_case == TRUE) { x <- tolower(gsub("[^a-zA-Z0-9]+", "_", x)) } - + if (any(duplicated(x))) { # very hacky way of adding the index to each duplicate # so "Amoxicillin", "Amoxicillin", "Amoxicillin" # will be "Amoxicillin", "Amoxicillin_2", "Amoxicillin_3" - invisible(lapply(unique(x), - function(u) { - dups <- which(x == u) - if (length(dups) > 1) { - # there are duplicates - dup_add_int <- dups[2:length(dups)] - x[dup_add_int] <<- paste0(x[dup_add_int], "_", c(2:length(dups))) - } - })) + invisible(lapply( + unique(x), + function(u) { + dups <- which(x == u) + if (length(dups) > 1) { + # there are duplicates + dup_add_int <- dups[2:length(dups)] + x[dup_add_int] <<- paste0(x[dup_add_int], "_", c(2:length(dups))) + } + } + )) } if (is.data.frame(data)) { colnames(data)[colnames(data) %in% vars] <- x @@ -419,25 +443,24 @@ set_ab_names <- function(data, ..., property = "name", language = get_AMR_locale } ab_validate <- function(x, property, ...) { - check_dataset_integrity() - + if (tryCatch(all(x[!is.na(x)] %in% AB_lookup$ab), error = function(e) FALSE)) { # special case for ab_* functions where class is already x <- AB_lookup[match(x, AB_lookup$ab), property, drop = TRUE] - } else { # try to catch an error when inputting an invalid argument # so the 'call.' can be set to FALSE tryCatch(x[1L] %in% antibiotics[1, property, drop = TRUE], - error = function(e) stop(e$message, call. = FALSE)) - + error = function(e) stop(e$message, call. = FALSE) + ) + if (!all(x %in% AB_lookup[, property, drop = TRUE])) { x <- as.ab(x, ...) x <- AB_lookup[match(x, AB_lookup$ab), property, drop = TRUE] } } - + if (property == "ab") { return(set_clean_class(x, new_class = c("ab", "character"))) } else if (property == "cid") { diff --git a/R/ab_selectors.R b/R/ab_selectors.R index ac56d66ea..17f14b36b 100644 --- a/R/ab_selectors.R +++ b/R/ab_selectors.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,7 +24,7 @@ # ==================================================================== # #' Antibiotic Selectors -#' +#' #' These functions allow for filtering rows and selecting columns based on antibiotic test results that are of a specific antibiotic class or group, without the need to define the columns or antibiotic abbreviations. In short, if you have a column name that resembles an antimicrobial agent, it will be picked up by any of these functions that matches its pharmaceutical class: "cefazolin", "CZO" and "J01DB04" will all be picked up by [cephalosporins()]. #' @param ab_class an antimicrobial class or a part of it, such as `"carba"` and `"carbapenems"`. The columns `group`, `atc_group1` and `atc_group2` of the [antibiotics] data set will be searched (case-insensitive) for this value. #' @param filter an [expression] to be evaluated in the [antibiotics] data set, such as `name %like% "trim"` @@ -33,12 +33,12 @@ #' @param ... ignored, only in place to allow future extensions #' @details #' These functions can be used in data set calls for selecting columns and filtering rows. They are heavily inspired by the [Tidyverse selection helpers][tidyselect::language] such as [`everything()`][tidyselect::everything()], but also work in base \R and not only in `dplyr` verbs. Nonetheless, they are very convenient to use with `dplyr` functions such as [`select()`][dplyr::select()], [`filter()`][dplyr::filter()] and [`summarise()`][dplyr::summarise()], see *Examples*. -#' -#' All columns in the data in which these functions are called will be searched for known antibiotic names, abbreviations, brand names, and codes (ATC, EARS-Net, WHO, etc.) according to the [antibiotics] data set. This means that a selector such as [aminoglycosides()] will pick up column names like 'gen', 'genta', 'J01GB03', 'tobra', 'Tobracin', etc. -#' +#' +#' All columns in the data in which these functions are called will be searched for known antibiotic names, abbreviations, brand names, and codes (ATC, EARS-Net, WHO, etc.) according to the [antibiotics] data set. This means that a selector such as [aminoglycosides()] will pick up column names like 'gen', 'genta', 'J01GB03', 'tobra', 'Tobracin', etc. +#' #' The [ab_class()] function can be used to filter/select on a manually defined antibiotic class. It searches for results in the [antibiotics] data set within the columns `group`, `atc_group1` and `atc_group2`. #' @section Full list of supported (antibiotic) classes: -#' +#' #' `r paste0(" * ", na.omit(sapply(DEFINED_AB_GROUPS, function(ab) ifelse(tolower(gsub("^AB_", "", ab)) %in% ls(envir = asNamespace("AMR")), paste0("[", tolower(gsub("^AB_", "", ab)), "()] can select: \\cr ", vector_and(paste0(ab_name(eval(parse(text = ab), envir = asNamespace("AMR")), language = NULL, tolower = TRUE), " (", eval(parse(text = ab), envir = asNamespace("AMR")), ")"), quotes = FALSE, sort = TRUE)), character(0)), USE.NAMES = FALSE)), "\n", collapse = "")` #' @rdname antibiotic_class_selectors #' @name antibiotic_class_selectors @@ -46,153 +46,144 @@ #' @export #' @inheritSection AMR Reference Data Publicly Available -#' @examples +#' @examples #' # `example_isolates` is a data set available in the AMR package. #' # See ?example_isolates. #' example_isolates -#' +#' #' # base R ------------------------------------------------------------------ -#' +#' #' # select columns 'IPM' (imipenem) and 'MEM' (meropenem) #' example_isolates[, carbapenems()] -#' +#' #' # select columns 'mo', 'AMK', 'GEN', 'KAN' and 'TOB' #' example_isolates[, c("mo", aminoglycosides())] -#' +#' #' # select only antibiotic columns with DDDs for oral treatment #' example_isolates[, administrable_per_os()] -#' +#' #' # filter using any() or all() #' example_isolates[any(carbapenems() == "R"), ] #' subset(example_isolates, any(carbapenems() == "R")) -#' +#' #' # filter on any or all results in the carbapenem columns (i.e., IPM, MEM): #' example_isolates[any(carbapenems()), ] #' example_isolates[all(carbapenems()), ] -#' +#' #' # filter with multiple antibiotic selectors using c() #' example_isolates[all(c(carbapenems(), aminoglycosides()) == "R"), ] -#' +#' #' # filter + select in one go: get penicillins in carbapenems-resistant strains #' example_isolates[any(carbapenems() == "R"), penicillins()] -#' +#' #' # You can combine selectors with '&' to be more specific. For example, #' # penicillins() would select benzylpenicillin ('peni G') and #' # administrable_per_os() would select erythromycin. Yet, when combined these #' # drugs are both omitted since benzylpenicillin is not administrable per os #' # and erythromycin is not a penicillin: #' example_isolates[, penicillins() & administrable_per_os()] -#' +#' #' # ab_selector() applies a filter in the `antibiotics` data set and is thus very #' # flexible. For instance, to select antibiotic columns with an oral DDD of at #' # least 1 gram: #' example_isolates[, ab_selector(oral_ddd > 1 & oral_units == "g")] -#' +#' #' # dplyr ------------------------------------------------------------------- #' \donttest{ #' if (require("dplyr")) { -#' +#' #' # get AMR for all aminoglycosides e.g., per ward: #' example_isolates %>% -#' group_by(ward) %>% +#' group_by(ward) %>% #' summarise(across(aminoglycosides(), resistance)) -#' #' } #' if (require("dplyr")) { -#' +#' #' # You can combine selectors with '&' to be more specific: #' example_isolates %>% #' select(penicillins() & administrable_per_os()) -#' #' } #' if (require("dplyr")) { -#' +#' #' # get AMR for only drugs that matter - no intrinsic resistance: #' example_isolates %>% -#' filter(mo_genus() %in% c("Escherichia", "Klebsiella")) %>% -#' group_by(ward) %>% +#' filter(mo_genus() %in% c("Escherichia", "Klebsiella")) %>% +#' group_by(ward) %>% #' summarise(across(not_intrinsic_resistant(), resistance)) -#' #' } #' if (require("dplyr")) { -#' +#' #' # get susceptibility for antibiotics whose name contains "trim": #' example_isolates %>% -#' filter(first_isolate()) %>% -#' group_by(ward) %>% +#' filter(first_isolate()) %>% +#' group_by(ward) %>% #' summarise(across(ab_selector(name %like% "trim"), susceptibility)) -#' #' } #' if (require("dplyr")) { -#' +#' #' # this will select columns 'IPM' (imipenem) and 'MEM' (meropenem): -#' example_isolates %>% +#' example_isolates %>% #' select(carbapenems()) -#' #' } #' if (require("dplyr")) { -#' +#' #' # this will select columns 'mo', 'AMK', 'GEN', 'KAN' and 'TOB': -#' example_isolates %>% +#' example_isolates %>% #' select(mo, aminoglycosides()) -#' #' } #' if (require("dplyr")) { -#' -#' # any() and all() work in dplyr's filter() too: -#' example_isolates %>% -#' filter(any(aminoglycosides() == "R"), -#' all(cephalosporins_2nd() == "R")) -#' +#' +#' # any() and all() work in dplyr's filter() too: +#' example_isolates %>% +#' filter( +#' any(aminoglycosides() == "R"), +#' all(cephalosporins_2nd() == "R") +#' ) #' } #' if (require("dplyr")) { -#' -#' # also works with c(): -#' example_isolates %>% +#' +#' # also works with c(): +#' example_isolates %>% #' filter(any(c(carbapenems(), aminoglycosides()) == "R")) -#' #' } #' if (require("dplyr")) { -#' -#' # not setting any/all will automatically apply all(): -#' example_isolates %>% +#' +#' # not setting any/all will automatically apply all(): +#' example_isolates %>% #' filter(aminoglycosides() == "R") -#' #' } #' if (require("dplyr")) { -#' +#' #' # this will select columns 'mo' and all antimycobacterial drugs ('RIF'): -#' example_isolates %>% +#' example_isolates %>% #' select(mo, ab_class("mycobact")) -#' #' } #' if (require("dplyr")) { -#' +#' #' # get bug/drug combinations for only glycopeptides in Gram-positives: -#' example_isolates %>% -#' filter(mo_is_gram_positive()) %>% -#' select(mo, glycopeptides()) %>% +#' example_isolates %>% +#' filter(mo_is_gram_positive()) %>% +#' select(mo, glycopeptides()) %>% #' bug_drug_combinations() %>% #' format() -#' #' } #' if (require("dplyr")) { -#' -#' data.frame(some_column = "some_value", -#' J01CA01 = "S") %>% # ATC code of ampicillin -#' select(penicillins()) # only the 'J01CA01' column will be selected -#' +#' data.frame( +#' some_column = "some_value", +#' J01CA01 = "S" +#' ) %>% # ATC code of ampicillin +#' select(penicillins()) # only the 'J01CA01' column will be selected #' } #' if (require("dplyr")) { -#' +#' #' # with recent versions of dplyr this is all equal: #' x <- example_isolates[carbapenems() == "R", ] #' y <- example_isolates %>% filter(carbapenems() == "R") -#' z <- example_isolates %>% filter(if_all(carbapenems(), ~.x == "R")) +#' z <- example_isolates %>% filter(if_all(carbapenems(), ~ .x == "R")) #' identical(x, y) && identical(y, z) #' } #' } -ab_class <- function(ab_class, +ab_class <- function(ab_class, only_rsi_columns = FALSE, only_treatable = TRUE, ...) { @@ -205,30 +196,36 @@ ab_class <- function(ab_class, #' @rdname antibiotic_class_selectors #' @details The [ab_selector()] function can be used to internally filter the [antibiotics] data set on any results, see *Examples*. It allows for filtering on a (part of) a certain name, and/or a group name or even a minimum of DDDs for oral treatment. This function yields the highest flexibility, but is also the least user-friendly, since it requires a hard-coded filter to set. #' @export -ab_selector <- function(filter, +ab_selector <- function(filter, only_rsi_columns = FALSE, only_treatable = TRUE, ...) { meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) meet_criteria(only_treatable, allow_class = "logical", has_length = 1) - + # get_current_data() has to run each time, for cases where e.g., filter() and select() are used in same call # but it only takes a couple of milliseconds vars_df <- get_current_data(arg_name = NA, call = -2) # to improve speed, get_column_abx() will only run once when e.g. in a select or group call - ab_in_data <- get_column_abx(vars_df, info = FALSE, only_rsi_columns = only_rsi_columns, - sort = FALSE, fn = "ab_selector") + ab_in_data <- get_column_abx(vars_df, + info = FALSE, only_rsi_columns = only_rsi_columns, + sort = FALSE, fn = "ab_selector" + ) call <- substitute(filter) agents <- tryCatch(AMR::antibiotics[which(eval(call, envir = AMR::antibiotics)), "ab", drop = TRUE], - error = function(e) stop_(e$message, call = -5)) + error = function(e) stop_(e$message, call = -5) + ) agents <- ab_in_data[ab_in_data %in% agents] - message_agent_names(function_name = "ab_selector", - agents = agents, - ab_group = NULL, - examples = "", - call = call) + message_agent_names( + function_name = "ab_selector", + agents = agents, + ab_group = NULL, + examples = "", + call = call + ) structure(unname(agents), - class = c("ab_selector", "character")) + class = c("ab_selector", "character") + ) } #' @rdname antibiotic_class_selectors @@ -419,24 +416,34 @@ administrable_per_os <- function(only_rsi_columns = FALSE, ...) { # but it only takes a couple of milliseconds vars_df <- get_current_data(arg_name = NA, call = -2) # to improve speed, get_column_abx() will only run once when e.g. in a select or group call - ab_in_data <- get_column_abx(vars_df, info = FALSE, only_rsi_columns = only_rsi_columns, - sort = FALSE, fn = "administrable_per_os") + ab_in_data <- get_column_abx(vars_df, + info = FALSE, only_rsi_columns = only_rsi_columns, + sort = FALSE, fn = "administrable_per_os" + ) agents_all <- antibiotics[which(!is.na(antibiotics$oral_ddd)), "ab", drop = TRUE] agents <- antibiotics[which(antibiotics$ab %in% ab_in_data & !is.na(antibiotics$oral_ddd)), "ab", drop = TRUE] agents <- ab_in_data[ab_in_data %in% agents] - message_agent_names(function_name = "administrable_per_os", - agents = agents, - ab_group = "administrable_per_os", - examples = paste0(" (such as ", - vector_or(ab_name(sample(agents_all, - size = min(5, length(agents_all)), - replace = FALSE), - tolower = TRUE, - language = NULL), - quotes = FALSE), - ")")) + message_agent_names( + function_name = "administrable_per_os", + agents = agents, + ab_group = "administrable_per_os", + examples = paste0( + " (such as ", + vector_or(ab_name(sample(agents_all, + size = min(5, length(agents_all)), + replace = FALSE + ), + tolower = TRUE, + language = NULL + ), + quotes = FALSE + ), + ")" + ) + ) structure(unname(agents), - class = c("ab_selector", "character")) + class = c("ab_selector", "character") + ) } #' @rdname antibiotic_class_selectors @@ -447,17 +454,22 @@ administrable_iv <- function(only_rsi_columns = FALSE, ...) { # but it only takes a couple of milliseconds vars_df <- get_current_data(arg_name = NA, call = -2) # to improve speed, get_column_abx() will only run once when e.g. in a select or group call - ab_in_data <- get_column_abx(vars_df, info = FALSE, only_rsi_columns = only_rsi_columns, - sort = FALSE, fn = "administrable_iv") + ab_in_data <- get_column_abx(vars_df, + info = FALSE, only_rsi_columns = only_rsi_columns, + sort = FALSE, fn = "administrable_iv" + ) agents_all <- antibiotics[which(!is.na(antibiotics$iv_ddd)), "ab", drop = TRUE] agents <- antibiotics[which(antibiotics$ab %in% ab_in_data & !is.na(antibiotics$iv_ddd)), "ab", drop = TRUE] agents <- ab_in_data[ab_in_data %in% agents] - message_agent_names(function_name = "administrable_iv", - agents = agents, - ab_group = "administrable_iv", - examples = "") + message_agent_names( + function_name = "administrable_iv", + agents = agents, + ab_group = "administrable_iv", + examples = "" + ) structure(unname(agents), - class = c("ab_selector", "character")) + class = c("ab_selector", "character") + ) } #' @rdname antibiotic_class_selectors @@ -470,35 +482,47 @@ not_intrinsic_resistant <- function(only_rsi_columns = FALSE, col_mo = NULL, ver # but it only takes a couple of milliseconds vars_df <- get_current_data(arg_name = NA, call = -2) # to improve speed, get_column_abx() will only run once when e.g. in a select or group call - ab_in_data <- get_column_abx(vars_df, info = FALSE, only_rsi_columns = only_rsi_columns, - sort = FALSE, fn = "not_intrinsic_resistant") + ab_in_data <- get_column_abx(vars_df, + info = FALSE, only_rsi_columns = only_rsi_columns, + sort = FALSE, fn = "not_intrinsic_resistant" + ) # intrinsic vars - vars_df_R <- tryCatch(sapply(eucast_rules(vars_df, - col_mo = col_mo, - version_expertrules = version_expertrules, - rules = "expert", - info = FALSE), - function(col) tryCatch(!any(is.na(col)) && all(col == "R"), - error = function(e) FALSE)), - error = function(e) stop_("in not_intrinsic_resistant(): ", e$message, call = FALSE)) - + vars_df_R <- tryCatch(sapply( + eucast_rules(vars_df, + col_mo = col_mo, + version_expertrules = version_expertrules, + rules = "expert", + info = FALSE + ), + function(col) { + tryCatch(!any(is.na(col)) && all(col == "R"), + error = function(e) FALSE + ) + } + ), + error = function(e) stop_("in not_intrinsic_resistant(): ", e$message, call = FALSE) + ) + agents <- ab_in_data[ab_in_data %in% names(vars_df_R[which(vars_df_R)])] if (length(agents) > 0 && - message_not_thrown_before("not_intrinsic_resistant", sort(agents))) { + message_not_thrown_before("not_intrinsic_resistant", sort(agents))) { agents_formatted <- paste0("'", font_bold(agents, collapse = NULL), "'") agents_names <- ab_name(names(agents), tolower = TRUE, language = NULL) need_name <- generalise_antibiotic_name(agents) != generalise_antibiotic_name(agents_names) agents_formatted[need_name] <- paste0(agents_formatted[need_name], " (", agents_names[need_name], ")") - message_("For `not_intrinsic_resistant()` removing ", - ifelse(length(agents) == 1, "column ", "columns "), - vector_and(agents_formatted, quotes = FALSE, sort = FALSE)) + message_( + "For `not_intrinsic_resistant()` removing ", + ifelse(length(agents) == 1, "column ", "columns "), + vector_and(agents_formatted, quotes = FALSE, sort = FALSE) + ) } - + vars_df_R <- names(vars_df_R)[which(!vars_df_R)] # find columns that are abx, but also intrinsic R out <- unname(intersect(ab_in_data, vars_df_R)) structure(out, - class = c("ab_selector", "character")) + class = c("ab_selector", "character") + ) } ab_select_exec <- function(function_name, @@ -509,61 +533,74 @@ ab_select_exec <- function(function_name, # but it only takes a couple of milliseconds vars_df <- get_current_data(arg_name = NA, call = -3) # to improve speed, get_column_abx() will only run once when e.g. in a select or group call - ab_in_data <- get_column_abx(vars_df, info = FALSE, only_rsi_columns = only_rsi_columns, - sort = FALSE, fn = function_name) + ab_in_data <- get_column_abx(vars_df, + info = FALSE, only_rsi_columns = only_rsi_columns, + sort = FALSE, fn = function_name + ) # untreatable drugs if (only_treatable == TRUE) { untreatable <- antibiotics[which(antibiotics$name %like% "-high|EDTA|polysorbate|macromethod|screening|/nacubactam"), "ab", drop = TRUE] if (any(untreatable %in% names(ab_in_data))) { if (message_not_thrown_before(function_name, "ab_class", "untreatable", entire_session = TRUE)) { - warning_("in `", function_name, "()`: some agents were ignored since they cannot be used for treating patients: ", - vector_and(ab_name(names(ab_in_data)[names(ab_in_data) %in% untreatable], - language = NULL, - tolower = TRUE), - quotes = FALSE, - sort = TRUE), ". They can be included using `", function_name, "(only_treatable = FALSE)`. ", - "This warning will be shown once per session.") + warning_( + "in `", function_name, "()`: some agents were ignored since they cannot be used for treating patients: ", + vector_and(ab_name(names(ab_in_data)[names(ab_in_data) %in% untreatable], + language = NULL, + tolower = TRUE + ), + quotes = FALSE, + sort = TRUE + ), ". They can be included using `", function_name, "(only_treatable = FALSE)`. ", + "This warning will be shown once per session." + ) } ab_in_data <- ab_in_data[!names(ab_in_data) %in% untreatable] } } - + if (length(ab_in_data) == 0) { message_("No antimicrobial agents found in the data.") return(NULL) } - + if (is.null(ab_class_args)) { - # their upper case equivalent are vectors with class , created in data-raw/pre-commit-hook.R + # their upper case equivalent are vectors with class , created in data-raw/_pre_commit_hook.R # carbapenems() gets its codes from AMR:::AB_CARBAPENEMS - abx <- get(paste0("AB_", toupper(function_name)), envir = asNamespace("AMR")) + abx <- get(paste0("AB_", toupper(function_name)), envir = asNamespace("AMR")) ab_group <- function_name examples <- paste0(" (such as ", vector_or(ab_name(sample(abx, size = min(2, length(abx)), replace = FALSE), - tolower = TRUE, - language = NULL), - quotes = FALSE), ")") + tolower = TRUE, + language = NULL + ), + quotes = FALSE + ), ")") } else { # this for the 'manual' ab_class() function - abx <- subset(AB_lookup, - group %like% ab_class_args | - atc_group1 %like% ab_class_args | - atc_group2 %like% ab_class_args)$ab + abx <- subset( + AB_lookup, + group %like% ab_class_args | + atc_group1 %like% ab_class_args | + atc_group2 %like% ab_class_args + )$ab ab_group <- find_ab_group(ab_class_args) function_name <- "ab_class" examples <- paste0(" (such as ", find_ab_names(ab_class_args, 2), ")") } - + # get the columns with a group names in the chosen ab class agents <- ab_in_data[names(ab_in_data) %in% abx] - - message_agent_names(function_name = function_name, - agents = agents, - ab_group = ab_group, - examples = examples, - ab_class_args = ab_class_args) - + + message_agent_names( + function_name = function_name, + agents = agents, + ab_group = ab_group, + examples = examples, + ab_class_args = ab_class_args + ) + structure(unname(agents), - class = c("ab_selector", "character")) + class = c("ab_selector", "character") + ) } #' @method c ab_selector @@ -571,7 +608,8 @@ ab_select_exec <- function(function_name, #' @noRd c.ab_selector <- function(...) { structure(unlist(lapply(list(...), as.character)), - class = c("ab_selector", "character")) + class = c("ab_selector", "character") + ) } all_any_ab_selector <- function(type, ..., na.rm = TRUE) { @@ -583,18 +621,20 @@ all_any_ab_selector <- function(type, ..., na.rm = TRUE) { } cols_ab <- cols_ab[!cols_ab %in% result] df <- get_current_data(arg_name = NA, call = -3) - + if (type == "all") { scope_fn <- all } else { scope_fn <- any } - + x_transposed <- as.list(as.data.frame(t(df[, cols_ab, drop = FALSE]), stringsAsFactors = FALSE)) - vapply(FUN.VALUE = logical(1), - X = x_transposed, - FUN = function(y) scope_fn(y %in% result, na.rm = na.rm), - USE.NAMES = FALSE) + vapply( + FUN.VALUE = logical(1), + X = x_transposed, + FUN = function(y) scope_fn(y %in% result, na.rm = na.rm), + USE.NAMES = FALSE + ) } #' @method all ab_selector @@ -654,12 +694,15 @@ any.ab_selector_any_all <- function(..., na.rm = FALSE) { } else { type <- "all" if (length(e1) > 1) { - message_("Assuming a filter on ", type, " ", length(e1), " ", gsub("[\\(\\)]", "", fn_name), - ". Wrap around `all()` or `any()` to prevent this note.") + message_( + "Assuming a filter on ", type, " ", length(e1), " ", gsub("[\\(\\)]", "", fn_name), + ". Wrap around `all()` or `any()` to prevent this note." + ) } } structure(all_any_ab_selector(type = type, e1, e2), - class = c("ab_selector_any_all", "logical")) + class = c("ab_selector_any_all", "logical") + ) } #' @method != ab_selector @@ -676,15 +719,18 @@ any.ab_selector_any_all <- function(..., na.rm = FALSE) { } else { type <- "all" if (length(e1) > 1) { - message_("Assuming a filter on ", type, " ", length(e1), " ", gsub("[\\(\\)]", "", fn_name), - ". Wrap around `all()` or `any()` to prevent this note.") + message_( + "Assuming a filter on ", type, " ", length(e1), " ", gsub("[\\(\\)]", "", fn_name), + ". Wrap around `all()` or `any()` to prevent this note." + ) } } # this is `!=`, so turn around the values rsi <- c("R", "S", "I") e2 <- rsi[rsi != e2] structure(all_any_ab_selector(type = type, e1, e2), - class = c("ab_selector_any_all", "logical")) + class = c("ab_selector_any_all", "logical") + ) } #' @method & ab_selector @@ -694,7 +740,8 @@ any.ab_selector_any_all <- function(..., na.rm = FALSE) { # this is only required for base R, since tidyselect has already implemented this # e.g., for: example_isolates[, penicillins() & administrable_per_os()] structure(intersect(unclass(e1), unclass(e2)), - class = c("ab_selector", "character")) + class = c("ab_selector", "character") + ) } #' @method | ab_selector #' @export @@ -703,7 +750,8 @@ any.ab_selector_any_all <- function(..., na.rm = FALSE) { # this is only required for base R, since tidyselect has already implemented this # e.g., for: example_isolates[, penicillins() | administrable_per_os()] structure(union(unclass(e1), unclass(e2)), - class = c("ab_selector", "character")) + class = c("ab_selector", "character") + ) } is_any <- function(el1) { @@ -720,38 +768,40 @@ is_all <- function(el1) { find_ab_group <- function(ab_class_args) { ab_class_args <- gsub("[^a-zA-Z0-9]", ".*", ab_class_args) AB_lookup %pm>% - subset(group %like% ab_class_args | - atc_group1 %like% ab_class_args | - atc_group2 %like% ab_class_args) %pm>% + subset(group %like% ab_class_args | + atc_group1 %like% ab_class_args | + atc_group2 %like% ab_class_args) %pm>% pm_pull(group) %pm>% unique() %pm>% tolower() %pm>% - sort() %pm>% + sort() %pm>% paste(collapse = "/") } find_ab_names <- function(ab_group, n = 3) { ab_group <- gsub("[^a-zA-Z|0-9]", ".*", ab_group) - + # try popular first, they have DDDs drugs <- antibiotics[which((!is.na(antibiotics$iv_ddd) | !is.na(antibiotics$oral_ddd)) & - antibiotics$name %unlike% " " & - antibiotics$group %like% ab_group & - antibiotics$ab %unlike% "[0-9]$"), ]$name + antibiotics$name %unlike% " " & + antibiotics$group %like% ab_group & + antibiotics$ab %unlike% "[0-9]$"), ]$name if (length(drugs) < n) { # now try it all drugs <- antibiotics[which((antibiotics$group %like% ab_group | - antibiotics$atc_group1 %like% ab_group | - antibiotics$atc_group2 %like% ab_group) & - antibiotics$ab %unlike% "[0-9]$"), ]$name + antibiotics$atc_group1 %like% ab_group | + antibiotics$atc_group2 %like% ab_group) & + antibiotics$ab %unlike% "[0-9]$"), ]$name } if (length(drugs) == 0) { return("??") } vector_or(ab_name(sample(drugs, size = min(n, length(drugs)), replace = FALSE), - tolower = TRUE, - language = NULL), - quotes = FALSE) + tolower = TRUE, + language = NULL + ), + quotes = FALSE + ) } message_agent_names <- function(function_name, agents, ab_group = NULL, examples = "", ab_class_args = NULL, call = NULL) { @@ -771,15 +821,19 @@ message_agent_names <- function(function_name, agents, ab_group = NULL, examples agents_names <- ab_name(names(agents), tolower = TRUE, language = NULL) need_name <- generalise_antibiotic_name(agents) != generalise_antibiotic_name(agents_names) agents_formatted[need_name] <- paste0(agents_formatted[need_name], " (", agents_names[need_name], ")") - message_("For `", function_name, "(", - ifelse(function_name == "ab_class", - paste0("\"", ab_class_args, "\""), - ifelse(!is.null(call), - paste0(deparse(call), collapse = " "), - "")), - ")` using ", - ifelse(length(agents) == 1, "column ", "columns "), - vector_and(agents_formatted, quotes = FALSE, sort = FALSE)) + message_( + "For `", function_name, "(", + ifelse(function_name == "ab_class", + paste0("\"", ab_class_args, "\""), + ifelse(!is.null(call), + paste0(deparse(call), collapse = " "), + "" + ) + ), + ")` using ", + ifelse(length(agents) == 1, "column ", "columns "), + vector_and(agents_formatted, quotes = FALSE, sort = FALSE) + ) } } } diff --git a/R/age.R b/R/age.R index 3eb91d91b..e1512ae59 100755 --- a/R/age.R +++ b/R/age.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -32,7 +32,7 @@ #' @param na.rm a [logical] to indicate whether missing values should be removed #' @param ... arguments passed on to [as.POSIXlt()], such as `origin` #' @details Ages below 0 will be returned as `NA` with a warning. Ages above 120 will only give a warning. -#' +#' #' This function vectorises over both `x` and `reference`, meaning that either can have a length of 1 while the other argument has a larger length. #' @return An [integer] (no decimals) if `exact = FALSE`, a [double] (with decimals) otherwise #' @seealso To split ages into groups, use the [age_groups()] function. @@ -40,13 +40,13 @@ #' @examples #' # 10 random pre-Y2K birth dates #' df <- data.frame(birth_date = as.Date("2000-01-01") - runif(10) * 25000) -#' +#' #' # add ages #' df$age <- age(df$birth_date) -#' +#' #' # add exact ages #' df$age_exact <- age(df$birth_date, exact = TRUE) -#' +#' #' # add age at millenium switch #' df$age_at_y2k <- age(df$birth_date, "2000-01-01") #' @@ -56,7 +56,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { meet_criteria(reference, allow_class = c("character", "Date", "POSIXt")) meet_criteria(exact, allow_class = "logical", has_length = 1) meet_criteria(na.rm, allow_class = "logical", has_length = 1) - + if (length(x) != length(reference)) { if (length(x) == 1) { x <- rep(x, length(reference)) @@ -68,26 +68,32 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { } x <- as.POSIXlt(x, ...) reference <- as.POSIXlt(reference, ...) - + # from https://stackoverflow.com/a/25450756/4575331 years_gap <- reference$year - x$year ages <- ifelse(reference$mon < x$mon | (reference$mon == x$mon & reference$mday < x$mday), - as.integer(years_gap - 1), - as.integer(years_gap)) - + as.integer(years_gap - 1), + as.integer(years_gap) + ) + # add decimals if (exact == TRUE) { # get dates of `x` when `x` would have the year of `reference` - x_in_reference_year <- as.POSIXlt(paste0(format(as.Date(reference), "%Y"), - format(as.Date(x), "-%m-%d")), - format = "%Y-%m-%d") + x_in_reference_year <- as.POSIXlt(paste0( + format(as.Date(reference), "%Y"), + format(as.Date(x), "-%m-%d") + ), + format = "%Y-%m-%d" + ) # get differences in days - n_days_x_rest <- as.double(difftime(as.Date(reference), - as.Date(x_in_reference_year), - units = "days")) + n_days_x_rest <- as.double(difftime(as.Date(reference), + as.Date(x_in_reference_year), + units = "days" + )) # get numbers of days the years of `reference` has for a reliable denominator n_days_reference_year <- as.POSIXlt(paste0(format(as.Date(reference), "%Y"), "-12-31"), - format = "%Y-%m-%d")$yday + 1 + format = "%Y-%m-%d" + )$yday + 1 # add decimal parts of year mod <- n_days_x_rest / n_days_reference_year # negative mods are cases where `x_in_reference_year` > `reference` - so 'add' a year @@ -95,7 +101,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { # and finally add to ages ages <- ages + mod } - + if (any(ages < 0, na.rm = TRUE)) { ages[!is.na(ages) & ages < 0] <- NA warning_("in `age()`: NAs introduced for ages below 0.") @@ -103,11 +109,11 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { if (any(ages > 120, na.rm = TRUE)) { warning_("in `age()`: some ages are above 120.") } - + if (isTRUE(na.rm)) { ages <- ages[!is.na(ages)] } - + if (exact == TRUE) { as.double(ages) } else { @@ -122,7 +128,7 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { #' @param split_at values to split `x` at, defaults to age groups 0-11, 12-24, 25-54, 55-74 and 75+. See *Details*. #' @param na.rm a [logical] to indicate whether missing values should be removed #' @details To split ages, the input for the `split_at` argument can be: -#' +#' #' * A [numeric] vector. A value of e.g. `c(10, 20)` will split `x` on 0-9, 10-19 and 20+. A value of only `50` will split `x` on 0-49 and 50+. #' The default is to split on young children (0-11), youth (12-24), young adults (25-54), middle-aged adults (55-74) and elderly (75+). #' * A character: @@ -163,17 +169,19 @@ age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE, ...) { #' filter(mo == as.mo("Escherichia coli")) %>% #' group_by(age_group = age_groups(age)) %>% #' select(age_group, CIP) %>% -#' ggplot_rsi(x = "age_group", -#' minimum = 0, -#' x.title = "Age Group", -#' title = "Ciprofloxacin resistance per age group") +#' ggplot_rsi( +#' x = "age_group", +#' minimum = 0, +#' x.title = "Age Group", +#' title = "Ciprofloxacin resistance per age group" +#' ) #' } #' } age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { meet_criteria(x, allow_class = c("numeric", "integer"), is_positive_or_zero = TRUE, is_finite = TRUE) meet_criteria(split_at, allow_class = c("numeric", "integer", "character"), is_positive_or_zero = TRUE, is_finite = TRUE) - meet_criteria(na.rm, allow_class = "logical", has_length = 1) - + meet_criteria(na.rm, allow_class = "logical", has_length = 1) + if (any(x < 0, na.rm = TRUE)) { x[x < 0] <- NA warning_("in `age_groups()`: NAs introduced for ages below 0.") @@ -183,7 +191,7 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { if (split_at %like% "^(child|kid|junior)") { split_at <- c(0, 1, 2, 4, 6, 13, 18) } else if (split_at %like% "^(elder|senior)") { - split_at <- c(65, 75, 85) + split_at <- c(65, 75, 85) } else if (split_at %like% "^five") { split_at <- 1:20 * 5 } else if (split_at %like% "^ten") { @@ -197,7 +205,7 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { } split_at <- split_at[!is.na(split_at)] stop_if(length(split_at) == 1, "invalid value for `split_at`") # only 0 is available - + # turn input values to 'split_at' indices y <- x lbls <- split_at @@ -206,15 +214,15 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) { # create labels lbls[i - 1] <- paste0(unique(c(split_at[i - 1], split_at[i] - 1)), collapse = "-") } - + # last category lbls[length(lbls)] <- paste0(split_at[length(split_at)], "+") - + agegroups <- factor(lbls[y], levels = lbls, ordered = TRUE) - + if (isTRUE(na.rm)) { agegroups <- agegroups[!is.na(agegroups)] } - + agegroups } diff --git a/R/amr.R b/R/amr.R index 822ad18eb..b846c75cc 100644 --- a/R/amr.R +++ b/R/amr.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -28,9 +28,9 @@ #' Welcome to the `AMR` package. #' @details #' `AMR` is a free, open-source and independent \R package to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. Our aim is to provide a standard for clean and reproducible antimicrobial resistance data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. -#' +#' #' After installing this package, \R knows `r format_included_data_number(microorganisms)` distinct microbial species and all `r format_included_data_number(rbind(antibiotics[, "atc", drop = FALSE], antivirals[, "atc", drop = FALSE]))` antibiotic, antimycotic and antiviral drugs by name and code (including ATC, EARS-NET, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. It supports any data format, including WHONET/EARS-Net data. -#' +#' #' This package is fully independent of any other \R package and works on Windows, macOS and Linux with all versions of \R since R-3.0.0 (April 2013). It was designed to work in any setting, including those with very limited resources. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the University of Groningen, in collaboration with non-profit organisations Certe Medical Diagnostics and Advice and University Medical Center Groningen. This \R package is actively maintained and free software; you can freely use and distribute it for both personal and commercial (but not patent) purposes under the terms of the GNU General Public License version 2.0 (GPL-2), as published by the Free Software Foundation. #' #' This package can be used for: @@ -50,7 +50,7 @@ #' - Getting LOINC codes of an antibiotic, or getting properties of an antibiotic based on a LOINC code #' - Machine reading the EUCAST and CLSI guidelines from 2011-2020 to translate MIC values and disk diffusion diameters to R/SI #' - Principal component analysis for AMR -#' +#' #' @section Reference Data Publicly Available: #' All data sets in this `AMR` package (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) are publicly and freely available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. We also provide tab-separated plain text files that are machine-readable and suitable for input in any software program, such as laboratory information systems. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). #' @section Contact Us: diff --git a/R/atc_online.R b/R/atc_online.R index 914e9774f..63dfb089d 100644 --- a/R/atc_online.R +++ b/R/atc_online.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -34,7 +34,7 @@ #' @param ... arguments to pass on to `atc_property` #' @details #' Options for argument `administration`: -#' +#' #' - `"Implant"` = Implant #' - `"Inhal"` = Inhalation #' - `"Instill"` = Instillation @@ -47,7 +47,7 @@ #' - `"V"` = vaginal #' #' Abbreviations of return values when using `property = "U"` (unit): -#' +#' #' - `"g"` = gram #' - `"mg"` = milligram #' - `"mcg"` = microgram @@ -56,18 +56,18 @@ #' - `"MU"` = million units #' - `"mmol"` = millimole #' - `"ml"` = millilitre (e.g. eyedrops) -#' +#' #' **N.B. This function requires an internet connection and only works if the following packages are installed: `curl`, `rvest`, `xml2`.** #' @export #' @rdname atc_online #' @source #' @examples #' \donttest{ -#' if (requireNamespace("curl") && requireNamespace("rvest") && requireNamespace("xml2")) { +#' if (requireNamespace("curl") && requireNamespace("rvest") && requireNamespace("xml2")) { #' # oral DDD (Defined Daily Dose) of amoxicillin #' atc_online_property("J01CA04", "DDD", "O") #' atc_online_ddd(ab_atc("amox")) -#' +#' #' # parenteral DDD (Defined Daily Dose) of amoxicillin #' atc_online_property("J01CA04", "DDD", "P") #' @@ -84,7 +84,7 @@ atc_online_property <- function(atc_code, meet_criteria(administration, allow_class = "character", has_length = 1) meet_criteria(url, allow_class = "character", has_length = 1, looks_like = "https?://") meet_criteria(url_vet, allow_class = "character", has_length = 1, looks_like = "https?://") - + has_internet <- import_fn("has_internet", "curl") html_attr <- import_fn("html_attr", "rvest") html_children <- import_fn("html_children", "rvest") @@ -93,20 +93,21 @@ atc_online_property <- function(atc_code, html_table <- import_fn("html_table", "rvest") html_text <- import_fn("html_text", "rvest") read_html <- import_fn("read_html", "xml2") - + check_dataset_integrity() - + if (!all(atc_code %in% unlist(antibiotics$atc))) { atc_code <- as.character(ab_atc(atc_code, only_first = TRUE)) } - + if (!has_internet()) { message_("There appears to be no internet connection, returning NA.", - add_fn = font_red, - as_note = FALSE) + add_fn = font_red, + as_note = FALSE + ) return(rep(NA, length(atc_code))) } - + property <- tolower(property) # also allow unit as property if (property == "unit") { @@ -119,12 +120,11 @@ atc_online_property <- function(atc_code, } else { returnvalue <- rep(NA_character_, length(atc_code)) } - + progress <- progress_ticker(n = length(atc_code), 3) on.exit(close(progress)) - + for (i in seq_len(length(atc_code))) { - progress$tick() if (atc_code[i] %like% "^Q") { @@ -134,19 +134,20 @@ atc_online_property <- function(atc_code, atc_url <- url } atc_url <- sub("%s", atc_code[i], atc_url, fixed = TRUE) - + if (property == "groups") { out <- tryCatch( read_html(atc_url) %pm>% html_node("#content") %pm>% html_children() %pm>% - html_node("a"), - error = function(e) NULL) + html_node("a"), + error = function(e) NULL + ) if (is.null(out)) { message_("Connection to ", atc_url, " failed.") return(rep(NA, length(atc_code))) } - + # get URLS of items hrefs <- out %pm>% html_attr("href") # get text of items @@ -156,28 +157,28 @@ atc_online_property <- function(atc_code, # last one is antibiotics, skip it texts <- texts[seq_len(length(texts)) - 1] returnvalue <- c(list(texts), returnvalue) - } else { out <- tryCatch( read_html(atc_url) %pm>% html_nodes("table") %pm>% html_table(header = TRUE) %pm>% - as.data.frame(stringsAsFactors = FALSE), - error = function(e) NULL) + as.data.frame(stringsAsFactors = FALSE), + error = function(e) NULL + ) if (is.null(out)) { message_("Connection to ", atc_url, " failed.") return(rep(NA, length(atc_code))) } - + # case insensitive column names colnames(out) <- gsub("^atc.*", "atc", tolower(colnames(out))) - + if (length(out) == 0) { warning_("in `atc_online_property()`: ATC not found: ", atc_code[i], ". Please check ", atc_url, ".") returnvalue[i] <- NA next } - + if (property %in% c("atc", "name")) { # ATC and name are only in first row returnvalue[i] <- out[1, property, drop = TRUE] @@ -195,11 +196,11 @@ atc_online_property <- function(atc_code, } } } - + if (property == "groups" & length(returnvalue) == 1) { returnvalue <- returnvalue[[1]] } - + returnvalue } diff --git a/R/availability.R b/R/availability.R index 3ea9a8ed9..aeb7701ae 100644 --- a/R/availability.R +++ b/R/availability.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -44,50 +44,52 @@ availability <- function(tbl, width = NULL) { meet_criteria(tbl, allow_class = "data.frame") meet_criteria(width, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE) - + tbl <- as.data.frame(tbl, stringsAsFactors = FALSE) - + x <- vapply(FUN.VALUE = double(1), tbl, function(x) { - 1 - sum(is.na(x)) / length(x) + 1 - sum(is.na(x)) / length(x) }) n <- vapply(FUN.VALUE = double(1), tbl, function(x) length(x[!is.na(x)])) R <- vapply(FUN.VALUE = double(1), tbl, function(x) ifelse(is.rsi(x), resistance(x, minimum = 0), NA_real_)) R_print <- character(length(R)) R_print[!is.na(R)] <- percentage(R[!is.na(R)]) R_print[is.na(R)] <- "" - + if (is.null(width)) { width <- options()$width - (max(nchar(colnames(tbl))) + - # count col - 8 + - # available % column - 10 + - # resistant % column - 10 + - # extra margin - 5) + # count col + 8 + + # available % column + 10 + + # resistant % column + 10 + + # extra margin + 5) width <- width / 2 } - + if (length(R[is.na(R)]) == ncol(tbl)) { width <- width * 2 + 10 } - + x_chars_R <- strrep("#", round(width * R, digits = 2)) x_chars_SI <- strrep("-", width - nchar(x_chars_R)) vis_resistance <- paste0("|", x_chars_R, x_chars_SI, "|") vis_resistance[is.na(R)] <- "" - + x_chars <- strrep("#", round(x, digits = 2) / (1 / width)) x_chars_empty <- strrep("-", width - nchar(x_chars)) - - df <- data.frame(count = n, - available = percentage(x), - visual_availabilty = paste0("|", x_chars, x_chars_empty, "|"), - resistant = R_print, - visual_resistance = vis_resistance, - stringsAsFactors = FALSE) + + df <- data.frame( + count = n, + available = percentage(x), + visual_availabilty = paste0("|", x_chars, x_chars_empty, "|"), + resistant = R_print, + visual_resistance = vis_resistance, + stringsAsFactors = FALSE + ) if (length(R[is.na(R)]) == ncol(tbl)) { df[, 1:3, drop = FALSE] } else { diff --git a/R/bug_drug_combinations.R b/R/bug_drug_combinations.R index 35f598b0c..b4a432525 100644 --- a/R/bug_drug_combinations.R +++ b/R/bug_drug_combinations.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,39 +24,44 @@ # ==================================================================== # #' Determine Bug-Drug Combinations -#' +#' #' Determine antimicrobial resistance (AMR) of all bug-drug combinations in your data set where at least 30 (default) isolates are available per species. Use [format()] on the result to prettify it to a publishable/printable format, see *Examples*. #' @inheritParams eucast_rules #' @param combine_IR a [logical] to indicate whether values R and I should be summed #' @param add_ab_group a [logical] to indicate where the group of the antimicrobials must be included as a first column #' @param remove_intrinsic_resistant [logical] to indicate that rows and columns with 100% resistance for all tested antimicrobials must be removed from the table -#' @param FUN the function to call on the `mo` column to transform the microorganism codes, defaults to [mo_shortname()] +#' @param FUN the function to call on the `mo` column to transform the microorganism codes, defaults to [mo_shortname()] #' @param translate_ab a [character] of length 1 containing column names of the [antibiotics] data set #' @param ... arguments passed on to `FUN` #' @inheritParams rsi_df #' @inheritParams base::formatC -#' @details The function [format()] calculates the resistance per bug-drug combination. Use `combine_IR = FALSE` (default) to test R vs. S+I and `combine_IR = TRUE` to test R+I vs. S. +#' @details The function [format()] calculates the resistance per bug-drug combination. Use `combine_IR = FALSE` (default) to test R vs. S+I and `combine_IR = TRUE` to test R+I vs. S. #' @export #' @rdname bug_drug_combinations #' @return The function [bug_drug_combinations()] returns a [data.frame] with columns "mo", "ab", "S", "I", "R" and "total". #' @source \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, *Clinical and Laboratory Standards Institute (CLSI)*. . -#' @examples +#' @examples #' \donttest{ #' x <- bug_drug_combinations(example_isolates) #' head(x) #' format(x, translate_ab = "name (atc)") -#' +#' #' # Use FUN to change to transformation of microorganism codes -#' bug_drug_combinations(example_isolates, -#' FUN = mo_gramstain) -#' #' bug_drug_combinations(example_isolates, -#' FUN = function(x) ifelse(x == as.mo("Escherichia coli"), -#' "E. coli", -#' "Others")) +#' FUN = mo_gramstain +#' ) +#' +#' bug_drug_combinations(example_isolates, +#' FUN = function(x) { +#' ifelse(x == as.mo("Escherichia coli"), +#' "E. coli", +#' "Others" +#' ) +#' } +#' ) #' } -bug_drug_combinations <- function(x, - col_mo = NULL, +bug_drug_combinations <- function(x, + col_mo = NULL, FUN = mo_shortname, ...) { meet_criteria(x, allow_class = "data.frame", contains_column_class = "rsi") @@ -71,13 +76,13 @@ bug_drug_combinations <- function(x, } else { stop_ifnot(col_mo %in% colnames(x), "column '", col_mo, "' (`col_mo`) not found") } - + x.bak <- x x <- as.data.frame(x, stringsAsFactors = FALSE) x[, col_mo] <- FUN(x[, col_mo, drop = TRUE], ...) - + unique_mo <- sort(unique(x[, col_mo, drop = TRUE])) - + # select only groups and antibiotics if (is_null_or_grouped_tbl(x.bak)) { data_has_groups <- TRUE @@ -87,21 +92,23 @@ bug_drug_combinations <- function(x, data_has_groups <- FALSE x <- x[, c(col_mo, names(which(vapply(FUN.VALUE = logical(1), x, is.rsi)))), drop = FALSE] } - + run_it <- function(x) { - out <- data.frame(mo = character(0), - ab = character(0), - S = integer(0), - I = integer(0), - R = integer(0), - total = integer(0), - stringsAsFactors = FALSE) + out <- data.frame( + mo = character(0), + ab = character(0), + S = integer(0), + I = integer(0), + R = integer(0), + total = integer(0), + stringsAsFactors = FALSE + ) if (data_has_groups) { group_values <- unique(x[, which(colnames(x) %in% groups), drop = FALSE]) rownames(group_values) <- NULL x <- x[, which(!colnames(x) %in% groups), drop = FALSE] } - + for (i in seq_len(length(unique_mo))) { # filter on MO group and only select R/SI columns x_mo_filter <- x[which(x[, col_mo, drop = TRUE] == unique_mo[i]), names(which(vapply(FUN.VALUE = logical(1), x, is.rsi))), drop = FALSE] @@ -111,18 +118,21 @@ bug_drug_combinations <- function(x, data.frame(S = m["S", ], I = m["I", ], R = m["R", ], stringsAsFactors = FALSE) }) merged <- do.call(rbind, pivot) - out_group <- data.frame(mo = rep(unique_mo[i], NROW(merged)), - ab = rownames(merged), - S = merged$S, - I = merged$I, - R = merged$R, - total = merged$S + merged$I + merged$R, - stringsAsFactors = FALSE) + out_group <- data.frame( + mo = rep(unique_mo[i], NROW(merged)), + ab = rownames(merged), + S = merged$S, + I = merged$I, + R = merged$R, + total = merged$S + merged$I + merged$R, + stringsAsFactors = FALSE + ) if (data_has_groups) { if (nrow(group_values) < nrow(out_group)) { # repeat group_values for the number of rows in out_group repeated <- rep(seq_len(nrow(group_values)), - each = nrow(out_group) / nrow(group_values)) + each = nrow(out_group) / nrow(group_values) + ) group_values <- group_values[repeated, , drop = FALSE] } out_group <- cbind(group_values, out_group) @@ -141,7 +151,7 @@ bug_drug_combinations <- function(x, } res } - + if (data_has_groups) { out <- apply_group(x, "run_it", groups) } else { @@ -176,27 +186,31 @@ format.bug_drug_combinations <- function(x, meet_criteria(remove_intrinsic_resistant, allow_class = "logical", has_length = 1) meet_criteria(decimal.mark, allow_class = "character", has_length = 1) meet_criteria(big.mark, allow_class = "character", has_length = 1) - + x.bak <- x if (inherits(x, "grouped")) { # bug_drug_combinations() has been run on groups, so de-group here warning_("in `format()`: formatting the output of `bug_drug_combinations()` does not support grouped variables, they were ignored") x <- as.data.frame(x, stringsAsFactors = FALSE) idx <- split(seq_len(nrow(x)), paste0(x$mo, "%%", x$ab)) - x <- data.frame(mo = gsub("(.*)%%(.*)", "\\1", names(idx)), - ab = gsub("(.*)%%(.*)", "\\2", names(idx)), - S = sapply(idx, function(i) sum(x$S[i], na.rm = TRUE)), - I = sapply(idx, function(i) sum(x$I[i], na.rm = TRUE)), - R = sapply(idx, function(i) sum(x$R[i], na.rm = TRUE)), - total = sapply(idx, function(i) sum(x$S[i], na.rm = TRUE) + - sum(x$I[i], na.rm = TRUE) + - sum(x$R[i], na.rm = TRUE)), - stringsAsFactors = FALSE) + x <- data.frame( + mo = gsub("(.*)%%(.*)", "\\1", names(idx)), + ab = gsub("(.*)%%(.*)", "\\2", names(idx)), + S = sapply(idx, function(i) sum(x$S[i], na.rm = TRUE)), + I = sapply(idx, function(i) sum(x$I[i], na.rm = TRUE)), + R = sapply(idx, function(i) sum(x$R[i], na.rm = TRUE)), + total = sapply(idx, function(i) { + sum(x$S[i], na.rm = TRUE) + + sum(x$I[i], na.rm = TRUE) + + sum(x$R[i], na.rm = TRUE) + }), + stringsAsFactors = FALSE + ) } - + x <- as.data.frame(x, stringsAsFactors = FALSE) x <- subset(x, total >= minimum) - + if (remove_intrinsic_resistant == TRUE) { x <- subset(x, R != total) } @@ -205,7 +219,7 @@ format.bug_drug_combinations <- function(x, } else { x$isolates <- x$R + x$I } - + give_ab_name <- function(ab, format, language) { format <- tolower(format) ab_txt <- rep(format, length(ab)) @@ -221,15 +235,16 @@ format.bug_drug_combinations <- function(x, } ab_txt } - + remove_NAs <- function(.data) { cols <- colnames(.data) .data <- as.data.frame(lapply(.data, function(x) ifelse(is.na(x), "", x)), - stringsAsFactors = FALSE) + stringsAsFactors = FALSE + ) colnames(.data) <- cols .data } - + create_var <- function(.data, ...) { dots <- list(...) for (i in seq_len(length(dots))) { @@ -237,66 +252,74 @@ format.bug_drug_combinations <- function(x, } .data } - + y <- x %pm>% - create_var(ab = as.ab(x$ab), - ab_txt = give_ab_name(ab = x$ab, format = translate_ab, language = language)) %pm>% - pm_group_by(ab, ab_txt, mo) %pm>% - pm_summarise(isolates = sum(isolates, na.rm = TRUE), - total = sum(total, na.rm = TRUE)) %pm>% + create_var( + ab = as.ab(x$ab), + ab_txt = give_ab_name(ab = x$ab, format = translate_ab, language = language) + ) %pm>% + pm_group_by(ab, ab_txt, mo) %pm>% + pm_summarise( + isolates = sum(isolates, na.rm = TRUE), + total = sum(total, na.rm = TRUE) + ) %pm>% pm_ungroup() - - y <- y %pm>% - create_var(txt = paste0(percentage(y$isolates / y$total, decimal.mark = decimal.mark, big.mark = big.mark), - " (", trimws(format(y$isolates, big.mark = big.mark)), "/", - trimws(format(y$total, big.mark = big.mark)), ")")) %pm>% + + y <- y %pm>% + create_var(txt = paste0( + percentage(y$isolates / y$total, decimal.mark = decimal.mark, big.mark = big.mark), + " (", trimws(format(y$isolates, big.mark = big.mark)), "/", + trimws(format(y$total, big.mark = big.mark)), ")" + )) %pm>% pm_select(ab, ab_txt, mo, txt) %pm>% pm_arrange(mo) - + # replace tidyr::pivot_wider() from here for (i in unique(y$mo)) { mo_group <- y[which(y$mo == i), c("ab", "txt"), drop = FALSE] colnames(mo_group) <- c("ab", i) rownames(mo_group) <- NULL - y <- y %pm>% + y <- y %pm>% pm_left_join(mo_group, by = "ab") } - y <- y %pm>% - pm_distinct(ab, .keep_all = TRUE) %pm>% - pm_select(-mo, -txt) %pm>% + y <- y %pm>% + pm_distinct(ab, .keep_all = TRUE) %pm>% + pm_select(-mo, -txt) %pm>% # replace tidyr::pivot_wider() until here remove_NAs() select_ab_vars <- function(.data) { .data[, c("ab_group", "ab_txt", colnames(.data)[!colnames(.data) %in% c("ab_group", "ab_txt", "ab")]), drop = FALSE] } - - y <- y %pm>% - create_var(ab_group = ab_group(y$ab, language = language)) %pm>% - select_ab_vars() %pm>% + + y <- y %pm>% + create_var(ab_group = ab_group(y$ab, language = language)) %pm>% + select_ab_vars() %pm>% pm_arrange(ab_group, ab_txt) - y <- y %pm>% + y <- y %pm>% create_var(ab_group = ifelse(y$ab_group != pm_lag(y$ab_group) | is.na(pm_lag(y$ab_group)), y$ab_group, "")) - + if (add_ab_group == FALSE) { - y <- y %pm>% + y <- y %pm>% pm_select(-ab_group) %pm>% pm_rename("Drug" = ab_txt) colnames(y)[1] <- translate_into_language(colnames(y)[1], language, only_unknown = FALSE) } else { - y <- y %pm>% - pm_rename("Group" = ab_group, - "Drug" = ab_txt) + y <- y %pm>% + pm_rename( + "Group" = ab_group, + "Drug" = ab_txt + ) } - + if (!is.null(language)) { colnames(y) <- translate_into_language(colnames(y), language, only_unknown = FALSE) } - + if (remove_intrinsic_resistant == TRUE) { y <- y[, !vapply(FUN.VALUE = logical(1), y, function(col) all(col %like% "100", na.rm = TRUE) & !any(is.na(col))), drop = FALSE] } - + rownames(y) <- NULL as_original_data_class(y, class(x.bak)) } @@ -305,10 +328,14 @@ format.bug_drug_combinations <- function(x, #' @export print.bug_drug_combinations <- function(x, ...) { x_class <- class(x) - print(set_clean_class(x, - new_class = x_class[!x_class %in% c("bug_drug_combinations", "grouped")]), - ...) + print( + set_clean_class(x, + new_class = x_class[!x_class %in% c("bug_drug_combinations", "grouped")] + ), + ... + ) message_("Use 'format()' on this result to get a publishable/printable format.", - ifelse(inherits(x, "grouped"), " Note: The grouping variable(s) will be ignored.", ""), - as_note = FALSE) + ifelse(inherits(x, "grouped"), " Note: The grouping variable(s) will be ignored.", ""), + as_note = FALSE + ) } diff --git a/R/catalogue_of_life.R b/R/catalogue_of_life.R index c47da3a21..cf7f29955 100755 --- a/R/catalogue_of_life.R +++ b/R/catalogue_of_life.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -94,41 +94,52 @@ NULL #' @inheritSection catalogue_of_life Catalogue of Life #' @export catalogue_of_life_version <- function() { - check_dataset_integrity() - + # see the `CATALOGUE_OF_LIFE` list in R/globals.R - lst <- list(CoL = - list(version = gsub("{year}", CATALOGUE_OF_LIFE$year, CATALOGUE_OF_LIFE$version, fixed = TRUE), - url = gsub("{year}", CATALOGUE_OF_LIFE$year, CATALOGUE_OF_LIFE$url_CoL, fixed = TRUE), - n = nrow(pm_filter(microorganisms, source == "CoL"))), - LPSN = - list(version = "List of Prokaryotic names with Standing in Nomenclature", - url = CATALOGUE_OF_LIFE$url_LPSN, - yearmonth = CATALOGUE_OF_LIFE$yearmonth_LPSN, - n = nrow(pm_filter(microorganisms, source == "LPSN"))), - total_included = - list( - n_total_species = nrow(microorganisms), - n_total_synonyms = nrow(microorganisms.old))) - + lst <- list( + CoL = + list( + version = gsub("{year}", CATALOGUE_OF_LIFE$year, CATALOGUE_OF_LIFE$version, fixed = TRUE), + url = gsub("{year}", CATALOGUE_OF_LIFE$year, CATALOGUE_OF_LIFE$url_CoL, fixed = TRUE), + n = nrow(pm_filter(microorganisms, source == "CoL")) + ), + LPSN = + list( + version = "List of Prokaryotic names with Standing in Nomenclature", + url = CATALOGUE_OF_LIFE$url_LPSN, + yearmonth = CATALOGUE_OF_LIFE$yearmonth_LPSN, + n = nrow(pm_filter(microorganisms, source == "LPSN")) + ), + total_included = + list( + n_total_species = nrow(microorganisms), + n_total_synonyms = nrow(microorganisms.old) + ) + ) + set_clean_class(lst, - new_class = c("catalogue_of_life_version", "list")) + new_class = c("catalogue_of_life_version", "list") + ) } #' @method print catalogue_of_life_version #' @export #' @noRd print.catalogue_of_life_version <- function(x, ...) { - cat(paste0(font_bold("Included in this AMR package (v", utils::packageDescription("AMR")$Version, ") are:\n\n", collapse = ""), - font_underline(x$CoL$version), "\n", - " Available at: ", font_blue(x$CoL$url), "\n", - " Number of included microbial species: ", format(x$CoL$n, big.mark = ","), "\n", - font_underline(paste0(x$LPSN$version, " (", - x$LPSN$yearmonth, ")")), "\n", - " Available at: ", font_blue(x$LPSN$url), "\n", - " Number of included bacterial species: ", format(x$LPSN$n, big.mark = ","), "\n\n", - "=> Total number of species included: ", format(x$total_included$n_total_species, big.mark = ","), "\n", - "=> Total number of synonyms included: ", format(x$total_included$n_total_synonyms, big.mark = ","), "\n\n", - "See for more info ", font_grey_bg("`?microorganisms`"), " and ", font_grey_bg("`?catalogue_of_life`"), ".\n")) + cat(paste0( + font_bold("Included in this AMR package (v", utils::packageDescription("AMR")$Version, ") are:\n\n", collapse = ""), + font_underline(x$CoL$version), "\n", + " Available at: ", font_blue(x$CoL$url), "\n", + " Number of included microbial species: ", format(x$CoL$n, big.mark = ","), "\n", + font_underline(paste0( + x$LPSN$version, " (", + x$LPSN$yearmonth, ")" + )), "\n", + " Available at: ", font_blue(x$LPSN$url), "\n", + " Number of included bacterial species: ", format(x$LPSN$n, big.mark = ","), "\n\n", + "=> Total number of species included: ", format(x$total_included$n_total_species, big.mark = ","), "\n", + "=> Total number of synonyms included: ", format(x$total_included$n_total_synonyms, big.mark = ","), "\n\n", + "See for more info ", font_grey_bg("`?microorganisms`"), " and ", font_grey_bg("`?catalogue_of_life`"), ".\n" + )) } diff --git a/R/count.R b/R/count.R index 7fb3865fb..715a18295 100755 --- a/R/count.R +++ b/R/count.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -32,7 +32,7 @@ #' @inheritParams proportion #' @inheritSection as.rsi Interpretation of R and S/I #' @details These functions are meant to count isolates. Use the [resistance()]/[susceptibility()] functions to calculate microbial resistance/susceptibility. -#' +#' #' The function [count_resistant()] is equal to the function [count_R()]. The function [count_susceptible()] is equal to the function [count_SI()]. #' #' The function [n_rsi()] is an alias of [count_all()]. They can be used to count all available isolates, i.e. where all input antibiotics have an available result (S, I or R). Their use is equal to `n_distinct()`. Their function is equal to `count_susceptible(...) + count_resistant(...)`. @@ -47,11 +47,11 @@ #' @examples #' # example_isolates is a data set available in the AMR package. #' # run ?example_isolates for more info. -#' +#' #' # base R ------------------------------------------------------------ -#' count_resistant(example_isolates$AMX) # counts "R" +#' count_resistant(example_isolates$AMX) # counts "R" #' count_susceptible(example_isolates$AMX) # counts "S" and "I" -#' count_all(example_isolates$AMX) # counts "S", "I" and "R" +#' count_all(example_isolates$AMX) # counts "S", "I" and "R" #' #' # be more specific #' count_S(example_isolates$AMX) @@ -76,36 +76,38 @@ #' if (require("dplyr")) { #' example_isolates %>% #' group_by(ward) %>% -#' summarise(R = count_R(CIP), -#' I = count_I(CIP), -#' S = count_S(CIP), -#' n1 = count_all(CIP), # the actual total; sum of all three -#' n2 = n_rsi(CIP), # same - analogous to n_distinct -#' total = n()) # NOT the number of tested isolates! -#' +#' summarise( +#' R = count_R(CIP), +#' I = count_I(CIP), +#' S = count_S(CIP), +#' n1 = count_all(CIP), # the actual total; sum of all three +#' n2 = n_rsi(CIP), # same - analogous to n_distinct +#' total = n() +#' ) # NOT the number of tested isolates! +#' #' # Number of available isolates for a whole antibiotic class #' # (i.e., in this data set columns GEN, TOB, AMK, KAN) #' example_isolates %>% #' group_by(ward) %>% #' summarise(across(aminoglycosides(), n_rsi)) -#' +#' #' # Count co-resistance between amoxicillin/clav acid and gentamicin, #' # so we can see that combination therapy does a lot more than mono therapy. #' # Please mind that `susceptibility()` calculates percentages right away instead. #' example_isolates %>% count_susceptible(AMC) # 1433 -#' example_isolates %>% count_all(AMC) # 1879 -#' +#' example_isolates %>% count_all(AMC) # 1879 +#' #' example_isolates %>% count_susceptible(GEN) # 1399 -#' example_isolates %>% count_all(GEN) # 1855 -#' +#' example_isolates %>% count_all(GEN) # 1855 +#' #' example_isolates %>% count_susceptible(AMC, GEN) # 1764 -#' example_isolates %>% count_all(AMC, GEN) # 1936 -#' +#' example_isolates %>% count_all(AMC, GEN) # 1936 +#' #' # Get number of S+I vs. R immediately of selected columns #' example_isolates %>% #' select(AMX, CIP) %>% #' count_df(translate = FALSE) -#' +#' #' # It also supports grouping variables #' example_isolates %>% #' select(ward, AMX, CIP) %>% @@ -116,10 +118,12 @@ count_resistant <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = "R", - only_all_tested = only_all_tested, - only_count = TRUE), - error = function(e) stop_(e$message, call = -5)) + ab_result = "R", + only_all_tested = only_all_tested, + only_count = TRUE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname count @@ -127,10 +131,12 @@ count_resistant <- function(..., only_all_tested = FALSE) { count_susceptible <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = c("S", "I"), - only_all_tested = only_all_tested, - only_count = TRUE), - error = function(e) stop_(e$message, call = -5)) + ab_result = c("S", "I"), + only_all_tested = only_all_tested, + only_count = TRUE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname count @@ -138,10 +144,12 @@ count_susceptible <- function(..., only_all_tested = FALSE) { count_R <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = "R", - only_all_tested = only_all_tested, - only_count = TRUE), - error = function(e) stop_(e$message, call = -5)) + ab_result = "R", + only_all_tested = only_all_tested, + only_count = TRUE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname count @@ -152,10 +160,12 @@ count_IR <- function(..., only_all_tested = FALSE) { } tryCatch( rsi_calc(..., - ab_result = c("I", "R"), - only_all_tested = only_all_tested, - only_count = TRUE), - error = function(e) stop_(e$message, call = -5)) + ab_result = c("I", "R"), + only_all_tested = only_all_tested, + only_count = TRUE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname count @@ -163,10 +173,12 @@ count_IR <- function(..., only_all_tested = FALSE) { count_I <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = "I", - only_all_tested = only_all_tested, - only_count = TRUE), - error = function(e) stop_(e$message, call = -5)) + ab_result = "I", + only_all_tested = only_all_tested, + only_count = TRUE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname count @@ -174,10 +186,12 @@ count_I <- function(..., only_all_tested = FALSE) { count_SI <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = c("S", "I"), - only_all_tested = only_all_tested, - only_count = TRUE), - error = function(e) stop_(e$message, call = -5)) + ab_result = c("S", "I"), + only_all_tested = only_all_tested, + only_count = TRUE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname count @@ -188,10 +202,12 @@ count_S <- function(..., only_all_tested = FALSE) { } tryCatch( rsi_calc(..., - ab_result = "S", - only_all_tested = only_all_tested, - only_count = TRUE), - error = function(e) stop_(e$message, call = -5)) + ab_result = "S", + only_all_tested = only_all_tested, + only_count = TRUE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname count @@ -199,10 +215,12 @@ count_S <- function(..., only_all_tested = FALSE) { count_all <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = c("S", "I", "R"), - only_all_tested = only_all_tested, - only_count = TRUE), - error = function(e) stop_(e$message, call = -5)) + ab_result = c("S", "I", "R"), + only_all_tested = only_all_tested, + only_count = TRUE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname count @@ -217,12 +235,15 @@ count_df <- function(data, combine_SI = TRUE, combine_IR = FALSE) { tryCatch( - rsi_calc_df(type = "count", - data = data, - translate_ab = translate_ab, - language = language, - combine_SI = combine_SI, - combine_IR = combine_IR, - combine_SI_missing = missing(combine_SI)), - error = function(e) stop_(e$message, call = -5)) + rsi_calc_df( + type = "count", + data = data, + translate_ab = translate_ab, + language = language, + combine_SI = combine_SI, + combine_IR = combine_IR, + combine_SI_missing = missing(combine_SI) + ), + error = function(e) stop_(e$message, call = -5) + ) } diff --git a/R/custom_eucast_rules.R b/R/custom_eucast_rules.R index b237094ec..3449be847 100644 --- a/R/custom_eucast_rules.R +++ b/R/custom_eucast_rules.R @@ -24,87 +24,96 @@ # ==================================================================== # #' Define Custom EUCAST Rules -#' +#' #' Define custom EUCAST rules for your organisation or specific analysis and use the output of this function in [eucast_rules()]. #' @param ... rules in [formula][base::tilde] notation, see *Examples* #' @details #' Some organisations have their own adoption of EUCAST rules. This function can be used to define custom EUCAST rules to be used in the [eucast_rules()] function. #' @section How it works: -#' +#' #' ### Basics -#' +#' #' If you are familiar with the [`case_when()`][dplyr::case_when()] function of the `dplyr` package, you will recognise the input method to set your own rules. Rules must be set using what \R considers to be the 'formula notation'. The rule itself is written *before* the tilde (`~`) and the consequence of the rule is written *after* the tilde: -#' +#' #' ```{r} #' x <- custom_eucast_rules(TZP == "S" ~ aminopenicillins == "S", #' TZP == "R" ~ aminopenicillins == "R") #' ``` -#' +#' #' These are two custom EUCAST rules: if TZP (piperacillin/tazobactam) is "S", all aminopenicillins (ampicillin and amoxicillin) must be made "S", and if TZP is "R", aminopenicillins must be made "R". These rules can also be printed to the console, so it is immediately clear how they work: -#' +#' #' ```{r} #' x #' ``` -#' +#' #' The rules (the part *before* the tilde, in above example `TZP == "S"` and `TZP == "R"`) must be evaluable in your data set: it should be able to run as a filter in your data set without errors. This means for the above example that the column `TZP` must exist. We will create a sample data set and test the rules set: -#' +#' #' ```{r} #' df <- data.frame(mo = c("Escherichia coli", "Klebsiella pneumoniae"), #' TZP = as.rsi("R"), #' ampi = as.rsi("S"), #' cipro = as.rsi("S")) #' df -#' +#' #' eucast_rules(df, rules = "custom", custom_rules = x, info = FALSE) #' ``` -#' +#' #' ### Using taxonomic properties in rules -#' +#' #' There is one exception in variables used for the rules: all column names of the [microorganisms] data set can also be used, but do not have to exist in the data set. These column names are: `r vector_and(colnames(microorganisms), sort = FALSE)`. Thus, this next example will work as well, despite the fact that the `df` data set does not contain a column `genus`: -#' +#' #' ```{r} #' y <- custom_eucast_rules(TZP == "S" & genus == "Klebsiella" ~ aminopenicillins == "S", #' TZP == "R" & genus == "Klebsiella" ~ aminopenicillins == "R") #' #' eucast_rules(df, rules = "custom", custom_rules = y, info = FALSE) #' ``` -#' +#' #' ### Usage of antibiotic group names -#' +#' #' It is possible to define antibiotic groups instead of single antibiotics for the rule consequence, the part *after* the tilde. In above examples, the antibiotic group `aminopenicillins` is used to include ampicillin and amoxicillin. The following groups are allowed (case-insensitive). Within parentheses are the agents that will be matched when running the rule. -#' +#' #' `r paste0(" * ", sapply(DEFINED_AB_GROUPS, function(x) paste0("\"", tolower(gsub("^AB_", "", x)), "\"\\cr(", vector_and(ab_name(eval(parse(text = x), envir = asNamespace("AMR")), language = NULL, tolower = TRUE), quotes = FALSE), ")"), USE.NAMES = FALSE), "\n", collapse = "")` #' @returns A [list] containing the custom rules #' @export #' @examples -#' x <- custom_eucast_rules(AMC == "R" & genus == "Klebsiella" ~ aminopenicillins == "R", -#' AMC == "I" & genus == "Klebsiella" ~ aminopenicillins == "I") +#' x <- custom_eucast_rules( +#' AMC == "R" & genus == "Klebsiella" ~ aminopenicillins == "R", +#' AMC == "I" & genus == "Klebsiella" ~ aminopenicillins == "I" +#' ) #' x -#' +#' #' # run the custom rule set (verbose = TRUE will return a logbook instead of the data set): #' eucast_rules(example_isolates, -#' rules = "custom", -#' custom_rules = x, -#' info = FALSE, -#' verbose = TRUE) -#' +#' rules = "custom", +#' custom_rules = x, +#' info = FALSE, +#' verbose = TRUE +#' ) +#' #' # combine rule sets -#' x2 <- c(x, -#' custom_eucast_rules(TZP == "R" ~ carbapenems == "R")) +#' x2 <- c( +#' x, +#' custom_eucast_rules(TZP == "R" ~ carbapenems == "R") +#' ) #' x2 custom_eucast_rules <- function(...) { - dots <- tryCatch(list(...), - error = function(e) "error") - stop_if(identical(dots, "error"), - "rules must be a valid formula inputs (e.g., using '~'), see `?custom_eucast_rules`") + error = function(e) "error" + ) + stop_if( + identical(dots, "error"), + "rules must be a valid formula inputs (e.g., using '~'), see `?custom_eucast_rules`" + ) n_dots <- length(dots) stop_if(n_dots == 0, "no custom rules were set. Please read the documentation using `?custom_eucast_rules`.") out <- vector("list", n_dots) for (i in seq_len(n_dots)) { - stop_ifnot(inherits(dots[[i]], "formula"), - "rule ", i, " must be a valid formula input (e.g., using '~'), see `?custom_eucast_rules`") - + stop_ifnot( + inherits(dots[[i]], "formula"), + "rule ", i, " must be a valid formula input (e.g., using '~'), see `?custom_eucast_rules`" + ) + # Query qry <- dots[[i]][[2]] if (inherits(qry, "call")) { @@ -119,11 +128,13 @@ custom_eucast_rules <- function(...) { qry <- gsub(" ?, ?", ", ", qry) qry <- gsub("'", "\"", qry, fixed = TRUE) out[[i]]$query <- as.expression(qry) - + # Resulting rule result <- dots[[i]][[3]] - stop_ifnot(deparse(result) %like% "==", - "the result of rule ", i, " (the part after the `~`) must contain `==`, such as in `... ~ ampicillin == \"R\"`, see `?custom_eucast_rules`") + stop_ifnot( + deparse(result) %like% "==", + "the result of rule ", i, " (the part after the `~`) must contain `==`, such as in `... ~ ampicillin == \"R\"`, see `?custom_eucast_rules`" + ) result_group <- as.character(result)[[2]] if (paste0("AB_", toupper(result_group), "S") %in% DEFINED_AB_GROUPS) { # support for e.g. 'aminopenicillin' if user meant 'aminopenicillins' @@ -134,25 +145,31 @@ custom_eucast_rules <- function(...) { } else { result_group <- tryCatch( suppressWarnings(as.ab(result_group, - fast_mode = TRUE, - flag_multiple_results = FALSE)), - error = function(e) NA_character_) + fast_mode = TRUE, + flag_multiple_results = FALSE + )), + error = function(e) NA_character_ + ) } - - stop_if(any(is.na(result_group)), - "this result of rule ", i, " could not be translated to a single antimicrobial agent/group: \"", - as.character(result)[[2]], "\".\n\nThe input can be a name or code of an antimicrobial agent, or be one of: ", - vector_or(tolower(gsub("AB_", "", DEFINED_AB_GROUPS)), quotes = FALSE), ".") + + stop_if( + any(is.na(result_group)), + "this result of rule ", i, " could not be translated to a single antimicrobial agent/group: \"", + as.character(result)[[2]], "\".\n\nThe input can be a name or code of an antimicrobial agent, or be one of: ", + vector_or(tolower(gsub("AB_", "", DEFINED_AB_GROUPS)), quotes = FALSE), "." + ) result_value <- as.character(result)[[3]] result_value[result_value == "NA"] <- NA - stop_ifnot(result_value %in% c("R", "S", "I", NA), - "the resulting value of rule ", i, " must be either \"R\", \"S\", \"I\" or NA") + stop_ifnot( + result_value %in% c("R", "S", "I", NA), + "the resulting value of rule ", i, " must be either \"R\", \"S\", \"I\" or NA" + ) result_value <- as.rsi(result_value) - + out[[i]]$result_group <- result_group out[[i]]$result_value <- result_value } - + names(out) <- paste0("rule", seq_len(n_dots)) set_clean_class(out, new_class = c("custom_eucast_rules", "list")) } @@ -196,13 +213,19 @@ print.custom_eucast_rules <- function(x, ...) { } else { val <- font_rsi_I_bg(font_black(" I ")) } - agents <- paste0(font_blue(ab_name(rule$result_group, language = NULL, tolower = TRUE), - collapse = NULL), - " (", rule$result_group, ")") + agents <- paste0( + font_blue(ab_name(rule$result_group, language = NULL, tolower = TRUE), + collapse = NULL + ), + " (", rule$result_group, ")" + ) agents <- sort(agents) - rule_if <- word_wrap(paste0(i, ". ", font_bold("If "), font_blue(rule$query), font_bold(" then "), - "set to {result}:"), - extra_indent = 5) + rule_if <- word_wrap(paste0( + i, ". ", font_bold("If "), font_blue(rule$query), font_bold(" then "), + "set to {result}:" + ), + extra_indent = 5 + ) rule_if <- gsub("{result}", val, rule_if, fixed = TRUE) rule_then <- paste0(" ", word_wrap(paste0(agents, collapse = ", "), extra_indent = 5)) cat("\n ", rule_if, "\n", rule_then, "\n", sep = "") diff --git a/R/data.R b/R/data.R index ae6dd4bc1..eb3543bfb 100755 --- a/R/data.R +++ b/R/data.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -42,7 +42,7 @@ #' - `iv_ddd`\cr Defined Daily Dose (DDD), parenteral (intravenous) treatment, currently available for `r sum(!is.na(antibiotics$iv_ddd))` drugs #' - `iv_units`\cr Units of `iv_ddd` #' - `loinc`\cr All LOINC codes (Logical Observation Identifiers Names and Codes) associated with the name of the antimicrobial agent. Use [ab_loinc()] to retrieve them quickly, see [ab_property()]. -#' +#' #' ## For the [antivirals] data set: a [tibble[tibble::tibble] with `r nrow(antivirals)` observations and `r ncol(antivirals)` variables: #' - `atc`\cr ATC codes (Anatomical Therapeutic Chemical) as defined by the WHOCC #' - `cid`\cr Compound ID as found in PubChem @@ -56,7 +56,7 @@ #' @details Properties that are based on an ATC code are only available when an ATC is available. These properties are: `atc_group1`, `atc_group2`, `oral_ddd`, `oral_units`, `iv_ddd` and `iv_units`. #' #' Synonyms (i.e. trade names) were derived from the Compound ID (`cid`) and consequently only available where a CID is available. -#' +#' #' ## Direct download #' Like all data sets in this package, these data sets are publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). #' @source World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology (WHOCC): @@ -86,14 +86,14 @@ #' - `source`\cr Either `r vector_or(microorganisms$source)` (see *Source*) #' - `prevalence`\cr Prevalence of the microorganism, see [as.mo()] #' - `snomed`\cr Systematized Nomenclature of Medicine (SNOMED) code of the microorganism, according to the `r SNOMED_VERSION$current_source` (see *Source*). Use [mo_snomed()] to retrieve it quickly, see [mo_property()]. -#' @details +#' @details #' Please note that entries are only based on the Catalogue of Life and the LPSN (see below). Since these sources incorporate entries based on (recent) publications in the International Journal of Systematic and Evolutionary Microbiology (IJSEM), it can happen that the year of publication is sometimes later than one might expect. -#' +#' #' For example, *Staphylococcus pettenkoferi* was described for the first time in Diagnostic Microbiology and Infectious Disease in 2002 (\doi{10.1016/s0732-8893(02)00399-1}), but it was not before 2007 that a publication in IJSEM followed (\doi{10.1099/ijs.0.64381-0}). Consequently, the `AMR` package returns 2007 for `mo_year("S. pettenkoferi")`. -#' +#' #' ## Manual additions #' For convenience, some entries were added manually: -#' +#' #' - 11 entries of *Streptococcus* (beta-haemolytic: groups A, B, C, D, F, G, H, K and unspecified; other: viridans, milleri) #' - 2 entries of *Staphylococcus* (coagulase-negative (CoNS) and coagulase-positive (CoPS)) #' - 3 entries of *Trichomonas* (*T. vaginalis*, and its family and genus) @@ -103,27 +103,27 @@ #' - 1 entry of *Moraxella* (*M. catarrhalis*), which was formally named *Branhamella catarrhalis* (Catlin, 1970) though this change was never accepted within the field of clinical microbiology #' - 5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus) #' - 6 families under the Enterobacterales order, according to Adeolu *et al.* (2016, PMID 27620848), that are not (yet) in the Catalogue of Life -#' +#' #' ## Direct download #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). #' @section About the Records from LPSN (see *Source*): #' The List of Prokaryotic names with Standing in Nomenclature (LPSN) provides comprehensive information on the nomenclature of prokaryotes. LPSN is a free to use service founded by Jean P. Euzeby in 1997 and later on maintained by Aidan C. Parte. -#' +#' #' As of February 2020, the regularly augmented LPSN database at DSMZ is the basis of the new LPSN service. The new database was implemented for the Type-Strain Genome Server and augmented in 2018 to store all kinds of nomenclatural information. Data from the previous version of LPSN and from the Prokaryotic Nomenclature Up-to-date (PNU) service were imported into the new system. PNU had been established in 1993 as a service of the Leibniz Institute DSMZ, and was curated by Norbert Weiss, Manfred Kracht and Dorothea Gleim. -#' @source +#' @source #' `r gsub("{year}", CATALOGUE_OF_LIFE$year, CATALOGUE_OF_LIFE$version, fixed = TRUE)` as currently implemented in this `AMR` package: -#' +#' #' * Annual Checklist (public online taxonomic database), -#' +#' #' List of Prokaryotic names with Standing in Nomenclature (`r CATALOGUE_OF_LIFE$yearmonth_LPSN`) as currently implemented in this `AMR` package: -#' +#' #' * Parte, A.C., Sarda Carbasse, J., Meier-Kolthoff, J.P., Reimer, L.C. and Goker, M. (2020). List of Prokaryotic names with Standing in Nomenclature (LPSN) moves to the DSMZ. International Journal of Systematic and Evolutionary Microbiology, 70, 5607-5612; \doi{10.1099/ijsem.0.004332} #' * Parte, A.C. (2018). LPSN - List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; \doi{10.1099/ijsem.0.002786} #' * Parte, A.C. (2014). LPSN - List of Prokaryotic names with Standing in Nomenclature. Nucleic Acids Research, 42, Issue D1, D613-D616; \doi{10.1093/nar/gkt1111} #' * Euzeby, J.P. (1997). List of Bacterial Names with Standing in Nomenclature: a Folder Available on the Internet. International Journal of Systematic Bacteriology, 47, 590-592; \doi{10.1099/00207713-47-2-590} -#' +#' #' `r SNOMED_VERSION$current_source` as currently implemented in this `AMR` package: -#' +#' #' * Retrieved from the `r SNOMED_VERSION$title`, OID `r SNOMED_VERSION$current_oid`, version `r SNOMED_VERSION$current_version`; url: <`r SNOMED_VERSION$url`> #' @seealso [as.mo()], [mo_property()], [microorganisms.codes], [intrinsic_resistant] #' @examples @@ -139,10 +139,10 @@ #' - `fullname_new`\cr New full taxonomic name of the microorganism #' - `ref`\cr Author(s) and year of concerning scientific publication #' - `prevalence`\cr Prevalence of the microorganism, see [as.mo()] -#' @details +#' @details #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). #' @source Catalogue of Life: Annual Checklist (public online taxonomic database), (check included annual version with [catalogue_of_life_version()]). -#' +#' #' Parte, A.C. (2018). LPSN - List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; \doi{10.1099/ijsem.0.002786} #' @seealso [as.mo()] [mo_property()] [microorganisms] #' @examples @@ -155,7 +155,7 @@ #' @format A [tibble[tibble::tibble] with `r format(nrow(microorganisms.codes), big.mark = ",")` observations and `r ncol(microorganisms.codes)` variables: #' - `code`\cr Commonly used code of a microorganism #' - `mo`\cr ID of the microorganism in the [microorganisms] data set -#' @details +#' @details #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). #' @inheritSection catalogue_of_life Catalogue of Life #' @seealso [as.mo()] [microorganisms] @@ -174,7 +174,7 @@ #' - `ward`\cr Ward type where the patient was admitted, either `r vector_or(example_isolates$ward)` #' - `mo`\cr ID of microorganism created with [as.mo()], see also the [microorganisms] data set #' - `PEN:RIF`\cr `r sum(vapply(FUN.VALUE = logical(1), example_isolates, is.rsi))` different antibiotics with class [`rsi`] (see [as.rsi()]); these column names occur in the [antibiotics] data set and can be translated with [set_ab_names()] or [ab_name()] -#' @details +#' @details #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). #' @examples #' example_isolates @@ -189,7 +189,7 @@ #' - `hospital`\cr ID of the hospital, from A to C #' - `bacteria`\cr info about microorganism that can be transformed with [as.mo()], see also [microorganisms] #' - `AMX:GEN`\cr 4 different antibiotics that have to be transformed with [as.rsi()] -#' @details +#' @details #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). #' @examples #' example_isolates_unclean @@ -225,7 +225,7 @@ #' - `Comment`\cr Other comments #' - `Date of data entry`\cr [Date] this data was entered in WHONET #' - `AMP_ND10:CIP_EE`\cr `r sum(vapply(FUN.VALUE = logical(1), WHONET, is.rsi))` different antibiotics. You can lookup the abbreviations in the [antibiotics] data set, or use e.g. [`ab_name("AMP")`][ab_name()] to get the official name immediately. Before analysis, you should transform this to a valid antibiotic class, using [as.rsi()]. -#' @details +#' @details #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). #' @examples #' WHONET @@ -246,9 +246,9 @@ #' - `breakpoint_S`\cr Lowest MIC value or highest number of millimetres that leads to "S" #' - `breakpoint_R`\cr Highest MIC value or lowest number of millimetres that leads to "R" #' - `uti`\cr A [logical] value (`TRUE`/`FALSE`) to indicate whether the rule applies to a urinary tract infection (UTI) -#' @details +#' @details #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). -#' +#' #' They **allow for machine reading EUCAST and CLSI guidelines**, which is almost impossible with the MS Excel and PDF files distributed by EUCAST and CLSI. #' @seealso [intrinsic_resistant] #' @examples @@ -261,12 +261,12 @@ #' @format A [tibble[tibble::tibble] with `r format(nrow(intrinsic_resistant), big.mark = ",")` observations and `r ncol(intrinsic_resistant)` variables: #' - `mo`\cr Microorganism ID #' - `ab`\cr Antibiotic ID -#' @details +#' @details #' This data set is based on `r format_eucast_version_nr(3.3)`. -#' +#' #' ## Direct download #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). -#' +#' #' They **allow for machine reading EUCAST and CLSI guidelines**, which is almost impossible with the MS Excel and PDF files distributed by EUCAST and CLSI. #' @examples #' intrinsic_resistant @@ -285,9 +285,9 @@ #' - `notes`\cr Additional dosage notes #' - `original_txt`\cr Original text in the PDF file of EUCAST #' - `eucast_version`\cr Version number of the EUCAST Clinical Breakpoints guideline to which these dosages apply -#' @details +#' @details #' This data set is based on `r format_eucast_version_nr(11.0)`. -#' +#' #' ## Direct download #' Like all data sets in this package, this data set is publicly available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. Please visit [our website for the download links](https://msberends.github.io/AMR/articles/datasets.html). The actual files are of course available on [our GitHub repository](https://github.com/msberends/AMR/tree/main/data-raw). #' @examples diff --git a/R/deprecated.R b/R/deprecated.R index fbb6dbacb..ef4f2b0ab 100755 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # diff --git a/R/disk.R b/R/disk.R index cdb359fc7..fa08d60a3 100644 --- a/R/disk.R +++ b/R/disk.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -36,33 +36,37 @@ #' @seealso [as.rsi()] #' @examples #' # transform existing disk zones to the `disk` class (using base R) -#' df <- data.frame(microorganism = "Escherichia coli", -#' AMP = 20, -#' CIP = 14, -#' GEN = 18, -#' TOB = 16) +#' df <- data.frame( +#' microorganism = "Escherichia coli", +#' AMP = 20, +#' CIP = 14, +#' GEN = 18, +#' TOB = 16 +#' ) #' df[, 2:5] <- lapply(df[, 2:5], as.disk) #' str(df) -#' +#' #' \donttest{ #' # transforming is easier with dplyr: #' if (require("dplyr")) { #' df %>% mutate(across(AMP:TOB, as.disk)) #' } #' } -#' +#' #' # interpret disk values, see ?as.rsi -#' as.rsi(x = as.disk(18), -#' mo = "Strep pneu", # `mo` will be coerced with as.mo() -#' ab = "ampicillin", # and `ab` with as.ab() -#' guideline = "EUCAST") +#' as.rsi( +#' x = as.disk(18), +#' mo = "Strep pneu", # `mo` will be coerced with as.mo() +#' ab = "ampicillin", # and `ab` with as.ab() +#' guideline = "EUCAST" +#' ) #' #' # interpret whole data set, pretend to be all from urinary tract infections: #' as.rsi(df, uti = TRUE) as.disk <- function(x, na.rm = FALSE) { meet_criteria(x, allow_class = c("disk", "character", "numeric", "integer"), allow_NA = TRUE) meet_criteria(na.rm, allow_class = "logical", has_length = 1) - + if (!is.disk(x)) { x <- unlist(x) if (na.rm == TRUE) { @@ -70,9 +74,9 @@ as.disk <- function(x, na.rm = FALSE) { } x[trimws(x) == ""] <- NA x.bak <- x - + na_before <- length(x[is.na(x)]) - + # heavily based on cleaner::clean_double(): clean_double2 <- function(x, remove = "[^0-9.,-]", fixed = FALSE) { x <- gsub(",", ".", x) @@ -80,38 +84,44 @@ as.disk <- function(x, na.rm = FALSE) { x <- gsub("[,.]$", "", x) # only keep last dot/comma reverse <- function(x) vapply(FUN.VALUE = character(1), lapply(strsplit(x, NULL), rev), paste, collapse = "") - x <- sub("{{dot}}", ".", - gsub(".", "", - reverse(sub(".", "}}tod{{", - reverse(x), - fixed = TRUE)), - fixed = TRUE), - fixed = TRUE) + x <- sub("{{dot}}", ".", + gsub(".", "", + reverse(sub(".", "}}tod{{", + reverse(x), + fixed = TRUE + )), + fixed = TRUE + ), + fixed = TRUE + ) x_clean <- gsub(remove, "", x, ignore.case = TRUE, fixed = fixed) # remove everything that is not a number or dot as.double(gsub("[^0-9.]+", "", x_clean)) } - + # round up and make it an integer x <- as.integer(ceiling(clean_double2(x))) - + # disks can never be less than 6 mm (size of smallest disk) or more than 50 mm x[x < 6 | x > 50] <- NA_integer_ na_after <- length(x[is.na(x)]) - + if (na_before != na_after) { list_missing <- x.bak[is.na(x) & !is.na(x.bak)] %pm>% unique() %pm>% sort() %pm>% vector_and(quotes = TRUE) - warning_("in `as.disk()`: ", na_after - na_before, " results truncated (", - round(((na_after - na_before) / length(x)) * 100), - "%) that were invalid disk zones: ", - list_missing) + warning_( + "in `as.disk()`: ", na_after - na_before, " results truncated (", + round(((na_after - na_before) / length(x)) * 100), + "%) that were invalid disk zones: ", + list_missing + ) } } set_clean_class(as.integer(x), - new_class = c("disk", "integer")) + new_class = c("disk", "integer") + ) } all_valid_disks <- function(x) { @@ -119,7 +129,8 @@ all_valid_disks <- function(x) { return(FALSE) } x_disk <- tryCatch(suppressWarnings(as.disk(x[!is.na(x)])), - error = function(e) NA) + error = function(e) NA + ) !any(is.na(x_disk)) && !all(is.na(x)) } @@ -127,7 +138,8 @@ all_valid_disks <- function(x) { #' @details `NA_disk_` is a missing value of the new `` class. #' @export NA_disk_ <- set_clean_class(as.integer(NA_real_), - new_class = c("disk", "integer")) + new_class = c("disk", "integer") +) #' @rdname as.disk #' @export @@ -218,10 +230,10 @@ rep.disk <- function(x, ...) { get_skimmers.disk <- function(column) { skimr::sfl( skim_type = "disk", - min = ~min(as.double(.), na.rm = TRUE), - max = ~max(as.double(.), na.rm = TRUE), - median = ~stats::median(as.double(.), na.rm = TRUE), - n_unique = ~length(unique(stats::na.omit(.))), - hist = ~skimr::inline_hist(stats::na.omit(as.double(.))) + min = ~ min(as.double(.), na.rm = TRUE), + max = ~ max(as.double(.), na.rm = TRUE), + median = ~ stats::median(as.double(.), na.rm = TRUE), + n_unique = ~ length(unique(stats::na.omit(.))), + hist = ~ skimr::inline_hist(stats::na.omit(as.double(.))) ) } diff --git a/R/episode.R b/R/episode.R index 4a49438c3..9dddeb162 100644 --- a/R/episode.R +++ b/R/episode.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,18 +24,18 @@ # ==================================================================== # #' Determine (New) Episodes for Patients -#' +#' #' These functions determine which items in a vector can be considered (the start of) a new episode, based on the argument `episode_days`. This can be used to determine clinical episodes for any epidemiological analysis. The [get_episode()] function returns the index number of the episode per group, while the [is_new_episode()] function returns values `TRUE`/`FALSE` to indicate whether an item in a vector is the start of a new episode. #' @param x vector of dates (class `Date` or `POSIXt`), will be sorted internally to determine episodes #' @param episode_days required episode length in days, can also be less than a day or `Inf`, see *Details* #' @param ... ignored, only in place to allow future extensions -#' @details +#' @details #' Dates are first sorted from old to new. The oldest date will mark the start of the first episode. After this date, the next date will be marked that is at least `episode_days` days later than the start of the first episode. From that second marked date on, the next date will be marked that is at least `episode_days` days later than the start of the second episode which will be the start of the third episode, and so on. Before the vector is being returned, the original order will be restored. -#' +#' #' The [first_isolate()] function is a wrapper around the [is_new_episode()] function, but is more efficient for data sets containing microorganism codes or names and allows for different isolate selection methods. -#' +#' #' The `dplyr` package is not required for these functions to work, but these functions do support [variable grouping][dplyr::group_by()] and work conveniently inside `dplyr` verbs such as [`filter()`][dplyr::filter()], [`mutate()`][dplyr::mutate()] and [`summarise()`][dplyr::summarise()]. -#' @return +#' @return #' * [get_episode()]: a [double] vector #' * [is_new_episode()]: a [logical] vector #' @seealso [first_isolate()] @@ -45,69 +45,73 @@ #' # `example_isolates` is a data set available in the AMR package. #' # See ?example_isolates #' df <- example_isolates[sample(seq_len(2000), size = 200), ] -#' -#' get_episode(df$date, episode_days = 60) # indices +#' +#' get_episode(df$date, episode_days = 60) # indices #' is_new_episode(df$date, episode_days = 60) # TRUE/FALSE -#' +#' #' # filter on results from the third 60-day episode only, using base R #' df[which(get_episode(df$date, 60) == 3), ] -#' +#' #' # the functions also work for less than a day, e.g. to include one per hour: -#' get_episode(c(Sys.time(), -#' Sys.time() + 60 * 60), -#' episode_days = 1/24) -#' +#' get_episode(c( +#' Sys.time(), +#' Sys.time() + 60 * 60 +#' ), +#' episode_days = 1 / 24 +#' ) +#' #' \donttest{ #' if (require("dplyr")) { #' # is_new_episode() can also be used in dplyr verbs to determine patient #' # episodes based on any (combination of) grouping variables: #' df %>% -#' mutate(condition = sample(x = c("A", "B", "C"), -#' size = 200, -#' replace = TRUE)) %>% +#' mutate(condition = sample( +#' x = c("A", "B", "C"), +#' size = 200, +#' replace = TRUE +#' )) %>% #' group_by(condition) %>% #' mutate(new_episode = is_new_episode(date, 365)) %>% #' select(patient, date, condition, new_episode) -#' #' } #' if (require("dplyr")) { -#' #' df %>% #' group_by(ward, patient) %>% -#' transmute(date, -#' patient, -#' new_index = get_episode(date, 60), -#' new_logical = is_new_episode(date, 60)) -#' +#' transmute(date, +#' patient, +#' new_index = get_episode(date, 60), +#' new_logical = is_new_episode(date, 60) +#' ) #' } #' if (require("dplyr")) { -#' #' df %>% -#' group_by(ward) %>% -#' summarise(n_patients = n_distinct(patient), -#' n_episodes_365 = sum(is_new_episode(date, episode_days = 365)), -#' n_episodes_60 = sum(is_new_episode(date, episode_days = 60)), -#' n_episodes_30 = sum(is_new_episode(date, episode_days = 30))) -#' +#' group_by(ward) %>% +#' summarise( +#' n_patients = n_distinct(patient), +#' n_episodes_365 = sum(is_new_episode(date, episode_days = 365)), +#' n_episodes_60 = sum(is_new_episode(date, episode_days = 60)), +#' n_episodes_30 = sum(is_new_episode(date, episode_days = 30)) +#' ) #' } #' if (require("dplyr")) { -#' +#' #' # grouping on patients and microorganisms leads to the same #' # results as first_isolate() when using 'episode-based': #' x <- df %>% -#' filter_first_isolate(include_unknown = TRUE, -#' method = "episode-based") -#' +#' filter_first_isolate( +#' include_unknown = TRUE, +#' method = "episode-based" +#' ) +#' #' y <- df %>% #' group_by(patient, mo) %>% #' filter(is_new_episode(date, 365)) %>% #' ungroup() #' #' identical(x, y) -#' #' } #' if (require("dplyr")) { -#' +#' #' # but is_new_episode() has a lot more flexibility than first_isolate(), #' # since you can now group on anything that seems relevant: #' df %>% @@ -119,11 +123,13 @@ get_episode <- function(x, episode_days, ...) { meet_criteria(x, allow_class = c("Date", "POSIXt"), allow_NA = TRUE) meet_criteria(episode_days, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = FALSE) - - exec_episode(x = x, - type = "sequential", - episode_days = episode_days, - ... = ...) + + exec_episode( + x = x, + type = "sequential", + episode_days = episode_days, + ... = ... + ) } #' @rdname get_episode @@ -131,18 +137,20 @@ get_episode <- function(x, episode_days, ...) { is_new_episode <- function(x, episode_days, ...) { meet_criteria(x, allow_class = c("Date", "POSIXt"), allow_NA = TRUE) meet_criteria(episode_days, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = FALSE) - - exec_episode(x = x, - type = "logical", - episode_days = episode_days, - ... = ...) + + exec_episode( + x = x, + type = "logical", + episode_days = episode_days, + ... = ... + ) } exec_episode <- function(x, type, episode_days, ...) { x <- as.double(as.POSIXct(x)) # as.POSIXct() required for Date classes # since x is now in seconds, get seconds from episode_days as well episode_seconds <- episode_days * 60 * 60 * 24 - + if (length(x) == 1) { # this will also match 1 NA, which is fine if (type == "logical") { return(TRUE) @@ -164,7 +172,7 @@ exec_episode <- function(x, type, episode_days, ...) { } } } - + # I asked on StackOverflow: # https://stackoverflow.com/questions/42122245/filter-one-row-every-year run_episodes <- function(x, episode_seconds) { @@ -192,7 +200,7 @@ exec_episode <- function(x, type, episode_days, ...) { indices } } - + ord <- order(x) out <- run_episodes(x[ord], episode_seconds)[order(ord)] out[is.na(x) & ord != 1] <- NA # every NA but the first must remain NA diff --git a/R/eucast_rules.R b/R/eucast_rules.R index 63d85fdfe..45fdf7de3 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -35,22 +35,26 @@ format_eucast_version_nr <- function(version, markdown = TRUE) { for (i in seq_len(length(version))) { v <- version[i] if (markdown == TRUE) { - txt <- c(txt, paste0("[", lst[[v]]$title, " ", lst[[v]]$version_txt, "](", lst[[v]]$url, ")", - " (", lst[[v]]$year, ")")) + txt <- c(txt, paste0( + "[", lst[[v]]$title, " ", lst[[v]]$version_txt, "](", lst[[v]]$url, ")", + " (", lst[[v]]$year, ")" + )) } else { - txt <- c(txt, paste0(lst[[version]]$title, " ", lst[[v]]$version_txt, - " (", lst[[v]]$year, ")")) + txt <- c(txt, paste0( + lst[[version]]$title, " ", lst[[v]]$version_txt, + " (", lst[[v]]$year, ")" + )) } } - + vector_and(txt, quotes = FALSE) } #' Apply EUCAST Rules -#' +#' #' @description #' Apply rules for clinical breakpoints and intrinsic resistance as defined by the European Committee on Antimicrobial Susceptibility Testing (EUCAST, ), see *Source*. Use [eucast_dosage()] to get a [data.frame] with advised dosages of a certain bug-drug combination, which is based on the [dosage] data set. -#' +#' #' To improve the interpretation of the antibiogram before EUCAST rules are applied, some non-EUCAST rules can applied at default, see *Details*. #' @param x a data set with antibiotic columns, such as `amox`, `AMX` and `AMC` #' @param info a [logical] to indicate whether progress should be printed to the console, defaults to only print while in interactive sessions @@ -70,28 +74,28 @@ format_eucast_version_nr <- function(version, markdown = TRUE) { #' **Note:** When ampicillin (AMP, J01CA01) is not available but amoxicillin (AMX, J01CA04) is, the latter will be used for all rules where there is a dependency on ampicillin. These drugs are interchangeable when it comes to expression of antimicrobial resistance. \cr #' #' The file containing all EUCAST rules is located here: . **Note:** Old taxonomic names are replaced with the current taxonomy where applicable. For example, *Ochrobactrum anthropi* was renamed to *Brucella anthropi* in 2020; the original EUCAST rules v3.1 and v3.2 did not yet contain this new taxonomic name. The file used as input for this `AMR` package contains the taxonomy updated until [`r CATALOGUE_OF_LIFE$yearmonth_LPSN`][catalogue_of_life()]. -#' +#' #' ## Custom Rules -#' +#' #' Custom rules can be created using [custom_eucast_rules()], e.g.: -#' +#' #' ```{r} #' x <- custom_eucast_rules(AMC == "R" & genus == "Klebsiella" ~ aminopenicillins == "R", #' AMC == "I" & genus == "Klebsiella" ~ aminopenicillins == "I") #' #' eucast_rules(example_isolates, rules = "custom", custom_rules = x, info = FALSE) #' ``` -#' -#' +#' +#' #' ## 'Other' Rules -#' +#' #' Before further processing, two non-EUCAST rules about drug combinations can be applied to improve the efficacy of the EUCAST rules, and the reliability of your data (analysis). These rules are: -#' +#' #' 1. A drug **with** enzyme inhibitor will be set to S if the same drug **without** enzyme inhibitor is S #' 2. A drug **without** enzyme inhibitor will be set to R if the same drug **with** enzyme inhibitor is R -#' +#' #' Important examples include amoxicillin and amoxicillin/clavulanic acid, and trimethoprim and trimethoprim/sulfamethoxazole. Needless to say, for these rules to work, both drugs must be available in the data set. -#' +#' #' Since these rules are not officially approved by EUCAST, they are not applied at default. To use these rules, include `"other"` to the `rules` argument, or use `eucast_rules(..., rules = "all")`. You can also set the option `AMR_eucastrules`, i.e. run `options(AMR_eucastrules = "all")`. #' @section Antibiotics: #' To define antibiotics column names, leave as it is to determine it automatically with [guess_ab_col()] or input a text (case-insensitive), or use `NULL` to skip a column (e.g. `TIC = NULL` to skip ticarcillin). Manually defined but non-existing columns will be skipped with a warning. @@ -117,19 +121,23 @@ format_eucast_version_nr <- function(version, markdown = TRUE) { #' @examples #' \donttest{ -#' a <- data.frame(mo = c("Staphylococcus aureus", -#' "Enterococcus faecalis", -#' "Escherichia coli", -#' "Klebsiella pneumoniae", -#' "Pseudomonas aeruginosa"), -#' VAN = "-", # Vancomycin -#' AMX = "-", # Amoxicillin -#' COL = "-", # Colistin -#' CAZ = "-", # Ceftazidime -#' CXM = "-", # Cefuroxime -#' PEN = "S", # Benzylpenicillin -#' FOX = "S", # Cefoxitin -#' stringsAsFactors = FALSE) +#' a <- data.frame( +#' mo = c( +#' "Staphylococcus aureus", +#' "Enterococcus faecalis", +#' "Escherichia coli", +#' "Klebsiella pneumoniae", +#' "Pseudomonas aeruginosa" +#' ), +#' VAN = "-", # Vancomycin +#' AMX = "-", # Amoxicillin +#' COL = "-", # Colistin +#' CAZ = "-", # Ceftazidime +#' CXM = "-", # Cefuroxime +#' PEN = "S", # Benzylpenicillin +#' FOX = "S", # Cefoxitin +#' stringsAsFactors = FALSE +#' ) #' #' head(a) #' @@ -145,11 +153,11 @@ format_eucast_version_nr <- function(version, markdown = TRUE) { #' c <- eucast_rules(a, verbose = TRUE) #' head(c) #' } -#' +#' #' # Dosage guidelines: -#' +#' #' eucast_dosage(c("tobra", "genta", "cipro"), "iv") -#' +#' #' eucast_dosage(c("tobra", "genta", "cipro"), "iv", version_breakpoints = 10) eucast_rules <- function(x, col_mo = NULL, @@ -172,10 +180,11 @@ eucast_rules <- function(x, meet_criteria(ampc_cephalosporin_resistance, allow_class = c("logical", "character", "rsi"), has_length = 1, allow_NA = TRUE, allow_NULL = TRUE) meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) meet_criteria(custom_rules, allow_class = "custom_eucast_rules", allow_NULL = TRUE) - + if ("custom" %in% rules & is.null(custom_rules)) { warning_("in `eucast_rules()`: no custom rules were set with the `custom_rules` argument", - immediate = TRUE) + immediate = TRUE + ) rules <- rules[rules != "custom"] if (length(rules) == 0) { if (info == TRUE) { @@ -184,26 +193,28 @@ eucast_rules <- function(x, return(x) } } - + x_deparsed <- deparse(substitute(x)) if (length(x_deparsed) > 1 || any(x_deparsed %unlike% "[a-z]+")) { x_deparsed <- "your_data" } - + check_dataset_integrity() - + breakpoints_info <- EUCAST_VERSION_BREAKPOINTS[[which(as.double(names(EUCAST_VERSION_BREAKPOINTS)) == version_breakpoints)]] expertrules_info <- EUCAST_VERSION_EXPERT_RULES[[which(as.double(names(EUCAST_VERSION_EXPERT_RULES)) == version_expertrules)]] - + # support old setting (until AMR v1.3.0) if (missing(rules) & !is.null(getOption("AMR.eucast_rules", default = NULL))) { rules <- getOption("AMR.eucast_rules") } - + if (interactive() & verbose == TRUE & info == TRUE) { - txt <- paste0("WARNING: In Verbose mode, the eucast_rules() function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way.", - "\n\nThis may overwrite your existing data if you use e.g.:", - "\ndata <- eucast_rules(data, verbose = TRUE)\n\nDo you want to continue?") + txt <- paste0( + "WARNING: In Verbose mode, the eucast_rules() function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way.", + "\n\nThis may overwrite your existing data if you use e.g.:", + "\ndata <- eucast_rules(data, verbose = TRUE)\n\nDo you want to continue?" + ) showQuestion <- import_fn("showQuestion", "rstudioapi", error_on_fail = FALSE) if (!is.null(showQuestion)) { q_continue <- showQuestion("Using verbose = TRUE with eucast_rules()", txt) @@ -215,20 +226,20 @@ eucast_rules <- function(x, return(x) } } - + # try to find columns based on type # -- mo if (is.null(col_mo)) { col_mo <- search_type_in_df(x = x, type = "mo", info = info) stop_if(is.null(col_mo), "`col_mo` must be set") } - + decimal.mark <- getOption("OutDec") big.mark <- ifelse(decimal.mark != ",", ",", ".") formatnr <- function(x, big = big.mark, dec = decimal.mark) { trimws(format(x, big.mark = big, decimal.mark = dec)) } - + warned <- FALSE warn_lacking_rsi_class <- character(0) txt_ok <- function(n_added, n_changed, warned = FALSE) { @@ -263,7 +274,7 @@ eucast_rules <- function(x, } else { cat(font_blue(formatnr(n_changed), "values changed")) } - } + } # closing if (n_added > 0 & n_changed == 0) { cat(font_green(")\n")) @@ -276,30 +287,34 @@ eucast_rules <- function(x, warned <<- FALSE } } - - cols_ab <- get_column_abx(x = x, - soft_dependencies = c("AMC", - "AMP", - "AMX", - "CIP", - "ERY", - "FOX1", - "GEN", - "MFX", - "NAL", - "NOR", - "PEN", - "PIP", - "TCY", - "TIC", - "TOB"), - hard_dependencies = NULL, - verbose = verbose, - info = info, - only_rsi_columns = only_rsi_columns, - fn = "eucast_rules", - ...) - + + cols_ab <- get_column_abx( + x = x, + soft_dependencies = c( + "AMC", + "AMP", + "AMX", + "CIP", + "ERY", + "FOX1", + "GEN", + "MFX", + "NAL", + "NOR", + "PEN", + "PIP", + "TCY", + "TIC", + "TOB" + ), + hard_dependencies = NULL, + verbose = verbose, + info = info, + only_rsi_columns = only_rsi_columns, + fn = "eucast_rules", + ... + ) + if (!"AMP" %in% names(cols_ab) & "AMX" %in% names(cols_ab)) { # ampicillin column is missing, but amoxicillin is available if (info == TRUE) { @@ -307,12 +322,12 @@ eucast_rules <- function(x, } cols_ab <- c(cols_ab, c(AMP = unname(cols_ab[names(cols_ab) == "AMX"]))) } - + # data preparation ---- if (info == TRUE & NROW(x) > 10000) { message_("Preparing data...", appendLF = FALSE, as_note = FALSE) } - + # Some helper functions --------------------------------------------------- get_antibiotic_names <- function(x) { x <- x %pm>% @@ -348,13 +363,17 @@ eucast_rules <- function(x, } else { if (length(ab_names) == 2) { # like PEN,FOX S,R - paste0(ab_names[1], " is '", ab_results[1], "' and ", - ab_names[2], " is '", ab_results[2], "'") + paste0( + ab_names[1], " is '", ab_results[1], "' and ", + ab_names[2], " is '", ab_results[2], "'" + ) } else { # like PEN,FOX,GEN S,R,R (although dependency on > 2 ABx does not exist at the moment) - paste0(ab_names[1], " is '", ab_results[1], "' and ", - ab_names[2], " is '", ab_results[2], "' and ", - ab_names[3], " is '", ab_results[3], "'") + paste0( + ab_names[1], " is '", ab_results[1], "' and ", + ab_names[2], " is '", ab_results[2], "' and ", + ab_names[3], " is '", ab_results[3], "'" + ) } } } @@ -364,40 +383,45 @@ eucast_rules <- function(x, } suppressWarnings(as.rsi(x)) } - + # Preparing the data ------------------------------------------------------ - - verbose_info <- data.frame(rowid = character(0), - col = character(0), - mo_fullname = character(0), - old = as.rsi(character(0)), - new = as.rsi(character(0)), - rule = character(0), - rule_group = character(0), - rule_name = character(0), - rule_source = character(0), - stringsAsFactors = FALSE) - + + verbose_info <- data.frame( + rowid = character(0), + col = character(0), + mo_fullname = character(0), + old = as.rsi(character(0)), + new = as.rsi(character(0)), + rule = character(0), + rule_group = character(0), + rule_name = character(0), + rule_source = character(0), + stringsAsFactors = FALSE + ) + old_cols <- colnames(x) old_attributes <- attributes(x) x <- as.data.frame(x, stringsAsFactors = FALSE) # no tibbles, data.tables, etc. rownames(x) <- NULL # will later be restored with old_attributes # create unique row IDs - combination of the MO and all ABx columns (so they will only run once per unique combination) - x$`.rowid` <- vapply(FUN.VALUE = character(1), - as.list(as.data.frame(t(x[, c(col_mo, cols_ab), drop = FALSE]), - stringsAsFactors = FALSE)), - function(x) { - x[is.na(x)] <- "." - paste0(x, collapse = "") - }) - + x$`.rowid` <- vapply( + FUN.VALUE = character(1), + as.list(as.data.frame(t(x[, c(col_mo, cols_ab), drop = FALSE]), + stringsAsFactors = FALSE + )), + function(x) { + x[is.na(x)] <- "." + paste0(x, collapse = "") + } + ) + # save original table, with the new .rowid column x.bak <- x # keep only unique rows for MO and ABx - x <- x %pm>% - pm_arrange(`.rowid`) %pm>% + x <- x %pm>% + pm_arrange(`.rowid`) %pm>% # big speed gain! only analyse unique rows: - pm_distinct(`.rowid`, .keep_all = TRUE) %pm>% + pm_distinct(`.rowid`, .keep_all = TRUE) %pm>% as.data.frame(stringsAsFactors = FALSE) x[, col_mo] <- as.mo(as.character(x[, col_mo, drop = TRUE])) # rename col_mo to prevent interference with joined columns @@ -410,7 +434,7 @@ eucast_rules <- function(x, if (info == TRUE & NROW(x) > 10000) { message_(" OK.", add_fn = list(font_green, font_bold), as_note = FALSE) } - + if (any(x$genus == "Staphylococcus", na.rm = TRUE)) { all_staph <- MO_lookup[which(MO_lookup$genus == "Staphylococcus"), , drop = FALSE] all_staph$CNS_CPS <- suppressWarnings(mo_name(all_staph$mo, Becker = "all", language = NULL)) @@ -419,18 +443,23 @@ eucast_rules <- function(x, all_strep <- MO_lookup[which(MO_lookup$genus == "Streptococcus"), , drop = FALSE] all_strep$Lancefield <- suppressWarnings(mo_name(all_strep$mo, Lancefield = TRUE, language = NULL)) } - + n_added <- 0 n_changed <- 0 - + # Other rules: enzyme inhibitors ------------------------------------------ if (any(c("all", "other") %in% rules)) { if (info == TRUE) { cat("\n") cat(word_wrap( - font_bold(paste0("Rules by this AMR package (", - font_red(paste0("v", utils::packageDescription("AMR")$Version, ", ", - format(as.Date(utils::packageDescription("AMR")$Date), format = "%Y"))), "), see ?eucast_rules\n")))) + font_bold(paste0( + "Rules by this AMR package (", + font_red(paste0( + "v", utils::packageDescription("AMR")$Version, ", ", + format(as.Date(utils::packageDescription("AMR")$Date), format = "%Y") + )), "), see ?eucast_rules\n" + )) + )) } ab_enzyme <- subset(antibiotics, name %like% "/")[, c("ab", "name"), drop = FALSE] colnames(ab_enzyme) <- c("enzyme_ab", "enzyme_name") @@ -447,32 +476,39 @@ eucast_rules <- function(x, # merge and sort ab_enzyme <- rbind(ab_enzyme, ampi, amox) ab_enzyme <- ab_enzyme[order(ab_enzyme$enzyme_name), , drop = FALSE] - + for (i in seq_len(nrow(ab_enzyme))) { # check if both base and base + enzyme inhibitor are part of the data set if (all(c(ab_enzyme$base_ab[i], ab_enzyme$enzyme_ab[i]) %in% names(cols_ab), na.rm = TRUE)) { col_base <- unname(cols_ab[ab_enzyme$base_ab[i]]) col_enzyme <- unname(cols_ab[ab_enzyme$enzyme_ab[i]]) - + # Set base to R where base + enzyme inhibitor is R ---- - rule_current <- paste0(ab_enzyme$base_name[i], " ('", font_bold(col_base), "') = R if ", - tolower(ab_enzyme$enzyme_name[i]), " ('", font_bold(col_enzyme), "') = R") + rule_current <- paste0( + ab_enzyme$base_name[i], " ('", font_bold(col_base), "') = R if ", + tolower(ab_enzyme$enzyme_name[i]), " ('", font_bold(col_enzyme), "') = R" + ) if (info == TRUE) { - cat(word_wrap(rule_current, - width = getOption("width") - 30, - extra_indent = 6)) + cat(word_wrap(rule_current, + width = getOption("width") - 30, + extra_indent = 6 + )) } - run_changes <- edit_rsi(x = x, - to = "R", - rule = c(rule_current, "Other rules", "", - paste0("Non-EUCAST: AMR package v", utils::packageDescription("AMR")$Version)), - rows = which(as.rsi_no_warning(x[, col_enzyme, drop = TRUE]) == "R"), - cols = col_base, - last_verbose_info = verbose_info, - original_data = x.bak, - warned = warned, - info = info, - verbose = verbose) + run_changes <- edit_rsi( + x = x, + to = "R", + rule = c( + rule_current, "Other rules", "", + paste0("Non-EUCAST: AMR package v", utils::packageDescription("AMR")$Version) + ), + rows = which(as.rsi_no_warning(x[, col_enzyme, drop = TRUE]) == "R"), + cols = col_base, + last_verbose_info = verbose_info, + original_data = x.bak, + warned = warned, + info = info, + verbose = verbose + ) n_added <- n_added + run_changes$added n_changed <- n_changed + run_changes$changed verbose_info <- run_changes$verbose_info @@ -486,27 +522,34 @@ eucast_rules <- function(x, n_added <- 0 n_changed <- 0 } - + # Set base + enzyme inhibitor to S where base is S ---- - rule_current <- paste0(ab_enzyme$enzyme_name[i], " ('", font_bold(col_enzyme), "') = S if ", - tolower(ab_enzyme$base_name[i]), " ('", font_bold(col_base), "') = S") + rule_current <- paste0( + ab_enzyme$enzyme_name[i], " ('", font_bold(col_enzyme), "') = S if ", + tolower(ab_enzyme$base_name[i]), " ('", font_bold(col_base), "') = S" + ) if (info == TRUE) { - cat(word_wrap(rule_current, - width = getOption("width") - 30, - extra_indent = 6)) + cat(word_wrap(rule_current, + width = getOption("width") - 30, + extra_indent = 6 + )) } - run_changes <- edit_rsi(x = x, - to = "S", - rule = c(rule_current, "Other rules", "", - paste0("Non-EUCAST: AMR package v", utils::packageDescription("AMR")$Version)), - rows = which(as.rsi_no_warning(x[, col_base, drop = TRUE]) == "S"), - cols = col_enzyme, - last_verbose_info = verbose_info, - original_data = x.bak, - warned = warned, - info = info, - verbose = verbose) + run_changes <- edit_rsi( + x = x, + to = "S", + rule = c( + rule_current, "Other rules", "", + paste0("Non-EUCAST: AMR package v", utils::packageDescription("AMR")$Version) + ), + rows = which(as.rsi_no_warning(x[, col_base, drop = TRUE]) == "S"), + cols = col_enzyme, + last_verbose_info = verbose_info, + original_data = x.bak, + warned = warned, + info = info, + verbose = verbose + ) n_added <- n_added + run_changes$added n_changed <- n_changed + run_changes$changed verbose_info <- run_changes$verbose_info @@ -522,58 +565,62 @@ eucast_rules <- function(x, } } } - } else { if (info == TRUE) { cat("\n") message_("Skipping inheritance rules defined by this AMR package, such as setting trimethoprim (TMP) = R where trimethoprim/sulfamethoxazole (SXT) = R. Add \"other\" or \"all\" to the `rules` argument to apply those rules.") } } - + if (!any(c("all", "custom") %in% rules) & !is.null(custom_rules)) { if (info == TRUE) { message_("Skipping custom EUCAST rules, since the `rules` argument does not contain \"custom\".") } custom_rules <- NULL } - + # Official EUCAST rules --------------------------------------------------- eucast_notification_shown <- FALSE if (!is.null(list(...)$eucast_rules_df)) { # this allows: eucast_rules(x, eucast_rules_df = AMR:::EUCAST_RULES_DF %>% filter(is.na(have_these_values))) eucast_rules_df <- list(...)$eucast_rules_df } else { - # otherwise internal data file, created in data-raw/pre-commit-hook.R + # otherwise internal data file, created in data-raw/_pre_commit_hook.R eucast_rules_df <- EUCAST_RULES_DF } - + # filter on user-set guideline versions ---- if (any(c("all", "breakpoints") %in% rules)) { - eucast_rules_df <- subset(eucast_rules_df, - reference.rule_group %unlike% "breakpoint" | - (reference.rule_group %like% "breakpoint" & reference.version == version_breakpoints)) + eucast_rules_df <- subset( + eucast_rules_df, + reference.rule_group %unlike% "breakpoint" | + (reference.rule_group %like% "breakpoint" & reference.version == version_breakpoints) + ) } if (any(c("all", "expert") %in% rules)) { - eucast_rules_df <- subset(eucast_rules_df, - reference.rule_group %unlike% "expert" | - (reference.rule_group %like% "expert" & reference.version == version_expertrules)) + eucast_rules_df <- subset( + eucast_rules_df, + reference.rule_group %unlike% "expert" | + (reference.rule_group %like% "expert" & reference.version == version_expertrules) + ) } # filter out AmpC de-repressed cephalosporin-resistant mutants ---- # no need to filter on version number here - the rules contain these version number, so are inherently filtered # cefotaxime, ceftriaxone, ceftazidime if (is.null(ampc_cephalosporin_resistance) || isFALSE(ampc_cephalosporin_resistance)) { - eucast_rules_df <- subset(eucast_rules_df, - reference.rule %unlike% "ampc") + eucast_rules_df <- subset( + eucast_rules_df, + reference.rule %unlike% "ampc" + ) } else { if (isTRUE(ampc_cephalosporin_resistance)) { ampc_cephalosporin_resistance <- "R" } eucast_rules_df[which(eucast_rules_df$reference.rule %like% "ampc"), "to_value"] <- as.character(ampc_cephalosporin_resistance) } - + # Go over all rules and apply them ---- for (i in seq_len(nrow(eucast_rules_df))) { - rule_previous <- eucast_rules_df[max(1, i - 1), "reference.rule", drop = TRUE] rule_current <- eucast_rules_df[i, "reference.rule", drop = TRUE] rule_next <- eucast_rules_df[min(nrow(eucast_rules_df), i + 1), "reference.rule", drop = TRUE] @@ -586,17 +633,21 @@ eucast_rules <- function(x, if (rule_group_current %like% "expert" & !any(c("all", "expert") %in% rules)) { next } - + if (isFALSE(info) | isFALSE(verbose)) { rule_text <- "" } else { if (is.na(eucast_rules_df[i, "and_these_antibiotics", drop = TRUE])) { rule_text <- paste0("always report as '", eucast_rules_df[i, "to_value", drop = TRUE], "': ", get_antibiotic_names(eucast_rules_df[i, "then_change_these_antibiotics", drop = TRUE])) } else { - rule_text <- paste0("report as '", eucast_rules_df[i, "to_value", drop = TRUE], "' when ", - format_antibiotic_names(ab_names = get_antibiotic_names(eucast_rules_df[i, "and_these_antibiotics", drop = TRUE]), - ab_results = eucast_rules_df[i, "have_these_values", drop = TRUE]), ": ", - get_antibiotic_names(eucast_rules_df[i, "then_change_these_antibiotics", drop = TRUE])) + rule_text <- paste0( + "report as '", eucast_rules_df[i, "to_value", drop = TRUE], "' when ", + format_antibiotic_names( + ab_names = get_antibiotic_names(eucast_rules_df[i, "and_these_antibiotics", drop = TRUE]), + ab_results = eucast_rules_df[i, "have_these_values", drop = TRUE] + ), ": ", + get_antibiotic_names(eucast_rules_df[i, "then_change_these_antibiotics", drop = TRUE]) + ) } } if (i == 1) { @@ -606,91 +657,120 @@ eucast_rules <- function(x, if (i == nrow(eucast_rules_df)) { rule_next <- "" } - + if (info == TRUE) { # Print EUCAST intro ------------------------------------------------------ if (rule_group_current %unlike% "other" & eucast_notification_shown == FALSE) { cat( - paste0("\n", font_grey(strrep("-", 0.95 * options()$width)), "\n", - word_wrap("Rules by the ", font_bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)")), "\n", - font_blue("https://eucast.org/"), "\n")) + paste0( + "\n", font_grey(strrep("-", 0.95 * options()$width)), "\n", + word_wrap("Rules by the ", font_bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)")), "\n", + font_blue("https://eucast.org/"), "\n" + ) + ) eucast_notification_shown <- TRUE } - + # Print rule (group) ------------------------------------------------------ if (rule_group_current != rule_group_previous) { # is new rule group, one of Breakpoints, Expert Rules and Other cat(font_bold( ifelse( rule_group_current %like% "breakpoint", - paste0("\n", - word_wrap( - breakpoints_info$title, " (", - font_red(paste0(breakpoints_info$version_txt, ", ", breakpoints_info$year)), ")\n")), + paste0( + "\n", + word_wrap( + breakpoints_info$title, " (", + font_red(paste0(breakpoints_info$version_txt, ", ", breakpoints_info$year)), ")\n" + ) + ), ifelse( rule_group_current %like% "expert", - paste0("\n", - word_wrap( - expertrules_info$title, " (", - font_red(paste0(expertrules_info$version_txt, ", ", expertrules_info$year)), ")\n")), - ""))), "\n") + paste0( + "\n", + word_wrap( + expertrules_info$title, " (", + font_red(paste0(expertrules_info$version_txt, ", ", expertrules_info$year)), ")\n" + ) + ), + "" + ) + ) + ), "\n") } # Print rule ------------------------------------------------------------- if (rule_current != rule_previous) { # is new rule within group, print its name - cat(italicise_taxonomy(word_wrap(rule_current, - width = getOption("width") - 30, - extra_indent = 6), - type = "ansi")) + cat(italicise_taxonomy(word_wrap(rule_current, + width = getOption("width") - 30, + extra_indent = 6 + ), + type = "ansi" + )) warned <- FALSE } } - + # Get rule from file ------------------------------------------------------ if_mo_property <- trimws(eucast_rules_df[i, "if_mo_property", drop = TRUE]) like_is_one_of <- trimws(eucast_rules_df[i, "like.is.one_of", drop = TRUE]) mo_value <- trimws(eucast_rules_df[i, "this_value", drop = TRUE]) - + # be sure to comprise all coagulase-negative/-positive staphylococci when they are mentioned if (mo_value %like% "coagulase" && any(x$genus == "Staphylococcus", na.rm = TRUE)) { if (mo_value %like% "negative") { - eucast_rules_df[i, "this_value"] <- paste0("^(", paste0(all_staph[which(all_staph$CNS_CPS %like% "negative"), - "fullname", - drop = TRUE], - collapse = "|"), - ")$") + eucast_rules_df[i, "this_value"] <- paste0( + "^(", paste0(all_staph[which(all_staph$CNS_CPS %like% "negative"), + "fullname", + drop = TRUE + ], + collapse = "|" + ), + ")$" + ) } else { - eucast_rules_df[i, "this_value"] <- paste0("^(", paste0(all_staph[which(all_staph$CNS_CPS %like% "positive"), - "fullname", - drop = TRUE], - collapse = "|"), - ")$") + eucast_rules_df[i, "this_value"] <- paste0( + "^(", paste0(all_staph[which(all_staph$CNS_CPS %like% "positive"), + "fullname", + drop = TRUE + ], + collapse = "|" + ), + ")$" + ) } like_is_one_of <- "like" } # be sure to comprise all beta-haemolytic Streptococci (Lancefield groups A, B, C and G) when they are mentioned if (mo_value %like% "group [ABCG]" && any(x$genus == "Streptococcus", na.rm = TRUE)) { - eucast_rules_df[i, "this_value"] <- paste0("^(", paste0(all_strep[which(all_strep$Lancefield %like% "group [ABCG]"), - "fullname", - drop = TRUE], - collapse = "|"), - ")$") + eucast_rules_df[i, "this_value"] <- paste0( + "^(", paste0(all_strep[which(all_strep$Lancefield %like% "group [ABCG]"), + "fullname", + drop = TRUE + ], + collapse = "|" + ), + ")$" + ) like_is_one_of <- "like" } - + if (like_is_one_of == "is") { # so e.g. 'Enterococcus' will turn into '^Enterococcus$' mo_value <- paste0("^", mo_value, "$") } else if (like_is_one_of == "one_of") { # so 'Clostridium, Actinomyces, ...' will turn into '^(Clostridium|Actinomyces|...)$' - mo_value <- paste0("^(", - paste(trimws(unlist(strsplit(mo_value, ",", fixed = TRUE))), - collapse = "|"), - ")$") + mo_value <- paste0( + "^(", + paste(trimws(unlist(strsplit(mo_value, ",", fixed = TRUE))), + collapse = "|" + ), + ")$" + ) } else if (like_is_one_of != "like") { stop("invalid value for column 'like.is.one_of'", call. = FALSE) } - + source_antibiotics <- eucast_rules_df[i, "and_these_antibiotics", drop = TRUE] source_value <- trimws(unlist(strsplit(eucast_rules_df[i, "have_these_values", drop = TRUE], ",", fixed = TRUE))) target_antibiotics <- eucast_rules_df[i, "then_change_these_antibiotics", drop = TRUE] @@ -698,7 +778,8 @@ eucast_rules <- function(x, if (is.na(source_antibiotics)) { rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like% mo_value), - error = function(e) integer(0)) + error = function(e) integer(0) + ) } else { source_antibiotics <- get_ab_from_namespace(source_antibiotics, cols_ab) if (length(source_value) == 1 & length(source_antibiotics) > 1) { @@ -707,44 +788,51 @@ eucast_rules <- function(x, if (length(source_antibiotics) == 0) { rows <- integer(0) } else if (length(source_antibiotics) == 1) { - rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like% mo_value - & as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]), - error = function(e) integer(0)) + rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like% mo_value & + as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]), + error = function(e) integer(0) + ) } else if (length(source_antibiotics) == 2) { - rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like% mo_value - & as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L] - & as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]), - error = function(e) integer(0)) + rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like% mo_value & + as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L] & + as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]), + error = function(e) integer(0) + ) # nolint start - # } else if (length(source_antibiotics) == 3) { - # rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like% mo_value - # & as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L] - # & as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L] - # & as.rsi_no_warning(x[, source_antibiotics[3L]]) == source_value[3L]), - # error = function(e) integer(0)) + # } else if (length(source_antibiotics) == 3) { + # rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like% mo_value + # & as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L] + # & as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L] + # & as.rsi_no_warning(x[, source_antibiotics[3L]]) == source_value[3L]), + # error = function(e) integer(0)) # nolint end } else { stop_("only 2 antibiotics supported for source_antibiotics") } } - + cols <- get_ab_from_namespace(target_antibiotics, cols_ab) - + # Apply rule on data ------------------------------------------------------ # this will return the unique number of changes - run_changes <- edit_rsi(x = x, - to = target_value, - rule = c(rule_text, rule_group_current, rule_current, - ifelse(rule_group_current %like% "breakpoint", - paste0(breakpoints_info$title, " ", breakpoints_info$version_txt, ", ", breakpoints_info$year), - paste0(expertrules_info$title, " ", expertrules_info$version_txt, ", ", expertrules_info$year))), - rows = rows, - cols = cols, - last_verbose_info = verbose_info, - original_data = x.bak, - warned = warned, - info = info, - verbose = verbose) + run_changes <- edit_rsi( + x = x, + to = target_value, + rule = c( + rule_text, rule_group_current, rule_current, + ifelse(rule_group_current %like% "breakpoint", + paste0(breakpoints_info$title, " ", breakpoints_info$version_txt, ", ", breakpoints_info$year), + paste0(expertrules_info$title, " ", expertrules_info$version_txt, ", ", expertrules_info$year) + ) + ), + rows = rows, + cols = cols, + last_verbose_info = verbose_info, + original_data = x.bak, + warned = warned, + info = info, + verbose = verbose + ) n_added <- n_added + run_changes$added n_changed <- n_changed + run_changes$changed verbose_info <- run_changes$verbose_info @@ -770,35 +858,47 @@ eucast_rules <- function(x, rule <- custom_rules[[i]] rows <- which(eval(parse(text = rule$query), envir = x)) cols <- as.character(rule$result_group) - cols <- c(cols[cols %in% colnames(x)], # direct column names - unname(cols_ab[names(cols_ab) %in% cols])) # based on previous cols_ab finding + cols <- c( + cols[cols %in% colnames(x)], # direct column names + unname(cols_ab[names(cols_ab) %in% cols]) + ) # based on previous cols_ab finding cols <- unique(cols) target_value <- as.character(rule$result_value) - rule_text <- paste0("report as '", target_value, "' when ", - format_custom_query_rule(rule$query, colours = FALSE), ": ", - get_antibiotic_names(cols)) + rule_text <- paste0( + "report as '", target_value, "' when ", + format_custom_query_rule(rule$query, colours = FALSE), ": ", + get_antibiotic_names(cols) + ) if (info == TRUE) { # print rule - cat(italicise_taxonomy(word_wrap(format_custom_query_rule(rule$query, colours = FALSE), - width = getOption("width") - 30, - extra_indent = 6), - type = "ansi")) + cat(italicise_taxonomy(word_wrap(format_custom_query_rule(rule$query, colours = FALSE), + width = getOption("width") - 30, + extra_indent = 6 + ), + type = "ansi" + )) warned <- FALSE } - run_changes <- edit_rsi(x = x, - to = target_value, - rule = c(rule_text, - "Custom EUCAST rules", - paste0("Custom EUCAST rule ", i), - paste0("Object '", deparse(substitute(custom_rules)), - "' consisting of ", length(custom_rules), " custom rules")), - rows = rows, - cols = cols, - last_verbose_info = verbose_info, - original_data = x.bak, - warned = warned, - info = info, - verbose = verbose) + run_changes <- edit_rsi( + x = x, + to = target_value, + rule = c( + rule_text, + "Custom EUCAST rules", + paste0("Custom EUCAST rule ", i), + paste0( + "Object '", deparse(substitute(custom_rules)), + "' consisting of ", length(custom_rules), " custom rules" + ) + ), + rows = rows, + cols = cols, + last_verbose_info = verbose_info, + original_data = x.bak, + warned = warned, + info = info, + verbose = verbose + ) n_added <- n_added + run_changes$added n_changed <- n_changed + run_changes$changed verbose_info <- run_changes$verbose_info @@ -814,60 +914,70 @@ eucast_rules <- function(x, } } } - + # Print overview ---------------------------------------------------------- if (info == TRUE | verbose == TRUE) { verbose_info <- x.bak %pm>% pm_mutate(row = pm_row_number()) %pm>% pm_select(`.rowid`, row) %pm>% pm_right_join(verbose_info, - by = c(".rowid" = "rowid")) %pm>% - pm_select(-`.rowid`) %pm>% - pm_select(row, pm_everything()) %pm>% + by = c(".rowid" = "rowid") + ) %pm>% + pm_select(-`.rowid`) %pm>% + pm_select(row, pm_everything()) %pm>% pm_filter(!is.na(new) | is.na(new) & !is.na(old)) %pm>% pm_arrange(row, rule_group, rule_name, col) rownames(verbose_info) <- NULL } - + if (info == TRUE) { - if (verbose == TRUE) { wouldve <- "would have " } else { wouldve <- "" } - + cat(paste0("\n", font_grey(strrep("-", 0.95 * options()$width)), "\n")) - cat(word_wrap(paste0("The rules ", paste0(wouldve, "affected "), - font_bold(formatnr(pm_n_distinct(verbose_info$row)), - "out of", formatnr(nrow(x.bak)), - "rows"), - ", making a total of ", - font_bold(formatnr(nrow(verbose_info)), "edits\n")))) - + cat(word_wrap(paste0( + "The rules ", paste0(wouldve, "affected "), + font_bold( + formatnr(pm_n_distinct(verbose_info$row)), + "out of", formatnr(nrow(x.bak)), + "rows" + ), + ", making a total of ", + font_bold(formatnr(nrow(verbose_info)), "edits\n") + ))) + total_n_added <- verbose_info %pm>% pm_filter(is.na(old)) %pm>% nrow() total_n_changed <- verbose_info %pm>% pm_filter(!is.na(old)) %pm>% nrow() - + # print added values if (total_n_added == 0) { colour <- cat # is function } else { colour <- font_green # is function } - cat(colour(paste0("=> ", wouldve, "added ", - font_bold(formatnr(verbose_info %pm>% - pm_filter(is.na(old)) %pm>% - nrow()), "test results"), - "\n"))) + cat(colour(paste0( + "=> ", wouldve, "added ", + font_bold(formatnr(verbose_info %pm>% + pm_filter(is.na(old)) %pm>% + nrow()), "test results"), + "\n" + ))) if (total_n_added > 0) { added_summary <- verbose_info %pm>% pm_filter(is.na(old)) %pm>% pm_count(new, name = "n") - cat(paste(" -", - paste0(formatnr(added_summary$n), " test result", ifelse(added_summary$n > 1, "s", ""), - " added as ", paste0('"', added_summary$new, '"')), collapse = "\n")) + cat(paste(" -", + paste0( + formatnr(added_summary$n), " test result", ifelse(added_summary$n > 1, "s", ""), + " added as ", paste0('"', added_summary$new, '"') + ), + collapse = "\n" + )) } - + # print changed values if (total_n_changed == 0) { colour <- cat # is function @@ -877,44 +987,53 @@ eucast_rules <- function(x, if (total_n_added + total_n_changed > 0) { cat("\n") } - cat(colour(paste0("=> ", wouldve, "changed ", - font_bold(formatnr(verbose_info %pm>% - pm_filter(!is.na(old)) %pm>% - nrow()), "test results"), - "\n"))) + cat(colour(paste0( + "=> ", wouldve, "changed ", + font_bold(formatnr(verbose_info %pm>% + pm_filter(!is.na(old)) %pm>% + nrow()), "test results"), + "\n" + ))) if (total_n_changed > 0) { changed_summary <- verbose_info %pm>% pm_filter(!is.na(old)) %pm>% pm_mutate(new = ifelse(is.na(new), "NA", new)) %pm>% pm_count(old, new, name = "n") - cat(paste(" -", - paste0(formatnr(changed_summary$n), " test result", ifelse(changed_summary$n > 1, "s", ""), " changed from ", - paste0('"', changed_summary$old, '"'), " to ", paste0('"', changed_summary$new, '"')), collapse = "\n")) + cat(paste(" -", + paste0( + formatnr(changed_summary$n), " test result", ifelse(changed_summary$n > 1, "s", ""), " changed from ", + paste0('"', changed_summary$old, '"'), " to ", paste0('"', changed_summary$new, '"') + ), + collapse = "\n" + )) cat("\n") } - + cat(paste0(font_grey(strrep("-", 0.95 * options()$width)), "\n")) - + if (verbose == FALSE & total_n_added + total_n_changed > 0) { cat("\n", word_wrap("Use ", font_bold("eucast_rules(..., verbose = TRUE)"), " (on your original data) to get a data.frame with all specified edits instead."), "\n\n", sep = "") } else if (verbose == TRUE) { cat("\n", word_wrap("Used 'Verbose mode' (", font_bold("verbose = TRUE"), "), which returns a data.frame with all specified edits.\nUse ", font_bold("verbose = FALSE"), " to apply the rules on your data."), "\n\n", sep = "") } } - + if (length(warn_lacking_rsi_class) > 0) { warn_lacking_rsi_class <- unique(warn_lacking_rsi_class) # take order from original data set warn_lacking_rsi_class <- warn_lacking_rsi_class[order(colnames(x.bak))] warn_lacking_rsi_class <- warn_lacking_rsi_class[!is.na(warn_lacking_rsi_class)] - warning_("in `eucast_rules()`: not all columns with antimicrobial results are of class . Transform them on beforehand, with e.g.:\n", - " - ", x_deparsed, " %>% as.rsi(", ifelse(length(warn_lacking_rsi_class) == 1, - warn_lacking_rsi_class, - paste0(warn_lacking_rsi_class[1], ":", warn_lacking_rsi_class[length(warn_lacking_rsi_class)])), ")\n", - " - ", x_deparsed, " %>% mutate_if(is.rsi.eligible, as.rsi)\n", - " - ", x_deparsed, " %>% mutate(across(where(is.rsi.eligible), as.rsi))") + warning_( + "in `eucast_rules()`: not all columns with antimicrobial results are of class . Transform them on beforehand, with e.g.:\n", + " - ", x_deparsed, " %>% as.rsi(", ifelse(length(warn_lacking_rsi_class) == 1, + warn_lacking_rsi_class, + paste0(warn_lacking_rsi_class[1], ":", warn_lacking_rsi_class[length(warn_lacking_rsi_class)]) + ), ")\n", + " - ", x_deparsed, " %>% mutate_if(is.rsi.eligible, as.rsi)\n", + " - ", x_deparsed, " %>% mutate(across(where(is.rsi.eligible), as.rsi))" + ) } - + # Return data set --------------------------------------------------------- if (verbose == TRUE) { as_original_data_class(verbose_info, old_attributes$class) @@ -922,7 +1041,7 @@ eucast_rules <- function(x, # x was analysed with only unique rows, so join everything together again x <- x[, c(cols_ab, ".rowid"), drop = FALSE] x.bak <- x.bak[, setdiff(colnames(x.bak), cols_ab), drop = FALSE] - x.bak <- x.bak %pm>% + x.bak <- x.bak %pm>% pm_left_join(x, by = ".rowid") x.bak <- x.bak[, old_cols, drop = FALSE] # reset original attributes, no need for as_original_data_class() here @@ -943,24 +1062,26 @@ edit_rsi <- function(x, info, verbose) { cols <- unique(cols[!is.na(cols) & !is.null(cols)]) - + # for Verbose Mode, keep track of all changes and return them - track_changes <- list(added = 0, - changed = 0, - output = x, - verbose_info = last_verbose_info, - rsi_warn = character(0)) - + track_changes <- list( + added = 0, + changed = 0, + output = x, + verbose_info = last_verbose_info, + rsi_warn = character(0) + ) + txt_error <- function() { - if (info == TRUE) cat("", font_red_bg(font_white(" ERROR ")), "\n\n") + if (info == TRUE) cat("", font_red_bg(font_white(" ERROR ")), "\n\n") } txt_warning <- function() { if (warned == FALSE) { if (info == TRUE) cat(" ", font_rsi_I_bg(" WARNING "), sep = "") } - warned <<- TRUE + warned <<- TRUE } - + if (length(rows) > 0 & length(cols) > 0) { new_edits <- x if (any(!vapply(FUN.VALUE = logical(1), x[, cols, drop = FALSE], is.rsi), na.rm = TRUE)) { @@ -972,15 +1093,19 @@ edit_rsi <- function(x, warning = function(w) { if (w$message %like% "invalid factor level") { xyz <- vapply(FUN.VALUE = logical(1), cols, function(col) { - new_edits[, col] <<- factor(x = as.character(pm_pull(new_edits, col)), - levels = unique(c(to, levels(pm_pull(new_edits, col))))) + new_edits[, col] <<- factor( + x = as.character(pm_pull(new_edits, col)), + levels = unique(c(to, levels(pm_pull(new_edits, col)))) + ) TRUE }) suppressWarnings(new_edits[rows, cols] <<- to) - warning_("in `eucast_rules()`: value \"", to, "\" added to the factor levels of column", - ifelse(length(cols) == 1, "", "s"), - " ", vector_and(cols, quotes = "`", sort = FALSE), - " because this value was not an existing factor level.") + warning_( + "in `eucast_rules()`: value \"", to, "\" added to the factor levels of column", + ifelse(length(cols) == 1, "", "s"), + " ", vector_and(cols, quotes = "`", sort = FALSE), + " because this value was not an existing factor level." + ) txt_warning() warned <- FALSE } else { @@ -990,48 +1115,56 @@ edit_rsi <- function(x, }, error = function(e) { txt_error() - stop(paste0("In row(s) ", paste(rows[1:min(length(rows), 10)], collapse = ","), - ifelse(length(rows) > 10, "...", ""), - " while writing value '", to, - "' to column(s) `", paste(cols, collapse = "`, `"), - "`:\n", e$message), - call. = FALSE) + stop(paste0( + "In row(s) ", paste(rows[1:min(length(rows), 10)], collapse = ","), + ifelse(length(rows) > 10, "...", ""), + " while writing value '", to, + "' to column(s) `", paste(cols, collapse = "`, `"), + "`:\n", e$message + ), + call. = FALSE + ) } ) - + track_changes$output <- new_edits if ((info == TRUE | verbose == TRUE) && !isTRUE(all.equal(x, track_changes$output))) { get_original_rows <- function(rowids) { as.integer(rownames(original_data[which(original_data$.rowid %in% rowids), , drop = FALSE])) } for (i in seq_len(length(cols))) { - verbose_new <- data.frame(rowid = new_edits[rows, ".rowid", drop = TRUE], - col = cols[i], - mo_fullname = new_edits[rows, "fullname", drop = TRUE], - old = x[rows, cols[i], drop = TRUE], - new = to, - rule = font_stripstyle(rule[1]), - rule_group = font_stripstyle(rule[2]), - rule_name = font_stripstyle(rule[3]), - rule_source = font_stripstyle(rule[4]), - stringsAsFactors = FALSE) - colnames(verbose_new) <- c("rowid", "col", "mo_fullname", "old", "new", - "rule", "rule_group", "rule_name", "rule_source") + verbose_new <- data.frame( + rowid = new_edits[rows, ".rowid", drop = TRUE], + col = cols[i], + mo_fullname = new_edits[rows, "fullname", drop = TRUE], + old = x[rows, cols[i], drop = TRUE], + new = to, + rule = font_stripstyle(rule[1]), + rule_group = font_stripstyle(rule[2]), + rule_name = font_stripstyle(rule[3]), + rule_source = font_stripstyle(rule[4]), + stringsAsFactors = FALSE + ) + colnames(verbose_new) <- c( + "rowid", "col", "mo_fullname", "old", "new", + "rule", "rule_group", "rule_name", "rule_source" + ) verbose_new <- verbose_new %pm>% pm_filter(old != new | is.na(old) | is.na(new) & !is.na(old)) # save changes to data set 'verbose_info' track_changes$verbose_info <- rbind(track_changes$verbose_info, - verbose_new, - stringsAsFactors = FALSE) + verbose_new, + stringsAsFactors = FALSE + ) # count adds and changes track_changes$added <- track_changes$added + verbose_new %pm>% pm_filter(is.na(old)) %pm>% - pm_pull(rowid) %pm>% - get_original_rows() %pm>% + pm_pull(rowid) %pm>% + get_original_rows() %pm>% length() track_changes$changed <- track_changes$changed + verbose_new %pm>% pm_filter(!is.na(old)) %pm>% - pm_pull(rowid) %pm>% - get_original_rows() %pm>% + pm_pull(rowid) %pm>% + get_original_rows() %pm>% length() } } @@ -1045,26 +1178,32 @@ eucast_dosage <- function(ab, administration = "iv", version_breakpoints = 11.0) meet_criteria(ab, allow_class = c("character", "numeric", "integer", "factor")) meet_criteria(administration, allow_class = "character", is_in = dosage$administration[!is.na(dosage$administration)], has_length = 1) meet_criteria(version_breakpoints, allow_class = c("numeric", "integer"), has_length = 1, is_in = as.double(names(EUCAST_VERSION_BREAKPOINTS))) - + # show used version_breakpoints number once per session (pkg_env will reload every session) - if (message_not_thrown_before("eucast_dosage", "v", gsub("[^0-9]", "", version_breakpoints), entire_session = TRUE)) { - message_("Dosages for antimicrobial drugs, as meant for ", - format_eucast_version_nr(version_breakpoints, markdown = FALSE), ". ", - font_red("This note will be shown once per session.")) + if (message_not_thrown_before("eucast_dosage", "v", gsub("[^0-9]", "", version_breakpoints), entire_session = TRUE)) { + message_( + "Dosages for antimicrobial drugs, as meant for ", + format_eucast_version_nr(version_breakpoints, markdown = FALSE), ". ", + font_red("This note will be shown once per session.") + ) } - + ab <- as.ab(ab) lst <- vector("list", length = length(ab)) for (i in seq_len(length(ab))) { df <- AMR::dosage[which(AMR::dosage$ab == ab[i] & AMR::dosage$administration == administration), , drop = FALSE] - lst[[i]] <- list(ab = "", - name = "", - standard_dosage = ifelse("standard_dosage" %in% df$type, - df[which(df$type == "standard_dosage"), "original_txt", drop = TRUE], - NA_character_), - high_dosage = ifelse("high_dosage" %in% df$type, - df[which(df$type == "high_dosage"), "original_txt", drop = TRUE], - NA_character_)) + lst[[i]] <- list( + ab = "", + name = "", + standard_dosage = ifelse("standard_dosage" %in% df$type, + df[which(df$type == "standard_dosage"), "original_txt", drop = TRUE], + NA_character_ + ), + high_dosage = ifelse("high_dosage" %in% df$type, + df[which(df$type == "high_dosage"), "original_txt", drop = TRUE], + NA_character_ + ) + ) } out <- do.call("rbind", lapply(lst, as.data.frame, stringsAsFactors = FALSE)) rownames(out) <- NULL diff --git a/R/first_isolate.R b/R/first_isolate.R index 4842c96ae..869e9fb9b 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -34,7 +34,7 @@ #' @param col_specimen column name of the specimen type or group #' @param col_icu column name of the logicals (`TRUE`/`FALSE`) whether a ward or department is an Intensive Care Unit (ICU). This can also be a [logical] vector with the same length as rows in `x`. #' @param col_keyantimicrobials (only useful when `method = "phenotype-based"`) column name of the key antimicrobials to determine first isolates, see [key_antimicrobials()]. Defaults to the first column that starts with 'key' followed by 'ab' or 'antibiotics' or 'antimicrobials' (case insensitive). Use `col_keyantimicrobials = FALSE` to prevent this. Can also be the output of [key_antimicrobials()]. -#' @param episode_days episode in days after which a genus/species combination will be determined as 'first isolate' again. The default of 365 days is based on the guideline by CLSI, see *Source*. +#' @param episode_days episode in days after which a genus/species combination will be determined as 'first isolate' again. The default of 365 days is based on the guideline by CLSI, see *Source*. #' @param testcodes_exclude a [character] vector with test codes that should be excluded (case-insensitive) #' @param icu_exclude a [logical] to indicate whether ICU isolates should be excluded (rows with value `TRUE` in the column set with `col_icu`) #' @param specimen_group value in the column set with `col_specimen` to filter on @@ -46,22 +46,22 @@ #' @param include_unknown a [logical] to indicate whether 'unknown' microorganisms should be included too, i.e. microbial code `"UNKNOWN"`, which defaults to `FALSE`. For WHONET users, this means that all records with organism code `"con"` (*contamination*) will be excluded at default. Isolates with a microbial ID of `NA` will always be excluded as first isolate. #' @param include_untested_rsi a [logical] to indicate whether also rows without antibiotic results are still eligible for becoming a first isolate. Use `include_untested_rsi = FALSE` to always return `FALSE` for such rows. This checks the data set for columns of class `` and consequently requires transforming columns with antibiotic results using [as.rsi()] first. #' @param ... arguments passed on to [first_isolate()] when using [filter_first_isolate()], otherwise arguments passed on to [key_antimicrobials()] (such as `universal`, `gram_negative`, `gram_positive`) -#' @details +#' @details #' To conduct epidemiological analyses on antimicrobial resistance data, only so-called first isolates should be included to prevent overestimation and underestimation of antimicrobial resistance. Different methods can be used to do so, see below. -#' +#' #' These functions are context-aware. This means that the `x` argument can be left blank if used inside a [data.frame] call, see *Examples*. -#' +#' #' The [first_isolate()] function is a wrapper around the [is_new_episode()] function, but more efficient for data sets containing microorganism codes or names. -#' +#' #' All isolates with a microbial ID of `NA` will be excluded as first isolate. -#' +#' #' ## Different methods -#' -#' According to Hindler *et al.* (2007, \doi{10.1086/511864}), there are different methods (algorithms) to select first isolates with increasing reliability: isolate-based, patient-based, episode-based and phenotype-based. All methods select on a combination of the taxonomic genus and species (not subspecies). -#' +#' +#' According to Hindler *et al.* (2007, \doi{10.1086/511864}), there are different methods (algorithms) to select first isolates with increasing reliability: isolate-based, patient-based, episode-based and phenotype-based. All methods select on a combination of the taxonomic genus and species (not subspecies). +#' #' All mentioned methods are covered in the [first_isolate()] function: -#' -#' +#' +#' #' | **Method** | **Function to apply** | #' |--------------------------------------------------|-------------------------------------------------------| #' | **Isolate-based** | `first_isolate(x, method = "isolate-based")` | @@ -82,53 +82,53 @@ #' | *(= first isolate per phenotype)* | | #' | - Major difference in any antimicrobial result | - `first_isolate(x, type = "points")` | #' | - Any difference in key antimicrobial results | - `first_isolate(x, type = "keyantimicrobials")` | -#' +#' #' ### Isolate-based -#' +#' #' This method does not require any selection, as all isolates should be included. It does, however, respect all arguments set in the [first_isolate()] function. For example, the default setting for `include_unknown` (`FALSE`) will omit selection of rows without a microbial ID. -#' +#' #' ### Patient-based -#' +#' #' To include every genus-species combination per patient once, set the `episode_days` to `Inf`. Although often inappropriate, this method makes sure that no duplicate isolates are selected from the same patient. In a large longitudinal data set, this could mean that isolates are *excluded* that were found years after the initial isolate. -#' +#' #' ### Episode-based -#' +#' #' To include every genus-species combination per patient episode once, set the `episode_days` to a sensible number of days. Depending on the type of analysis, this could be 14, 30, 60 or 365. Short episodes are common for analysing specific hospital or ward data, long episodes are common for analysing regional and national data. -#' +#' #' This is the most common method to correct for duplicate isolates. Patients are categorised into episodes based on their ID and dates (e.g., the date of specimen receipt or laboratory result). While this is a common method, it does not take into account antimicrobial test results. This means that e.g. a methicillin-resistant *Staphylococcus aureus* (MRSA) isolate cannot be differentiated from a wildtype *Staphylococcus aureus* isolate. -#' +#' #' ### Phenotype-based -#' +#' #' This is a more reliable method, since it also *weighs* the antibiogram (antimicrobial test results) yielding so-called 'first weighted isolates'. There are two different methods to weigh the antibiogram: -#' +#' #' 1. Using `type = "points"` and argument `points_threshold` (default) -#' +#' #' This method weighs *all* antimicrobial agents available in the data set. Any difference from I to S or R (or vice versa) counts as `0.5` points, a difference from S to R (or vice versa) counts as `1` point. When the sum of points exceeds `points_threshold`, which defaults to `2`, an isolate will be selected as a first weighted isolate. -#' +#' #' All antimicrobials are internally selected using the [all_antimicrobials()] function. The output of this function does not need to be passed to the [first_isolate()] function. -#' -#' +#' +#' #' 2. Using `type = "keyantimicrobials"` and argument `ignore_I` -#' -#' This method only weighs specific antimicrobial agents, called *key antimicrobials*. Any difference from S to R (or vice versa) in these key antimicrobials will select an isolate as a first weighted isolate. With `ignore_I = FALSE`, also differences from I to S or R (or vice versa) will lead to this. -#' +#' +#' This method only weighs specific antimicrobial agents, called *key antimicrobials*. Any difference from S to R (or vice versa) in these key antimicrobials will select an isolate as a first weighted isolate. With `ignore_I = FALSE`, also differences from I to S or R (or vice versa) will lead to this. +#' #' Key antimicrobials are internally selected using the [key_antimicrobials()] function, but can also be added manually as a variable to the data and set in the `col_keyantimicrobials` argument. Another option is to pass the output of the [key_antimicrobials()] function directly to the `col_keyantimicrobials` argument. -#' -#' +#' +#' #' The default method is phenotype-based (using `type = "points"`) and episode-based (using `episode_days = 365`). This makes sure that every genus-species combination is selected per patient once per year, while taking into account all antimicrobial test results. If no antimicrobial test results are available in the data set, only the episode-based method is applied at default. #' @rdname first_isolate #' @seealso [key_antimicrobials()] #' @export #' @return A [logical] vector #' @source Methodology of this function is strictly based on: -#' +#' #' - **M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition**, 2014, *Clinical and Laboratory Standards Institute (CLSI)*. . -#' +#' #' - Hindler JF and Stelling J (2007). **Analysis and Presentation of Cumulative Antibiograms: A New Consensus Guideline from the Clinical and Laboratory Standards Institute.** Clinical Infectious Diseases, 44(6), 867-873. \doi{10.1086/511864} #' @examples #' # `example_isolates` is a data set available in the AMR package. #' # See ?example_isolates. -#' +#' #' example_isolates[first_isolate(), ] #' \donttest{ #' # get all first Gram-negatives @@ -138,22 +138,20 @@ #' # filter on first isolates using dplyr: #' example_isolates %>% #' filter(first_isolate()) -#' #' } #' if (require("dplyr")) { -#' +#' #' # short-hand version: #' example_isolates %>% #' filter_first_isolate(info = FALSE) -#' #' } #' if (require("dplyr")) { -#' -#' # flag the first isolates per group: -#' example_isolates %>% -#' group_by(ward) %>% -#' mutate(first = first_isolate()) %>% -#' select(ward, date, patient, mo, first) +#' +#' # flag the first isolates per group: +#' example_isolates %>% +#' group_by(ward) %>% +#' mutate(first = first_isolate()) %>% +#' select(ward, date, patient, mo, first) #' } #' } first_isolate <- function(x = NULL, @@ -176,7 +174,6 @@ first_isolate <- function(x = NULL, include_unknown = FALSE, include_untested_rsi = TRUE, ...) { - dots <- unlist(list(...)) if (length(dots) != 0) { # backwards compatibility with old arguments @@ -188,7 +185,7 @@ first_isolate <- function(x = NULL, col_keyantimicrobials <- dots[which(dots.names == "col_keyantibiotics")] } } - + if (is_null_or_grouped_tbl(x)) { # when `x` is left blank, auto determine it (get_current_data() also contains dplyr::cur_data_all()) # is also fix for using a grouped df as input (a dot as first argument) @@ -240,36 +237,43 @@ first_isolate <- function(x = NULL, meet_criteria(info, allow_class = "logical", has_length = 1) meet_criteria(include_unknown, allow_class = "logical", has_length = 1) meet_criteria(include_untested_rsi, allow_class = "logical", has_length = 1) - + # remove data.table, grouping from tibbles, etc. x <- as.data.frame(x, stringsAsFactors = FALSE) - - any_col_contains_rsi <- any(vapply(FUN.VALUE = logical(1), - X = x, - # check only first 10,000 rows - FUN = function(x) any(as.character(x[1:10000]) %in% c("R", "S", "I"), na.rm = TRUE), - USE.NAMES = FALSE)) + + any_col_contains_rsi <- any(vapply( + FUN.VALUE = logical(1), + X = x, + # check only first 10,000 rows + FUN = function(x) any(as.character(x[1:10000]) %in% c("R", "S", "I"), na.rm = TRUE), + USE.NAMES = FALSE + )) if (method == "phenotype-based" & !any_col_contains_rsi) { method <- "episode-based" } if (info == TRUE & message_not_thrown_before("first_isolate", "method")) { - message_(paste0("Determining first isolates ", - ifelse(method %in% c("episode-based", "phenotype-based"), - ifelse(is.infinite(episode_days), - "without a specified episode length", - paste("using an episode length of", episode_days, "days")), - "")), - as_note = FALSE, - add_fn = font_black) + message_(paste0( + "Determining first isolates ", + ifelse(method %in% c("episode-based", "phenotype-based"), + ifelse(is.infinite(episode_days), + "without a specified episode length", + paste("using an episode length of", episode_days, "days") + ), + "" + ) + ), + as_note = FALSE, + add_fn = font_black + ) } - + # try to find columns based on type # -- mo if (is.null(col_mo)) { col_mo <- search_type_in_df(x = x, type = "mo", info = info) stop_if(is.null(col_mo), "`col_mo` must be set") } - + # methods ---- if (method == "isolate-based") { episode_days <- Inf @@ -300,13 +304,13 @@ first_isolate <- function(x = NULL, } } } - + # -- date if (is.null(col_date)) { col_date <- search_type_in_df(x = x, type = "date", info = info) stop_if(is.null(col_date), "`col_date` must be set") } - + # -- patient id if (is.null(col_patient_id)) { if (all(c("First name", "Last name", "Sex") %in% colnames(x))) { @@ -324,78 +328,86 @@ first_isolate <- function(x = NULL, if (is.null(col_specimen) & !is.null(specimen_group)) { col_specimen <- search_type_in_df(x = x, type = "specimen", info = info) } - + # check if columns exist check_columns_existance <- function(column, tblname = x) { if (!is.null(column)) { stop_ifnot(column %in% colnames(tblname), - "Column '", column, "' not found.", call = FALSE) + "Column '", column, "' not found.", + call = FALSE + ) } } - + check_columns_existance(col_date) check_columns_existance(col_patient_id) check_columns_existance(col_mo) check_columns_existance(col_testcode) check_columns_existance(col_keyantimicrobials) - + # convert dates to Date dates <- as.Date(x[, col_date, drop = TRUE]) dates[is.na(dates)] <- as.Date("1970-01-01") x[, col_date] <- dates - + # create original row index x$newvar_row_index <- seq_len(nrow(x)) x$newvar_mo <- as.mo(x[, col_mo, drop = TRUE]) x$newvar_genus_species <- paste(mo_genus(x$newvar_mo), mo_species(x$newvar_mo)) x$newvar_date <- x[, col_date, drop = TRUE] x$newvar_patient_id <- x[, col_patient_id, drop = TRUE] - + if (is.null(col_testcode)) { testcodes_exclude <- NULL } # remove testcodes if (!is.null(testcodes_exclude) & info == TRUE & message_not_thrown_before("first_isolate", "excludingtestcodes")) { message_("Excluding test codes: ", vector_and(testcodes_exclude, quotes = TRUE), - add_fn = font_black, - as_note = FALSE) + add_fn = font_black, + as_note = FALSE + ) } - + if (is.null(col_specimen)) { specimen_group <- NULL } - + # filter on specimen group and keyantibiotics when they are filled in if (!is.null(specimen_group)) { check_columns_existance(col_specimen, x) if (info == TRUE & message_not_thrown_before("first_isolate", "excludingspecimen")) { message_("Excluding other than specimen group '", specimen_group, "'", - add_fn = font_black, - as_note = FALSE) + add_fn = font_black, + as_note = FALSE + ) } } if (!is.null(col_keyantimicrobials)) { x$newvar_key_ab <- x[, col_keyantimicrobials, drop = TRUE] } - + if (is.null(testcodes_exclude)) { testcodes_exclude <- "" } - + # arrange data to the right sorting if (is.null(specimen_group)) { - x <- x[order(x$newvar_patient_id, - x$newvar_genus_species, - x$newvar_date), ] + x <- x[order( + x$newvar_patient_id, + x$newvar_genus_species, + x$newvar_date + ), ] rownames(x) <- NULL row.start <- 1 row.end <- nrow(x) } else { # filtering on specimen and only analyse these rows to save time - x <- x[order(pm_pull(x, col_specimen), - x$newvar_patient_id, - x$newvar_genus_species, - x$newvar_date), ] + x <- x[order( + pm_pull(x, col_specimen), + x$newvar_patient_id, + x$newvar_genus_species, + x$newvar_date + ), ] rownames(x) <- NULL suppressWarnings( row.start <- which(x %pm>% pm_pull(col_specimen) == specimen_group) %pm>% min(na.rm = TRUE) @@ -404,95 +416,111 @@ first_isolate <- function(x = NULL, row.end <- which(x %pm>% pm_pull(col_specimen) == specimen_group) %pm>% max(na.rm = TRUE) ) } - + # speed up - return immediately if obvious if (abs(row.start) == Inf | abs(row.end) == Inf) { if (info == TRUE) { message_("=> Found ", font_bold("no isolates"), - add_fn = font_black, - as_note = FALSE) + add_fn = font_black, + as_note = FALSE + ) } return(rep(FALSE, nrow(x))) } if (row.start == row.end) { if (info == TRUE) { - message_("=> Found ", font_bold("1 first isolate"), ", as the data only contained 1 row", - add_fn = font_black, - as_note = FALSE) + message_("=> Found ", font_bold("1 first isolate"), ", as the data only contained 1 row", + add_fn = font_black, + as_note = FALSE + ) } return(TRUE) } if (length(c(row.start:row.end)) == pm_n_distinct(x[c(row.start:row.end), col_mo, drop = TRUE])) { if (info == TRUE) { message_("=> Found ", font_bold(paste(length(c(row.start:row.end)), "first isolates")), - ", as all isolates were different microbial species", - add_fn = font_black, - as_note = FALSE) + ", as all isolates were different microbial species", + add_fn = font_black, + as_note = FALSE + ) } return(rep(TRUE, length(c(row.start:row.end)))) } - + # did find some isolates - add new index numbers of rows x$newvar_row_index_sorted <- seq_len(nrow(x)) - + scope.size <- nrow(x[which(x$newvar_row_index_sorted %in% c(row.start + 1:row.end) & - !is.na(x$newvar_mo)), , drop = FALSE]) - + !is.na(x$newvar_mo)), , drop = FALSE]) + # Analysis of first isolate ---- if (!is.null(col_keyantimicrobials)) { if (info == TRUE & message_not_thrown_before("first_isolate", "type")) { if (type == "keyantimicrobials") { message_("Basing inclusion on key antimicrobials, ", - ifelse(ignore_I == FALSE, "not ", ""), - "ignoring I", - add_fn = font_black, - as_note = FALSE) + ifelse(ignore_I == FALSE, "not ", ""), + "ignoring I", + add_fn = font_black, + as_note = FALSE + ) } if (type == "points") { message_("Basing inclusion on all antimicrobial results, using a points threshold of ", - points_threshold, - add_fn = font_black, - as_note = FALSE) + points_threshold, + add_fn = font_black, + as_note = FALSE + ) } } } - + x$other_pat_or_mo <- ifelse(x$newvar_patient_id == pm_lag(x$newvar_patient_id) & - x$newvar_genus_species == pm_lag(x$newvar_genus_species), - FALSE, - TRUE) - + x$newvar_genus_species == pm_lag(x$newvar_genus_species), + FALSE, + TRUE + ) + x$episode_group <- paste(x$newvar_patient_id, x$newvar_genus_species) - x$more_than_episode_ago <- unlist(lapply(split(x$newvar_date, - x$episode_group), - exec_episode, # this will skip meet_criteria() in is_new_episode(), saving time - type = "logical", - episode_days = episode_days), - use.names = FALSE) - + x$more_than_episode_ago <- unlist(lapply(split( + x$newvar_date, + x$episode_group + ), + exec_episode, # this will skip meet_criteria() in is_new_episode(), saving time + type = "logical", + episode_days = episode_days + ), + use.names = FALSE + ) + if (!is.null(col_keyantimicrobials)) { # with key antibiotics - x$other_key_ab <- !antimicrobials_equal(y = x$newvar_key_ab, - z = pm_lag(x$newvar_key_ab), - type = type, - ignore_I = ignore_I, - points_threshold = points_threshold) - x$newvar_first_isolate <- pm_if_else(x$newvar_row_index_sorted >= row.start & - x$newvar_row_index_sorted <= row.end & - x$newvar_genus_species != "" & - (x$other_pat_or_mo | x$more_than_episode_ago | x$other_key_ab), - TRUE, - FALSE) + x$other_key_ab <- !antimicrobials_equal( + y = x$newvar_key_ab, + z = pm_lag(x$newvar_key_ab), + type = type, + ignore_I = ignore_I, + points_threshold = points_threshold + ) + x$newvar_first_isolate <- pm_if_else( + x$newvar_row_index_sorted >= row.start & + x$newvar_row_index_sorted <= row.end & + x$newvar_genus_species != "" & + (x$other_pat_or_mo | x$more_than_episode_ago | x$other_key_ab), + TRUE, + FALSE + ) } else { # no key antibiotics - x$newvar_first_isolate <- pm_if_else(x$newvar_row_index_sorted >= row.start & - x$newvar_row_index_sorted <= row.end & - x$newvar_genus_species != "" & - (x$other_pat_or_mo | x$more_than_episode_ago), - TRUE, - FALSE) + x$newvar_first_isolate <- pm_if_else( + x$newvar_row_index_sorted >= row.start & + x$newvar_row_index_sorted <= row.end & + x$newvar_genus_species != "" & + (x$other_pat_or_mo | x$more_than_episode_ago), + TRUE, + FALSE + ) } - + # first one as TRUE x[row.start, "newvar_first_isolate"] <- TRUE # no tests that should be included, or ICU @@ -502,19 +530,21 @@ first_isolate <- function(x = NULL, if (!is.null(col_icu)) { if (icu_exclude == TRUE) { message_("Excluding ", format(sum(!col_icu, na.rm = TRUE), big.mark = ","), " isolates from ICU.", - add_fn = font_black, - as_note = FALSE) + add_fn = font_black, + as_note = FALSE + ) x[which(col_icu), "newvar_first_isolate"] <- FALSE } else { message_("Including isolates from ICU.", - add_fn = font_black, - as_note = FALSE) + add_fn = font_black, + as_note = FALSE + ) } } - + decimal.mark <- getOption("OutDec") big.mark <- ifelse(decimal.mark != ",", ",", ".") - + if (info == TRUE) { # print group name if used in dplyr::group_by() cur_group <- import_fn("cur_group", "dplyr", error_on_fail = FALSE) @@ -532,41 +562,50 @@ first_isolate <- function(x = NULL, } }) message_("\nGroup: ", paste0(names(group), " = ", group, collapse = ", "), "\n", - as_note = FALSE, - add_fn = font_red) + as_note = FALSE, + add_fn = font_red + ) } } } - + # handle empty microorganisms if (any(x$newvar_mo == "UNKNOWN", na.rm = TRUE) & info == TRUE) { - message_(ifelse(include_unknown == TRUE, "Included ", "Excluded "), - format(sum(x$newvar_mo == "UNKNOWN", na.rm = TRUE), - decimal.mark = decimal.mark, big.mark = big.mark), - " isolates with a microbial ID 'UNKNOWN' (in column '", font_bold(col_mo), "')") + message_( + ifelse(include_unknown == TRUE, "Included ", "Excluded "), + format(sum(x$newvar_mo == "UNKNOWN", na.rm = TRUE), + decimal.mark = decimal.mark, big.mark = big.mark + ), + " isolates with a microbial ID 'UNKNOWN' (in column '", font_bold(col_mo), "')" + ) } x[which(x$newvar_mo == "UNKNOWN"), "newvar_first_isolate"] <- include_unknown - + # exclude all NAs if (any(is.na(x$newvar_mo)) & info == TRUE) { - message_("Excluded ", format(sum(is.na(x$newvar_mo), na.rm = TRUE), - decimal.mark = decimal.mark, big.mark = big.mark), - " isolates with a microbial ID 'NA' (in column '", font_bold(col_mo), "')") + message_( + "Excluded ", format(sum(is.na(x$newvar_mo), na.rm = TRUE), + decimal.mark = decimal.mark, big.mark = big.mark + ), + " isolates with a microbial ID 'NA' (in column '", font_bold(col_mo), "')" + ) } x[which(is.na(x$newvar_mo)), "newvar_first_isolate"] <- FALSE - + # handle isolates without antibiogram if (include_untested_rsi == FALSE && any(is.rsi(x))) { - rsi_all_NA <- which(unname(vapply(FUN.VALUE = logical(1), - as.data.frame(t(x[, is.rsi(x), drop = FALSE])), - function(rsi_values) all(is.na(rsi_values))))) + rsi_all_NA <- which(unname(vapply( + FUN.VALUE = logical(1), + as.data.frame(t(x[, is.rsi(x), drop = FALSE])), + function(rsi_values) all(is.na(rsi_values)) + ))) x[rsi_all_NA, "newvar_first_isolate"] <- FALSE } - + # arrange back according to original sorting again x <- x[order(x$newvar_row_index), , drop = FALSE] rownames(x) <- NULL - + if (info == TRUE) { n_found <- sum(x$newvar_first_isolate, na.rm = TRUE) p_found_total <- percentage(n_found / nrow(x[which(!is.na(x$newvar_mo)), , drop = FALSE]), digits = 1) @@ -579,20 +618,25 @@ first_isolate <- function(x = NULL, } # mark up number of found n_found <- format(n_found, big.mark = big.mark, decimal.mark = decimal.mark) - message_(paste0("=> Found ", - font_bold(paste0(n_found, - ifelse(method == "isolate-based", "", paste0(" '", method, "'")), - " first isolates")), - " (", - ifelse(p_found_total != p_found_scope, - paste0(p_found_scope, " within scope and "), - ""), - p_found_total, " of total where a microbial ID was available)"), - add_fn = font_black, as_note = FALSE) + message_(paste0( + "=> Found ", + font_bold(paste0( + n_found, + ifelse(method == "isolate-based", "", paste0(" '", method, "'")), + " first isolates" + )), + " (", + ifelse(p_found_total != p_found_scope, + paste0(p_found_scope, " within scope and "), + "" + ), + p_found_total, " of total where a microbial ID was available)" + ), + add_fn = font_black, as_note = FALSE + ) } - + x$newvar_first_isolate - } #' @rdname first_isolate @@ -616,14 +660,16 @@ filter_first_isolate <- function(x = NULL, meet_criteria(episode_days, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = FALSE) method <- coerce_method(method) meet_criteria(method, allow_class = "character", has_length = 1, is_in = c("phenotype-based", "episode-based", "patient-based", "isolate-based")) - - subset(x, first_isolate(x = x, - col_date = col_date, - col_patient_id = col_patient_id, - col_mo = col_mo, - episode_days = episode_days, - method = method, - ...)) + + subset(x, first_isolate( + x = x, + col_date = col_date, + col_patient_id = col_patient_id, + col_mo = col_mo, + episode_days = episode_days, + method = method, + ... + )) } coerce_method <- function(method) { diff --git a/R/g.test.R b/R/g.test.R index eb23c3d5e..9ff4264d5 100755 --- a/R/g.test.R +++ b/R/g.test.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -36,7 +36,7 @@ #' In the contingency table case simulation is done by random sampling from the set of all contingency tables with given marginals, and works only if the marginals are strictly positive. Note that this is not the usual sampling situation assumed for a chi-squared test (such as the *G*-test) but rather that for Fisher's exact test. #' #' In the goodness-of-fit case simulation is done by random sampling from the discrete distribution specified by `p`, each sample being of size `n = sum(x)`. This simulation is done in \R and may be slow. -#' +#' #' ## *G*-test Of Goodness-of-Fit (Likelihood Ratio Test) #' Use the *G*-test of goodness-of-fit when you have one nominal variable with two or more values (such as male and female, or red, pink and white flowers). You compare the observed counts of numbers of observations in each category with the expected counts, which you calculate using some kind of theoretical expectation (such as a 1:1 sex ratio or a 1:2:1 ratio in a genetic cross). #' @@ -57,9 +57,9 @@ #' Unlike the exact test of goodness-of-fit ([fisher.test()]), the *G*-test does not directly calculate the probability of obtaining the observed results or something more extreme. Instead, like almost all statistical tests, the *G*-test has an intermediate step; it uses the data to calculate a test statistic that measures how far the observed data are from the null expectation. You then use a mathematical relationship, in this case the chi-square distribution, to estimate the probability of obtaining that value of the test statistic. #' #' The *G*-test uses the log of the ratio of two likelihoods as the test statistic, which is why it is also called a likelihood ratio test or log-likelihood ratio test. The formula to calculate a *G*-statistic is: -#' +#' #' \eqn{G = 2 * sum(x * log(x / E))} -#' +#' #' where `E` are the expected values. Since this is chi-square distributed, the p value can be calculated in \R with: #' ``` #' p <- stats::pchisq(G, df, lower.tail = FALSE) @@ -111,92 +111,112 @@ g.test <- function(x, p = rep(1 / length(x), length(x)), rescale.p = FALSE) { DNAME <- deparse(substitute(x)) - if (is.data.frame(x)) + if (is.data.frame(x)) { x <- as.matrix(x) + } if (is.matrix(x)) { - if (min(dim(x)) == 1L) + if (min(dim(x)) == 1L) { x <- as.vector(x) + } } if (!is.matrix(x) && !is.null(y)) { - if (length(x) != length(y)) + if (length(x) != length(y)) { stop("'x' and 'y' must have the same length") + } DNAME2 <- deparse(substitute(y)) xname <- if (length(DNAME) > 1L || nchar(DNAME, "w") > - 30) + 30) { "" - else DNAME + } else { + DNAME + } yname <- if (length(DNAME2) > 1L || nchar(DNAME2, "w") > - 30) + 30) { "" - else DNAME2 + } else { + DNAME2 + } OK <- complete.cases(x, y) x <- factor(x[OK]) y <- factor(y[OK]) - if ((nlevels(x) < 2L) || (nlevels(y) < 2L)) + if ((nlevels(x) < 2L) || (nlevels(y) < 2L)) { stop("'x' and 'y' must have at least 2 levels") + } x <- table(x, y) names(dimnames(x)) <- c(xname, yname) - DNAME <- paste(paste(DNAME, collapse = "\n"), "and", - paste(DNAME2, collapse = "\n")) + DNAME <- paste( + paste(DNAME, collapse = "\n"), "and", + paste(DNAME2, collapse = "\n") + ) } - if (any(x < 0) || any(is.na((x)))) # this last one was anyNA, but only introduced in R 3.1.0 + if (any(x < 0) || any(is.na((x)))) { # this last one was anyNA, but only introduced in R 3.1.0 stop("all entries of 'x' must be nonnegative and finite") - if ((n <- sum(x)) == 0) + } + if ((n <- sum(x)) == 0) { stop("at least one entry of 'x' must be positive") - - + } + + if (is.matrix(x)) { METHOD <- "G-test of independence" nr <- as.integer(nrow(x)) nc <- as.integer(ncol(x)) - if (is.na(nr) || is.na(nc) || is.na(nr * nc)) + if (is.na(nr) || is.na(nc) || is.na(nr * nc)) { stop("invalid nrow(x) or ncol(x)", domain = NA) + } # add fisher.test suggestion - if (nr == 2 && nc == 2) + if (nr == 2 && nc == 2) { warning("`fisher.test()` is always more reliable for 2x2 tables and although much slower, often only takes seconds.") + } sr <- rowSums(x) sc <- colSums(x) E <- outer(sr, sc, "*") / n - v <- function(r, c, n) c * r * (n - r) * (n - c) / n ^ 3 + v <- function(r, c, n) c * r * (n - r) * (n - c) / n^3 V <- outer(sr, sc, v, n) dimnames(E) <- dimnames(x) - + STATISTIC <- 2 * sum(x * log(x / E)) # sum((abs(x - E) - YATES)^2/E) for chisq.test PARAMETER <- (nr - 1L) * (nc - 1L) PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE) - - } - else { - if (length(dim(x)) > 2L) + } else { + if (length(dim(x)) > 2L) { stop("invalid 'x'") - if (length(x) == 1L) + } + if (length(x) == 1L) { stop("'x' must at least have 2 elements") - if (length(x) != length(p)) + } + if (length(x) != length(p)) { stop("'x' and 'p' must have the same number of elements") - if (any(p < 0)) + } + if (any(p < 0)) { stop("probabilities must be non-negative.") + } if (abs(sum(p) - 1) > sqrt(.Machine$double.eps)) { - if (rescale.p) + if (rescale.p) { p <- p / sum(p) - else stop("probabilities must sum to 1.") + } else { + stop("probabilities must sum to 1.") + } } METHOD <- "G-test of goodness-of-fit (likelihood ratio test)" E <- n * p V <- n * p * (1 - p) STATISTIC <- 2 * sum(x * log(x / E)) # sum((x - E)^2/E) for chisq.test names(E) <- names(x) - + PARAMETER <- length(x) - 1 PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE) - } names(STATISTIC) <- "X-squared" names(PARAMETER) <- "df" - if (any(E < 5) && is.finite(PARAMETER)) + if (any(E < 5) && is.finite(PARAMETER)) { warning("G-statistic approximation may be incorrect due to E < 5") - - structure(list(statistic = STATISTIC, argument = PARAMETER, - p.value = PVAL, method = METHOD, data.name = DNAME, - observed = x, expected = E, residuals = (x - E) / sqrt(E), - stdres = (x - E) / sqrt(V)), class = "htest") + } + + structure(list( + statistic = STATISTIC, argument = PARAMETER, + p.value = PVAL, method = METHOD, data.name = DNAME, + observed = x, expected = E, residuals = (x - E) / sqrt(E), + stdres = (x - E) / sqrt(V) + ), class = "htest") } diff --git a/R/ggplot_pca.R b/R/ggplot_pca.R index e7b00172a..91f884476 100755 --- a/R/ggplot_pca.R +++ b/R/ggplot_pca.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -48,8 +48,8 @@ #' @param base_textsize the text size for all plot elements except the labels and arrows #' @param ... arguments passed on to functions #' @source The [ggplot_pca()] function is based on the `ggbiplot()` function from the `ggbiplot` package by Vince Vu, as found on GitHub: (retrieved: 2 March 2020, their latest commit: [`7325e88`](https://github.com/vqv/ggbiplot/commit/7325e880485bea4c07465a0304c470608fffb5d9); 12 February 2015). -#' -#' As per their GPL-2 licence that demands documentation of code changes, the changes made based on the source code were: +#' +#' As per their GPL-2 licence that demands documentation of code changes, the changes made based on the source code were: #' 1. Rewritten code to remove the dependency on packages `plyr`, `scales` and `grid` #' 2. Parametrised more options, like arrow and ellipse settings #' 3. Hardened all input possibilities by defining the exact type of user input for every argument @@ -59,30 +59,32 @@ #' @details The colours for labels and points can be changed by adding another scale layer for colour, such as `scale_colour_viridis_d()` and `scale_colour_brewer()`. #' @rdname ggplot_pca #' @export -#' @examples +#' @examples #' # `example_isolates` is a data set available in the AMR package. #' # See ?example_isolates. #' #' \donttest{ #' if (require("dplyr")) { -#' # calculate the resistance per group first -#' resistance_data <- example_isolates %>% -#' group_by(order = mo_order(mo), # group on anything, like order -#' genus = mo_genus(mo)) %>% # and genus as we do here; -#' filter(n() >= 30) %>% # filter on only 30 results per group -#' summarise_if(is.rsi, resistance) # then get resistance of all drugs -#' +#' # calculate the resistance per group first +#' resistance_data <- example_isolates %>% +#' group_by( +#' order = mo_order(mo), # group on anything, like order +#' genus = mo_genus(mo) +#' ) %>% # and genus as we do here; +#' filter(n() >= 30) %>% # filter on only 30 results per group +#' summarise_if(is.rsi, resistance) # then get resistance of all drugs +#' #' # now conduct PCA for certain antimicrobial agents -#' pca_result <- resistance_data %>% -#' pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT) -#' +#' pca_result <- resistance_data %>% +#' pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT) +#' #' summary(pca_result) -#' +#' #' # old base R plotting method: #' biplot(pca_result) #' # new ggplot2 plotting method using this package: #' ggplot_pca(pca_result) -#' +#' #' if (require("ggplot2")) { #' ggplot_pca(pca_result) + #' scale_colour_viridis_d() + @@ -112,7 +114,6 @@ ggplot_pca <- function(x, arrows_alpha = 0.75, base_textsize = 10, ...) { - stop_ifnot_installed("ggplot2") meet_criteria(x, allow_class = c("prcomp", "princomp", "PCA", "lda")) meet_criteria(choices, allow_class = c("numeric", "integer"), has_length = 2, is_positive = TRUE, is_finite = TRUE) @@ -135,17 +136,19 @@ ggplot_pca <- function(x, meet_criteria(arrows_textangled, allow_class = "logical", has_length = 1) meet_criteria(arrows_alpha, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE) meet_criteria(base_textsize, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE) - - calculations <- pca_calculations(pca_model = x, - groups = groups, - groups_missing = missing(groups), - labels = labels, - labels_missing = missing(labels), - choices = choices, - scale = scale, - pc.biplot = pc.biplot, - ellipse_prob = ellipse_prob, - labels_text_placement = labels_text_placement) + + calculations <- pca_calculations( + pca_model = x, + groups = groups, + groups_missing = missing(groups), + labels = labels, + labels_missing = missing(labels), + choices = choices, + scale = scale, + pc.biplot = pc.biplot, + ellipse_prob = ellipse_prob, + labels_text_placement = labels_text_placement + ) choices <- calculations$choices df.u <- calculations$df.u df.v <- calculations$df.v @@ -153,111 +156,141 @@ ggplot_pca <- function(x, groups <- calculations$groups group_name <- calculations$group_name labels <- calculations$labels - + # Append the proportion of explained variance to the axis labels if ((1 - as.integer(scale)) == 0) { u.axis.labs <- paste0("Standardised PC", choices) } else { u.axis.labs <- paste0("PC", choices) } - u.axis.labs <- paste0(u.axis.labs, - paste0("\n(explained var: ", - percentage(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2)), - ")")) - + u.axis.labs <- paste0( + u.axis.labs, + paste0( + "\n(explained var: ", + percentage(x$sdev[choices]^2 / sum(x$sdev^2)), + ")" + ) + ) + # Score Labels if (!is.null(labels)) { df.u$labels <- labels } - + # Grouping variable if (!is.null(groups)) { df.u$groups <- groups } - + # Base plot - g <- ggplot2::ggplot(data = df.u, - ggplot2::aes(x = xvar, y = yvar)) + - ggplot2::xlab(u.axis.labs[1]) + - ggplot2::ylab(u.axis.labs[2]) + - ggplot2::expand_limits(x = c(-1.15, 1.15), - y = c(-1.15, 1.15)) - + g <- ggplot2::ggplot( + data = df.u, + ggplot2::aes(x = xvar, y = yvar) + ) + + ggplot2::xlab(u.axis.labs[1]) + + ggplot2::ylab(u.axis.labs[2]) + + ggplot2::expand_limits( + x = c(-1.15, 1.15), + y = c(-1.15, 1.15) + ) + # Draw either labels or points if (!is.null(df.u$labels)) { if (!is.null(df.u$groups)) { g <- g + ggplot2::geom_point(ggplot2::aes(colour = groups), - alpha = points_alpha, - size = points_size) + + alpha = points_alpha, + size = points_size + ) + ggplot2::geom_text(ggplot2::aes(label = labels, colour = groups), - nudge_y = -0.05, - size = labels_textsize) + + nudge_y = -0.05, + size = labels_textsize + ) + ggplot2::labs(colour = group_name) } else { - g <- g + ggplot2::geom_point(alpha = points_alpha, - size = points_size) + + g <- g + ggplot2::geom_point( + alpha = points_alpha, + size = points_size + ) + ggplot2::geom_text(ggplot2::aes(label = labels), - nudge_y = -0.05, - size = labels_textsize) + nudge_y = -0.05, + size = labels_textsize + ) } } else { if (!is.null(df.u$groups)) { g <- g + ggplot2::geom_point(ggplot2::aes(colour = groups), - alpha = points_alpha, - size = points_size) + + alpha = points_alpha, + size = points_size + ) + ggplot2::labs(colour = group_name) } else { - g <- g + ggplot2::geom_point(alpha = points_alpha, - size = points_size) + g <- g + ggplot2::geom_point( + alpha = points_alpha, + size = points_size + ) } } - + # Overlay a concentration ellipse if there are groups if (!is.null(df.u$groups) & !is.null(ell) & isTRUE(ellipse)) { - g <- g + ggplot2::geom_path(data = ell, - ggplot2::aes(colour = groups, group = groups), - size = ellipse_size, - alpha = points_alpha) + g <- g + ggplot2::geom_path( + data = ell, + ggplot2::aes(colour = groups, group = groups), + size = ellipse_size, + alpha = points_alpha + ) } - + # Label the variable axes if (arrows == TRUE) { - g <- g + ggplot2::geom_segment(data = df.v, - ggplot2::aes(x = 0, y = 0, xend = xvar, yend = yvar), - arrow = ggplot2::arrow(length = ggplot2::unit(0.5, "picas"), - angle = 20, - ends = "last", - type = "open"), - colour = arrows_colour, - size = arrows_size, - alpha = arrows_alpha) + g <- g + ggplot2::geom_segment( + data = df.v, + ggplot2::aes(x = 0, y = 0, xend = xvar, yend = yvar), + arrow = ggplot2::arrow( + length = ggplot2::unit(0.5, "picas"), + angle = 20, + ends = "last", + type = "open" + ), + colour = arrows_colour, + size = arrows_size, + alpha = arrows_alpha + ) if (arrows_textangled == TRUE) { - g <- g + ggplot2::geom_text(data = df.v, - ggplot2::aes(label = varname, x = xvar, y = yvar, angle = angle, hjust = hjust), - colour = arrows_colour, - size = arrows_textsize, - alpha = arrows_alpha) + g <- g + ggplot2::geom_text( + data = df.v, + ggplot2::aes(label = varname, x = xvar, y = yvar, angle = angle, hjust = hjust), + colour = arrows_colour, + size = arrows_textsize, + alpha = arrows_alpha + ) } else { - g <- g + ggplot2::geom_text(data = df.v, - ggplot2::aes(label = varname, x = xvar, y = yvar, hjust = hjust), - colour = arrows_colour, - size = arrows_textsize, - alpha = arrows_alpha) + g <- g + ggplot2::geom_text( + data = df.v, + ggplot2::aes(label = varname, x = xvar, y = yvar, hjust = hjust), + colour = arrows_colour, + size = arrows_textsize, + alpha = arrows_alpha + ) } } - + # Add caption label about total explained variance - g <- g + ggplot2::labs(caption = paste0("Total explained variance: ", - percentage(sum(x$sdev[choices] ^ 2 / sum(x$sdev ^ 2))))) - + g <- g + ggplot2::labs(caption = paste0( + "Total explained variance: ", + percentage(sum(x$sdev[choices]^2 / sum(x$sdev^2))) + )) + # mark-up nicely g <- g + ggplot2::theme_minimal(base_size = base_textsize) + - ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "grey85"), - panel.grid.minor = ggplot2::element_blank(), - # centre title and subtitle - plot.title = ggplot2::element_text(hjust = 0.5), - plot.subtitle = ggplot2::element_text(hjust = 0.5)) - + ggplot2::theme( + panel.grid.major = ggplot2::element_line(colour = "grey85"), + panel.grid.minor = ggplot2::element_blank(), + # centre title and subtitle + plot.title = ggplot2::element_text(hjust = 0.5), + plot.subtitle = ggplot2::element_text(hjust = 0.5) + ) + g } @@ -272,17 +305,19 @@ pca_calculations <- function(pca_model, pc.biplot = TRUE, ellipse_prob = 0.68, labels_text_placement = 1.5) { - non_numeric_cols <- attributes(pca_model)$non_numeric_cols if (groups_missing) { groups <- tryCatch(non_numeric_cols[[1]], - error = function(e) NULL) + error = function(e) NULL + ) group_name <- tryCatch(colnames(non_numeric_cols[1]), - error = function(e) NULL) + error = function(e) NULL + ) } if (labels_missing) { labels <- tryCatch(non_numeric_cols[[2]], - error = function(e) NULL) + error = function(e) NULL + ) } if (!is.null(groups) & is.null(labels)) { # turn them around @@ -290,7 +325,7 @@ pca_calculations <- function(pca_model, groups <- NULL group_name <- NULL } - + # Recover the SVD if (inherits(pca_model, "prcomp")) { nobs.factor <- sqrt(nrow(pca_model$x) - 1) @@ -315,66 +350,72 @@ pca_calculations <- function(pca_model, } else { stop("Expected an object of class prcomp, princomp, PCA, or lda") } - + # Scores choices <- pmin(choices, ncol(u)) obs.scale <- 1 - as.integer(scale) - df.u <- as.data.frame(sweep(u[, choices], 2, d[choices] ^ obs.scale, FUN = "*"), - stringsAsFactors = FALSE) - + df.u <- as.data.frame(sweep(u[, choices], 2, d[choices]^obs.scale, FUN = "*"), + stringsAsFactors = FALSE + ) + # Directions - v <- sweep(v, 2, d ^ as.integer(scale), FUN = "*") + v <- sweep(v, 2, d^as.integer(scale), FUN = "*") df.v <- as.data.frame(v[, choices], - stringsAsFactors = FALSE) - + stringsAsFactors = FALSE + ) + names(df.u) <- c("xvar", "yvar") names(df.v) <- names(df.u) - + if (isTRUE(pc.biplot)) { df.u <- df.u * nobs.factor } - - # Scale the radius of the correlation circle so that it corresponds to + + # Scale the radius of the correlation circle so that it corresponds to # a data ellipse for the standardized PC scores circle_prob <- 0.69 - r <- sqrt(qchisq(circle_prob, df = 2)) * prod(colMeans(df.u ^ 2)) ^ (0.25) - + r <- sqrt(qchisq(circle_prob, df = 2)) * prod(colMeans(df.u^2))^(0.25) + # Scale directions - v.scale <- rowSums(v ^ 2) + v.scale <- rowSums(v^2) df.v <- r * df.v / sqrt(max(v.scale)) - + # Grouping variable if (!is.null(groups)) { df.u$groups <- groups } - + df.v$varname <- rownames(v) - + # Variables for text label placement df.v$angle <- with(df.v, (180 / pi) * atan(yvar / xvar)) df.v$hjust <- with(df.v, (1 - labels_text_placement * sign(xvar)) / 2) - + if (!is.null(df.u$groups)) { theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) circle <- cbind(cos(theta), sin(theta)) - + df.groups <- lapply(unique(df.u$groups), function(g, df = df.u) { x <- df[which(df$groups == g), , drop = FALSE] if (nrow(x) <= 2) { - return(data.frame(X1 = numeric(0), - X2 = numeric(0), - groups = character(0), - stringsAsFactors = FALSE)) + return(data.frame( + X1 = numeric(0), + X2 = numeric(0), + groups = character(0), + stringsAsFactors = FALSE + )) } sigma <- var(cbind(x$xvar, x$yvar)) mu <- c(mean(x$xvar), mean(x$yvar)) ed <- sqrt(qchisq(ellipse_prob, df = 2)) data.frame(sweep(circle %*% chol(sigma) * ed, - MARGIN = 2, - STATS = mu, - FUN = "+"), - groups = x$groups[1], - stringsAsFactors = FALSE) + MARGIN = 2, + STATS = mu, + FUN = "+" + ), + groups = x$groups[1], + stringsAsFactors = FALSE + ) }) ell <- do.call(rbind, df.groups) if (NROW(ell) == 0) { @@ -385,13 +426,14 @@ pca_calculations <- function(pca_model, } else { ell <- NULL } - - list(choices = choices, - df.u = df.u, - df.v = df.v, - ell = ell, - groups = groups, - group_name = group_name, - labels = labels + + list( + choices = choices, + df.u = df.u, + df.v = df.v, + ell = ell, + groups = groups, + group_name = group_name, + labels = labels ) } diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index 8be36d2dd..e05243f70 100755 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -67,14 +67,13 @@ #' @examples #' \donttest{ #' if (require("ggplot2") && require("dplyr")) { -#' +#' #' # get antimicrobial results for drugs against a UTI: #' ggplot(example_isolates %>% select(AMX, NIT, FOS, TMP, CIP)) + #' geom_rsi() -#' #' } #' if (require("ggplot2") && require("dplyr")) { -#' +#' #' # prettify the plot using some additional functions: #' df <- example_isolates %>% select(AMX, NIT, FOS, TMP, CIP) #' ggplot(df) + @@ -83,35 +82,33 @@ #' scale_rsi_colours() + #' labels_rsi_count() + #' theme_rsi() -#' #' } #' if (require("ggplot2") && require("dplyr")) { -#' +#' #' # or better yet, simplify this using the wrapper function - a single command: #' example_isolates %>% #' select(AMX, NIT, FOS, TMP, CIP) %>% #' ggplot_rsi() -#' #' } #' if (require("ggplot2") && require("dplyr")) { -#' +#' #' # get only proportions and no counts: #' example_isolates %>% #' select(AMX, NIT, FOS, TMP, CIP) %>% #' ggplot_rsi(datalabels = FALSE) -#' #' } #' if (require("ggplot2") && require("dplyr")) { -#' +#' #' # add other ggplot2 arguments as you like: #' example_isolates %>% #' select(AMX, NIT, FOS, TMP, CIP) %>% -#' ggplot_rsi(width = 0.5, -#' colour = "black", -#' size = 1, -#' linetype = 2, -#' alpha = 0.25) -#' +#' ggplot_rsi( +#' width = 0.5, +#' colour = "black", +#' size = 1, +#' linetype = 2, +#' alpha = 0.25 +#' ) #' } #' if (require("ggplot2") && require("dplyr")) { #' @@ -119,55 +116,57 @@ #' example_isolates %>% #' select(AMX) %>% #' ggplot_rsi(colours = c(SI = "yellow")) -#' #' } #' if (require("ggplot2") && require("dplyr")) { #' #' # but you can also use the built-in colour-blind friendly colours for #' # your plots, where "S" is green, "I" is yellow and "R" is red: -#' data.frame(x = c("Value1", "Value2", "Value3"), -#' y = c(1, 2, 3), -#' z = c("Value4", "Value5", "Value6")) %>% +#' data.frame( +#' x = c("Value1", "Value2", "Value3"), +#' y = c(1, 2, 3), +#' z = c("Value4", "Value5", "Value6") +#' ) %>% #' ggplot() + #' geom_col(aes(x = x, y = y, fill = z)) + #' scale_rsi_colours(Value4 = "S", Value5 = "I", Value6 = "R") -#' #' } #' if (require("ggplot2") && require("dplyr")) { -#' +#' #' # resistance of ciprofloxacine per age group #' example_isolates %>% #' mutate(first_isolate = first_isolate()) %>% -#' filter(first_isolate == TRUE, -#' mo == as.mo("Escherichia coli")) %>% +#' filter( +#' first_isolate == TRUE, +#' mo == as.mo("Escherichia coli") +#' ) %>% #' # age_groups() is also a function in this AMR package: #' group_by(age_group = age_groups(age)) %>% #' select(age_group, CIP) %>% #' ggplot_rsi(x = "age_group") -#' #' } #' if (require("ggplot2") && require("dplyr")) { -#' +#' #' # a shorter version which also adjusts data label colours: #' example_isolates %>% #' select(AMX, NIT, FOS, TMP, CIP) %>% #' ggplot_rsi(colours = FALSE) -#' #' } #' if (require("ggplot2") && require("dplyr")) { #' #' # it also supports groups (don't forget to use the group var on `x` or `facet`): #' example_isolates %>% -#' filter(mo_is_gram_negative(), ward != "Outpatient") %>% +#' filter(mo_is_gram_negative(), ward != "Outpatient") %>% #' # select only UTI-specific drugs #' select(ward, AMX, NIT, FOS, TMP, CIP) %>% #' group_by(ward) %>% -#' ggplot_rsi(x = "ward", -#' facet = "antibiotic", -#' nrow = 1, -#' title = "AMR of Anti-UTI Drugs Per Ward", -#' x.title = "Ward", -#' datalabels = FALSE) +#' ggplot_rsi( +#' x = "ward", +#' facet = "antibiotic", +#' nrow = 1, +#' title = "AMR of Anti-UTI Drugs Per Ward", +#' x.title = "Ward", +#' datalabels = FALSE +#' ) #' } #' } ggplot_rsi <- function(data, @@ -184,11 +183,13 @@ ggplot_rsi <- function(data, minimum = 30, language = get_AMR_locale(), nrow = NULL, - colours = c(S = "#3CAEA3", - SI = "#3CAEA3", - I = "#F6D55C", - IR = "#ED553B", - R = "#ED553B"), + colours = c( + S = "#3CAEA3", + SI = "#3CAEA3", + I = "#F6D55C", + IR = "#ED553B", + R = "#ED553B" + ), datalabels = TRUE, datalabels.size = 2.5, datalabels.colour = "grey15", @@ -198,7 +199,6 @@ ggplot_rsi <- function(data, x.title = "Antimicrobial", y.title = "Proportion", ...) { - stop_ifnot_installed("ggplot2") meet_criteria(data, allow_class = "data.frame", contains_column_class = "rsi") meet_criteria(position, allow_class = "character", has_length = 1, is_in = c("fill", "stack", "dodge"), allow_NULL = TRUE) @@ -241,48 +241,54 @@ ggplot_rsi <- function(data, if (facet %in% c("NULL", "")) { facet <- NULL } - + if (is.null(position)) { position <- "fill" } - + p <- ggplot2::ggplot(data = data) + - geom_rsi(position = position, x = x, fill = fill, translate_ab = translate_ab, - minimum = minimum, language = language, - combine_SI = combine_SI, combine_IR = combine_IR, ...) + + geom_rsi( + position = position, x = x, fill = fill, translate_ab = translate_ab, + minimum = minimum, language = language, + combine_SI = combine_SI, combine_IR = combine_IR, ... + ) + theme_rsi() - + if (fill == "interpretation") { p <- p + scale_rsi_colours(colours = colours) } - + if (identical(position, "fill")) { # proportions, so use y scale with percentage p <- p + scale_y_percent(breaks = breaks, limits = limits) } - + if (datalabels == TRUE) { - p <- p + labels_rsi_count(position = position, - x = x, - translate_ab = translate_ab, - minimum = minimum, - language = language, - combine_SI = combine_SI, - combine_IR = combine_IR, - datalabels.size = datalabels.size, - datalabels.colour = datalabels.colour) + p <- p + labels_rsi_count( + position = position, + x = x, + translate_ab = translate_ab, + minimum = minimum, + language = language, + combine_SI = combine_SI, + combine_IR = combine_IR, + datalabels.size = datalabels.size, + datalabels.colour = datalabels.colour + ) } - + if (!is.null(facet)) { p <- p + facet_rsi(facet = facet, nrow = nrow) } - - p <- p + ggplot2::labs(title = title, - subtitle = subtitle, - caption = caption, - x = x.title, - y = y.title) - + + p <- p + ggplot2::labs( + title = title, + subtitle = subtitle, + caption = caption, + x = x.title, + y = y.title + ) + p } @@ -296,7 +302,7 @@ geom_rsi <- function(position = NULL, language = get_AMR_locale(), combine_SI = TRUE, combine_IR = FALSE, - ...) { + ...) { x <- x[1] stop_ifnot_installed("ggplot2") stop_if(is.data.frame(position), "`position` is invalid. Did you accidentally use '%>%' instead of '+'?") @@ -308,16 +314,16 @@ geom_rsi <- function(position = NULL, meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(combine_SI, allow_class = "logical", has_length = 1) meet_criteria(combine_IR, allow_class = "logical", has_length = 1) - + y <- "value" if (missing(position) | is.null(position)) { position <- "fill" } - + if (identical(position, "fill")) { position <- ggplot2::position_fill(vjust = 0.5, reverse = TRUE) } - + # we work with aes_string later on x_deparse <- deparse(substitute(x)) if (x_deparse != "x") { @@ -326,21 +332,23 @@ geom_rsi <- function(position = NULL, if (x %like% '".*"') { x <- substr(x, 2, nchar(x) - 1) } - + if (tolower(x) %in% tolower(c("ab", "abx", "antibiotics"))) { x <- "antibiotic" } else if (tolower(x) %in% tolower(c("SIR", "RSI", "interpretations", "result"))) { x <- "interpretation" } - + ggplot2::geom_col( data = function(x) { - rsi_df(data = x, - translate_ab = translate_ab, - language = language, - minimum = minimum, - combine_SI = combine_SI, - combine_IR = combine_IR) + rsi_df( + data = x, + translate_ab = translate_ab, + language = language, + minimum = minimum, + combine_SI = combine_SI, + combine_IR = combine_IR + ) }, mapping = ggplot2::aes_string(x = x, y = y, fill = fill), position = position, @@ -355,7 +363,7 @@ facet_rsi <- function(facet = c("interpretation", "antibiotic"), nrow = NULL) { stop_ifnot_installed("ggplot2") meet_criteria(facet, allow_class = "character", has_length = 1) meet_criteria(nrow, allow_class = c("numeric", "integer"), has_length = 1, allow_NULL = TRUE, is_positive = TRUE, is_finite = TRUE) - + # we work with aes_string later on facet_deparse <- deparse(substitute(facet)) if (facet_deparse != "facet") { @@ -364,13 +372,13 @@ facet_rsi <- function(facet = c("interpretation", "antibiotic"), nrow = NULL) { if (facet %like% '".*"') { facet <- substr(facet, 2, nchar(facet) - 1) } - + if (tolower(facet) %in% tolower(c("SIR", "RSI", "interpretations", "result"))) { facet <- "interpretation" } else if (tolower(facet) %in% tolower(c("ab", "abx", "antibiotics"))) { facet <- "antibiotic" } - + ggplot2::facet_wrap(facets = facet, scales = "free_x", nrow = nrow) } @@ -380,13 +388,15 @@ scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) { stop_ifnot_installed("ggplot2") meet_criteria(breaks, allow_class = c("numeric", "integer")) meet_criteria(limits, allow_class = c("numeric", "integer"), has_length = 2, allow_NULL = TRUE, allow_NA = TRUE) - + if (all(breaks[breaks != 0] > 1)) { breaks <- breaks / 100 } - ggplot2::scale_y_continuous(breaks = breaks, - labels = percentage(breaks), - limits = limits) + ggplot2::scale_y_continuous( + breaks = breaks, + labels = percentage(breaks), + limits = limits + ) } #' @rdname ggplot_rsi @@ -397,11 +407,13 @@ scale_rsi_colours <- function(..., meet_criteria(aesthetics, allow_class = "character", is_in = c("alpha", "colour", "color", "fill", "linetype", "shape", "size")) # behaviour until AMR pkg v1.5.0 and also when coming from ggplot_rsi() if ("colours" %in% names(list(...))) { - original_cols <- c(S = "#3CAEA3", - SI = "#3CAEA3", - I = "#F6D55C", - IR = "#ED553B", - R = "#ED553B") + original_cols <- c( + S = "#3CAEA3", + SI = "#3CAEA3", + I = "#F6D55C", + IR = "#ED553B", + R = "#ED553B" + ) colours <- replace(original_cols, names(list(...)$colours), list(...)$colours) # limits = force is needed in ggplot2 3.3.4 and 3.3.5, see here; # https://github.com/tidyverse/ggplot2/issues/4511#issuecomment-866185530 @@ -410,28 +422,42 @@ scale_rsi_colours <- function(..., if (identical(unlist(list(...)), FALSE)) { return(invisible()) } - - names_susceptible <- c("S", "SI", "IS", "S+I", "I+S", "susceptible", "Susceptible", - unique(TRANSLATIONS[which(TRANSLATIONS$pattern == "Susceptible"), - "replacement", drop = TRUE])) - names_incr_exposure <- c("I", "intermediate", "increased exposure", "incr. exposure", - "Increased exposure", "Incr. exposure", "Susceptible, incr. exp.", - unique(TRANSLATIONS[which(TRANSLATIONS$pattern == "Intermediate"), - "replacement", drop = TRUE]), - unique(TRANSLATIONS[which(TRANSLATIONS$pattern == "Susceptible, incr. exp."), - "replacement", drop = TRUE])) - names_resistant <- c("R", "IR", "RI", "R+I", "I+R", "resistant", "Resistant", - unique(TRANSLATIONS[which(TRANSLATIONS$pattern == "Resistant"), - "replacement", drop = TRUE])) - + + names_susceptible <- c( + "S", "SI", "IS", "S+I", "I+S", "susceptible", "Susceptible", + unique(TRANSLATIONS[which(TRANSLATIONS$pattern == "Susceptible"), + "replacement", + drop = TRUE + ]) + ) + names_incr_exposure <- c( + "I", "intermediate", "increased exposure", "incr. exposure", + "Increased exposure", "Incr. exposure", "Susceptible, incr. exp.", + unique(TRANSLATIONS[which(TRANSLATIONS$pattern == "Intermediate"), + "replacement", + drop = TRUE + ]), + unique(TRANSLATIONS[which(TRANSLATIONS$pattern == "Susceptible, incr. exp."), + "replacement", + drop = TRUE + ]) + ) + names_resistant <- c( + "R", "IR", "RI", "R+I", "I+R", "resistant", "Resistant", + unique(TRANSLATIONS[which(TRANSLATIONS$pattern == "Resistant"), + "replacement", + drop = TRUE + ]) + ) + susceptible <- rep("#3CAEA3", length(names_susceptible)) names(susceptible) <- names_susceptible incr_exposure <- rep("#F6D55C", length(names_incr_exposure)) names(incr_exposure) <- names_incr_exposure resistant <- rep("#ED553B", length(names_resistant)) names(resistant) <- names_resistant - - original_cols = c(susceptible, incr_exposure, resistant) + + original_cols <- c(susceptible, incr_exposure, resistant) dots <- c(...) # replace S, I, R as colours: scale_rsi_colours(mydatavalue = "S") dots[dots == "S"] <- "#3CAEA3" @@ -448,12 +474,14 @@ scale_rsi_colours <- function(..., theme_rsi <- function() { stop_ifnot_installed("ggplot2") ggplot2::theme_minimal(base_size = 10) + - ggplot2::theme(panel.grid.major.x = ggplot2::element_blank(), - panel.grid.minor = ggplot2::element_blank(), - panel.grid.major.y = ggplot2::element_line(colour = "grey75"), - # center title and subtitle - plot.title = ggplot2::element_text(hjust = 0.5), - plot.subtitle = ggplot2::element_text(hjust = 0.5)) + ggplot2::theme( + panel.grid.major.x = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.grid.major.y = ggplot2::element_line(colour = "grey75"), + # center title and subtitle + plot.title = ggplot2::element_text(hjust = 0.5), + plot.subtitle = ggplot2::element_text(hjust = 0.5) + ) } #' @rdname ggplot_rsi @@ -477,7 +505,7 @@ labels_rsi_count <- function(position = NULL, meet_criteria(combine_IR, allow_class = "logical", has_length = 1) meet_criteria(datalabels.size, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = TRUE) meet_criteria(datalabels.colour, allow_class = "character", has_length = 1) - + if (is.null(position)) { position <- "fill" } @@ -485,26 +513,32 @@ labels_rsi_count <- function(position = NULL, position <- ggplot2::position_fill(vjust = 0.5, reverse = TRUE) } x_name <- x - ggplot2::geom_text(mapping = ggplot2::aes_string(label = "lbl", - x = x, - y = "value"), - position = position, - inherit.aes = FALSE, - size = datalabels.size, - colour = datalabels.colour, - lineheight = 0.75, - data = function(x) { - transformed <- rsi_df(data = x, - translate_ab = translate_ab, - combine_SI = combine_SI, - combine_IR = combine_IR, - minimum = minimum, - language = language) - transformed$gr <- transformed[, x_name, drop = TRUE] - transformed %pm>% - pm_group_by(gr) %pm>% - pm_mutate(lbl = paste0("n=", isolates)) %pm>% - pm_ungroup() %pm>% - pm_select(-gr) - }) + ggplot2::geom_text( + mapping = ggplot2::aes_string( + label = "lbl", + x = x, + y = "value" + ), + position = position, + inherit.aes = FALSE, + size = datalabels.size, + colour = datalabels.colour, + lineheight = 0.75, + data = function(x) { + transformed <- rsi_df( + data = x, + translate_ab = translate_ab, + combine_SI = combine_SI, + combine_IR = combine_IR, + minimum = minimum, + language = language + ) + transformed$gr <- transformed[, x_name, drop = TRUE] + transformed %pm>% + pm_group_by(gr) %pm>% + pm_mutate(lbl = paste0("n=", isolates)) %pm>% + pm_ungroup() %pm>% + pm_select(-gr) + } + ) } diff --git a/R/guess_ab_col.R b/R/guess_ab_col.R index da005f813..b9cb20664 100755 --- a/R/guess_ab_col.R +++ b/R/guess_ab_col.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -34,8 +34,10 @@ #' @return A column name of `x`, or `NULL` when no result is found. #' @export #' @examples -#' df <- data.frame(amox = "S", -#' tetr = "R") +#' df <- data.frame( +#' amox = "S", +#' tetr = "R" +#' ) #' #' guess_ab_col(df, "amoxicillin") #' # [1] "amox" @@ -47,8 +49,10 @@ #' # [1] "tetr" #' #' # WHONET codes -#' df <- data.frame(AMP_ND10 = "R", -#' AMC_ED20 = "S") +#' df <- data.frame( +#' AMP_ND10 = "R", +#' AMC_ED20 = "S" +#' ) #' guess_ab_col(df, "ampicillin") #' # [1] "AMP_ND10" #' guess_ab_col(df, "J01CR02") @@ -57,8 +61,10 @@ #' # [1] "AMC_ED20" #' #' # Longer names take precendence: -#' df <- data.frame(AMP_ED2 = "S", -#' AMP_ED20 = "S") +#' df <- data.frame( +#' AMP_ED2 = "S", +#' AMP_ED20 = "S" +#' ) #' guess_ab_col(df, "ampicillin") #' # [1] "AMP_ED20" guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE, only_rsi_columns = FALSE) { @@ -66,30 +72,35 @@ guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE, only_r meet_criteria(search_string, allow_class = "character", has_length = 1, allow_NULL = TRUE) meet_criteria(verbose, allow_class = "logical", has_length = 1) meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) - + if (is.null(x) & is.null(search_string)) { return(as.name("guess_ab_col")) } else { meet_criteria(search_string, allow_class = "character", has_length = 1, allow_NULL = FALSE) } - - all_found <- get_column_abx(x, info = verbose, only_rsi_columns = only_rsi_columns, - verbose = verbose, fn = "guess_ab_col") + + all_found <- get_column_abx(x, + info = verbose, only_rsi_columns = only_rsi_columns, + verbose = verbose, fn = "guess_ab_col" + ) search_string.ab <- suppressWarnings(as.ab(search_string)) ab_result <- unname(all_found[names(all_found) == search_string.ab]) - + if (length(ab_result) == 0) { if (verbose == TRUE) { message_("No column found as input for ", search_string, - " (", ab_name(search_string, language = NULL, tolower = TRUE), ").", - add_fn = font_black, - as_note = FALSE) + " (", ab_name(search_string, language = NULL, tolower = TRUE), ").", + add_fn = font_black, + as_note = FALSE + ) } return(NULL) } else { if (verbose == TRUE) { - message_("Using column '", font_bold(ab_result), "' as input for ", search_string, - " (", ab_name(search_string, language = NULL, tolower = TRUE), ").") + message_( + "Using column '", font_bold(ab_result), "' as input for ", search_string, + " (", ab_name(search_string, language = NULL, tolower = TRUE), ")." + ) } return(ab_result) } @@ -106,16 +117,20 @@ get_column_abx <- function(x, reuse_previous_result = TRUE, fn = NULL) { # check if retrieved before, then get it from package environment - if (isTRUE(reuse_previous_result) && identical(unique_call_id(entire_session = FALSE, - match_fn = fn), - pkg_env$get_column_abx.call)) { + if (isTRUE(reuse_previous_result) && identical( + unique_call_id( + entire_session = FALSE, + match_fn = fn + ), + pkg_env$get_column_abx.call + )) { # so within the same call, within the same environment, we got here again. # but we could've come from another function within the same call, so now only check the columns that changed - + # first remove the columns that are not existing anymore previous <- pkg_env$get_column_abx.out current <- previous[previous %in% colnames(x)] - + # then compare columns in current call with columns in original call new_cols <- colnames(x)[!colnames(x) %in% pkg_env$get_column_abx.checked_cols] if (length(new_cols) > 0) { @@ -125,7 +140,7 @@ get_column_abx <- function(x, # order according to columns in current call current <- current[match(colnames(x)[colnames(x) %in% current], current)] } - + # update pkg environment to improve speed on next run pkg_env$get_column_abx.out <- current pkg_env$get_column_abx.checked_cols <- colnames(x) @@ -133,7 +148,7 @@ get_column_abx <- function(x, # and return right values return(pkg_env$get_column_abx.out) } - + meet_criteria(x, allow_class = "data.frame") meet_criteria(soft_dependencies, allow_class = "character", allow_NULL = TRUE) meet_criteria(hard_dependencies, allow_class = "character", allow_NULL = TRUE) @@ -141,11 +156,11 @@ get_column_abx <- function(x, meet_criteria(info, allow_class = "logical", has_length = 1) meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) meet_criteria(sort, allow_class = "logical", has_length = 1) - + if (info == TRUE) { message_("Auto-guessing columns suitable for analysis", appendLF = FALSE, as_note = FALSE) } - + x <- as.data.frame(x, stringsAsFactors = FALSE) x.bak <- x if (only_rsi_columns == TRUE) { @@ -156,8 +171,9 @@ get_column_abx <- function(x, # only test maximum of 10,000 values per column if (info == TRUE) { message_(" (using only ", font_bold("the first 10,000 rows"), ")...", - appendLF = FALSE, - as_note = FALSE) + appendLF = FALSE, + as_note = FALSE + ) } x <- x[1:10000, , drop = FALSE] } else if (info == TRUE) { @@ -165,32 +181,36 @@ get_column_abx <- function(x, } # only check columns that are a valid AB code, ATC code, name, abbreviation or synonym, - # or already have the class (as.rsi) + # or already have the class (as.rsi) # and that they have no more than 50% invalid values vectr_antibiotics <- unlist(AB_lookup$generalised_all) vectr_antibiotics <- vectr_antibiotics[!is.na(vectr_antibiotics) & nchar(vectr_antibiotics) >= 3] - x_columns <- vapply(FUN.VALUE = character(1), - colnames(x), - function(col, df = x) { - if (generalise_antibiotic_name(col) %in% vectr_antibiotics || - is.rsi(x[, col, drop = TRUE]) || - is.rsi.eligible(x[, col, drop = TRUE], threshold = 0.5) - ) { - return(col) - } else { - return(NA_character_) - } - }, USE.NAMES = FALSE) - + x_columns <- vapply( + FUN.VALUE = character(1), + colnames(x), + function(col, df = x) { + if (generalise_antibiotic_name(col) %in% vectr_antibiotics || + is.rsi(x[, col, drop = TRUE]) || + is.rsi.eligible(x[, col, drop = TRUE], threshold = 0.5) + ) { + return(col) + } else { + return(NA_character_) + } + }, USE.NAMES = FALSE + ) + x_columns <- x_columns[!is.na(x_columns)] x <- x[, x_columns, drop = FALSE] # without drop = FALSE, x will become a vector when x_columns is length 1 - df_trans <- data.frame(colnames = colnames(x), - abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)), - stringsAsFactors = FALSE) + df_trans <- data.frame( + colnames = colnames(x), + abcode = suppressWarnings(as.ab(colnames(x), info = FALSE)), + stringsAsFactors = FALSE + ) df_trans <- df_trans[!is.na(df_trans$abcode), , drop = FALSE] out <- as.character(df_trans$colnames) names(out) <- df_trans$abcode - + # add from self-defined dots (...): # such as get_column_abx(example_isolates %>% rename(thisone = AMX), amox = "thisone") all_okay <- TRUE @@ -204,8 +224,9 @@ get_column_abx <- function(x, message_(" WARNING", add_fn = list(font_yellow, font_bold), as_note = FALSE) } warning_("Invalid antibiotic reference(s): ", vector_and(names(dots)[is.na(newnames)], quotes = FALSE), - call = FALSE, - immediate = TRUE) + call = FALSE, + immediate = TRUE + ) all_okay <- FALSE } unexisting_cols <- which(!vapply(FUN.VALUE = logical(1), dots, function(col) all(col %in% x_columns))) @@ -214,7 +235,8 @@ get_column_abx <- function(x, message_(" ERROR", add_fn = list(font_red, font_bold), as_note = FALSE) } stop_("Column(s) not found: ", vector_and(unlist(dots[[unexisting_cols]]), quotes = FALSE), - call = FALSE) + call = FALSE + ) all_okay <- FALSE } # turn all NULLs to NAs @@ -226,7 +248,7 @@ get_column_abx <- function(x, # delete NAs, this will make e.g. eucast_rules(... TMP = NULL) work to prevent TMP from being used out <- out[!is.na(out)] } - + if (length(out) == 0) { if (info == TRUE & all_okay == TRUE) { message_("No columns found.") @@ -236,7 +258,7 @@ get_column_abx <- function(x, pkg_env$get_column_abx.out <- out return(out) } - + # sort on name if (sort == TRUE) { out <- out[order(names(out), out)] @@ -246,7 +268,7 @@ get_column_abx <- function(x, if (length(duplicates) > 0) { all_okay <- FALSE } - + if (info == TRUE) { if (all_okay == TRUE) { message_(" OK.", add_fn = list(font_green, font_bold), as_note = FALSE) @@ -255,27 +277,32 @@ get_column_abx <- function(x, } for (i in seq_len(length(out))) { if (verbose == TRUE & !names(out[i]) %in% names(duplicates)) { - message_("Using column '", font_bold(out[i]), "' as input for ", names(out)[i], - " (", ab_name(names(out)[i], tolower = TRUE, language = NULL), ").") + message_( + "Using column '", font_bold(out[i]), "' as input for ", names(out)[i], + " (", ab_name(names(out)[i], tolower = TRUE, language = NULL), ")." + ) } if (names(out[i]) %in% names(duplicates)) { already_set_as <- out[unname(out) == unname(out[i])][1L] - warning_(paste0("Column '", font_bold(out[i]), "' will not be used for ", - names(out)[i], " (", ab_name(names(out)[i], tolower = TRUE, language = NULL), ")", - ", as it is already set for ", - names(already_set_as), " (", ab_name(names(already_set_as), tolower = TRUE, language = NULL), ")"), - add_fn = font_red, - immediate = verbose) + warning_(paste0( + "Column '", font_bold(out[i]), "' will not be used for ", + names(out)[i], " (", ab_name(names(out)[i], tolower = TRUE, language = NULL), ")", + ", as it is already set for ", + names(already_set_as), " (", ab_name(names(already_set_as), tolower = TRUE, language = NULL), ")" + ), + add_fn = font_red, + immediate = verbose + ) } } } - + out <- out[!duplicated(names(out))] out <- out[!duplicated(unname(out))] if (sort == TRUE) { out <- out[order(names(out), out)] } - + if (!is.null(hard_dependencies)) { hard_dependencies <- unique(hard_dependencies) if (!all(hard_dependencies %in% names(out))) { @@ -290,14 +317,19 @@ get_column_abx <- function(x, if (info == TRUE & !all(soft_dependencies %in% names(out))) { # missing a soft dependency may lower the reliability missing <- soft_dependencies[!soft_dependencies %in% names(out)] - missing_msg <- vector_and(paste0(ab_name(missing, tolower = TRUE, language = NULL), - " (", font_bold(missing, collapse = NULL), ")"), - quotes = FALSE) - message_("Reliability would be improved if these antimicrobial results would be available too: ", - missing_msg) + missing_msg <- vector_and(paste0( + ab_name(missing, tolower = TRUE, language = NULL), + " (", font_bold(missing, collapse = NULL), ")" + ), + quotes = FALSE + ) + message_( + "Reliability would be improved if these antimicrobial results would be available too: ", + missing_msg + ) } } - + pkg_env$get_column_abx.call <- unique_call_id(entire_session = FALSE, match_fn = fn) pkg_env$get_column_abx.checked_cols <- colnames(x.bak) pkg_env$get_column_abx.out <- out @@ -306,12 +338,12 @@ get_column_abx <- function(x, get_ab_from_namespace <- function(x, cols_ab) { # cols_ab comes from get_column_abx() - + x <- trimws(unique(toupper(unlist(strsplit(x, ","))))) x_new <- character() for (val in x) { if (paste0("AB_", val) %in% ls(envir = asNamespace("AMR"))) { - # antibiotic group names, as defined in data-raw/pre-commit-hook.R, such as `AB_CARBAPENEMS` + # antibiotic group names, as defined in data-raw/_pre_commit_hook.R, such as `AB_CARBAPENEMS` val <- eval(parse(text = paste0("AB_", val)), envir = asNamespace("AMR")) } else if (val %in% AB_lookup$ab) { # separate drugs, such as `AMX` @@ -333,7 +365,10 @@ generate_warning_abs_missing <- function(missing, any = FALSE) { } else { any_txt <- c("", "are") } - warning_(paste0("Introducing NAs since", any_txt[1], " these antimicrobials ", any_txt[2], " required: ", - vector_and(missing, quotes = FALSE)), - immediate = TRUE) + warning_(paste0( + "Introducing NAs since", any_txt[1], " these antimicrobials ", any_txt[2], " required: ", + vector_and(missing, quotes = FALSE) + ), + immediate = TRUE + ) } diff --git a/R/italicise_taxonomy.R b/R/italicise_taxonomy.R index 3dd76d76e..4edcbadab 100644 --- a/R/italicise_taxonomy.R +++ b/R/italicise_taxonomy.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,21 +24,21 @@ # ==================================================================== # #' Italicise Taxonomic Families, Genera, Species, Subspecies -#' +#' #' According to the binomial nomenclature, the lowest four taxonomic levels (family, genus, species, subspecies) should be printed in italics. This function finds taxonomic names within strings and makes them italic. #' @param string a [character] (vector) #' @param type type of conversion of the taxonomic names, either "markdown" or "ansi", see *Details* -#' @details +#' @details #' This function finds the taxonomic names and makes them italic based on the [microorganisms] data set. -#' +#' #' The taxonomic names can be italicised using markdown (the default) by adding `*` before and after the taxonomic names, or using ANSI colours by adding `\033[3m` before and `\033[23m` after the taxonomic names. If multiple ANSI colours are not available, no conversion will occur. -#' +#' #' This function also supports abbreviation of the genus if it is followed by a species, such as "E. coli" and "K. pneumoniae ozaenae". #' @export #' @examples #' italicise_taxonomy("An overview of Staphylococcus aureus isolates") #' italicise_taxonomy("An overview of S. aureus isolates") -#' +#' #' cat(italicise_taxonomy("An overview of S. aureus isolates", type = "ansi")) italicise_taxonomy <- function(string, type = c("markdown", "ansi")) { if (missing(type)) { @@ -46,7 +46,7 @@ italicise_taxonomy <- function(string, type = c("markdown", "ansi")) { } meet_criteria(string, allow_class = "character") meet_criteria(type, allow_class = "character", has_length = 1, is_in = c("markdown", "ansi")) - + if (type == "markdown") { before <- "*" after <- "*" @@ -57,57 +57,70 @@ italicise_taxonomy <- function(string, type = c("markdown", "ansi")) { before <- "\033[3m" after <- "\033[23m" } - - vapply(FUN.VALUE = character(1), - string, - function(s) { - s_split <- unlist(strsplit(s, " ")) - - search_strings <- gsub("[^a-zA-Z-]", "", s_split) - - ind_species <- search_strings != "" & - search_strings %in% MO_lookup[which(MO_lookup$rank %in% c("family", - "genus", - "species", - "subspecies", - "infraspecies", - "subsp.")), - "species", - drop = TRUE] - - ind_fullname <- search_strings != "" & - search_strings %in% c(MO_lookup[which(MO_lookup$rank %in% c("family", - "genus", - "species", - "subspecies", - "infraspecies", - "subsp.")), - "fullname", - drop = TRUE], - MO_lookup[which(MO_lookup$rank %in% c("family", - "genus", - "species", - "subspecies", - "infraspecies", - "subsp.")), - "subspecies", - drop = TRUE]) - # also support E. coli, add "E." to indices - has_previous_genera_abbr <- s_split[which(ind_species) - 1] %like_case% "^[A-Z][.]?$" - ind_species <- c(which(ind_species), which(ind_species)[has_previous_genera_abbr] - 1) - - ind <- c(ind_species, which(ind_fullname)) - - s_split[ind] <- paste0(before, s_split[ind], after) - s_paste <- paste(s_split, collapse = " ") - - # clean up a bit - s_paste <- gsub(paste0(after, " ", before), " ", s_paste, fixed = TRUE) - - s_paste - }, - USE.NAMES = FALSE) + vapply( + FUN.VALUE = character(1), + string, + function(s) { + s_split <- unlist(strsplit(s, " ")) + + search_strings <- gsub("[^a-zA-Z-]", "", s_split) + + ind_species <- search_strings != "" & + search_strings %in% MO_lookup[which(MO_lookup$rank %in% c( + "family", + "genus", + "species", + "subspecies", + "infraspecies", + "subsp." + )), + "species", + drop = TRUE + ] + + ind_fullname <- search_strings != "" & + search_strings %in% c( + MO_lookup[which(MO_lookup$rank %in% c( + "family", + "genus", + "species", + "subspecies", + "infraspecies", + "subsp." + )), + "fullname", + drop = TRUE + ], + MO_lookup[which(MO_lookup$rank %in% c( + "family", + "genus", + "species", + "subspecies", + "infraspecies", + "subsp." + )), + "subspecies", + drop = TRUE + ] + ) + + # also support E. coli, add "E." to indices + has_previous_genera_abbr <- s_split[which(ind_species) - 1] %like_case% "^[A-Z][.]?$" + ind_species <- c(which(ind_species), which(ind_species)[has_previous_genera_abbr] - 1) + + ind <- c(ind_species, which(ind_fullname)) + + s_split[ind] <- paste0(before, s_split[ind], after) + s_paste <- paste(s_split, collapse = " ") + + # clean up a bit + s_paste <- gsub(paste0(after, " ", before), " ", s_paste, fixed = TRUE) + + s_paste + }, + USE.NAMES = FALSE + ) } #' @rdname italicise_taxonomy diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R index 47448d11c..ac1e64963 100755 --- a/R/join_microorganisms.R +++ b/R/join_microorganisms.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -33,8 +33,8 @@ #' @param by a variable to join by - if left empty will search for a column with class [`mo`] (created with [as.mo()]) or will be `"mo"` if that column name exists in `x`, could otherwise be a column name of `x` with values that exist in `microorganisms$mo` (such as `by = "bacteria_id"`), or another column in [microorganisms] (but then it should be named, like `by = c("bacteria_id" = "fullname")`) #' @param suffix if there are non-joined duplicate variables in `x` and `y`, these suffixes will be added to the output to disambiguate them. Should be a [character] vector of length 2. #' @param ... ignored, only in place to allow future extensions -#' @details **Note:** As opposed to the `join()` functions of `dplyr`, [character] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix. -#' +#' @details **Note:** As opposed to the `join()` functions of `dplyr`, [character] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix. +#' #' If the `dplyr` package is installed, their join functions will be used. Otherwise, the much slower [merge()] and [interaction()] functions from base \R will be used. #' @return a [data.frame] #' @export @@ -42,21 +42,27 @@ #' left_join_microorganisms(as.mo("K. pneumoniae")) #' left_join_microorganisms("B_KLBSL_PNMN") #' -#' df <- data.frame(date = seq(from = as.Date("2018-01-01"), -#' to = as.Date("2018-01-07"), -#' by = 1), -#' bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR", -#' "E. coli", "E. coli", "E. coli")), -#' stringsAsFactors = FALSE) +#' df <- data.frame( +#' date = seq( +#' from = as.Date("2018-01-01"), +#' to = as.Date("2018-01-07"), +#' by = 1 +#' ), +#' bacteria = as.mo(c( +#' "S. aureus", "MRSA", "MSSA", "STAAUR", +#' "E. coli", "E. coli", "E. coli" +#' )), +#' stringsAsFactors = FALSE +#' ) #' colnames(df) -#' +#' #' df_joined <- left_join_microorganisms(df, "bacteria") #' colnames(df_joined) -#' +#' #' \donttest{ #' if (require("dplyr")) { #' example_isolates %>% -#' left_join_microorganisms() %>% +#' left_join_microorganisms() %>% #' colnames() #' } #' } @@ -64,7 +70,7 @@ inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { meet_criteria(x, allow_class = c("data.frame", "character")) meet_criteria(by, allow_class = "character", allow_NULL = TRUE) meet_criteria(suffix, allow_class = "character", has_length = 2) - + join_microorganisms(type = "inner_join", x = x, by = by, suffix = suffix, ...) } @@ -74,7 +80,7 @@ left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { meet_criteria(x, allow_class = c("data.frame", "character")) meet_criteria(by, allow_class = "character", allow_NULL = TRUE) meet_criteria(suffix, allow_class = "character", has_length = 2) - + join_microorganisms(type = "left_join", x = x, by = by, suffix = suffix, ...) } @@ -84,7 +90,7 @@ right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { meet_criteria(x, allow_class = c("data.frame", "character")) meet_criteria(by, allow_class = "character", allow_NULL = TRUE) meet_criteria(suffix, allow_class = "character", has_length = 2) - + join_microorganisms(type = "right_join", x = x, by = by, suffix = suffix, ...) } @@ -94,7 +100,7 @@ full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { meet_criteria(x, allow_class = c("data.frame", "character")) meet_criteria(by, allow_class = "character", allow_NULL = TRUE) meet_criteria(suffix, allow_class = "character", has_length = 2) - + join_microorganisms(type = "full_join", x = x, by = by, suffix = suffix, ...) } @@ -103,7 +109,7 @@ full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { semi_join_microorganisms <- function(x, by = NULL, ...) { meet_criteria(x, allow_class = c("data.frame", "character")) meet_criteria(by, allow_class = "character", allow_NULL = TRUE) - + join_microorganisms(type = "semi_join", x = x, by = by, ...) } @@ -112,13 +118,13 @@ semi_join_microorganisms <- function(x, by = NULL, ...) { anti_join_microorganisms <- function(x, by = NULL, ...) { meet_criteria(x, allow_class = c("data.frame", "character")) meet_criteria(by, allow_class = "character", allow_NULL = TRUE) - + join_microorganisms(type = "anti_join", x = x, by = by, ...) } join_microorganisms <- function(type, x, by, suffix, ...) { check_dataset_integrity() - + if (!is.data.frame(x)) { if (pkg_is_available("tibble", also_load = FALSE)) { x <- import_fn("tibble", "tibble")(mo = x) @@ -143,12 +149,12 @@ join_microorganisms <- function(type, x, by, suffix, ...) { } else { x[, by] <- as.mo(x[, by, drop = TRUE]) } - + if (is.null(names(by))) { # will always be joined to microorganisms$mo, so add name to that by <- stats::setNames("mo", by) } - + # use dplyr if available - it's much faster than poorman alternatives dplyr_join <- import_fn(name = type, pkg = "dplyr", error_on_fail = FALSE) if (!is.null(dplyr_join)) { @@ -162,7 +168,7 @@ join_microorganisms <- function(type, x, by, suffix, ...) { } else { joined <- join_fn(x = x, y = AMR::microorganisms, by = by, ...) } - + if ("join.mo" %in% colnames(joined)) { if ("mo" %in% colnames(joined)) { ind_mo <- which(colnames(joined) %in% c("mo", "join.mo")) @@ -172,10 +178,10 @@ join_microorganisms <- function(type, x, by, suffix, ...) { colnames(joined)[colnames(joined) == "join.mo"] <- "mo" } } - + if (type %like% "full|left|right|inner" && NROW(joined) > NROW(x)) { warning_("in `", type, "_join()`: the newly joined data set contains ", nrow(joined) - nrow(x), " rows more than the number of rows of `x`.") } - + as_original_data_class(joined, class(x.bak)) } diff --git a/R/key_antimicrobials.R b/R/key_antimicrobials.R index c7d622b3f..258206a3f 100755 --- a/R/key_antimicrobials.R +++ b/R/key_antimicrobials.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -35,15 +35,15 @@ #' @param antifungal names of antifungal agents for **fungi**, case-insensitive. Set to `NULL` to ignore. See *Details* for the default agents. #' @param only_rsi_columns a [logical] to indicate whether only columns must be included that were transformed to class `` (see [as.rsi()]) on beforehand (defaults to `FALSE`) #' @param ... ignored, only in place to allow future extensions -#' @details +#' @details #' The [key_antimicrobials()] and [all_antimicrobials()] functions are context-aware. This means that the `x` argument can be left blank if used inside a [data.frame] call, see *Examples*. -#' +#' #' The function [key_antimicrobials()] returns a [character] vector with 12 antimicrobial results for every isolate. The function [all_antimicrobials()] returns a [character] vector with all antimicrobial results for every isolate. These vectors can then be compared using [antimicrobials_equal()], to check if two isolates have generally the same antibiogram. Missing and invalid values are replaced with a dot (`"."`) by [key_antimicrobials()] and ignored by [antimicrobials_equal()]. -#' +#' #' Please see the [first_isolate()] function how these important functions enable the 'phenotype-based' method for determination of first isolates. #' #' The default antimicrobial agents used for **all rows** (set in `universal`) are: -#' +#' #' - Ampicillin #' - Amoxicillin/clavulanic acid #' - Cefuroxime @@ -52,7 +52,7 @@ #' - Trimethoprim/sulfamethoxazole #' #' The default antimicrobial agents used for **Gram-negative bacteria** (set in `gram_negative`) are: -#' +#' #' - Cefotaxime #' - Ceftazidime #' - Colistin @@ -61,17 +61,17 @@ #' - Tobramycin #' #' The default antimicrobial agents used for **Gram-positive bacteria** (set in `gram_positive`) are: -#' +#' #' - Erythromycin #' - Oxacillin #' - Rifampin #' - Teicoplanin #' - Tetracycline #' - Vancomycin -#' -#' +#' +#' #' The default antimicrobial agents used for **fungi** (set in `antifungal`) are: -#' +#' #' - Anidulafungin #' - Caspofungin #' - Fluconazole @@ -84,7 +84,7 @@ #' @examples #' # `example_isolates` is a data set available in the AMR package. #' # See ?example_isolates. -#' +#' #' # output of the `key_antimicrobials()` function could be like this: #' strainA <- "SSSRR.S.R..S" #' strainB <- "SSSIRSSSRSSS" @@ -107,7 +107,7 @@ #' # and first WEIGHTED isolates #' first_weighted = first_isolate(col_keyantimicrobials = "keyab") #' ) -#' +#' #' # Check the difference in this data set, 'weighted' results in more isolates: #' sum(my_patients$first_regular, na.rm = TRUE) #' sum(my_patients$first_weighted, na.rm = TRUE) @@ -115,14 +115,22 @@ #' } key_antimicrobials <- function(x = NULL, col_mo = NULL, - universal = c("ampicillin", "amoxicillin/clavulanic acid", "cefuroxime", - "piperacillin/tazobactam", "ciprofloxacin", "trimethoprim/sulfamethoxazole"), - gram_negative = c("gentamicin", "tobramycin", "colistin", - "cefotaxime", "ceftazidime", "meropenem"), - gram_positive = c("vancomycin", "teicoplanin", "tetracycline", - "erythromycin", "oxacillin", "rifampin"), - antifungal = c("anidulafungin", "caspofungin", "fluconazole", - "miconazole", "nystatin", "voriconazole"), + universal = c( + "ampicillin", "amoxicillin/clavulanic acid", "cefuroxime", + "piperacillin/tazobactam", "ciprofloxacin", "trimethoprim/sulfamethoxazole" + ), + gram_negative = c( + "gentamicin", "tobramycin", "colistin", + "cefotaxime", "ceftazidime", "meropenem" + ), + gram_positive = c( + "vancomycin", "teicoplanin", "tetracycline", + "erythromycin", "oxacillin", "rifampin" + ), + antifungal = c( + "anidulafungin", "caspofungin", "fluconazole", + "miconazole", "nystatin", "voriconazole" + ), only_rsi_columns = FALSE, ...) { if (is_null_or_grouped_tbl(x)) { @@ -137,11 +145,11 @@ key_antimicrobials <- function(x = NULL, meet_criteria(gram_positive, allow_class = "character", allow_NULL = TRUE) meet_criteria(antifungal, allow_class = "character", allow_NULL = TRUE) meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) - + # force regular data.frame, not a tibble or data.table x <- as.data.frame(x, stringsAsFactors = FALSE) cols <- get_column_abx(x, info = FALSE, only_rsi_columns = only_rsi_columns, fn = "key_antimicrobials") - + # try to find columns based on type # -- mo if (is.null(col_mo)) { @@ -156,68 +164,79 @@ key_antimicrobials <- function(x = NULL, gramstain <- mo_gramstain(x.mo, language = NULL) kingdom <- mo_kingdom(x.mo, language = NULL) } - + AMR_string <- function(x, values, name, filter, cols = cols) { if (is.null(values)) { return(rep(NA_character_, length(which(filter)))) } - + values_old_length <- length(values) values <- as.ab(values, flag_multiple_results = FALSE, info = FALSE) values <- cols[names(cols) %in% values] values_new_length <- length(values) - + if (values_new_length < values_old_length & - any(filter, na.rm = TRUE) & - message_not_thrown_before("key_antimicrobials", name)) { - warning_("in `key_antimicrobials()`: ", - ifelse(values_new_length == 0, - "No columns available ", - paste0("Only using ", values_new_length, " out of ", values_old_length, " defined columns ")), - "as key antimicrobials for ", name, "s. See ?key_antimicrobials.") + any(filter, na.rm = TRUE) & + message_not_thrown_before("key_antimicrobials", name)) { + warning_( + "in `key_antimicrobials()`: ", + ifelse(values_new_length == 0, + "No columns available ", + paste0("Only using ", values_new_length, " out of ", values_old_length, " defined columns ") + ), + "as key antimicrobials for ", name, "s. See ?key_antimicrobials." + ) } - + generate_antimcrobials_string(x[which(filter), c(universal, values), drop = FALSE]) } - + if (is.null(universal)) { universal <- character(0) } else { universal <- as.ab(universal, flag_multiple_results = FALSE, info = FALSE) universal <- cols[names(cols) %in% universal] } - + key_ab <- rep(NA_character_, nrow(x)) - - key_ab[which(gramstain == "Gram-negative")] <- AMR_string(x = x, - values = gram_negative, - name = "Gram-negative", - filter = gramstain == "Gram-negative", - cols = cols) - - key_ab[which(gramstain == "Gram-positive")] <- AMR_string(x = x, - values = gram_positive, - name = "Gram-positive", - filter = gramstain == "Gram-positive", - cols = cols) - - key_ab[which(kingdom == "Fungi")] <- AMR_string(x = x, - values = antifungal, - name = "antifungal", - filter = kingdom == "Fungi", - cols = cols) - + + key_ab[which(gramstain == "Gram-negative")] <- AMR_string( + x = x, + values = gram_negative, + name = "Gram-negative", + filter = gramstain == "Gram-negative", + cols = cols + ) + + key_ab[which(gramstain == "Gram-positive")] <- AMR_string( + x = x, + values = gram_positive, + name = "Gram-positive", + filter = gramstain == "Gram-positive", + cols = cols + ) + + key_ab[which(kingdom == "Fungi")] <- AMR_string( + x = x, + values = antifungal, + name = "antifungal", + filter = kingdom == "Fungi", + cols = cols + ) + # back-up - only use `universal` - key_ab[which(is.na(key_ab))] <- AMR_string(x = x, - values = character(0), - name = "", - filter = is.na(key_ab), - cols = cols) - + key_ab[which(is.na(key_ab))] <- AMR_string( + x = x, + values = character(0), + name = "", + filter = is.na(key_ab), + cols = cols + ) + if (length(unique(key_ab)) == 1) { warning_("in `key_antimicrobials()`: no distinct key antibiotics determined.") } - + key_ab } @@ -233,13 +252,15 @@ all_antimicrobials <- function(x = NULL, } meet_criteria(x, allow_class = "data.frame") # also checks dimensions to be >0 meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) - + # force regular data.frame, not a tibble or data.table x <- as.data.frame(x, stringsAsFactors = FALSE) - cols <- get_column_abx(x, only_rsi_columns = only_rsi_columns, info = FALSE, - sort = FALSE, fn = "all_antimicrobials") - - generate_antimcrobials_string(x[ , cols, drop = FALSE]) + cols <- get_column_abx(x, + only_rsi_columns = only_rsi_columns, info = FALSE, + sort = FALSE, fn = "all_antimicrobials" + ) + + generate_antimcrobials_string(x[, cols, drop = FALSE]) } generate_antimcrobials_string <- function(df) { @@ -249,26 +270,32 @@ generate_antimcrobials_string <- function(df) { if (NROW(df) == 0) { return(character(0)) } - tryCatch({ - do.call(paste0, - lapply(as.list(df), - function(x) { - x <- toupper(as.character(x)) - x[!x %in% c("R", "S", "I")] <- "." - paste(x) - })) - }, - error = function(e) rep(strrep(".", NCOL(df)), NROW(df))) + tryCatch( + { + do.call( + paste0, + lapply( + as.list(df), + function(x) { + x <- toupper(as.character(x)) + x[!x %in% c("R", "S", "I")] <- "." + paste(x) + } + ) + ) + }, + error = function(e) rep(strrep(".", NCOL(df)), NROW(df)) + ) } #' @rdname key_antimicrobials #' @export antimicrobials_equal <- function(y, - z, - type = c("points", "keyantimicrobials"), - ignore_I = TRUE, - points_threshold = 2, - ...) { + z, + type = c("points", "keyantimicrobials"), + ignore_I = TRUE, + points_threshold = 2, + ...) { meet_criteria(y, allow_class = "character") meet_criteria(z, allow_class = "character") stop_if(missing(type), "argument \"type\" is missing, with no default") @@ -289,10 +316,10 @@ antimicrobials_equal <- function(y, uniq <- unique(c(y, z)) uniq_list <- lapply(uniq, key2rsi) names(uniq_list) <- uniq - + y <- uniq_list[match(y, names(uniq_list))] z <- uniq_list[match(z, names(uniq_list))] - + determine_equality <- function(a, b, type, points_threshold, ignore_I) { if (length(a) != length(b)) { # incomparable, so not equal @@ -302,7 +329,7 @@ antimicrobials_equal <- function(y, NA_ind <- which(is.na(a) | is.na(b)) a[NA_ind] <- NA_real_ b[NA_ind] <- NA_real_ - + if (type == "points") { # count points for every single character: # - no change is 0 points @@ -320,14 +347,18 @@ antimicrobials_equal <- function(y, all(a == b, na.rm = TRUE) } } - out <- unlist(mapply(FUN = determine_equality, - y, - z, - MoreArgs = list(type = type, - points_threshold = points_threshold, - ignore_I = ignore_I), - SIMPLIFY = FALSE, - USE.NAMES = FALSE)) + out <- unlist(mapply( + FUN = determine_equality, + y, + z, + MoreArgs = list( + type = type, + points_threshold = points_threshold, + ignore_I = ignore_I + ), + SIMPLIFY = FALSE, + USE.NAMES = FALSE + )) out[is.na(y) | is.na(z)] <- NA out } diff --git a/R/kurtosis.R b/R/kurtosis.R index 87f49795c..41cd7763b 100755 --- a/R/kurtosis.R +++ b/R/kurtosis.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -32,7 +32,7 @@ #' @seealso [skewness()] #' @rdname kurtosis #' @export -#' @examples +#' @examples #' kurtosis(rnorm(10000)) #' kurtosis(rnorm(10000), excess = TRUE) kurtosis <- function(x, na.rm = FALSE, excess = FALSE) { diff --git a/R/like.R b/R/like.R index 7cb4021f2..172700141 100755 --- a/R/like.R +++ b/R/like.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -39,7 +39,7 @@ #' * Support multiple patterns #' * Check if `pattern` is a valid regular expression and sets `fixed = TRUE` if not, to greatly improve speed (vectorised over `pattern`) #' * Always use compatibility with Perl unless `fixed = TRUE`, to greatly improve speed -#' +#' #' Using RStudio? The `%like%`/`%unlike%` functions can also be directly inserted in your code from the Addins menu and can have its own keyboard shortcut like `Shift+Ctrl+L` or `Shift+Cmd+L` (see menu `Tools` > `Modify Keyboard Shortcuts...`). If you keep pressing your shortcut, the inserted text will be iterated over `%like%` -> `%unlike%` -> `%like_case%` -> `%unlike_case%`. #' @source Idea from the [`like` function from the `data.table` package](https://github.com/Rdatatable/data.table/blob/ec1259af1bf13fc0c96a1d3f9e84d55d8106a9a4/R/like.R), although altered as explained in *Details*. #' @seealso [grepl()] @@ -49,20 +49,20 @@ #' b <- "TEST" #' a %like% b #' b %like% a -#' +#' #' # also supports multiple patterns #' a <- c("Test case", "Something different", "Yet another thing") -#' b <- c( "case", "diff", "yet") +#' b <- c("case", "diff", "yet") #' a %like% b #' a %unlike% b -#' +#' #' a[1] %like% b #' a %like% b[1] -#' +#' #' \donttest{ #' # get isolates whose name start with 'Entero' (case-insensitive) #' example_isolates[which(mo_name() %like% "^entero"), ] -#' +#' #' if (require("dplyr")) { #' example_isolates %>% #' filter(mo_name() %like% "^ent") @@ -72,7 +72,7 @@ like <- function(x, pattern, ignore.case = TRUE) { meet_criteria(x, allow_NA = TRUE) meet_criteria(pattern, allow_NA = FALSE) meet_criteria(ignore.case, allow_class = "logical", has_length = 1) - + if (all(is.na(x))) { return(rep(FALSE, length(x))) } @@ -96,18 +96,22 @@ like <- function(x, pattern, ignore.case = TRUE) { if (length(x) == 1) { x <- rep(x, length(pattern)) } else if (length(pattern) != length(x)) { - stop_("arguments `x` and `pattern` must be of same length, or either one must be 1 ", - "(`x` has length ", length(x), " and `pattern` has length ", length(pattern), ")") + stop_( + "arguments `x` and `pattern` must be of same length, or either one must be 1 ", + "(`x` has length ", length(x), " and `pattern` has length ", length(pattern), ")" + ) } unlist( - mapply(FUN = grepl, - x = x, - pattern = pattern, - fixed = fixed, - perl = !fixed, - MoreArgs = list(ignore.case = FALSE), - SIMPLIFY = FALSE, - USE.NAMES = FALSE) + mapply( + FUN = grepl, + x = x, + pattern = pattern, + fixed = fixed, + perl = !fixed, + MoreArgs = list(ignore.case = FALSE), + SIMPLIFY = FALSE, + USE.NAMES = FALSE + ) ) } } diff --git a/R/mdro.R b/R/mdro.R index 0ae413ae7..c6c652692 100755 --- a/R/mdro.R +++ b/R/mdro.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -35,74 +35,74 @@ #' @param combine_SI a [logical] to indicate whether all values of S and I must be merged into one, so resistance is only considered when isolates are R, not I. As this is the default behaviour of the [mdro()] function, it follows the redefinition by EUCAST about the interpretation of I (increased exposure) in 2019, see section 'Interpretation of S, I and R' below. When using `combine_SI = FALSE`, resistance is considered when isolates are R or I. #' @param verbose a [logical] to turn Verbose mode on and off (default is off). In Verbose mode, the function does not return the MDRO results, but instead returns a data set in logbook form with extensive info about which isolates would be MDRO-positive, or why they are not. #' @inheritSection eucast_rules Antibiotics -#' @details +#' @details #' These functions are context-aware. This means that the `x` argument can be left blank if used inside a [data.frame] call, see *Examples*. -#' +#' #' For the `pct_required_classes` argument, values above 1 will be divided by 100. This is to support both fractions (`0.75` or `3/4`) and percentages (`75`). -#' +#' #' **Note:** Every test that involves the Enterobacteriaceae family, will internally be performed using its newly named *order* Enterobacterales, since the Enterobacteriaceae family has been taxonomically reclassified by Adeolu *et al.* in 2016. Before that, Enterobacteriaceae was the only family under the Enterobacteriales (with an i) order. All species under the old Enterobacteriaceae family are still under the new Enterobacterales (without an i) order, but divided into multiple families. The way tests are performed now by this [mdro()] function makes sure that results from before 2016 and after 2016 are identical. -#' +#' #' @section Supported International / National Guidelines: -#' +#' #' Currently supported guidelines are (case-insensitive): -#' +#' #' * `guideline = "CMI2012"` (default) #' #' Magiorakos AP, Srinivasan A *et al.* "Multidrug-resistant, extensively drug-resistant and pandrug-resistant bacteria: an international expert proposal for interim standard definitions for acquired resistance." Clinical Microbiology and Infection (2012) ([link](https://www.clinicalmicrobiologyandinfection.com/article/S1198-743X(14)61632-3/fulltext)) -#' +#' #' * `guideline = "EUCAST3.3"` (or simply `guideline = "EUCAST"`) #' #' The European international guideline - EUCAST Expert Rules Version 3.3 "Intrinsic Resistance and Unusual Phenotypes" ([link](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/2021/Intrinsic_Resistance_and_Unusual_Phenotypes_Tables_v3.3_20211018.pdf)) -#' +#' #' * `guideline = "EUCAST3.2"` #' #' The European international guideline - EUCAST Expert Rules Version 3.2 "Intrinsic Resistance and Unusual Phenotypes" ([link](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/2020/Intrinsic_Resistance_and_Unusual_Phenotypes_Tables_v3.2_20200225.pdf)) -#' +#' #' * `guideline = "EUCAST3.1"` #' #' The European international guideline - EUCAST Expert Rules Version 3.1 "Intrinsic Resistance and Exceptional Phenotypes Tables" ([link](https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf)) -#' +#' #' * `guideline = "TB"` #' #' The international guideline for multi-drug resistant tuberculosis - World Health Organization "Companion handbook to the WHO guidelines for the programmatic management of drug-resistant tuberculosis" ([link](https://www.who.int/publications/i/item/9789241548809)) -#' +#' #' * `guideline = "MRGN"` #' #' The German national guideline - Mueller et al. (2015) Antimicrobial Resistance and Infection Control 4:7; \doi{10.1186/s13756-015-0047-6} -#' +#' #' * `guideline = "BRMO"` #' #' The Dutch national guideline - Rijksinstituut voor Volksgezondheid en Milieu "WIP-richtlijn BRMO (Bijzonder Resistente Micro-Organismen) (ZKH)" ([link](https://www.rivm.nl/wip-richtlijn-brmo-bijzonder-resistente-micro-organismen-zkh)) -#' +#' #' Please suggest your own (country-specific) guidelines by letting us know: . -#' +#' #' @section Using Custom Guidelines: -#' +#' #' Custom guidelines can be set with the [custom_mdro_guideline()] function. This is of great importance if you have custom rules to determine MDROs in your hospital, e.g., rules that are dependent on ward, state of contact isolation or other variables in your data. -#' +#' #' If you are familiar with the [`case_when()`][dplyr::case_when()] function of the `dplyr` package, you will recognise the input method to set your own rules. Rules must be set using what \R considers to be the 'formula notation'. The rule is written *before* the tilde (`~`) and the consequence of the rule is written *after* the tilde: -#' +#' #' ``` #' custom <- custom_mdro_guideline(CIP == "R" & age > 60 ~ "Elderly Type A", #' ERY == "R" & age > 60 ~ "Elderly Type B") #' ``` -#' -#' If a row/an isolate matches the first rule, the value after the first `~` (in this case *'Elderly Type A'*) will be set as MDRO value. Otherwise, the second rule will be tried and so on. The number of rules is unlimited. -#' +#' +#' If a row/an isolate matches the first rule, the value after the first `~` (in this case *'Elderly Type A'*) will be set as MDRO value. Otherwise, the second rule will be tried and so on. The number of rules is unlimited. +#' #' You can print the rules set in the console for an overview. Colours will help reading it if your console supports colours. -#' +#' #' ``` #' custom #' #> A set of custom MDRO rules: #' #> 1. CIP is "R" and age is higher than 60 -> Elderly Type A #' #> 2. ERY is "R" and age is higher than 60 -> Elderly Type B #' #> 3. Otherwise -> Negative -#' #> +#' #> #' #> Unmatched rows will return NA. #' ``` -#' +#' #' The outcome of the function can be used for the `guideline` argument in the [mdro()] function: -#' +#' #' ``` #' x <- mdro(example_isolates, #' guideline = custom) @@ -110,18 +110,18 @@ #' #> Negative Elderly Type A Elderly Type B #' #> 1070 198 732 #' ``` -#' +#' #' Rules can also be combined with other custom rules by using [c()]: -#' +#' #' ``` #' x <- mdro(example_isolates, -#' guideline = c(custom, +#' guideline = c(custom, #' custom_mdro_guideline(ERY == "R" & age > 50 ~ "Elderly Type C"))) #' table(x) -#' #> Negative Elderly Type A Elderly Type B Elderly Type C +#' #> Negative Elderly Type A Elderly Type B Elderly Type C #' #> 961 198 732 109 #' ``` -#' +#' #' The rules set (the `custom` object in this case) could be exported to a shared file location using [saveRDS()] if you collaborate with multiple users. The custom rules set could then be imported using [readRDS()]. #' @inheritSection as.rsi Interpretation of R and S/I #' @return @@ -142,24 +142,26 @@ #' out <- mdro(example_isolates, guideline = "EUCAST") #' str(out) #' table(out) -#' +#' #' out <- mdro(example_isolates, -#' guideline = custom_mdro_guideline(AMX == "R" ~ "Custom MDRO 1", -#' VAN == "R" ~ "Custom MDRO 2")) +#' guideline = custom_mdro_guideline( +#' AMX == "R" ~ "Custom MDRO 1", +#' VAN == "R" ~ "Custom MDRO 2" +#' ) +#' ) #' table(out) -#' +#' #' \donttest{ #' if (require("dplyr")) { #' example_isolates %>% #' mdro() %>% #' table() -#' +#' #' # no need to define `x` when used inside dplyr verbs: #' example_isolates %>% #' mutate(MDRO = mdro()) %>% #' pull(MDRO) %>% #' table() -#' #' } #' } mdro <- function(x = NULL, @@ -187,19 +189,21 @@ mdro <- function(x = NULL, meet_criteria(combine_SI, allow_class = "logical", has_length = 1) meet_criteria(verbose, allow_class = "logical", has_length = 1) meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) - + check_dataset_integrity() - + info.bak <- info # don't thrown info's more than once per call if (isTRUE(info)) { info <- message_not_thrown_before("mdro") } - + if (interactive() & verbose == TRUE & info == TRUE) { - txt <- paste0("WARNING: In Verbose mode, the mdro() function does not return the MDRO results, but instead returns a data set in logbook form with extensive info about which isolates would be MDRO-positive, or why they are not.", - "\n\nThis may overwrite your existing data if you use e.g.:", - "\ndata <- mdro(data, verbose = TRUE)\n\nDo you want to continue?") + txt <- paste0( + "WARNING: In Verbose mode, the mdro() function does not return the MDRO results, but instead returns a data set in logbook form with extensive info about which isolates would be MDRO-positive, or why they are not.", + "\n\nThis may overwrite your existing data if you use e.g.:", + "\ndata <- mdro(data, verbose = TRUE)\n\nDo you want to continue?" + ) showQuestion <- import_fn("showQuestion", "rstudioapi", error_on_fail = FALSE) if (!is.null(showQuestion)) { q_continue <- showQuestion("Using verbose = TRUE with mdro()", txt) @@ -211,7 +215,7 @@ mdro <- function(x = NULL, return(x) } } - + group_msg <- "" if (info.bak == TRUE) { # print group name if used in dplyr::group_by() @@ -236,27 +240,30 @@ mdro <- function(x = NULL, # force regular [data.frame], not a tibble or data.table x <- as.data.frame(x, stringsAsFactors = FALSE) - + if (pct_required_classes > 1) { # allow pct_required_classes = 75 -> pct_required_classes = 0.75 pct_required_classes <- pct_required_classes / 100 } - + if (!is.null(list(...)$country)) { warning_("in `mdro()`: using `country` is deprecated, use `guideline` instead. See ?mdro") guideline <- list(...)$country } - + guideline.bak <- guideline if (is.list(guideline)) { # Custom MDRO guideline --------------------------------------------------- stop_ifnot(inherits(guideline, "custom_mdro_guideline"), "use `custom_mdro_guideline()` to create custom guidelines") if (info == TRUE) { - txt <- paste0("Determining MDROs based on custom rules", - ifelse(isTRUE(attributes(guideline)$as_factor), - paste0(", resulting in factor levels: ", paste0(attributes(guideline)$values, collapse = " < ")), - ""), - ".") + txt <- paste0( + "Determining MDROs based on custom rules", + ifelse(isTRUE(attributes(guideline)$as_factor), + paste0(", resulting in factor levels: ", paste0(attributes(guideline)$values, collapse = " < ")), + "" + ), + "." + ) txt <- word_wrap(txt) cat(txt, "\n", sep = "") } @@ -266,18 +273,22 @@ mdro <- function(x = NULL, if (sum(!is.na(x$MDRO)) == 0) { cat(word_wrap(font_bold(paste0("=> Found 0 MDROs since no isolates are covered by the custom guideline")))) } else { - cat(word_wrap(font_bold(paste0("=> Found ", sum(x$MDRO != "Negative", na.rm = TRUE), - " custom defined MDROs out of ", sum(!is.na(x$MDRO)), - " isolates (", - trimws(percentage(sum(x$MDRO != "Negative", na.rm = TRUE) / sum(!is.na(x$MDRO)))), - ")\n")))) + cat(word_wrap(font_bold(paste0( + "=> Found ", sum(x$MDRO != "Negative", na.rm = TRUE), + " custom defined MDROs out of ", sum(!is.na(x$MDRO)), + " isolates (", + trimws(percentage(sum(x$MDRO != "Negative", na.rm = TRUE) / sum(!is.na(x$MDRO)))), + ")\n" + )))) } } if (verbose == TRUE) { - return(x[, c("row_number", - "MDRO", - "reason", - "columns_nonsusceptible")]) + return(x[, c( + "row_number", + "MDRO", + "reason", + "columns_nonsusceptible" + )]) } else { return(x$MDRO) } @@ -297,58 +308,58 @@ mdro <- function(x = NULL, if (guideline == "de") { guideline <- "mrgn" } - stop_ifnot(guideline %in% c("brmo", "mrgn", "eucast3.1", "eucast3.2", "eucast3.3", "tb", "cmi2012"), - "invalid guideline: ", guideline.bak) + stop_ifnot( + guideline %in% c("brmo", "mrgn", "eucast3.1", "eucast3.2", "eucast3.3", "tb", "cmi2012"), + "invalid guideline: ", guideline.bak + ) guideline <- list(code = guideline) - + # try to find columns based on type # -- mo if (is.null(col_mo)) { col_mo <- search_type_in_df(x = x, type = "mo", info = info) } if (is.null(col_mo) & guideline$code == "tb") { - message_("No column found as input for `col_mo`, ", - font_bold(paste0("assuming all rows contain ", font_italic("Mycobacterium tuberculosis"), "."))) + message_( + "No column found as input for `col_mo`, ", + font_bold(paste0("assuming all rows contain ", font_italic("Mycobacterium tuberculosis"), ".")) + ) x$mo <- as.mo("Mycobacterium tuberculosis") # consider overkill at all times: MO_lookup[which(MO_lookup$fullname == "Mycobacterium tuberculosis"), "mo", drop = TRUE] col_mo <- "mo" } stop_if(is.null(col_mo), "`col_mo` must be set") - + if (guideline$code == "cmi2012") { guideline$name <- "Multidrug-resistant, extensively drug-resistant and pandrug-resistant bacteria: an international expert proposal for interim standard definitions for acquired resistance." guideline$author <- "Magiorakos AP, Srinivasan A, Carey RB, ..., Vatopoulos A, Weber JT, Monnet DL" guideline$version <- NA guideline$source_url <- "Clinical Microbiology and Infection 18:3, 2012; doi: 10.1111/j.1469-0691.2011.03570.x" guideline$type <- "MDRs/XDRs/PDRs" - } else if (guideline$code == "eucast3.1") { guideline$name <- "EUCAST Expert Rules, \"Intrinsic Resistance and Exceptional Phenotypes Tables\"" guideline$author <- "EUCAST (European Committee on Antimicrobial Susceptibility Testing)" guideline$version <- "3.1, 2016" guideline$source_url <- "https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf" guideline$type <- "EUCAST Exceptional Phenotypes" - } else if (guideline$code == "eucast3.2") { guideline$name <- "EUCAST Expert Rules, \"Intrinsic Resistance and Unusual Phenotypes\"" guideline$author <- "EUCAST (European Committee on Antimicrobial Susceptibility Testing)" guideline$version <- "3.2, February 2020" guideline$source_url <- "https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/2020/Intrinsic_Resistance_and_Unusual_Phenotypes_Tables_v3.2_20200225.pdf" guideline$type <- "EUCAST Unusual Phenotypes" - } else if (guideline$code == "eucast3.3") { guideline$name <- "EUCAST Expert Rules, \"Intrinsic Resistance and Unusual Phenotypes\"" guideline$author <- "EUCAST (European Committee on Antimicrobial Susceptibility Testing)" guideline$version <- "3.3, October 2021" guideline$source_url <- "https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/2021/Intrinsic_Resistance_and_Unusual_Phenotypes_Tables_v3.3_20211018.pdf" guideline$type <- "EUCAST Unusual Phenotypes" - } else if (guideline$code == "tb") { guideline$name <- "Companion handbook to the WHO guidelines for the programmatic management of drug-resistant tuberculosis" guideline$author <- "WHO (World Health Organization)" guideline$version <- "WHO/HTM/TB/2014.11, 2014" guideline$source_url <- "https://www.who.int/publications/i/item/9789241548809" guideline$type <- "MDR-TB's" - + # support per country: } else if (guideline$code == "mrgn") { guideline$name <- "Cross-border comparison of the Dutch and German guidelines on multidrug-resistant Gram-negative microorganisms" @@ -356,7 +367,6 @@ mdro <- function(x = NULL, guideline$version <- NA guideline$source_url <- "Antimicrobial Resistance and Infection Control 4:7, 2015; doi: 10.1186/s13756-015-0047-6" guideline$type <- "MRGNs" - } else if (guideline$code == "brmo") { guideline$name <- "WIP-Richtlijn Bijzonder Resistente Micro-organismen (BRMO)" guideline$author <- "RIVM (Rijksinstituut voor de Volksgezondheid)" @@ -366,222 +376,243 @@ mdro <- function(x = NULL, } else { stop("This guideline is currently unsupported: ", guideline$code, call. = FALSE) } - + if (guideline$code == "cmi2012") { - cols_ab <- get_column_abx(x = x, - soft_dependencies = c( - # [table] 1 (S aureus): - "GEN", - "RIF", - "CPT", - "OXA", - "CIP", - "MFX", - "SXT", - "FUS", - "VAN", - "TEC", - "TLV", - "TGC", - "CLI", - "DAP", - "ERY", - "LNZ", - "CHL", - "FOS", - "QDA", - "TCY", - "DOX", - "MNO", - # [table] 2 (Enterococcus) - "GEH", - "STH", - "IPM", - "MEM", - "DOR", - "CIP", - "LVX", - "MFX", - "VAN", - "TEC", - "TGC", - "DAP", - "LNZ", - "AMP", - "QDA", - "DOX", - "MNO", - # [table] 3 (Enterobacteriaceae) - "GEN", - "TOB", - "AMK", - "NET", - "CPT", - "TCC", - "TZP", - "ETP", - "IPM", - "MEM", - "DOR", - "CZO", - "CXM", - "CTX", - "CAZ", - "FEP", - "FOX", - "CTT", - "CIP", - "SXT", - "TGC", - "ATM", - "AMP", - "AMC", - "SAM", - "CHL", - "FOS", - "COL", - "TCY", - "DOX", - "MNO", - # [table] 4 (Pseudomonas) - "GEN", - "TOB", - "AMK", - "NET", - "IPM", - "MEM", - "DOR", - "CAZ", - "FEP", - "CIP", - "LVX", - "TCC", - "TZP", - "ATM", - "FOS", - "COL", - "PLB", - # [table] 5 (Acinetobacter) - "GEN", - "TOB", - "AMK", - "NET", - "IPM", - "MEM", - "DOR", - "CIP", - "LVX", - "TZP", - "TCC", - "CTX", - "CRO", - "CAZ", - "FEP", - "SXT", - "SAM", - "COL", - "PLB", - "TCY", - "DOX", - "MNO"), - verbose = verbose, - info = info, - only_rsi_columns = only_rsi_columns, - fn = "mdro", - ...) + cols_ab <- get_column_abx( + x = x, + soft_dependencies = c( + # [table] 1 (S aureus): + "GEN", + "RIF", + "CPT", + "OXA", + "CIP", + "MFX", + "SXT", + "FUS", + "VAN", + "TEC", + "TLV", + "TGC", + "CLI", + "DAP", + "ERY", + "LNZ", + "CHL", + "FOS", + "QDA", + "TCY", + "DOX", + "MNO", + # [table] 2 (Enterococcus) + "GEH", + "STH", + "IPM", + "MEM", + "DOR", + "CIP", + "LVX", + "MFX", + "VAN", + "TEC", + "TGC", + "DAP", + "LNZ", + "AMP", + "QDA", + "DOX", + "MNO", + # [table] 3 (Enterobacteriaceae) + "GEN", + "TOB", + "AMK", + "NET", + "CPT", + "TCC", + "TZP", + "ETP", + "IPM", + "MEM", + "DOR", + "CZO", + "CXM", + "CTX", + "CAZ", + "FEP", + "FOX", + "CTT", + "CIP", + "SXT", + "TGC", + "ATM", + "AMP", + "AMC", + "SAM", + "CHL", + "FOS", + "COL", + "TCY", + "DOX", + "MNO", + # [table] 4 (Pseudomonas) + "GEN", + "TOB", + "AMK", + "NET", + "IPM", + "MEM", + "DOR", + "CAZ", + "FEP", + "CIP", + "LVX", + "TCC", + "TZP", + "ATM", + "FOS", + "COL", + "PLB", + # [table] 5 (Acinetobacter) + "GEN", + "TOB", + "AMK", + "NET", + "IPM", + "MEM", + "DOR", + "CIP", + "LVX", + "TZP", + "TCC", + "CTX", + "CRO", + "CAZ", + "FEP", + "SXT", + "SAM", + "COL", + "PLB", + "TCY", + "DOX", + "MNO" + ), + verbose = verbose, + info = info, + only_rsi_columns = only_rsi_columns, + fn = "mdro", + ... + ) } else if (guideline$code == "eucast3.2") { - cols_ab <- get_column_abx(x = x, - soft_dependencies = c("AMP", - "AMX", - "CIP", - "DAL", - "DAP", - "ERV", - "FDX", - "GEN", - "LNZ", - "MEM", - "MTR", - "OMC", - "ORI", - "PEN", - "QDA", - "RIF", - "TEC", - "TGC", - "TLV", - "TOB", - "TZD", - "VAN"), - verbose = verbose, - info = info, - only_rsi_columns = only_rsi_columns, - fn = "mdro", - ...) + cols_ab <- get_column_abx( + x = x, + soft_dependencies = c( + "AMP", + "AMX", + "CIP", + "DAL", + "DAP", + "ERV", + "FDX", + "GEN", + "LNZ", + "MEM", + "MTR", + "OMC", + "ORI", + "PEN", + "QDA", + "RIF", + "TEC", + "TGC", + "TLV", + "TOB", + "TZD", + "VAN" + ), + verbose = verbose, + info = info, + only_rsi_columns = only_rsi_columns, + fn = "mdro", + ... + ) } else if (guideline$code == "eucast3.3") { - cols_ab <- get_column_abx(x = x, - soft_dependencies = c("AMP", - "AMX", - "CIP", - "DAL", - "DAP", - "ERV", - "FDX", - "GEN", - "LNZ", - "MEM", - "MTR", - "OMC", - "ORI", - "PEN", - "QDA", - "RIF", - "TEC", - "TGC", - "TLV", - "TOB", - "TZD", - "VAN"), - verbose = verbose, - info = info, - only_rsi_columns = only_rsi_columns, - fn = "mdro", - ...) + cols_ab <- get_column_abx( + x = x, + soft_dependencies = c( + "AMP", + "AMX", + "CIP", + "DAL", + "DAP", + "ERV", + "FDX", + "GEN", + "LNZ", + "MEM", + "MTR", + "OMC", + "ORI", + "PEN", + "QDA", + "RIF", + "TEC", + "TGC", + "TLV", + "TOB", + "TZD", + "VAN" + ), + verbose = verbose, + info = info, + only_rsi_columns = only_rsi_columns, + fn = "mdro", + ... + ) } else if (guideline$code == "tb") { - cols_ab <- get_column_abx(x = x, - soft_dependencies = c("CAP", - "ETH", - "GAT", - "INH", - "PZA", - "RIF", - "RIB", - "RFP"), - verbose = verbose, - info = info, - only_rsi_columns = only_rsi_columns, - fn = "mdro", - ...) + cols_ab <- get_column_abx( + x = x, + soft_dependencies = c( + "CAP", + "ETH", + "GAT", + "INH", + "PZA", + "RIF", + "RIB", + "RFP" + ), + verbose = verbose, + info = info, + only_rsi_columns = only_rsi_columns, + fn = "mdro", + ... + ) } else if (guideline$code == "mrgn") { - cols_ab <- get_column_abx(x = x, - soft_dependencies = c("PIP", - "CTX", - "CAZ", - "IPM", - "MEM", - "CIP"), - verbose = verbose, - info = info, - only_rsi_columns = only_rsi_columns, - fn = "mdro", - ...) + cols_ab <- get_column_abx( + x = x, + soft_dependencies = c( + "PIP", + "CTX", + "CAZ", + "IPM", + "MEM", + "CIP" + ), + verbose = verbose, + info = info, + only_rsi_columns = only_rsi_columns, + fn = "mdro", + ... + ) } else { - cols_ab <- get_column_abx(x = x, - verbose = verbose, - info = info, - only_rsi_columns = only_rsi_columns, - fn = "mdro", - ...) + cols_ab <- get_column_abx( + x = x, + verbose = verbose, + info = info, + only_rsi_columns = only_rsi_columns, + fn = "mdro", + ... + ) } if (!"AMP" %in% names(cols_ab) & "AMX" %in% names(cols_ab)) { # ampicillin column is missing, but amoxicillin is available @@ -735,13 +766,13 @@ mdro <- function(x = NULL, abx_tb <- abx_tb[!is.na(abx_tb)] stop_if(guideline$code == "tb" & length(abx_tb) == 0, "no antimycobacterials found in data set") # nolint end - + if (combine_SI == TRUE) { search_result <- "R" } else { search_result <- c("R", "I") } - + if (info == TRUE) { if (combine_SI == TRUE) { cat(font_red("\nOnly results with 'R' are considered as resistance. Use `combine_SI = FALSE` to also consider 'I' as resistance.\n")) @@ -749,15 +780,18 @@ mdro <- function(x = NULL, cat(font_red("\nResults with 'R' or 'I' are considered as resistance. Use `combine_SI = TRUE` to only consider 'R' as resistance.\n")) } cat("\n", word_wrap("Determining multidrug-resistant organisms (MDRO), according to:"), "\n", - word_wrap(paste0(font_bold("Guideline: "), font_italic(guideline$name)), extra_indent = 11, as_note = FALSE), "\n", - word_wrap(paste0(font_bold("Author(s): "), guideline$author), extra_indent = 11, as_note = FALSE), "\n", - ifelse(!is.na(guideline$version), - paste0(word_wrap(paste0(font_bold("Version: "), guideline$version), extra_indent = 11, as_note = FALSE), "\n"), - ""), - paste0(font_bold("Source: "), guideline$source_url), - "\n\n", sep = "") + word_wrap(paste0(font_bold("Guideline: "), font_italic(guideline$name)), extra_indent = 11, as_note = FALSE), "\n", + word_wrap(paste0(font_bold("Author(s): "), guideline$author), extra_indent = 11, as_note = FALSE), "\n", + ifelse(!is.na(guideline$version), + paste0(word_wrap(paste0(font_bold("Version: "), guideline$version), extra_indent = 11, as_note = FALSE), "\n"), + "" + ), + paste0(font_bold("Source: "), guideline$source_url), + "\n\n", + sep = "" + ) } - + ab_missing <- function(ab) { isTRUE(ab %in% c(NULL, NA)) | length(ab) == 0 } @@ -769,7 +803,7 @@ mdro <- function(x = NULL, out[is.na(out)] <- FALSE out } - + # antibiotic classes # nolint start aminoglycosides <- c(TOB, GEN) @@ -780,47 +814,62 @@ mdro <- function(x = NULL, carbapenems <- c(DOR, ETP, IPM, MEM, MEV) fluoroquinolones <- c(CIP, ENX, FLE, GAT, GEM, GRX, LVX, LOM, MFX, NOR, OFX, PAZ, PEF, PRU, RFL, SPX, TMX, TVA) # nolint end - + # helper function for editing the table trans_tbl <- function(to, rows, cols, any_all) { cols <- cols[!ab_missing(cols)] cols <- cols[!is.na(cols)] if (length(rows) > 0 & length(cols) > 0) { - x[, cols] <- as.data.frame(lapply(x[, cols, drop = FALSE], - function(col) as.rsi(col)), - stringsAsFactors = FALSE) - x[rows, "columns_nonsusceptible"] <<- vapply(FUN.VALUE = character(1), - rows, - function(row, group_vct = cols) { - cols_nonsus <- vapply(FUN.VALUE = logical(1), - x[row, group_vct, drop = FALSE], - function(y) y %in% search_result) - paste(sort(c(unlist(strsplit(x[row, "columns_nonsusceptible", drop = TRUE], ", ")), - names(cols_nonsus)[cols_nonsus])), - collapse = ", ") - }) - + x[, cols] <- as.data.frame(lapply( + x[, cols, drop = FALSE], + function(col) as.rsi(col) + ), + stringsAsFactors = FALSE + ) + x[rows, "columns_nonsusceptible"] <<- vapply( + FUN.VALUE = character(1), + rows, + function(row, group_vct = cols) { + cols_nonsus <- vapply( + FUN.VALUE = logical(1), + x[row, group_vct, drop = FALSE], + function(y) y %in% search_result + ) + paste(sort(c( + unlist(strsplit(x[row, "columns_nonsusceptible", drop = TRUE], ", ")), + names(cols_nonsus)[cols_nonsus] + )), + collapse = ", " + ) + } + ) + if (any_all == "any") { search_function <- any } else if (any_all == "all") { search_function <- all } x_transposed <- as.list(as.data.frame(t(x[, cols, drop = FALSE]), - stringsAsFactors = FALSE)) - rows_affected <- vapply(FUN.VALUE = logical(1), - x_transposed, - function(y) search_function(y %in% search_result, na.rm = TRUE)) + stringsAsFactors = FALSE + )) + rows_affected <- vapply( + FUN.VALUE = logical(1), + x_transposed, + function(y) search_function(y %in% search_result, na.rm = TRUE) + ) rows_affected <- x[which(rows_affected), "row_number", drop = TRUE] rows_to_change <- rows[rows %in% rows_affected] x[rows_to_change, "MDRO"] <<- to - x[rows_to_change, "reason"] <<- paste0(any_all, - " of the required antibiotics ", - ifelse(any_all == "any", "is", "are"), - " R", - ifelse(!isTRUE(combine_SI), " or I", "")) + x[rows_to_change, "reason"] <<- paste0( + any_all, + " of the required antibiotics ", + ifelse(any_all == "any", "is", "are"), + " R", + ifelse(!isTRUE(combine_SI), " or I", "") + ) } } - + trans_tbl2 <- function(txt, rows, lst) { if (info == TRUE) { message_(txt, "...", appendLF = FALSE, as_note = FALSE) @@ -828,48 +877,63 @@ mdro <- function(x = NULL, if (length(rows) > 0) { # function specific for the CMI paper of 2012 (Magiorakos et al.) lst_vector <- unlist(lst)[!is.na(unlist(lst))] - x[, lst_vector] <- as.data.frame(lapply(x[, lst_vector, drop = FALSE], - function(col) as.rsi(col)), - stringsAsFactors = FALSE) + x[, lst_vector] <- as.data.frame(lapply( + x[, lst_vector, drop = FALSE], + function(col) as.rsi(col) + ), + stringsAsFactors = FALSE + ) x[rows, "classes_in_guideline"] <<- length(lst) - x[rows, "classes_available"] <<- vapply(FUN.VALUE = double(1), - rows, - function(row, group_tbl = lst) { - sum(vapply(FUN.VALUE = logical(1), - group_tbl, - function(group) any(unlist(x[row, group[!is.na(group)], drop = TRUE]) %in% c("S", "I", "R")))) - }) - + x[rows, "classes_available"] <<- vapply( + FUN.VALUE = double(1), + rows, + function(row, group_tbl = lst) { + sum(vapply( + FUN.VALUE = logical(1), + group_tbl, + function(group) any(unlist(x[row, group[!is.na(group)], drop = TRUE]) %in% c("S", "I", "R")) + )) + } + ) + if (verbose == TRUE) { - x[rows, "columns_nonsusceptible"] <<- vapply(FUN.VALUE = character(1), - rows, - function(row, group_vct = lst_vector) { - cols_nonsus <- vapply(FUN.VALUE = logical(1), x[row, group_vct, drop = FALSE], function(y) y %in% search_result) - paste(sort(names(cols_nonsus)[cols_nonsus]), collapse = ", ") - }) + x[rows, "columns_nonsusceptible"] <<- vapply( + FUN.VALUE = character(1), + rows, + function(row, group_vct = lst_vector) { + cols_nonsus <- vapply(FUN.VALUE = logical(1), x[row, group_vct, drop = FALSE], function(y) y %in% search_result) + paste(sort(names(cols_nonsus)[cols_nonsus]), collapse = ", ") + } + ) } - x[rows, "classes_affected"] <<- vapply(FUN.VALUE = double(1), - rows, - function(row, group_tbl = lst) { - sum(vapply(FUN.VALUE = logical(1), - group_tbl, - function(group) { - any(unlist(x[row, group[!is.na(group)], drop = TRUE]) %in% search_result, na.rm = TRUE) - }), - na.rm = TRUE) - }) + x[rows, "classes_affected"] <<- vapply( + FUN.VALUE = double(1), + rows, + function(row, group_tbl = lst) { + sum(vapply( + FUN.VALUE = logical(1), + group_tbl, + function(group) { + any(unlist(x[row, group[!is.na(group)], drop = TRUE]) %in% search_result, na.rm = TRUE) + } + ), + na.rm = TRUE + ) + } + ) # for PDR; all agents are R (or I if combine_SI = FALSE) x_transposed <- as.list(as.data.frame(t(x[rows, lst_vector, drop = FALSE]), - stringsAsFactors = FALSE)) + stringsAsFactors = FALSE + )) row_filter <- vapply(FUN.VALUE = logical(1), x_transposed, function(y) all(y %in% search_result, na.rm = TRUE)) x[which(row_filter), "classes_affected"] <<- 999 } - + if (info == TRUE) { message_(" OK.", add_fn = list(font_green, font_bold), as_note = FALSE) } } - + x[, col_mo] <- as.mo(as.character(x[, col_mo, drop = TRUE])) # rename col_mo to prevent interference with joined columns colnames(x)[colnames(x) == col_mo] <- ".col_mo" @@ -880,12 +944,12 @@ mdro <- function(x = NULL, x$row_number <- seq_len(nrow(x)) x$reason <- paste0("not covered by ", toupper(guideline$code), " guideline") x$columns_nonsusceptible <- "" - + if (guideline$code == "cmi2012") { # CMI, 2012 --------------------------------------------------------------- # Non-susceptible = R and I # (see header 'Approaches to Creating Definitions for MDR, XDR and PDR' in paper) - + # take amoxicillin if ampicillin is unavailable if (is.na(AMP) & !is.na(AMX)) { if (verbose == TRUE) { @@ -906,468 +970,595 @@ mdro <- function(x = NULL, } CTX <- CRO } - + # intrinsic resistant must not be considered for the determination of MDR, # so let's just remove them, meticulously following the paper x[which(x$genus == "Enterococcus" & x$species == "faecium"), ab_NA(IPM)] <- NA x[which(x$genus == "Enterococcus" & x$species == "faecalis"), ab_NA(QDA)] <- NA - x[which((x$genus == "Providencia" & x$species == "rettgeri") - | (x$genus == "Providencia" & x$species == "stuartii")), ab_NA(c(GEN, TOB, NET))] <- NA + x[which((x$genus == "Providencia" & x$species == "rettgeri") | + (x$genus == "Providencia" & x$species == "stuartii")), ab_NA(c(GEN, TOB, NET))] <- NA x[which(x$genus == "Escherichia" & x$species == "hermannii"), ab_NA(c(TCC, TZP))] <- NA - x[which((x$genus == "Citrobacter" & x$species == "freundii") - | (x$genus == "Enterobacter" & x$species == "aerogenes") - | (x$genus == "Klebsiella" & x$species == "aerogenes") # new name (2017) - | (x$genus == "Enterobacter" & x$species == "cloacae") - | (x$genus == "Hafnia" & x$species == "alvei") - | (x$genus == "Morganella" & x$species == "morganii") - | (x$genus == "Proteus" & x$species == "penneri") - | (x$genus == "Proteus" & x$species == "vulgaris") - | (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(CZO)] <- NA - x[which((x$genus == "Morganella" & x$species == "morganii") - | (x$genus == "Proteus" & x$species == "penneri") - | (x$genus == "Proteus" & x$species == "vulgaris") - | (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(CXM)] <- NA - x[which((x$genus == "Morganella" & x$species == "morganii") - | (x$genus == "Proteus" & x$species == "mirabilis") - | (x$genus == "Proteus" & x$species == "penneri") - | (x$genus == "Proteus" & x$species == "vulgaris") - | (x$genus == "Providencia" & x$species == "rettgeri") - | (x$genus == "Providencia" & x$species == "stuartii")), ab_NA(TGC)] <- NA - x[which((x$genus == "Citrobacter" & x$species == "koseri") - | (x$genus == "Citrobacter" & x$species == "freundii") - | (x$genus == "Enterobacter" & x$species == "aerogenes") - | (x$genus == "Klebsiella" & x$species == "aerogenes") # new name (2017) - | (x$genus == "Enterobacter" & x$species == "cloacae") - | (x$genus == "Escherichia" & x$species == "hermannii") - | (x$genus == "Hafnia" & x$species == "alvei") - | (x$genus == "Klebsiella") - | (x$genus == "Morganella" & x$species == "morganii") - | (x$genus == "Proteus" & x$species == "penneri") - | (x$genus == "Proteus" & x$species == "vulgaris") - | (x$genus == "Providencia" & x$species == "rettgeri") - | (x$genus == "Providencia" & x$species == "stuartii") - | (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(AMP)] <- NA - x[which((x$genus == "Citrobacter" & x$species == "freundii") - | (x$genus == "Enterobacter" & x$species == "aerogenes") - | (x$genus == "Klebsiella" & x$species == "aerogenes") # new name (2017) - | (x$genus == "Enterobacter" & x$species == "cloacae") - | (x$genus == "Hafnia" & x$species == "alvei") - | (x$genus == "Morganella" & x$species == "morganii") - | (x$genus == "Providencia" & x$species == "rettgeri") - | (x$genus == "Providencia" & x$species == "stuartii") - | (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(AMC)] <- NA - x[which((x$genus == "Citrobacter" & x$species == "freundii") - | (x$genus == "Citrobacter" & x$species == "koseri") - | (x$genus == "Enterobacter" & x$species == "aerogenes") - | (x$genus == "Klebsiella" & x$species == "aerogenes") # new name (2017) - | (x$genus == "Enterobacter" & x$species == "cloacae") - | (x$genus == "Hafnia" & x$species == "alvei") - | (x$genus == "Providencia" & x$species == "rettgeri") - | (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(SAM)] <- NA - x[which((x$genus == "Morganella" & x$species == "morganii") - | (x$genus == "Proteus" & x$species == "mirabilis") - | (x$genus == "Proteus" & x$species == "penneri") - | (x$genus == "Proteus" & x$species == "vulgaris") - | (x$genus == "Providencia" & x$species == "rettgeri") - | (x$genus == "Providencia" & x$species == "stuartii") - | (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(COL)] <- NA - x[which((x$genus == "Morganella" & x$species == "morganii") - | (x$genus == "Proteus" & x$species == "mirabilis") - | (x$genus == "Proteus" & x$species == "penneri") - | (x$genus == "Proteus" & x$species == "vulgaris") - | (x$genus == "Providencia" & x$species == "rettgeri") - | (x$genus == "Providencia" & x$species == "stuartii")), ab_NA(TCY)] <- NA - x[which((x$genus == "Morganella" & x$species == "morganii") - | (x$genus == "Proteus" & x$species == "penneri") - | (x$genus == "Proteus" & x$species == "vulgaris") - | (x$genus == "Providencia" & x$species == "rettgeri") - | (x$genus == "Providencia" & x$species == "stuartii")), ab_NA(c(DOX, MNO))] <- NA - + x[which((x$genus == "Citrobacter" & x$species == "freundii") | + (x$genus == "Enterobacter" & x$species == "aerogenes") | + (x$genus == "Klebsiella" & x$species == "aerogenes") # new name (2017) + | (x$genus == "Enterobacter" & x$species == "cloacae") | + (x$genus == "Hafnia" & x$species == "alvei") | + (x$genus == "Morganella" & x$species == "morganii") | + (x$genus == "Proteus" & x$species == "penneri") | + (x$genus == "Proteus" & x$species == "vulgaris") | + (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(CZO)] <- NA + x[which((x$genus == "Morganella" & x$species == "morganii") | + (x$genus == "Proteus" & x$species == "penneri") | + (x$genus == "Proteus" & x$species == "vulgaris") | + (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(CXM)] <- NA + x[which((x$genus == "Morganella" & x$species == "morganii") | + (x$genus == "Proteus" & x$species == "mirabilis") | + (x$genus == "Proteus" & x$species == "penneri") | + (x$genus == "Proteus" & x$species == "vulgaris") | + (x$genus == "Providencia" & x$species == "rettgeri") | + (x$genus == "Providencia" & x$species == "stuartii")), ab_NA(TGC)] <- NA + x[which((x$genus == "Citrobacter" & x$species == "koseri") | + (x$genus == "Citrobacter" & x$species == "freundii") | + (x$genus == "Enterobacter" & x$species == "aerogenes") | + (x$genus == "Klebsiella" & x$species == "aerogenes") # new name (2017) + | (x$genus == "Enterobacter" & x$species == "cloacae") | + (x$genus == "Escherichia" & x$species == "hermannii") | + (x$genus == "Hafnia" & x$species == "alvei") | + (x$genus == "Klebsiella") | + (x$genus == "Morganella" & x$species == "morganii") | + (x$genus == "Proteus" & x$species == "penneri") | + (x$genus == "Proteus" & x$species == "vulgaris") | + (x$genus == "Providencia" & x$species == "rettgeri") | + (x$genus == "Providencia" & x$species == "stuartii") | + (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(AMP)] <- NA + x[which((x$genus == "Citrobacter" & x$species == "freundii") | + (x$genus == "Enterobacter" & x$species == "aerogenes") | + (x$genus == "Klebsiella" & x$species == "aerogenes") # new name (2017) + | (x$genus == "Enterobacter" & x$species == "cloacae") | + (x$genus == "Hafnia" & x$species == "alvei") | + (x$genus == "Morganella" & x$species == "morganii") | + (x$genus == "Providencia" & x$species == "rettgeri") | + (x$genus == "Providencia" & x$species == "stuartii") | + (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(AMC)] <- NA + x[which((x$genus == "Citrobacter" & x$species == "freundii") | + (x$genus == "Citrobacter" & x$species == "koseri") | + (x$genus == "Enterobacter" & x$species == "aerogenes") | + (x$genus == "Klebsiella" & x$species == "aerogenes") # new name (2017) + | (x$genus == "Enterobacter" & x$species == "cloacae") | + (x$genus == "Hafnia" & x$species == "alvei") | + (x$genus == "Providencia" & x$species == "rettgeri") | + (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(SAM)] <- NA + x[which((x$genus == "Morganella" & x$species == "morganii") | + (x$genus == "Proteus" & x$species == "mirabilis") | + (x$genus == "Proteus" & x$species == "penneri") | + (x$genus == "Proteus" & x$species == "vulgaris") | + (x$genus == "Providencia" & x$species == "rettgeri") | + (x$genus == "Providencia" & x$species == "stuartii") | + (x$genus == "Serratia" & x$species == "marcescens")), ab_NA(COL)] <- NA + x[which((x$genus == "Morganella" & x$species == "morganii") | + (x$genus == "Proteus" & x$species == "mirabilis") | + (x$genus == "Proteus" & x$species == "penneri") | + (x$genus == "Proteus" & x$species == "vulgaris") | + (x$genus == "Providencia" & x$species == "rettgeri") | + (x$genus == "Providencia" & x$species == "stuartii")), ab_NA(TCY)] <- NA + x[which((x$genus == "Morganella" & x$species == "morganii") | + (x$genus == "Proteus" & x$species == "penneri") | + (x$genus == "Proteus" & x$species == "vulgaris") | + (x$genus == "Providencia" & x$species == "rettgeri") | + (x$genus == "Providencia" & x$species == "stuartii")), ab_NA(c(DOX, MNO))] <- NA + x$classes_in_guideline <- NA_integer_ x$classes_available <- NA_integer_ x$classes_affected <- NA_integer_ - + # now add the MDR levels to the data - trans_tbl(2, - which(x$genus == "Staphylococcus" & x$species == "aureus"), - c(OXA, FOX), - "any") - trans_tbl2(paste("Table 1 -", font_italic("Staphylococcus aureus")), - which(x$genus == "Staphylococcus" & x$species == "aureus"), - list(GEN, - RIF, - CPT, - c(OXA, FOX), - c(CIP, MFX), - SXT, - FUS, - c(VAN, TEC, TLV), - TGC, - CLI, - DAP, - ERY, - LNZ, - CHL, - FOS, - QDA, - c(TCY, DOX, MNO))) - trans_tbl2(paste("Table 2 -", font_italic("Enterococcus"), "spp."), - which(x$genus == "Enterococcus"), - list(GEH, - STH, - c(IPM, MEM, DOR), - c(CIP, LVX, MFX), - c(VAN, TEC), - TGC, - DAP, - LNZ, - AMP, - QDA, - c(DOX, MNO))) - trans_tbl2(paste0("Table 3 - ", font_italic("Enterobacteriaceae")), - # this new order was previously 'Enterobacteriales' and contained only the family 'Enterobacteriaceae': - which(x$order == "Enterobacterales"), - list(c(GEN, TOB, AMK, NET), - CPT, - c(TCC, TZP), - c(ETP, IPM, MEM, DOR), - CZO, - CXM, - c(CTX, CAZ, FEP), - c(FOX, CTT), - CIP, - SXT, - TGC, - ATM, - AMP, - c(AMC, SAM), - CHL, - FOS, - COL, - c(TCY, DOX, MNO))) - trans_tbl2(paste("Table 4 -", font_italic("Pseudomonas aeruginosa")), - which(x$genus == "Pseudomonas" & x$species == "aeruginosa"), - list(c(GEN, TOB, AMK, NET), - c(IPM, MEM, DOR), - c(CAZ, FEP), - c(CIP, LVX), - c(TCC, TZP), - ATM, - FOS, - c(COL, PLB))) - trans_tbl2(paste("Table 5 -", font_italic("Acinetobacter"), "spp."), - which(x$genus == "Acinetobacter"), - list(c(GEN, TOB, AMK, NET), - c(IPM, MEM, DOR), - c(CIP, LVX), - c(TZP, TCC), - c(CTX, CRO, CAZ, FEP), - SXT, - SAM, - c(COL, PLB), - c(TCY, DOX, MNO))) - - # now set MDROs: + trans_tbl( + 2, + which(x$genus == "Staphylococcus" & x$species == "aureus"), + c(OXA, FOX), + "any" + ) + trans_tbl2( + paste("Table 1 -", font_italic("Staphylococcus aureus")), + which(x$genus == "Staphylococcus" & x$species == "aureus"), + list( + GEN, + RIF, + CPT, + c(OXA, FOX), + c(CIP, MFX), + SXT, + FUS, + c(VAN, TEC, TLV), + TGC, + CLI, + DAP, + ERY, + LNZ, + CHL, + FOS, + QDA, + c(TCY, DOX, MNO) + ) + ) + trans_tbl2( + paste("Table 2 -", font_italic("Enterococcus"), "spp."), + which(x$genus == "Enterococcus"), + list( + GEH, + STH, + c(IPM, MEM, DOR), + c(CIP, LVX, MFX), + c(VAN, TEC), + TGC, + DAP, + LNZ, + AMP, + QDA, + c(DOX, MNO) + ) + ) + trans_tbl2( + paste0("Table 3 - ", font_italic("Enterobacteriaceae")), + # this new order was previously 'Enterobacteriales' and contained only the family 'Enterobacteriaceae': + which(x$order == "Enterobacterales"), + list( + c(GEN, TOB, AMK, NET), + CPT, + c(TCC, TZP), + c(ETP, IPM, MEM, DOR), + CZO, + CXM, + c(CTX, CAZ, FEP), + c(FOX, CTT), + CIP, + SXT, + TGC, + ATM, + AMP, + c(AMC, SAM), + CHL, + FOS, + COL, + c(TCY, DOX, MNO) + ) + ) + trans_tbl2( + paste("Table 4 -", font_italic("Pseudomonas aeruginosa")), + which(x$genus == "Pseudomonas" & x$species == "aeruginosa"), + list( + c(GEN, TOB, AMK, NET), + c(IPM, MEM, DOR), + c(CAZ, FEP), + c(CIP, LVX), + c(TCC, TZP), + ATM, + FOS, + c(COL, PLB) + ) + ) + trans_tbl2( + paste("Table 5 -", font_italic("Acinetobacter"), "spp."), + which(x$genus == "Acinetobacter"), + list( + c(GEN, TOB, AMK, NET), + c(IPM, MEM, DOR), + c(CIP, LVX), + c(TZP, TCC), + c(CTX, CRO, CAZ, FEP), + SXT, + SAM, + c(COL, PLB), + c(TCY, DOX, MNO) + ) + ) + + # now set MDROs: # MDR (=2): >=3 classes affected x[which(x$classes_affected >= 3), "MDRO"] <- 2 if (verbose == TRUE) { - x[which(x$classes_affected >= 3), "reason"] <- paste0("at least 3 classes contain R", - ifelse(!isTRUE(combine_SI), " or I", ""), ": ", - x$classes_affected[which(x$classes_affected >= 3)], - " out of ", x$classes_available[which(x$classes_affected >= 3)], " available classes") + x[which(x$classes_affected >= 3), "reason"] <- paste0( + "at least 3 classes contain R", + ifelse(!isTRUE(combine_SI), " or I", ""), ": ", + x$classes_affected[which(x$classes_affected >= 3)], + " out of ", x$classes_available[which(x$classes_affected >= 3)], " available classes" + ) } - + # XDR (=3): all but <=2 classes affected x[which((x$classes_in_guideline - x$classes_affected) <= 2), "MDRO"] <- 3 if (verbose == TRUE) { - x[which(x$MDRO == 3), "reason"] <- paste0("less than 3 classes remain susceptible (", x$classes_in_guideline[which((x$classes_in_guideline - x$classes_affected) <= 2)] - x$classes_affected[which(x$MDRO == 3)], - " out of ", x$classes_in_guideline[which(x$MDRO == 3)], " classes)") + x[which(x$MDRO == 3), "reason"] <- paste0( + "less than 3 classes remain susceptible (", x$classes_in_guideline[which((x$classes_in_guideline - x$classes_affected) <= 2)] - x$classes_affected[which(x$MDRO == 3)], + " out of ", x$classes_in_guideline[which(x$MDRO == 3)], " classes)" + ) } - - # PDR (=4): all agents are R + + # PDR (=4): all agents are R x[which(x$classes_affected == 999 & x$classes_in_guideline == x$classes_available), "MDRO"] <- 4 if (verbose == TRUE) { - x[which(x$MDRO == 4), "reason"] <- paste("all antibiotics in all", - x$classes_in_guideline[which(x$MDRO == 4)], - "classes were tested R", - ifelse(!isTRUE(combine_SI), " or I", "")) + x[which(x$MDRO == 4), "reason"] <- paste( + "all antibiotics in all", + x$classes_in_guideline[which(x$MDRO == 4)], + "classes were tested R", + ifelse(!isTRUE(combine_SI), " or I", "") + ) } - + # not enough classes available x[which(x$MDRO %in% c(1, 3) & x$classes_available < floor(x$classes_in_guideline * pct_required_classes)), "MDRO"] <- -1 if (verbose == TRUE) { - x[which(x$MDRO == -1), "reason"] <- paste0("not enough classes available: ", x$classes_available[which(x$MDRO == -1)], - " of required ", (floor(x$classes_in_guideline * pct_required_classes))[which(x$MDRO == -1)], - " (~", percentage(pct_required_classes), " of ", x$classes_in_guideline[which(x$MDRO == -1)], ")") + x[which(x$MDRO == -1), "reason"] <- paste0( + "not enough classes available: ", x$classes_available[which(x$MDRO == -1)], + " of required ", (floor(x$classes_in_guideline * pct_required_classes))[which(x$MDRO == -1)], + " (~", percentage(pct_required_classes), " of ", x$classes_in_guideline[which(x$MDRO == -1)], ")" + ) } - + # add antibiotic names of resistant ones to verbose output - } - + if (guideline$code == "eucast3.1") { # EUCAST 3.1 -------------------------------------------------------------- # Table 5 - trans_tbl(3, - which(x$order == "Enterobacterales" - | (x$genus == "Pseudomonas" & x$species == "aeruginosa") - | x$genus == "Acinetobacter"), - COL, - "all") - trans_tbl(3, - which(x$genus == "Salmonella" & x$species == "Typhi"), - c(carbapenems, fluoroquinolones), - "any") - trans_tbl(3, - which(x$genus == "Haemophilus" & x$species == "influenzae"), - c(cephalosporins_3rd, carbapenems, fluoroquinolones), - "any") - trans_tbl(3, - which(x$genus == "Moraxella" & x$species == "catarrhalis"), - c(cephalosporins_3rd, fluoroquinolones), - "any") - trans_tbl(3, - which(x$genus == "Neisseria" & x$species == "meningitidis"), - c(cephalosporins_3rd, fluoroquinolones), - "any") - trans_tbl(3, - which(x$genus == "Neisseria" & x$species == "gonorrhoeae"), - AZM, - "any") + trans_tbl( + 3, + which(x$order == "Enterobacterales" | + (x$genus == "Pseudomonas" & x$species == "aeruginosa") | + x$genus == "Acinetobacter"), + COL, + "all" + ) + trans_tbl( + 3, + which(x$genus == "Salmonella" & x$species == "Typhi"), + c(carbapenems, fluoroquinolones), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Haemophilus" & x$species == "influenzae"), + c(cephalosporins_3rd, carbapenems, fluoroquinolones), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Moraxella" & x$species == "catarrhalis"), + c(cephalosporins_3rd, fluoroquinolones), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Neisseria" & x$species == "meningitidis"), + c(cephalosporins_3rd, fluoroquinolones), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Neisseria" & x$species == "gonorrhoeae"), + AZM, + "any" + ) # Table 6 - trans_tbl(3, - which(x$fullname %like% "^(Coagulase-negative|Staphylococcus (aureus|epidermidis|hominis|haemolyticus|intermedius|pseudointermedius))"), - c(VAN, TEC, DAP, LNZ, QDA, TGC), - "any") - trans_tbl(3, - which(x$genus == "Corynebacterium"), - c(VAN, TEC, DAP, LNZ, QDA, TGC), - "any") - trans_tbl(3, - which(x$genus == "Streptococcus" & x$species == "pneumoniae"), - c(carbapenems, VAN, TEC, DAP, LNZ, QDA, TGC, RIF), - "any") - trans_tbl(3, # Sr. groups A/B/C/G - which(x$fullname %like% "^Streptococcus (group (A|B|C|G)|pyogenes|agalactiae|equisimilis|equi|zooepidemicus|dysgalactiae|anginosus)"), - c(PEN, cephalosporins, VAN, TEC, DAP, LNZ, QDA, TGC), - "any") - trans_tbl(3, - which(x$genus == "Enterococcus"), - c(DAP, LNZ, TGC, TEC), - "any") - trans_tbl(3, - which(x$genus == "Enterococcus" & x$species == "faecalis"), - c(AMP, AMX), - "any") + trans_tbl( + 3, + which(x$fullname %like% "^(Coagulase-negative|Staphylococcus (aureus|epidermidis|hominis|haemolyticus|intermedius|pseudointermedius))"), + c(VAN, TEC, DAP, LNZ, QDA, TGC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Corynebacterium"), + c(VAN, TEC, DAP, LNZ, QDA, TGC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Streptococcus" & x$species == "pneumoniae"), + c(carbapenems, VAN, TEC, DAP, LNZ, QDA, TGC, RIF), + "any" + ) + trans_tbl( + 3, # Sr. groups A/B/C/G + which(x$fullname %like% "^Streptococcus (group (A|B|C|G)|pyogenes|agalactiae|equisimilis|equi|zooepidemicus|dysgalactiae|anginosus)"), + c(PEN, cephalosporins, VAN, TEC, DAP, LNZ, QDA, TGC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Enterococcus"), + c(DAP, LNZ, TGC, TEC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Enterococcus" & x$species == "faecalis"), + c(AMP, AMX), + "any" + ) # Table 7 - trans_tbl(3, - which(x$genus == "Bacteroides"), - MTR, - "any") - trans_tbl(3, - which(x$genus == "Clostridium" & x$species == "difficile"), - c(MTR, VAN), - "any") + trans_tbl( + 3, + which(x$genus == "Bacteroides"), + MTR, + "any" + ) + trans_tbl( + 3, + which(x$genus == "Clostridium" & x$species == "difficile"), + c(MTR, VAN), + "any" + ) } - + if (guideline$code == "eucast3.2") { # EUCAST 3.2 -------------------------------------------------------------- # Table 6 - trans_tbl(3, - which((x$order == "Enterobacterales" & - !x$family == "Morganellaceae" & - !(x$genus == "Serratia" & x$species == "marcescens")) - | (x$genus == "Pseudomonas" & x$species == "aeruginosa") - | x$genus == "Acinetobacter"), - COL, - "all") - trans_tbl(3, - which(x$genus == "Salmonella" & x$species == "Typhi"), - c(carbapenems), - "any") - trans_tbl(3, - which(x$genus == "Haemophilus" & x$species == "influenzae"), - c(cephalosporins_3rd, carbapenems, fluoroquinolones), - "any") - trans_tbl(3, - which(x$genus == "Moraxella" & x$species == "catarrhalis"), - c(cephalosporins_3rd, fluoroquinolones), - "any") - trans_tbl(3, - which(x$genus == "Neisseria" & x$species == "meningitidis"), - c(cephalosporins_3rd, fluoroquinolones), - "any") - trans_tbl(3, - which(x$genus == "Neisseria" & x$species == "gonorrhoeae"), - SPT, - "any") + trans_tbl( + 3, + which((x$order == "Enterobacterales" & + !x$family == "Morganellaceae" & + !(x$genus == "Serratia" & x$species == "marcescens")) | + (x$genus == "Pseudomonas" & x$species == "aeruginosa") | + x$genus == "Acinetobacter"), + COL, + "all" + ) + trans_tbl( + 3, + which(x$genus == "Salmonella" & x$species == "Typhi"), + c(carbapenems), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Haemophilus" & x$species == "influenzae"), + c(cephalosporins_3rd, carbapenems, fluoroquinolones), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Moraxella" & x$species == "catarrhalis"), + c(cephalosporins_3rd, fluoroquinolones), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Neisseria" & x$species == "meningitidis"), + c(cephalosporins_3rd, fluoroquinolones), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Neisseria" & x$species == "gonorrhoeae"), + SPT, + "any" + ) # Table 7 - trans_tbl(3, - which(x$genus == "Staphylococcus" & x$species == "aureus"), - c(VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), - "any") - trans_tbl(3, - which(x$mo %in% MO_CONS), # coagulase-negative Staphylococcus - c(VAN, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), - "any") - trans_tbl(3, - which(x$genus == "Corynebacterium"), - c(VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC), - "any") - trans_tbl(3, - which(x$genus == "Streptococcus" & x$species == "pneumoniae"), - c(carbapenems, VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC, RIF), - "any") - trans_tbl(3, # Sr. groups A/B/C/G - which(x$mo %in% MO_STREP_ABCG), - c(PEN, cephalosporins, VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), - "any") - trans_tbl(3, - which(x$genus == "Enterococcus"), - c(DAP, LNZ, TGC, ERV, OMC, TEC), - "any") - trans_tbl(3, - which(x$genus == "Enterococcus" & x$species == "faecalis"), - c(AMP, AMX), - "any") + trans_tbl( + 3, + which(x$genus == "Staphylococcus" & x$species == "aureus"), + c(VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), + "any" + ) + trans_tbl( + 3, + which(x$mo %in% MO_CONS), # coagulase-negative Staphylococcus + c(VAN, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Corynebacterium"), + c(VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Streptococcus" & x$species == "pneumoniae"), + c(carbapenems, VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC, RIF), + "any" + ) + trans_tbl( + 3, # Sr. groups A/B/C/G + which(x$mo %in% MO_STREP_ABCG), + c(PEN, cephalosporins, VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Enterococcus"), + c(DAP, LNZ, TGC, ERV, OMC, TEC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Enterococcus" & x$species == "faecalis"), + c(AMP, AMX), + "any" + ) # Table 8 - trans_tbl(3, - which(x$genus == "Bacteroides"), - MTR, - "any") - trans_tbl(3, - which(x$genus == "Clostridium" & x$species == "difficile"), - c(MTR, VAN, FDX), - "any") + trans_tbl( + 3, + which(x$genus == "Bacteroides"), + MTR, + "any" + ) + trans_tbl( + 3, + which(x$genus == "Clostridium" & x$species == "difficile"), + c(MTR, VAN, FDX), + "any" + ) } - + if (guideline$code == "eucast3.3") { # EUCAST 3.3 -------------------------------------------------------------- # note: this guideline is equal to EUCAST 3.2 - no MDRO insights changed # Table 6 - trans_tbl(3, - which((x$order == "Enterobacterales" & - !x$family == "Morganellaceae" & - !(x$genus == "Serratia" & x$species == "marcescens")) - | (x$genus == "Pseudomonas" & x$species == "aeruginosa") - | x$genus == "Acinetobacter"), - COL, - "all") - trans_tbl(3, - which(x$genus == "Salmonella" & x$species == "Typhi"), - c(carbapenems), - "any") - trans_tbl(3, - which(x$genus == "Haemophilus" & x$species == "influenzae"), - c(cephalosporins_3rd, carbapenems, fluoroquinolones), - "any") - trans_tbl(3, - which(x$genus == "Moraxella" & x$species == "catarrhalis"), - c(cephalosporins_3rd, fluoroquinolones), - "any") - trans_tbl(3, - which(x$genus == "Neisseria" & x$species == "meningitidis"), - c(cephalosporins_3rd, fluoroquinolones), - "any") - trans_tbl(3, - which(x$genus == "Neisseria" & x$species == "gonorrhoeae"), - SPT, - "any") + trans_tbl( + 3, + which((x$order == "Enterobacterales" & + !x$family == "Morganellaceae" & + !(x$genus == "Serratia" & x$species == "marcescens")) | + (x$genus == "Pseudomonas" & x$species == "aeruginosa") | + x$genus == "Acinetobacter"), + COL, + "all" + ) + trans_tbl( + 3, + which(x$genus == "Salmonella" & x$species == "Typhi"), + c(carbapenems), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Haemophilus" & x$species == "influenzae"), + c(cephalosporins_3rd, carbapenems, fluoroquinolones), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Moraxella" & x$species == "catarrhalis"), + c(cephalosporins_3rd, fluoroquinolones), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Neisseria" & x$species == "meningitidis"), + c(cephalosporins_3rd, fluoroquinolones), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Neisseria" & x$species == "gonorrhoeae"), + SPT, + "any" + ) # Table 7 - trans_tbl(3, - which(x$genus == "Staphylococcus" & x$species == "aureus"), - c(VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), - "any") - trans_tbl(3, - which(x$mo %in% MO_CONS), # coagulase-negative Staphylococcus - c(VAN, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), - "any") - trans_tbl(3, - which(x$genus == "Corynebacterium"), - c(VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC), - "any") - trans_tbl(3, - which(x$genus == "Streptococcus" & x$species == "pneumoniae"), - c(carbapenems, VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC, RIF), - "any") - trans_tbl(3, # Sr. groups A/B/C/G - which(x$mo %in% MO_STREP_ABCG), - c(PEN, cephalosporins, VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), - "any") - trans_tbl(3, - which(x$genus == "Enterococcus"), - c(DAP, LNZ, TGC, ERV, OMC, TEC), - "any") - trans_tbl(3, - which(x$genus == "Enterococcus" & x$species == "faecalis"), - c(AMP, AMX), - "any") + trans_tbl( + 3, + which(x$genus == "Staphylococcus" & x$species == "aureus"), + c(VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), + "any" + ) + trans_tbl( + 3, + which(x$mo %in% MO_CONS), # coagulase-negative Staphylococcus + c(VAN, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Corynebacterium"), + c(VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Streptococcus" & x$species == "pneumoniae"), + c(carbapenems, VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC, RIF), + "any" + ) + trans_tbl( + 3, # Sr. groups A/B/C/G + which(x$mo %in% MO_STREP_ABCG), + c(PEN, cephalosporins, VAN, TEC, TLV, DAL, ORI, DAP, LNZ, TZD, QDA, TGC, ERV, OMC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Enterococcus"), + c(DAP, LNZ, TGC, ERV, OMC, TEC), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Enterococcus" & x$species == "faecalis"), + c(AMP, AMX), + "any" + ) # Table 8 - trans_tbl(3, - which(x$genus == "Bacteroides"), - MTR, - "any") - trans_tbl(3, - which(x$genus == "Clostridium" & x$species == "difficile"), - c(MTR, VAN, FDX), - "any") + trans_tbl( + 3, + which(x$genus == "Bacteroides"), + MTR, + "any" + ) + trans_tbl( + 3, + which(x$genus == "Clostridium" & x$species == "difficile"), + c(MTR, VAN, FDX), + "any" + ) } - + if (guideline$code == "mrgn") { # Germany ----------------------------------------------------------------- - + # Table 1 - trans_tbl(2, # 3MRGN - which((x$order == "Enterobacterales" | # following in fact the old Enterobacteriaceae classification - (x$genus == "Acinetobacter" & x$species == "baumannii")) & - try_ab(x[, PIP, drop = TRUE] == "R") & - (try_ab(x[, CTX, drop = TRUE] == "R") | try_ab(x[, CAZ, drop = TRUE] == "R")) & - (try_ab(x[, IPM, drop = TRUE] != "R") | try_ab(x[, MEM, drop = TRUE] != "R")) & - try_ab(x[, CIP, drop = TRUE] == "R")), - c(PIP, CTX, CAZ, IPM, MEM, CIP), - "any") - - trans_tbl(3, # 4MRGN, overwrites 3MRGN if applicable - which((x$order == "Enterobacterales" | # following in fact the old Enterobacteriaceae classification - (x$genus == "Acinetobacter" & x$species == "baumannii")) & - try_ab(x[, PIP, drop = TRUE] == "R") & - (try_ab(x[, CTX, drop = TRUE] == "R") | try_ab(x[, CAZ, drop = TRUE] == "R")) & - (try_ab(x[, IPM, drop = TRUE] == "R") | try_ab(x[, MEM, drop = TRUE] == "R")) & - try_ab(x[, CIP, drop = TRUE] == "R")), - c(PIP, CTX, CAZ, IPM, MEM, CIP), - "any") - - trans_tbl(3, # 4MRGN, overwrites 3MRGN if applicable - which((x$order == "Enterobacterales" | # following in fact the old Enterobacteriaceae classification - (x$genus == "Acinetobacter" & x$species == "baumannii")) & - (try_ab(x[, IPM, drop = TRUE] == "R") | try_ab(x[, MEM, drop = TRUE] == "R"))), - c(IPM, MEM), - "any") - - trans_tbl(2, # 3MRGN, if only 1 group is S - which(x$genus == "Pseudomonas" & x$species == "aeruginosa" & - try_ab(x[, PIP, drop = TRUE] == "S") + - try_ab(x[, CTX, drop = TRUE] == "S") + - try_ab(x[, CAZ, drop = TRUE] == "S") + - try_ab(x[, IPM, drop = TRUE] == "S") + - try_ab(x[, MEM, drop = TRUE] == "S") + - try_ab(x[, CIP, drop = TRUE] == "S") == 1), - c(PIP, CTX, CAZ, IPM, MEM, CIP), - "any") - - trans_tbl(3, # 4MRGN otherwise - which((x$genus == "Pseudomonas" & x$species == "aeruginosa") & - try_ab(x[, PIP, drop = TRUE] == "R") & - (try_ab(x[, CTX, drop = TRUE] == "R") | try_ab(x[, CAZ, drop = TRUE] == "R")) & - (try_ab(x[, IPM, drop = TRUE] == "R") | try_ab(x[, MEM, drop = TRUE] == "R")) & - try_ab(x[, CIP, drop = TRUE] == "R")), - c(PIP, CTX, CAZ, IPM, MEM, CIP), - "any") - + trans_tbl( + 2, # 3MRGN + which((x$order == "Enterobacterales" | # following in fact the old Enterobacteriaceae classification + (x$genus == "Acinetobacter" & x$species == "baumannii")) & + try_ab(x[, PIP, drop = TRUE] == "R") & + (try_ab(x[, CTX, drop = TRUE] == "R") | try_ab(x[, CAZ, drop = TRUE] == "R")) & + (try_ab(x[, IPM, drop = TRUE] != "R") | try_ab(x[, MEM, drop = TRUE] != "R")) & + try_ab(x[, CIP, drop = TRUE] == "R")), + c(PIP, CTX, CAZ, IPM, MEM, CIP), + "any" + ) + + trans_tbl( + 3, # 4MRGN, overwrites 3MRGN if applicable + which((x$order == "Enterobacterales" | # following in fact the old Enterobacteriaceae classification + (x$genus == "Acinetobacter" & x$species == "baumannii")) & + try_ab(x[, PIP, drop = TRUE] == "R") & + (try_ab(x[, CTX, drop = TRUE] == "R") | try_ab(x[, CAZ, drop = TRUE] == "R")) & + (try_ab(x[, IPM, drop = TRUE] == "R") | try_ab(x[, MEM, drop = TRUE] == "R")) & + try_ab(x[, CIP, drop = TRUE] == "R")), + c(PIP, CTX, CAZ, IPM, MEM, CIP), + "any" + ) + + trans_tbl( + 3, # 4MRGN, overwrites 3MRGN if applicable + which((x$order == "Enterobacterales" | # following in fact the old Enterobacteriaceae classification + (x$genus == "Acinetobacter" & x$species == "baumannii")) & + (try_ab(x[, IPM, drop = TRUE] == "R") | try_ab(x[, MEM, drop = TRUE] == "R"))), + c(IPM, MEM), + "any" + ) + + trans_tbl( + 2, # 3MRGN, if only 1 group is S + which(x$genus == "Pseudomonas" & x$species == "aeruginosa" & + try_ab(x[, PIP, drop = TRUE] == "S") + + try_ab(x[, CTX, drop = TRUE] == "S") + + try_ab(x[, CAZ, drop = TRUE] == "S") + + try_ab(x[, IPM, drop = TRUE] == "S") + + try_ab(x[, MEM, drop = TRUE] == "S") + + try_ab(x[, CIP, drop = TRUE] == "S") == 1), + c(PIP, CTX, CAZ, IPM, MEM, CIP), + "any" + ) + + trans_tbl( + 3, # 4MRGN otherwise + which((x$genus == "Pseudomonas" & x$species == "aeruginosa") & + try_ab(x[, PIP, drop = TRUE] == "R") & + (try_ab(x[, CTX, drop = TRUE] == "R") | try_ab(x[, CAZ, drop = TRUE] == "R")) & + (try_ab(x[, IPM, drop = TRUE] == "R") | try_ab(x[, MEM, drop = TRUE] == "R")) & + try_ab(x[, CIP, drop = TRUE] == "R")), + c(PIP, CTX, CAZ, IPM, MEM, CIP), + "any" + ) + x[which(x$MDRO == 2), "reason"] <- "3MRGN" x[which(x$MDRO == 3), "reason"] <- "4MRGN" } - + if (guideline$code == "brmo") { # Netherlands ------------------------------------------------------------- aminoglycosides <- aminoglycosides[!is.na(aminoglycosides)] @@ -1380,43 +1571,55 @@ mdro <- function(x = NULL, if (length(ESBLs) != 2) { ESBLs <- character(0) } - + # Table 1 - trans_tbl(3, - which(x$order == "Enterobacterales"), # following in fact the old Enterobacteriaceae classification - c(aminoglycosides, fluoroquinolones), - "all") - - trans_tbl(2, - which(x$order == "Enterobacterales"), # following in fact the old Enterobacteriaceae classification - carbapenems, - "any") - - trans_tbl(2, - which(x$order == "Enterobacterales"), # following in fact the old Enterobacteriaceae classification - ESBLs, - "all") - + trans_tbl( + 3, + which(x$order == "Enterobacterales"), # following in fact the old Enterobacteriaceae classification + c(aminoglycosides, fluoroquinolones), + "all" + ) + + trans_tbl( + 2, + which(x$order == "Enterobacterales"), # following in fact the old Enterobacteriaceae classification + carbapenems, + "any" + ) + + trans_tbl( + 2, + which(x$order == "Enterobacterales"), # following in fact the old Enterobacteriaceae classification + ESBLs, + "all" + ) + # Table 2 - trans_tbl(2, - which(x$genus == "Acinetobacter"), - c(carbapenems), - "any") - trans_tbl(3, - which(x$genus == "Acinetobacter"), - c(aminoglycosides, fluoroquinolones), - "all") - - trans_tbl(3, - which(x$genus == "Stenotrophomonas" & x$species == "maltophilia"), - SXT, - "all") - - if (!ab_missing(MEM) & !ab_missing(IPM) - & !ab_missing(GEN) & !ab_missing(TOB) - & !ab_missing(CIP) - & !ab_missing(CAZ) - & !ab_missing(TZP)) { + trans_tbl( + 2, + which(x$genus == "Acinetobacter"), + c(carbapenems), + "any" + ) + trans_tbl( + 3, + which(x$genus == "Acinetobacter"), + c(aminoglycosides, fluoroquinolones), + "all" + ) + + trans_tbl( + 3, + which(x$genus == "Stenotrophomonas" & x$species == "maltophilia"), + SXT, + "all" + ) + + if (!ab_missing(MEM) & !ab_missing(IPM) & + !ab_missing(GEN) & !ab_missing(TOB) & + !ab_missing(CIP) & + !ab_missing(CAZ) & + !ab_missing(TZP)) { x$psae <- 0 x[which(x[, MEM, drop = TRUE] == "R" | x[, IPM, drop = TRUE] == "R"), "psae"] <- 1 + x[which(x[, MEM, drop = TRUE] == "R" | x[, IPM, drop = TRUE] == "R"), "psae"] x[which(x[, GEN, drop = TRUE] == "R" & x[, TOB, drop = TRUE] == "R"), "psae"] <- 1 + x[which(x[, GEN, drop = TRUE] == "R" & x[, TOB, drop = TRUE] == "R"), "psae"] @@ -1426,29 +1629,38 @@ mdro <- function(x = NULL, } else { x$psae <- 0 } - trans_tbl(3, - which(x$genus == "Pseudomonas" & x$species == "aeruginosa" & x$psae >= 3), - c(CAZ, CIP, GEN, IPM, MEM, TOB, TZP), - "any") + trans_tbl( + 3, + which(x$genus == "Pseudomonas" & x$species == "aeruginosa" & x$psae >= 3), + c(CAZ, CIP, GEN, IPM, MEM, TOB, TZP), + "any" + ) x[which( - x$genus == "Pseudomonas" & x$species == "aeruginosa" - & x$psae >= 3), "reason"] <- paste0("at least 3 classes contain R", ifelse(!isTRUE(combine_SI), " or I", "")) - + x$genus == "Pseudomonas" & x$species == "aeruginosa" & + x$psae >= 3 + ), "reason"] <- paste0("at least 3 classes contain R", ifelse(!isTRUE(combine_SI), " or I", "")) + # Table 3 - trans_tbl(3, - which(x$genus == "Streptococcus" & x$species == "pneumoniae"), - PEN, - "all") - trans_tbl(3, - which(x$genus == "Streptococcus" & x$species == "pneumoniae"), - VAN, - "all") - trans_tbl(3, - which(x$genus == "Enterococcus" & x$species == "faecium"), - c(PEN, VAN), - "all") + trans_tbl( + 3, + which(x$genus == "Streptococcus" & x$species == "pneumoniae"), + PEN, + "all" + ) + trans_tbl( + 3, + which(x$genus == "Streptococcus" & x$species == "pneumoniae"), + VAN, + "all" + ) + trans_tbl( + 3, + which(x$genus == "Enterococcus" & x$species == "faecium"), + c(PEN, VAN), + "all" + ) } - + if (guideline$code == "tb") { # Tuberculosis ------------------------------------------------------------ prepare_drug <- function(ab) { @@ -1485,7 +1697,7 @@ mdro <- function(x = NULL, ab != "R" } } - + x$mono_count <- 0 x[drug_is_R(INH), "mono_count"] <- x[drug_is_R(INH), "mono_count", drop = TRUE] + 1 x[drug_is_R(RIF), "mono_count"] <- x[drug_is_R(RIF), "mono_count", drop = TRUE] + 1 @@ -1493,7 +1705,7 @@ mdro <- function(x = NULL, x[drug_is_R(PZA), "mono_count"] <- x[drug_is_R(PZA), "mono_count", drop = TRUE] + 1 x[drug_is_R(RIB), "mono_count"] <- x[drug_is_R(RIB), "mono_count", drop = TRUE] + 1 x[drug_is_R(RFP), "mono_count"] <- x[drug_is_R(RFP), "mono_count", drop = TRUE] + 1 - + x$mono <- x$mono_count > 0 x$poly <- x$mono_count > 1 & drug_is_not_R(RIF) & drug_is_not_R(INH) x$mdr <- drug_is_R(RIF) & drug_is_R(INH) @@ -1501,46 +1713,57 @@ mdro <- function(x = NULL, x$second <- drug_is_R(CAP) | drug_is_R(KAN) | drug_is_R(AMK) x$xdr <- x$mdr & x$xdr & x$second x$MDRO <- ifelse(x$xdr, 5, - ifelse(x$mdr, 4, - ifelse(x$poly, 3, - ifelse(x$mono, 2, - 1)))) + ifelse(x$mdr, 4, + ifelse(x$poly, 3, + ifelse(x$mono, 2, + 1 + ) + ) + ) + ) # keep all real TB, make other species NA x$MDRO <- ifelse(x$fullname == "Mycobacterium tuberculosis", x$MDRO, NA_real_) x$reason <- "PDR/MDR/XDR criteria were met" } - + # some more info on negative results if (verbose == TRUE) { if (guideline$code == "cmi2012") { - x[which(x$MDRO == 1 & !is.na(x$classes_affected)), "reason"] <- paste0(x$classes_affected[which(x$MDRO == 1 & !is.na(x$classes_affected))], - " of ", - x$classes_available[which(x$MDRO == 1 & !is.na(x$classes_affected))], - " available classes contain R", - ifelse(!isTRUE(combine_SI), " or I", ""), - " (3 required for MDR)") + x[which(x$MDRO == 1 & !is.na(x$classes_affected)), "reason"] <- paste0( + x$classes_affected[which(x$MDRO == 1 & !is.na(x$classes_affected))], + " of ", + x$classes_available[which(x$MDRO == 1 & !is.na(x$classes_affected))], + " available classes contain R", + ifelse(!isTRUE(combine_SI), " or I", ""), + " (3 required for MDR)" + ) } else { x[which(x$MDRO == 1), "reason"] <- "too few antibiotics are R" } } - + if (info.bak == TRUE) { cat(group_msg) if (sum(!is.na(x$MDRO)) == 0) { cat(font_bold(paste0("=> Found 0 MDROs since no isolates are covered by the guideline"))) } else { - cat(font_bold(paste0("=> Found ", sum(x$MDRO %in% c(2:5), na.rm = TRUE), " ", guideline$type, " out of ", sum(!is.na(x$MDRO)), - " isolates (", trimws(percentage(sum(x$MDRO %in% c(2:5), na.rm = TRUE) / sum(!is.na(x$MDRO)))), ")"))) + cat(font_bold(paste0( + "=> Found ", sum(x$MDRO %in% c(2:5), na.rm = TRUE), " ", guideline$type, " out of ", sum(!is.na(x$MDRO)), + " isolates (", trimws(percentage(sum(x$MDRO %in% c(2:5), na.rm = TRUE) / sum(!is.na(x$MDRO)))), ")" + ))) } } - + # Fill in blanks ---- # for rows that have no results x_transposed <- as.list(as.data.frame(t(x[, cols_ab, drop = FALSE]), - stringsAsFactors = FALSE)) - rows_empty <- which(vapply(FUN.VALUE = logical(1), - x_transposed, - function(y) all(is.na(y)))) + stringsAsFactors = FALSE + )) + rows_empty <- which(vapply( + FUN.VALUE = logical(1), + x_transposed, + function(y) all(is.na(y)) + )) if (length(rows_empty) > 0) { cat(font_italic(paste0(" (", length(rows_empty), " isolates had no test results)\n"))) x[rows_empty, "MDRO"] <- NA @@ -1548,71 +1771,92 @@ mdro <- function(x = NULL, } else { cat("\n") } - + # Results ---- if (guideline$code == "cmi2012") { if (any(x$MDRO == -1, na.rm = TRUE)) { if (message_not_thrown_before("mdro", "availability")) { - warning_("in `mdro()`: NA introduced for isolates where the available percentage of antimicrobial classes was below ", - percentage(pct_required_classes), " (set with `pct_required_classes`)") + warning_( + "in `mdro()`: NA introduced for isolates where the available percentage of antimicrobial classes was below ", + percentage(pct_required_classes), " (set with `pct_required_classes`)" + ) } # set these -1s to NA x[which(x$MDRO == -1), "MDRO"] <- NA_integer_ } - x$MDRO <- factor(x = x$MDRO, - levels = 1:4, - labels = c("Negative", "Multi-drug-resistant (MDR)", - "Extensively drug-resistant (XDR)", "Pandrug-resistant (PDR)"), - ordered = TRUE) + x$MDRO <- factor( + x = x$MDRO, + levels = 1:4, + labels = c( + "Negative", "Multi-drug-resistant (MDR)", + "Extensively drug-resistant (XDR)", "Pandrug-resistant (PDR)" + ), + ordered = TRUE + ) } else if (guideline$code == "tb") { - x$MDRO <- factor(x = x$MDRO, - levels = 1:5, - labels = c("Negative", "Mono-resistant", "Poly-resistant", - "Multi-drug-resistant", "Extensively drug-resistant"), - ordered = TRUE) + x$MDRO <- factor( + x = x$MDRO, + levels = 1:5, + labels = c( + "Negative", "Mono-resistant", "Poly-resistant", + "Multi-drug-resistant", "Extensively drug-resistant" + ), + ordered = TRUE + ) } else if (guideline$code == "mrgn") { - x$MDRO <- factor(x = x$MDRO, - levels = 1:3, - labels = c("Negative", "3MRGN", "4MRGN"), - ordered = TRUE) + x$MDRO <- factor( + x = x$MDRO, + levels = 1:3, + labels = c("Negative", "3MRGN", "4MRGN"), + ordered = TRUE + ) } else { - x$MDRO <- factor(x = x$MDRO, - levels = 1:3, - labels = c("Negative", "Positive, unconfirmed", "Positive"), - ordered = TRUE) + x$MDRO <- factor( + x = x$MDRO, + levels = 1:3, + labels = c("Negative", "Positive, unconfirmed", "Positive"), + ordered = TRUE + ) } - + if (verbose == TRUE) { colnames(x)[colnames(x) == col_mo] <- "microorganism" x$microorganism <- mo_name(x$microorganism, language = NULL) - x[, c("row_number", - "microorganism", - "MDRO", - "reason", - "columns_nonsusceptible"), - drop = FALSE] + x[, c( + "row_number", + "microorganism", + "MDRO", + "reason", + "columns_nonsusceptible" + ), + drop = FALSE + ] } else { x$MDRO } - } #' @rdname mdro #' @export custom_mdro_guideline <- function(..., as_factor = TRUE) { meet_criteria(as_factor, allow_class = "logical", has_length = 1) - + dots <- tryCatch(list(...), - error = function(e) "error") - stop_if(identical(dots, "error"), - "rules must be a valid formula inputs (e.g., using '~'), see `?mdro`") + error = function(e) "error" + ) + stop_if( + identical(dots, "error"), + "rules must be a valid formula inputs (e.g., using '~'), see `?mdro`" + ) n_dots <- length(dots) stop_if(n_dots == 0, "no custom rules were set. Please read the documentation using `?mdro`.") out <- vector("list", n_dots) for (i in seq_len(n_dots)) { - stop_ifnot(inherits(dots[[i]], "formula"), - "rule ", i, " must be a valid formula input (e.g., using '~'), see `?mdro`") - + stop_ifnot( + inherits(dots[[i]], "formula"), + "rule ", i, " must be a valid formula input (e.g., using '~'), see `?mdro`" + ) + # Query qry <- dots[[i]][[2]] if (inherits(qry, "call")) { @@ -1628,14 +1872,14 @@ custom_mdro_guideline <- function(..., as_factor = TRUE) { qry <- gsub(" *([&|+-/*^><==]+) *", " \\1 ", qry) qry <- gsub("'", "\"", qry, fixed = TRUE) out[[i]]$query <- as.expression(qry) - + # Value val <- tryCatch(eval(dots[[i]][[3]]), error = function(e) NULL) stop_if(is.null(val), "rule ", i, " must return a valid value, it now returns an error: ", tryCatch(eval(dots[[i]][[3]]), error = function(e) e$message)) stop_if(length(val) > 1, "rule ", i, " must return a value of length 1, not ", length(val)) out[[i]]$value <- as.character(val) } - + names(out) <- paste0("rule", seq_len(n_dots)) out <- set_clean_class(out, new_class = c("custom_mdro_guideline", "list")) attr(out, "values") <- unname(c("Negative", vapply(FUN.VALUE = character(1), unclass(out), function(x) x$value))) @@ -1657,8 +1901,9 @@ c.custom_mdro_guideline <- function(x, ..., as_factor = NULL) { } for (g in list(...)) { stop_ifnot(inherits(g, "custom_mdro_guideline"), - "for combining custom MDRO guidelines, all rules must be created with `custom_mdro_guideline()`", - call = FALSE) + "for combining custom MDRO guidelines, all rules must be created with `custom_mdro_guideline()`", + call = FALSE + ) vals <- attributes(x)$values if (!all(attributes(g)$values %in% vals)) { vals <- unname(unique(c(vals, attributes(g)$values))) @@ -1707,27 +1952,33 @@ run_custom_mdro_guideline <- function(df, guideline, info) { reasons <- character(length = NROW(df)) for (i in seq_len(n_dots)) { qry <- tryCatch(eval(parse(text = guideline[[i]]$query), envir = df, enclos = parent.frame()), - error = function(e) { - pkg_env$err_msg <- e$message - return("error") - }) + error = function(e) { + pkg_env$err_msg <- e$message + return("error") + } + ) if (identical(qry, "error")) { - warning_("in `custom_mdro_guideline()`: rule ", i, - " (`", as.character(guideline[[i]]$query), "`) was ignored because of this error message: ", - pkg_env$err_msg, - call = FALSE, - add_fn = font_red) + warning_("in `custom_mdro_guideline()`: rule ", i, + " (`", as.character(guideline[[i]]$query), "`) was ignored because of this error message: ", + pkg_env$err_msg, + call = FALSE, + add_fn = font_red + ) next } - stop_ifnot(is.logical(qry), "in custom_mdro_guideline(): rule ", i, " (`", guideline[[i]]$query, - "`) must return `TRUE` or `FALSE`, not ", - format_class(class(qry), plural = FALSE), call = FALSE) - + stop_ifnot(is.logical(qry), "in custom_mdro_guideline(): rule ", i, " (`", guideline[[i]]$query, + "`) must return `TRUE` or `FALSE`, not ", + format_class(class(qry), plural = FALSE), + call = FALSE + ) + new_mdros <- which(qry == TRUE & out == "") - + if (info == TRUE) { - cat(word_wrap("- Custom MDRO rule ", i, ": `", as.character(guideline[[i]]$query), - "` (", length(new_mdros), " rows matched)"), "\n", sep = "") + cat(word_wrap( + "- Custom MDRO rule ", i, ": `", as.character(guideline[[i]]$query), + "` (", length(new_mdros), " rows matched)" + ), "\n", sep = "") } val <- guideline[[i]]$value out[new_mdros] <- val @@ -1735,22 +1986,26 @@ run_custom_mdro_guideline <- function(df, guideline, info) { } out[out == ""] <- "Negative" reasons[out == "Negative"] <- "no rules matched" - + if (isTRUE(attributes(guideline)$as_factor)) { out <- factor(out, levels = attributes(guideline)$values, ordered = TRUE) } - + columns_nonsusceptible <- as.data.frame(t(df[, is.rsi(df), drop = FALSE] == "R")) - columns_nonsusceptible <- vapply(FUN.VALUE = character(1), - columns_nonsusceptible, - function(x) paste0(rownames(columns_nonsusceptible)[which(x)], collapse = " ")) + columns_nonsusceptible <- vapply( + FUN.VALUE = character(1), + columns_nonsusceptible, + function(x) paste0(rownames(columns_nonsusceptible)[which(x)], collapse = " ") + ) columns_nonsusceptible[is.na(out)] <- NA_character_ - - data.frame(row_number = seq_len(NROW(df)), - MDRO = out, - reason = reasons, - columns_nonsusceptible = columns_nonsusceptible, - stringsAsFactors = FALSE) + + data.frame( + row_number = seq_len(NROW(df)), + MDRO = out, + reason = reasons, + columns_nonsusceptible = columns_nonsusceptible, + stringsAsFactors = FALSE + ) } #' @rdname mdro @@ -1758,8 +2013,10 @@ run_custom_mdro_guideline <- function(df, guideline, info) { brmo <- function(x = NULL, only_rsi_columns = FALSE, ...) { meet_criteria(x, allow_class = "data.frame", allow_NULL = TRUE) meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) - stop_if("guideline" %in% names(list(...)), - "argument `guideline` must not be set since this is a guideline-specific function") + stop_if( + "guideline" %in% names(list(...)), + "argument `guideline` must not be set since this is a guideline-specific function" + ) mdro(x = x, only_rsi_columns = only_rsi_columns, guideline = "BRMO", ...) } @@ -1768,8 +2025,10 @@ brmo <- function(x = NULL, only_rsi_columns = FALSE, ...) { mrgn <- function(x = NULL, only_rsi_columns = FALSE, ...) { meet_criteria(x, allow_class = "data.frame", allow_NULL = TRUE) meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) - stop_if("guideline" %in% names(list(...)), - "argument `guideline` must not be set since this is a guideline-specific function") + stop_if( + "guideline" %in% names(list(...)), + "argument `guideline` must not be set since this is a guideline-specific function" + ) mdro(x = x, only_rsi_columns = only_rsi_columns, guideline = "MRGN", ...) } @@ -1778,8 +2037,10 @@ mrgn <- function(x = NULL, only_rsi_columns = FALSE, ...) { mdr_tb <- function(x = NULL, only_rsi_columns = FALSE, ...) { meet_criteria(x, allow_class = "data.frame", allow_NULL = TRUE) meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) - stop_if("guideline" %in% names(list(...)), - "argument `guideline` must not be set since this is a guideline-specific function") + stop_if( + "guideline" %in% names(list(...)), + "argument `guideline` must not be set since this is a guideline-specific function" + ) mdro(x = x, only_rsi_columns = only_rsi_columns, guideline = "TB", ...) } @@ -1788,8 +2049,10 @@ mdr_tb <- function(x = NULL, only_rsi_columns = FALSE, ...) { mdr_cmi2012 <- function(x = NULL, only_rsi_columns = FALSE, ...) { meet_criteria(x, allow_class = "data.frame", allow_NULL = TRUE) meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) - stop_if("guideline" %in% names(list(...)), - "argument `guideline` must not be set since this is a guideline-specific function") + stop_if( + "guideline" %in% names(list(...)), + "argument `guideline` must not be set since this is a guideline-specific function" + ) mdro(x = x, only_rsi_columns = only_rsi_columns, guideline = "CMI2012", ...) } @@ -1798,7 +2061,9 @@ mdr_cmi2012 <- function(x = NULL, only_rsi_columns = FALSE, ...) { eucast_exceptional_phenotypes <- function(x = NULL, only_rsi_columns = FALSE, ...) { meet_criteria(x, allow_class = "data.frame", allow_NULL = TRUE) meet_criteria(only_rsi_columns, allow_class = "logical", has_length = 1) - stop_if("guideline" %in% names(list(...)), - "argument `guideline` must not be set since this is a guideline-specific function") + stop_if( + "guideline" %in% names(list(...)), + "argument `guideline` must not be set since this is a guideline-specific function" + ) mdro(x = x, only_rsi_columns = only_rsi_columns, guideline = "EUCAST", ...) } diff --git a/R/mic.R b/R/mic.R index 6ffadfd2c..0d2c1d7b6 100755 --- a/R/mic.R +++ b/R/mic.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -25,20 +25,42 @@ # these are allowed MIC values and will become [factor] levels ops <- c("<", "<=", "", ">=", ">") -valid_mic_levels <- c(c(t(vapply(FUN.VALUE = character(9), ops, - function(x) paste0(x, "0.00", 1:9)))), - unique(c(t(vapply(FUN.VALUE = character(104), ops, - function(x) paste0(x, sort(as.double(paste0("0.0", - sort(c(1:99, 125, 128, 256, 512, 625)))))))))), - unique(c(t(vapply(FUN.VALUE = character(103), ops, - function(x) paste0(x, sort(as.double(paste0("0.", - c(1:99, 125, 128, 256, 512))))))))), - c(t(vapply(FUN.VALUE = character(10), ops, - function(x) paste0(x, sort(c(1:9, 1.5)))))), - c(t(vapply(FUN.VALUE = character(45), ops, - function(x) paste0(x, c(10:98)[9:98 %% 2 == TRUE])))), - c(t(vapply(FUN.VALUE = character(17), ops, - function(x) paste0(x, sort(c(2 ^ c(7:11), 192, 80 * c(2:12)))))))) +valid_mic_levels <- c( + c(t(vapply( + FUN.VALUE = character(9), ops, + function(x) paste0(x, "0.00", 1:9) + ))), + unique(c(t(vapply( + FUN.VALUE = character(104), ops, + function(x) { + paste0(x, sort(as.double(paste0( + "0.0", + sort(c(1:99, 125, 128, 256, 512, 625)) + )))) + } + )))), + unique(c(t(vapply( + FUN.VALUE = character(103), ops, + function(x) { + paste0(x, sort(as.double(paste0( + "0.", + c(1:99, 125, 128, 256, 512) + )))) + } + )))), + c(t(vapply( + FUN.VALUE = character(10), ops, + function(x) paste0(x, sort(c(1:9, 1.5))) + ))), + c(t(vapply( + FUN.VALUE = character(45), ops, + function(x) paste0(x, c(10:98)[9:98 %% 2 == TRUE]) + ))), + c(t(vapply( + FUN.VALUE = character(17), ops, + function(x) paste0(x, sort(c(2^c(7:11), 192, 80 * c(2:12)))) + ))) +) #' Transform Input to Minimum Inhibitory Concentrations (MIC) #' @@ -48,32 +70,32 @@ valid_mic_levels <- c(c(t(vapply(FUN.VALUE = character(9), ops, #' @param na.rm a [logical] indicating whether missing values should be removed #' @param ... arguments passed on to methods #' @details To interpret MIC values as RSI values, use [as.rsi()] on MIC values. It supports guidelines from EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`) and CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`). -#' +#' #' This class for MIC values is a quite a special data type: formally it is an ordered [factor] with valid MIC values as [factor] levels (to make sure only valid MIC values are retained), but for any mathematical operation it acts as decimal numbers: -#' +#' #' ``` #' x <- random_mic(10) #' x #' #> Class #' #> [1] 16 1 8 8 64 >=128 0.0625 32 32 16 -#' +#' #' is.factor(x) #' #> [1] TRUE -#' +#' #' x[1] * 2 #' #> [1] 32 -#' +#' #' median(x) #' #> [1] 26 #' ``` -#' +#' #' This makes it possible to maintain operators that often come with MIC values, such ">=" and "<=", even when filtering using [numeric] values in data analysis, e.g.: -#' +#' #' ``` #' x[x > 4] #' #> Class #' #> [1] 16 8 8 64 >=128 32 32 16 -#' +#' #' df <- data.frame(x, hospital = "A") #' subset(df, x > 4) # or with dplyr: df %>% filter(x > 4) #' #> x hospital @@ -84,11 +106,11 @@ valid_mic_levels <- c(c(t(vapply(FUN.VALUE = character(9), ops, #' #> 9 32 A #' #> 10 16 A #' ``` -#' +#' #' The following [generic functions][groupGeneric()] are implemented for the MIC class: `!`, `!=`, `%%`, `%/%`, `&`, `*`, `+`, `-`, `/`, `<`, `<=`, `==`, `>`, `>=`, `^`, `|`, [abs()], [acos()], [acosh()], [all()], [any()], [asin()], [asinh()], [atan()], [atanh()], [ceiling()], [cos()], [cosh()], [cospi()], [cummax()], [cummin()], [cumprod()], [cumsum()], [digamma()], [exp()], [expm1()], [floor()], [gamma()], [lgamma()], [log()], [log1p()], [log2()], [log10()], [max()], [mean()], [min()], [prod()], [range()], [round()], [sign()], [signif()], [sin()], [sinh()], [sinpi()], [sqrt()], [sum()], [tan()], [tanh()], [tanpi()], [trigamma()] and [trunc()]. Some functions of the `stats` package are also implemented: [median()], [quantile()], [mad()], [IQR()], [fivenum()]. Also, [boxplot.stats()] is supported. Since [sd()] and [var()] are non-generic functions, these could not be extended. Use [mad()] as an alternative, or use e.g. `sd(as.numeric(x))` where `x` is your vector of MIC values. -#' +#' #' Using [as.double()] or [as.numeric()] on MIC values will remove the operators and return a numeric vector. Do **not** use [as.integer()] on MIC values as by the \R convention on [factor]s, it will return the index of the factor levels (which is often useless for regular users). -#' +#' #' Use [droplevels()] to drop unused levels. At default, it will return a plain factor. Use `droplevels(..., as.mic = TRUE)` to maintain the `` class. #' @return Ordered [factor] with additional class [`mic`], that in mathematical operations acts as decimal numbers. Bare in mind that the outcome of any mathematical operation on MICs will return a [numeric] value. #' @aliases mic @@ -101,26 +123,30 @@ valid_mic_levels <- c(c(t(vapply(FUN.VALUE = character(9), ops, #' #' # this can also coerce combined MIC/RSI values: #' as.mic("<=0.002; S") -#' +#' #' # mathematical processing treats MICs as numeric values #' fivenum(mic_data) #' quantile(mic_data) #' all(mic_data < 512) #' #' # interpret MIC values -#' as.rsi(x = as.mic(2), -#' mo = as.mo("Streptococcus pneumoniae"), -#' ab = "AMX", -#' guideline = "EUCAST") -#' as.rsi(x = as.mic(c(0.01, 2, 4, 8)), -#' mo = as.mo("Streptococcus pneumoniae"), -#' ab = "AMX", -#' guideline = "EUCAST") +#' as.rsi( +#' x = as.mic(2), +#' mo = as.mo("Streptococcus pneumoniae"), +#' ab = "AMX", +#' guideline = "EUCAST" +#' ) +#' as.rsi( +#' x = as.mic(c(0.01, 2, 4, 8)), +#' mo = as.mo("Streptococcus pneumoniae"), +#' ab = "AMX", +#' guideline = "EUCAST" +#' ) #' #' # plot MIC values, see ?plot #' plot(mic_data) #' plot(mic_data, mo = "E. coli", ab = "cipro") -#' +#' #' if (require("ggplot2")) { #' autoplot(mic_data, mo = "E. coli", ab = "cipro") #' } @@ -133,7 +159,7 @@ valid_mic_levels <- c(c(t(vapply(FUN.VALUE = character(9), ops, as.mic <- function(x, na.rm = FALSE) { meet_criteria(x, allow_class = c("mic", "character", "numeric", "integer", "factor"), allow_NA = TRUE) meet_criteria(na.rm, allow_class = "logical", has_length = 1) - + if (is.mic(x)) { x } else { @@ -143,7 +169,7 @@ as.mic <- function(x, na.rm = FALSE) { } x[trimws(x) == ""] <- NA x.bak <- x - + # comma to period x <- gsub(",", ".", x, fixed = TRUE) # transform Unicode for >= and <= @@ -177,27 +203,30 @@ as.mic <- function(x, na.rm = FALSE) { x <- gsub("[.]$", "", x, perl = TRUE) # trim it x <- trimws(x) - + ## previously unempty values now empty - should return a warning later on x[x.bak != "" & x == ""] <- "invalid" - + na_before <- x[is.na(x) | x == ""] %pm>% length() x[!x %in% valid_mic_levels] <- NA na_after <- x[is.na(x) | x == ""] %pm>% length() - + if (na_before != na_after) { list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>% unique() %pm>% sort() %pm>% vector_and(quotes = TRUE) warning_("in `as.mic()`: ", na_after - na_before, " results truncated (", - round(((na_after - na_before) / length(x)) * 100), - "%) that were invalid MICs: ", - list_missing, call = FALSE) + round(((na_after - na_before) / length(x)) * 100), + "%) that were invalid MICs: ", + list_missing, + call = FALSE + ) } - + set_clean_class(factor(x, levels = valid_mic_levels, ordered = TRUE), - new_class = c("mic", "ordered", "factor")) + new_class = c("mic", "ordered", "factor") + ) } } @@ -206,7 +235,8 @@ all_valid_mics <- function(x) { return(FALSE) } x_mic <- tryCatch(suppressWarnings(as.mic(x[!is.na(x)])), - error = function(e) NA) + error = function(e) NA + ) !any(is.na(x_mic)) && !all(is.na(x)) } @@ -215,7 +245,8 @@ all_valid_mics <- function(x) { #' @format NULL #' @export NA_mic_ <- set_clean_class(factor(NA, levels = valid_mic_levels, ordered = TRUE), - new_class = c("mic", "ordered", "factor")) + new_class = c("mic", "ordered", "factor") +) #' @rdname as.mic #' @export @@ -271,8 +302,10 @@ type_sum.mic <- function(x, ...) { #' @noRd print.mic <- function(x, ...) { cat("Class ", - ifelse(length(levels(x)) < length(valid_mic_levels), font_red(" with dropped levels"), ""), - "\n", sep = "") + ifelse(length(levels(x)) < length(valid_mic_levels), font_red(" with dropped levels"), ""), + "\n", + sep = "" + ) print(as.character(x), quote = FALSE) att <- attributes(x) if ("na.action" %in% names(att)) { @@ -378,12 +411,12 @@ hist.mic <- function(x, ...) { get_skimmers.mic <- function(column) { skimr::sfl( skim_type = "mic", - p0 = ~stats::quantile(., probs = 0, na.rm = TRUE, names = FALSE), - p25 = ~stats::quantile(., probs = 0.25, na.rm = TRUE, names = FALSE), - p50 = ~stats::quantile(., probs = 0.5, na.rm = TRUE, names = FALSE), - p75 = ~stats::quantile(., probs = 0.75, na.rm = TRUE, names = FALSE), - p100 = ~stats::quantile(., probs = 1, na.rm = TRUE, names = FALSE), - hist = ~skimr::inline_hist(log2(stats::na.omit(.)), 5) + p0 = ~ stats::quantile(., probs = 0, na.rm = TRUE, names = FALSE), + p25 = ~ stats::quantile(., probs = 0.25, na.rm = TRUE, names = FALSE), + p50 = ~ stats::quantile(., probs = 0.5, na.rm = TRUE, names = FALSE), + p75 = ~ stats::quantile(., probs = 0.75, na.rm = TRUE, names = FALSE), + p100 = ~ stats::quantile(., probs = 1, na.rm = TRUE, names = FALSE), + hist = ~ skimr::inline_hist(log2(stats::na.omit(.)), 5) ) } @@ -679,7 +712,7 @@ is_lower <- function(el) { #' @export #' @noRd `^.mic` <- function(e1, e2) { - as.double(e1) ^ as.double(e2) + as.double(e1)^as.double(e2) } #' @method %% mic diff --git a/R/mo.R b/R/mo.R index cd4005ff4..952d0dd09 100755 --- a/R/mo.R +++ b/R/mo.R @@ -127,9 +127,9 @@ #' as.mo("Staphylococcus aureus") #' as.mo("Staphylococcus aureus (MRSA)") #' as.mo("Zthafilokkoockus oureuz") # handles incorrect spelling -#' as.mo("MRSA") # Methicillin Resistant S. aureus -#' as.mo("VISA") # Vancomycin Intermediate S. aureus -#' as.mo("VRSA") # Vancomycin Resistant S. aureus +#' as.mo("MRSA") # Methicillin Resistant S. aureus +#' as.mo("VISA") # Vancomycin Intermediate S. aureus +#' as.mo("VRSA") # Vancomycin Resistant S. aureus #' as.mo(115329001) # SNOMED CT code #' #' # Dyslexia is no problem - these all work: @@ -142,15 +142,15 @@ #' as.mo("GAS") # Group A Streptococci #' as.mo("GBS") # Group B Streptococci #' -#' as.mo("S. epidermidis") # will remain species: B_STPHY_EPDR -#' as.mo("S. epidermidis", Becker = TRUE) # will not remain species: B_STPHY_CONS +#' as.mo("S. epidermidis") # will remain species: B_STPHY_EPDR +#' as.mo("S. epidermidis", Becker = TRUE) # will not remain species: B_STPHY_CONS #' -#' as.mo("S. pyogenes") # will remain species: B_STRPT_PYGN +#' as.mo("S. pyogenes") # will remain species: B_STRPT_PYGN #' as.mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPT_GRPA #' #' # All mo_* functions use as.mo() internally too (see ?mo_property): -#' mo_genus("E. coli") # returns "Escherichia" -#' mo_gramstain("E. coli") # returns "Gram negative" +#' mo_genus("E. coli") # returns "Escherichia" +#' mo_gramstain("E. coli") # returns "Gram negative" #' mo_is_intrinsic_resistant("E. coli", "vanco") # returns TRUE #' } as.mo <- function(x, @@ -170,71 +170,72 @@ as.mo <- function(x, meet_criteria(ignore_pattern, allow_class = "character", has_length = 1, allow_NULL = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(info, allow_class = "logical", has_length = 1) - + check_dataset_integrity() - - if (tryCatch(all(x[!is.na(x)] %in% MO_lookup$mo) - & isFALSE(Becker) - & isFALSE(Lancefield), error = function(e) FALSE)) { + + if (tryCatch(all(x[!is.na(x)] %in% MO_lookup$mo) & + isFALSE(Becker) & + isFALSE(Lancefield), error = function(e) FALSE)) { # don't look into valid MO codes, just return them # is.mo() won't work - MO codes might change between package versions return(set_clean_class(x, new_class = c("mo", "character"))) } - + # start off with replaced language-specific non-ASCII characters with ASCII characters x <- parse_and_convert(x) # replace mo codes used in older package versions x <- replace_old_mo_codes(x, property = "mo") # ignore cases that match the ignore pattern x <- replace_ignore_pattern(x, ignore_pattern) - + # WHONET: xxx = no growth x[tolower(as.character(paste0(x, ""))) %in% c("", "xxx", "na", "nan")] <- NA_character_ # Laboratory systems: remove (translated) entries like "no growth", etc. x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_ x[trimws2(x) %like% paste0("^(", translate_into_language("no|not", language = language), ") [a-z]+")] <- "UNKNOWN" uncertainty_level <- translate_allow_uncertain(allow_uncertain) - - if (tryCatch(all(x == "" | gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE) %in% MO_lookup$fullname_lower, na.rm = TRUE) - & isFALSE(Becker) - & isFALSE(Lancefield), error = function(e) FALSE)) { + + if (tryCatch(all(x == "" | gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE) %in% MO_lookup$fullname_lower, na.rm = TRUE) & + isFALSE(Becker) & + isFALSE(Lancefield), error = function(e) FALSE)) { # to improve speed, special case for taxonomically correct full names (case-insensitive) return(set_clean_class(MO_lookup[match(gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE), MO_lookup$fullname_lower), "mo", drop = TRUE], - new_class = c("mo", "character"))) + new_class = c("mo", "character") + )) } - - if (!is.null(reference_df) - && check_validity_mo_source(reference_df) - && isFALSE(Becker) - && isFALSE(Lancefield) - && all(x %in% unlist(reference_df), na.rm = TRUE)) { - + + if (!is.null(reference_df) && + check_validity_mo_source(reference_df) && + isFALSE(Becker) && + isFALSE(Lancefield) && + all(x %in% unlist(reference_df), na.rm = TRUE)) { reference_df <- repair_reference_df(reference_df) suppressWarnings( y <- data.frame(x = x, stringsAsFactors = FALSE) %pm>% pm_left_join(reference_df, by = "x") %pm>% - pm_pull(mo) + pm_pull(mo) ) - - } else if (all(x[!is.na(x)] %in% MO_lookup$mo) - & isFALSE(Becker) - & isFALSE(Lancefield)) { + } else if (all(x[!is.na(x)] %in% MO_lookup$mo) & + isFALSE(Becker) & + isFALSE(Lancefield)) { y <- x - } else { # will be checked for mo class in validation and uses exec_as.mo internally if necessary - y <- mo_validate(x = x, property = "mo", - Becker = Becker, Lancefield = Lancefield, - allow_uncertain = uncertainty_level, - reference_df = reference_df, - ignore_pattern = ignore_pattern, - language = language, - info = info, - ...) + y <- mo_validate( + x = x, property = "mo", + Becker = Becker, Lancefield = Lancefield, + allow_uncertain = uncertainty_level, + reference_df = reference_df, + ignore_pattern = ignore_pattern, + language = language, + info = info, + ... + ) } - + set_clean_class(y, - new_class = c("mo", "character")) + new_class = c("mo", "character") + ) } #' @rdname as.mo @@ -280,13 +281,13 @@ exec_as.mo <- function(x, meet_criteria(actual_uncertainty, allow_class = "numeric", has_length = 1) meet_criteria(actual_input, allow_class = "character", allow_NULL = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + check_dataset_integrity() - + if (isTRUE(debug) && initial_search == TRUE) { time_start_tracking() } - + lookup <- function(needle, column = property, haystack = reference_data_to_use, @@ -295,25 +296,28 @@ exec_as.mo <- function(x, initial = initial_search, uncertainty = actual_uncertainty, input_actual = actual_input) { - if (!is.null(input_actual)) { input <- input_actual } else { input <- tryCatch(x_backup[i], error = function(e) "") } - + # `column` can be NULL for all columns, or a selection # returns a [character] (vector) - if `column` > length 1 then with columns as names if (isTRUE(debug_mode)) { - cat(font_silver("Looking up: ", substitute(needle), collapse = ""), - "\n ", time_track()) + cat( + font_silver("Looking up: ", substitute(needle), collapse = ""), + "\n ", time_track() + ) } if (length(column) == 1) { res_df <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), , drop = FALSE] if (NROW(res_df) > 1 & uncertainty != -1) { # sort the findings on matching score - scores <- mo_matching_score(x = input, - n = res_df[, "fullname", drop = TRUE]) + scores <- mo_matching_score( + x = input, + n = res_df[, "fullname", drop = TRUE] + ) res_df <- res_df[order(scores, decreasing = TRUE), , drop = FALSE] } res <- as.character(res_df[, column, drop = TRUE]) @@ -329,11 +333,14 @@ exec_as.mo <- function(x, if ((length(res) > n | uncertainty > 1) & uncertainty != -1) { # save the other possible results as well, but not for forced certain results (then uncertainty == -1) uncertainties <<- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = uncertainty, - input = input, - result_mo = res_df[1, "mo", drop = TRUE], - candidates = as.character(res_df[, "fullname", drop = TRUE])), - stringsAsFactors = FALSE) + format_uncertainty_as_df( + uncertainty_level = uncertainty, + input = input, + result_mo = res_df[1, "mo", drop = TRUE], + candidates = as.character(res_df[, "fullname", drop = TRUE]) + ), + stringsAsFactors = FALSE + ) } res[seq_len(min(n, length(res)))] } @@ -358,40 +365,42 @@ exec_as.mo <- function(x, res } } - + # start off with replaced language-specific non-ASCII characters with ASCII characters x <- parse_and_convert(x) # replace mo codes used in older package versions x <- replace_old_mo_codes(x, property) # ignore cases that match the ignore pattern x <- replace_ignore_pattern(x, ignore_pattern) - + # WHONET: xxx = no growth x[tolower(as.character(paste0(x, ""))) %in% c("", "xxx", "na", "nan")] <- NA_character_ # Laboratory systems: remove (translated) entries like "no growth", etc. x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_ x[trimws2(x) %like% paste0("^(", translate_into_language("no|not", language = language), ") [a-z]+")] <- "UNKNOWN" - + if (initial_search == TRUE) { # keep track of time - give some hints to improve speed if it takes a long time start_time <- Sys.time() - + pkg_env$mo_failures <- NULL pkg_env$mo_uncertainties <- NULL pkg_env$mo_renamed <- NULL } pkg_env$mo_renamed_last_run <- NULL - + failures <- character(0) uncertainty_level <- translate_allow_uncertain(allow_uncertain) - uncertainties <- data.frame(uncertainty = integer(0), - input = character(0), - fullname = character(0), - renamed_to = character(0), - mo = character(0), - candidates = character(0), - stringsAsFactors = FALSE) - + uncertainties <- data.frame( + uncertainty = integer(0), + input = character(0), + fullname = character(0), + renamed_to = character(0), + mo = character(0), + candidates = character(0), + stringsAsFactors = FALSE + ) + x_input <- x # already strip leading and trailing spaces x <- trimws(x) @@ -399,58 +408,58 @@ exec_as.mo <- function(x, x <- unique(x) # remove empty values (to later fill them in again with NAs) # ("xxx" is WHONET code for 'no growth') - x <- x[!is.na(x) - & !is.null(x) - & !identical(x, "") - & !identical(x, "xxx")] - + x <- x[!is.na(x) & + !is.null(x) & + !identical(x, "") & + !identical(x, "xxx")] + # defined df to check for if (!is.null(reference_df)) { check_validity_mo_source(reference_df) reference_df <- repair_reference_df(reference_df) } - + # all empty if (all(identical(trimws(x_input), "") | is.na(x_input) | length(x) == 0)) { if (property == "mo") { return(set_clean_class(rep(NA_character_, length(x_input)), - new_class = c("mo", "character"))) + new_class = c("mo", "character") + )) } else { return(rep(NA_character_, length(x_input))) } - } else if (all(x %in% reference_df[, 1, drop = TRUE][[1]])) { # all in reference df colnames(reference_df)[1] <- "x" suppressWarnings( x <- MO_lookup[match(reference_df[match(x, reference_df$x), "mo", drop = TRUE], MO_lookup$mo), property, drop = TRUE] ) - } else if (all(x %in% reference_data_to_use$mo)) { x <- MO_lookup[match(x, MO_lookup$mo), property, drop = TRUE] - } else if (all(tolower(x) %in% reference_data_to_use$fullname_lower)) { # we need special treatment for very prevalent full names, they are likely! # e.g. as.mo("Staphylococcus aureus") x <- MO_lookup[match(tolower(x), MO_lookup$fullname_lower), property, drop = TRUE] - } else if (all(x %in% reference_data_to_use$fullname)) { # we need special treatment for very prevalent full names, they are likely! # e.g. as.mo("Staphylococcus aureus") x <- MO_lookup[match(x, MO_lookup$fullname), property, drop = TRUE] - } else if (all(toupper(x) %in% microorganisms.codes$code)) { # commonly used MO codes - x <- MO_lookup[match(microorganisms.codes[match(toupper(x), - microorganisms.codes$code), - "mo", - drop = TRUE], - MO_lookup$mo), - property, - drop = TRUE] - + x <- MO_lookup[match( + microorganisms.codes[match( + toupper(x), + microorganisms.codes$code + ), + "mo", + drop = TRUE + ], + MO_lookup$mo + ), + property, + drop = TRUE + ] } else if (!all(x %in% microorganisms[, property, drop = TRUE])) { - strip_whitespace <- function(x, dyslexia_mode) { # all whitespaces (tab, new lines, etc.) should be one space # and spaces before and after should be left blank @@ -463,7 +472,7 @@ exec_as.mo <- function(x, } trimmed } - + x_backup_untouched <- x x <- strip_whitespace(x, dyslexia_mode) # translate 'unknown' names back to English @@ -473,46 +482,52 @@ exec_as.mo <- function(x, for (l in langs) { for (i in seq_len(nrow(trns))) { if (!is.na(trns[i, l, drop = TRUE])) { - x <- gsub(pattern = trns[i, l, drop = TRUE], - replacement = trns$pattern[i], - x = x, - ignore.case = TRUE, - perl = TRUE) + x <- gsub( + pattern = trns[i, l, drop = TRUE], + replacement = trns$pattern[i], + x = x, + ignore.case = TRUE, + perl = TRUE + ) } } } } - + # remove spp and species x <- gsub("(^| )[ .]*(spp|ssp|ss|sp|subsp|subspecies|biovar|biotype|serovar|species)[ .]*( |$)", "", x, ignore.case = TRUE, perl = TRUE) x <- strip_whitespace(x, dyslexia_mode) - + x_backup <- x - + # from here on case-insensitive x <- tolower(x) - + x_backup[x %like_case% "^(fungus|fungi)$"] <- "(unknown fungus)" # will otherwise become the kingdom x_backup[x_backup_untouched == "Fungi"] <- "Fungi" # is literally the kingdom - + # Fill in fullnames and MO codes directly known_names <- tolower(x_backup) %in% MO_lookup$fullname_lower x[known_names] <- MO_lookup[match(tolower(x_backup)[known_names], MO_lookup$fullname_lower), property, drop = TRUE] known_codes_mo <- toupper(x_backup) %in% MO_lookup$mo x[known_codes_mo] <- MO_lookup[match(toupper(x_backup)[known_codes_mo], MO_lookup$mo), property, drop = TRUE] known_codes_lis <- toupper(x_backup) %in% microorganisms.codes$code - x[known_codes_lis] <- MO_lookup[match(microorganisms.codes[match(toupper(x_backup)[known_codes_lis], - microorganisms.codes$code), "mo", drop = TRUE], - MO_lookup$mo), property, drop = TRUE] + x[known_codes_lis] <- MO_lookup[match( + microorganisms.codes[match( + toupper(x_backup)[known_codes_lis], + microorganisms.codes$code + ), "mo", drop = TRUE], + MO_lookup$mo + ), property, drop = TRUE] already_known <- known_names | known_codes_mo | known_codes_lis - + # now only continue where the right taxonomic output is not already known if (any(!already_known)) { x_known <- x[already_known] - + # when ending in SPE instead of SPP and preceded by 2-4 characters x <- gsub("^([a-z]{2,4})(spe.?)$", "\\1", x, perl = TRUE) - + x_backup_without_spp <- x # translate to English for supported languages of mo_property x <- gsub("(gruppe|groep|grupo|gruppo|groupe)", "group", x, perl = TRUE) @@ -552,13 +567,21 @@ exec_as.mo <- function(x, x <- gsub("u+", "u+", x, perl = TRUE) # allow any ending of -um, -us, -ium, -icum, -ius, -icus, -ica, -ia and -a (needs perl for the negative backward lookup): x <- gsub("(u\\+\\(c\\|k\\|q\\|qu\\+\\|s\\|z\\|x\\|ks\\)\\+)(?![a-z])", - "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE) + "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, + perl = TRUE + ) x <- gsub("(\\[iy\\]\\+\\(c\\|k\\|q\\|qu\\+\\|s\\|z\\|x\\|ks\\)\\+a\\+)(?![a-z])", - "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE) + "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, + perl = TRUE + ) x <- gsub("(\\[iy\\]\\+u\\+m)(?![a-z])", - "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, perl = TRUE) + "(u[s|m]|[iy][ck]?u[ms]|[iy]?[ck]?a)", x, + perl = TRUE + ) x <- gsub("(\\[iy\\]\\+a\\+)(?![a-z])", - "([iy]*a+|[iy]+a*)", x, perl = TRUE) + "([iy]*a+|[iy]+a*)", x, + perl = TRUE + ) x <- gsub("e+", "e+", x, perl = TRUE) x <- gsub("o+", "o+", x, perl = TRUE) x <- gsub("(.)\\1+", "\\1+", x, perl = TRUE) @@ -580,7 +603,7 @@ exec_as.mo <- function(x, # make sure to remove regex overkill (will lead to errors) x <- gsub("++", "+", x, fixed = TRUE) x <- gsub("?+", "?", x, fixed = TRUE) - + x_trimmed <- x x_trimmed_without_group <- gsub(" gro.u.p$", "", x_trimmed, perl = TRUE) # remove last part from "-" or "/" @@ -590,11 +613,11 @@ exec_as.mo <- function(x, x <- gsub("[ .]+", ".*", x, perl = TRUE) # add start en stop regex x <- paste0("^", x, "$") - + x_withspaces_start_only <- paste0("^", x_withspaces) x_withspaces_end_only <- paste0(x_withspaces, "$") x_withspaces_start_end <- paste0("^", x_withspaces, "$") - + if (isTRUE(debug)) { cat(paste0(font_blue("x"), ' "', x, '"\n')) cat(paste0(font_blue("x_withspaces_start_only"), ' "', x_withspaces_start_only, '"\n')) @@ -605,25 +628,24 @@ exec_as.mo <- function(x, cat(paste0(font_blue("x_trimmed"), ' "', x_trimmed, '"\n')) cat(paste0(font_blue("x_trimmed_without_group"), ' "', x_trimmed_without_group, '"\n')) } - + if (initial_search == TRUE) { progress <- progress_ticker(n = length(x[!already_known]), n_min = 25, print = info) # start if n >= 25 on.exit(close(progress)) } - + for (i in which(!already_known)) { - if (initial_search == TRUE) { progress$tick() } - + # valid MO code ---- found <- lookup(mo == toupper(x_backup[i])) if (!is.na(found)) { x[i] <- found[1L] next } - + # valid fullname ---- found <- lookup(fullname_lower %in% gsub("[^a-zA-Z0-9_. -]", "", tolower(c(x_backup[i], x_backup_without_spp[i])), perl = TRUE)) # added the gsub() for "(unknown fungus)", since fullname_lower does not contain brackets @@ -631,11 +653,12 @@ exec_as.mo <- function(x, x[i] <- found[1L] next } - + # old fullname ---- found <- lookup(fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])), - column = NULL, # all columns - haystack = MO.old_lookup) + column = NULL, # all columns + haystack = MO.old_lookup + ) if (!all(is.na(found))) { # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: # mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning) @@ -646,28 +669,34 @@ exec_as.mo <- function(x, x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup) } pkg_env$mo_renamed_last_run <- found["fullname"] - was_renamed(name_old = found["fullname"], - name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), - ref_old = found["ref"], - ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), - mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) + was_renamed( + name_old = found["fullname"], + name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), + ref_old = found["ref"], + ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), + mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup) + ) next } - + if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)" | tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) { # empty and nonsense values, ignore without warning x[i] <- lookup(mo == "UNKNOWN") next } - + # exact SNOMED code ---- if (x_backup[i] %like_case% "^[0-9]+$") { - snomed_found <- unlist(lapply(reference_data_to_use$snomed, - function(s) if (x_backup[i] %in% s) { - TRUE - } else { - FALSE - })) + snomed_found <- unlist(lapply( + reference_data_to_use$snomed, + function(s) { + if (x_backup[i] %in% s) { + TRUE + } else { + FALSE + } + } + )) if (sum(snomed_found, na.rm = TRUE) > 0) { found <- reference_data_to_use[snomed_found == TRUE, property, drop = TRUE][[1]] if (!is.na(found)) { @@ -676,22 +705,24 @@ exec_as.mo <- function(x, } } } - + # very probable: is G. species ---- found <- lookup(g_species %in% gsub("[^a-z0-9/ \\-]+", "", - tolower(c(x_backup[i], x_backup_without_spp[i])), perl = TRUE)) + tolower(c(x_backup[i], x_backup_without_spp[i])), + perl = TRUE + )) if (!is.na(found)) { x[i] <- found[1L] next } - + # WHONET and other common LIS codes ---- found <- microorganisms.codes[which(microorganisms.codes$code %in% toupper(c(x_backup_untouched[i], x_backup[i], x_backup_without_spp[i]))), "mo", drop = TRUE][1L] if (!is.na(found)) { x[i] <- lookup(mo == found) next } - + # user-defined reference ---- if (!is.null(reference_df)) { if (x_backup[i] %in% reference_df[, 1, drop = TRUE]) { @@ -701,16 +732,16 @@ exec_as.mo <- function(x, next } } - + # WHONET: xxx = no growth if (tolower(as.character(paste0(x_backup_without_spp[i], ""))) %in% c("", "xxx", "na", "nan")) { x[i] <- NA_character_ next } - + # check for very small input, but ignore the O antigens of E. coli - if (nchar(gsub("[^a-zA-Z]", "", x_trimmed[i])) < 3 - & toupper(x_backup_without_spp[i]) %unlike_case% "O?(26|103|104|104|111|121|145|157)") { + if (nchar(gsub("[^a-zA-Z]", "", x_trimmed[i])) < 3 & + toupper(x_backup_without_spp[i]) %unlike_case% "O?(26|103|104|104|111|121|145|157)") { # fewer than 3 chars and not looked for species, add as failure x[i] <- lookup(mo == "UNKNOWN") if (initial_search == TRUE) { @@ -718,27 +749,27 @@ exec_as.mo <- function(x, } next } - + if (x_backup_without_spp[i] %like_case% "(virus|viridae)") { # there is no fullname like virus or viridae, so don't try to coerce it x[i] <- NA_character_ next } - + # translate known trivial abbreviations to genus + species ---- - if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA", "BORSA", "GISA") - | x_backup_without_spp[i] %like_case% "(^| )(mrsa|mssa|visa|vrsa|borsa|gisa|la-?mrsa|ca-?mrsa)( |$)") { + if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA", "BORSA", "GISA") | + x_backup_without_spp[i] %like_case% "(^| )(mrsa|mssa|visa|vrsa|borsa|gisa|la-?mrsa|ca-?mrsa)( |$)") { x[i] <- lookup(fullname == "Staphylococcus aureus", uncertainty = -1) next } - if (toupper(x_backup_without_spp[i]) %in% c("MRSE", "MSSE") - | x_backup_without_spp[i] %like_case% "(^| )(mrse|msse)( |$)") { + if (toupper(x_backup_without_spp[i]) %in% c("MRSE", "MSSE") | + x_backup_without_spp[i] %like_case% "(^| )(mrse|msse)( |$)") { x[i] <- lookup(fullname == "Staphylococcus epidermidis", uncertainty = -1) next } - if (toupper(x_backup_without_spp[i]) == "VRE" - | x_backup_without_spp[i] %like_case% "(^| )vre " - | x_backup_without_spp[i] %like_case% "(enterococci|enterokok|enterococo)[a-z]*?$") { + if (toupper(x_backup_without_spp[i]) == "VRE" | + x_backup_without_spp[i] %like_case% "(^| )vre " | + x_backup_without_spp[i] %like_case% "(enterococci|enterokok|enterococo)[a-z]*?$") { x[i] <- lookup(genus == "Enterococcus", uncertainty = -1) next } @@ -755,13 +786,13 @@ exec_as.mo <- function(x, # - STEC (Shiga-toxin producing E. coli) # - UPEC (Uropathogenic E. coli) if (toupper(x_backup_without_spp[i]) %in% c("AIEC", "ATEC", "DAEC", "EAEC", "EHEC", "EIEC", "EPEC", "ETEC", "NMEC", "STEC", "UPEC") - # also support O-antigens of E. coli: O26, O103, O104, O111, O121, O145, O157 - | x_backup_without_spp[i] %like_case% "o?(26|103|104|111|121|145|157)") { + # also support O-antigens of E. coli: O26, O103, O104, O111, O121, O145, O157 + | x_backup_without_spp[i] %like_case% "o?(26|103|104|111|121|145|157)") { x[i] <- lookup(fullname == "Escherichia coli", uncertainty = -1) next } - if (toupper(x_backup_without_spp[i]) == "MRPA" - | x_backup_without_spp[i] %like_case% "(^| )mrpa( |$)") { + if (toupper(x_backup_without_spp[i]) == "MRPA" | + x_backup_without_spp[i] %like_case% "(^| )mrpa( |$)") { # multi resistant P. aeruginosa x[i] <- lookup(fullname == "Pseudomonas aeruginosa", uncertainty = -1) next @@ -771,8 +802,8 @@ exec_as.mo <- function(x, x[i] <- lookup(fullname == "Stenotrophomonas maltophilia", uncertainty = -1) next } - if (toupper(x_backup_without_spp[i]) %in% c("PISP", "PRSP", "VISP", "VRSP") - | x_backup_without_spp[i] %like_case% "(^| )(pisp|prsp|visp|vrsp)( |$)") { + if (toupper(x_backup_without_spp[i]) %in% c("PISP", "PRSP", "VISP", "VRSP") | + x_backup_without_spp[i] %like_case% "(^| )(pisp|prsp|visp|vrsp)( |$)") { # peni I, peni R, vanco I, vanco R: S. pneumoniae x[i] <- lookup(fullname == "Streptococcus pneumoniae", uncertainty = -1) next @@ -780,25 +811,28 @@ exec_as.mo <- function(x, if (x_backup_without_spp[i] %like_case% "^g[abcdfghk]s$") { # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) x[i] <- lookup(mo == toupper(gsub("g([abcdfghk])s", - "B_STRPT_GRP\\1", - x_backup_without_spp[i], - perl = TRUE)), uncertainty = -1) + "B_STRPT_GRP\\1", + x_backup_without_spp[i], + perl = TRUE + )), uncertainty = -1) next } if (x_backup_without_spp[i] %like_case% "(streptococ|streptokok).* [abcdfghk]$") { # Streptococci in different languages, like "estreptococos grupo B" x[i] <- lookup(mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", - "B_STRPT_GRP\\2", - x_backup_without_spp[i], - perl = TRUE)), uncertainty = -1) + "B_STRPT_GRP\\2", + x_backup_without_spp[i], + perl = TRUE + )), uncertainty = -1) next } if (x_backup_without_spp[i] %like_case% "group [abcdfghk] (streptococ|streptokok|estreptococ)") { # Streptococci in different languages, like "Group A Streptococci" x[i] <- lookup(mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", - "B_STRPT_GRP\\1", - x_backup_without_spp[i], - perl = TRUE)), uncertainty = -1) + "B_STRPT_GRP\\1", + x_backup_without_spp[i], + perl = TRUE + )), uncertainty = -1) next } if (x_backup_without_spp[i] %like_case% "ha?emoly.*strep") { @@ -807,45 +841,45 @@ exec_as.mo <- function(x, next } # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) - if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" - | x_trimmed[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" - | x_backup_without_spp[i] %like_case% "[ck]o?ns[^a-z]?$") { + if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" | + x_trimmed[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" | + x_backup_without_spp[i] %like_case% "[ck]o?ns[^a-z]?$") { # coerce S. coagulase negative x[i] <- lookup(mo == "B_STPHY_CONS", uncertainty = -1) next } - if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] positie?[vf]" - | x_trimmed[i] %like_case% "[ck]oagulas[ea] positie?[vf]" - | x_backup_without_spp[i] %like_case% "[ck]o?ps[^a-z]?$") { + if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] positie?[vf]" | + x_trimmed[i] %like_case% "[ck]oagulas[ea] positie?[vf]" | + x_backup_without_spp[i] %like_case% "[ck]o?ps[^a-z]?$") { # coerce S. coagulase positive x[i] <- lookup(mo == "B_STPHY_COPS", uncertainty = -1) next } # streptococcal groups: milleri and viridans - if (x_trimmed[i] %like_case% "strepto.* mil+er+i" - | x_backup_without_spp[i] %like_case% "strepto.* mil+er+i" - | x_backup_without_spp[i] %like_case% "mgs[^a-z]?$") { + if (x_trimmed[i] %like_case% "strepto.* mil+er+i" | + x_backup_without_spp[i] %like_case% "strepto.* mil+er+i" | + x_backup_without_spp[i] %like_case% "mgs[^a-z]?$") { # Milleri Group Streptococcus (MGS) x[i] <- lookup(mo == "B_STRPT_MILL", uncertainty = -1) next } - if (x_trimmed[i] %like_case% "strepto.* viridans" - | x_backup_without_spp[i] %like_case% "strepto.* viridans" - | x_backup_without_spp[i] %like_case% "vgs[^a-z]?$") { + if (x_trimmed[i] %like_case% "strepto.* viridans" | + x_backup_without_spp[i] %like_case% "strepto.* viridans" | + x_backup_without_spp[i] %like_case% "vgs[^a-z]?$") { # Viridans Group Streptococcus (VGS) x[i] <- lookup(mo == "B_STRPT_VIRI", uncertainty = -1) next } - if (x_backup_without_spp[i] %like_case% "gram[ -]?neg.*" - | x_backup_without_spp[i] %like_case% "negatie?[vf]" - | x_trimmed[i] %like_case% "gram[ -]?neg.*") { + if (x_backup_without_spp[i] %like_case% "gram[ -]?neg.*" | + x_backup_without_spp[i] %like_case% "negatie?[vf]" | + x_trimmed[i] %like_case% "gram[ -]?neg.*") { # coerce Gram negatives x[i] <- lookup(mo == "B_GRAMN", uncertainty = -1) next } - if (x_backup_without_spp[i] %like_case% "gram[ -]?pos.*" - | x_backup_without_spp[i] %like_case% "positie?[vf]" - | x_trimmed[i] %like_case% "gram[ -]?pos.*") { + if (x_backup_without_spp[i] %like_case% "gram[ -]?pos.*" | + x_backup_without_spp[i] %like_case% "positie?[vf]" | + x_trimmed[i] %like_case% "gram[ -]?pos.*") { # coerce Gram positives x[i] <- lookup(mo == "B_GRAMP", uncertainty = -1) next @@ -855,26 +889,29 @@ exec_as.mo <- function(x, x[i] <- lookup(genus == "Mycobacterium", uncertainty = -1) next } - + if (x_backup_without_spp[i] %like_case% "salmonella [a-z]+ ?.*") { if (x_backup_without_spp[i] %like_case% "salmonella group") { # Salmonella Group A to Z, just return S. species for now x[i] <- lookup(genus == "Salmonella", uncertainty = -1) next } else if (x_backup[i] %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & - x_backup[i] %unlike% "t[iy](ph|f)[iy]") { + x_backup[i] %unlike% "t[iy](ph|f)[iy]") { # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica # except for S. typhi, S. paratyphi, S. typhimurium x[i] <- lookup(fullname == "Salmonella enterica", uncertainty = -1) uncertainties <- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = 1, - input = x_backup[i], - result_mo = lookup(fullname == "Salmonella enterica", "mo", uncertainty = -1)), - stringsAsFactors = FALSE) + format_uncertainty_as_df( + uncertainty_level = 1, + input = x_backup[i], + result_mo = lookup(fullname == "Salmonella enterica", "mo", uncertainty = -1) + ), + stringsAsFactors = FALSE + ) next } } - + # trivial names known to the field: if ("meningococcus" %like_case% x_trimmed[i]) { # coerce Neisseria meningitidis @@ -891,7 +928,7 @@ exec_as.mo <- function(x, x[i] <- lookup(fullname == "Streptococcus pneumoniae", uncertainty = -1) next } - + if (x_backup[i] %in% pkg_env$mo_failed) { # previously failed already in this session ---- # (at this point the latest reference_df has also been checked) @@ -901,7 +938,7 @@ exec_as.mo <- function(x, } next } - + # NOW RUN THROUGH DIFFERENT PREVALENCE LEVELS check_per_prevalence <- function(data_to_check, data.old_to_check, @@ -912,14 +949,15 @@ exec_as.mo <- function(x, e.x_withspaces_start_only, f.x_withspaces_end_only, g.x_backup_without_spp) { - + # FIRST TRY FULLNAMES AND CODES ---- # if only genus is available, return only genus - + if (all(c(x[i], b.x_trimmed) %unlike_case% " ")) { if (nchar(g.x_backup_without_spp) >= 6) { found <- lookup(fullname_lower %like_case% paste0("^", unregex(g.x_backup_without_spp), "[a-z]+"), - haystack = data_to_check) + haystack = data_to_check + ) if (!is.na(found)) { x[i] <- found[1L] return(x[i]) @@ -927,7 +965,7 @@ exec_as.mo <- function(x, } # rest of genus only is in allow_uncertain part. } - + # allow no codes less than 4 characters long, was already checked for WHONET earlier if (nchar(g.x_backup_without_spp) < 4) { x[i] <- lookup(mo == "UNKNOWN") @@ -936,83 +974,95 @@ exec_as.mo <- function(x, } return(x[i]) } - + # try probable: trimmed version of fullname ---- found <- lookup(fullname_lower %in% tolower(g.x_backup_without_spp), - haystack = data_to_check) + haystack = data_to_check + ) if (!is.na(found)) { return(found[1L]) } - + # try any match keeping spaces ---- if (nchar(g.x_backup_without_spp) >= 6) { found <- lookup(fullname_lower %like_case% d.x_withspaces_start_end, - haystack = data_to_check) + haystack = data_to_check + ) if (!is.na(found)) { return(found[1L]) } } - + # try any match keeping spaces, not ending with $ ---- found <- lookup(fullname_lower %like_case% paste0(trimws(e.x_withspaces_start_only), " "), - haystack = data_to_check) + haystack = data_to_check + ) if (!is.na(found)) { return(found[1L]) } if (nchar(g.x_backup_without_spp) >= 6) { found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, - haystack = data_to_check) + haystack = data_to_check + ) if (!is.na(found)) { return(found[1L]) } } - + # try any match keeping spaces, not start with ^ ---- found <- lookup(fullname_lower %like_case% paste0(" ", trimws(f.x_withspaces_end_only)), - haystack = data_to_check) + haystack = data_to_check + ) if (!is.na(found)) { return(found[1L]) } - + # try a trimmed version if (nchar(g.x_backup_without_spp) >= 6) { found <- lookup(fullname_lower %like_case% b.x_trimmed | - fullname_lower %like_case% c.x_trimmed_without_group, - haystack = data_to_check) + fullname_lower %like_case% c.x_trimmed_without_group, + haystack = data_to_check + ) if (!is.na(found)) { return(found[1L]) } } - - + + # try splitting of characters in the middle and then find ID ---- # only when text length is 6 or lower # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus if (nchar(g.x_backup_without_spp) <= 6) { x_length <- nchar(g.x_backup_without_spp) - x_split <- paste0("^", - g.x_backup_without_spp %pm>% substr(1, x_length / 2), - ".* ", - g.x_backup_without_spp %pm>% substr((x_length / 2) + 1, x_length)) + x_split <- paste0( + "^", + g.x_backup_without_spp %pm>% substr(1, x_length / 2), + ".* ", + g.x_backup_without_spp %pm>% substr((x_length / 2) + 1, x_length) + ) found <- lookup(fullname_lower %like_case% x_split, - haystack = data_to_check) + haystack = data_to_check + ) if (!is.na(found)) { return(found[1L]) } } - + # try splitting of characters in the middle and then find ID based on old names ---- # only when text length is 6 or lower # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus if (nchar(g.x_backup_without_spp) <= 6) { x_length <- nchar(g.x_backup_without_spp) - x_split <- paste0("^", - g.x_backup_without_spp %pm>% substr(1, x_length / 2), - ".* ", - g.x_backup_without_spp %pm>% substr((x_length / 2) + 1, x_length)) + x_split <- paste0( + "^", + g.x_backup_without_spp %pm>% substr(1, x_length / 2), + ".* ", + g.x_backup_without_spp %pm>% substr((x_length / 2) + 1, x_length) + ) found <- lookup(fullname_lower %like_case% x_split, - haystack = MO.old_lookup, - column = NULL) + haystack = MO.old_lookup, + column = NULL + ) if (!all(is.na(found))) { # it's an old name, so return it if (property == "ref") { @@ -1021,29 +1071,33 @@ exec_as.mo <- function(x, x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup) } pkg_env$mo_renamed_last_run <- found["fullname"] - was_renamed(name_old = found["fullname"], - name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), - ref_old = found["ref"], - ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), - mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) + was_renamed( + name_old = found["fullname"], + name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), + ref_old = found["ref"], + ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), + mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup) + ) return(x[i]) } } - + # try fullname without start and without nchar limit of >= 6 ---- # like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, - haystack = data_to_check) + haystack = data_to_check + ) if (!is.na(found)) { return(found[1L]) } - + # MISCELLANEOUS ---- - + # look for old taxonomic names ---- found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, - column = NULL, # all columns - haystack = data.old_to_check) + column = NULL, # all columns + haystack = data.old_to_check + ) if (!all(is.na(found))) { # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: # mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning) @@ -1054,14 +1108,16 @@ exec_as.mo <- function(x, x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup) } pkg_env$mo_renamed_last_run <- found["fullname"] - was_renamed(name_old = found["fullname"], - name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), - ref_old = found["ref"], - ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), - mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) + was_renamed( + name_old = found["fullname"], + name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), + ref_old = found["ref"], + ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), + mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup) + ) return(x[i]) } - + # check for uncertain results ---- uncertain_fn <- function(a.x_backup, b.x_trimmed, @@ -1070,16 +1126,15 @@ exec_as.mo <- function(x, f.x_withspaces_end_only, g.x_backup_without_spp, uncertain.reference_data_to_use) { - if (uncertainty_level == 0) { # do not allow uncertainties return(NA_character_) } - + # UNCERTAINTY LEVEL 1 ---- if (uncertainty_level >= 1) { now_checks_for_uncertainty_level <- 1 - + # (1) look again for old taxonomic names, now for G. species ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (1) look again for old taxonomic names, now for G. species\n")) @@ -1088,9 +1143,10 @@ exec_as.mo <- function(x, message("Running '", d.x_withspaces_start_end, "' and '", e.x_withspaces_start_only, "'") } found <- lookup(fullname_lower %like_case% d.x_withspaces_start_end | - fullname_lower %like_case% e.x_withspaces_start_only, - column = NULL, # all columns - haystack = data.old_to_check) + fullname_lower %like_case% e.x_withspaces_start_only, + column = NULL, # all columns + haystack = data.old_to_check + ) if (!all(is.na(found)) & nchar(g.x_backup_without_spp) >= 6) { if (property == "ref") { # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: @@ -1100,20 +1156,25 @@ exec_as.mo <- function(x, } else { x <- lookup(fullname == found["fullname_new"], haystack = MO_lookup) } - was_renamed(name_old = found["fullname"], - name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), - ref_old = found["ref"], - ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), - mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) + was_renamed( + name_old = found["fullname"], + name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), + ref_old = found["ref"], + ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), + mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup) + ) pkg_env$mo_renamed_last_run <- found["fullname"] uncertainties <<- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, - input = a.x_backup, - result_mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)), - stringsAsFactors = FALSE) + format_uncertainty_as_df( + uncertainty_level = now_checks_for_uncertainty_level, + input = a.x_backup, + result_mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup) + ), + stringsAsFactors = FALSE + ) return(x) } - + # (2) Try with misspelled input ---- # just rerun with dyslexia_mode = TRUE will used the extensive regex part above if (isTRUE(debug)) { @@ -1131,17 +1192,18 @@ exec_as.mo <- function(x, if (!empty_result(found)) { found_result <- found uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE + ) found <- lookup(mo == found) return(found) } } - + # UNCERTAINTY LEVEL 2 ---- if (uncertainty_level >= 2) { now_checks_for_uncertainty_level <- 2 - + # (3) look for genus only, part of name ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (3) look for genus only, part of name\n")) @@ -1153,20 +1215,24 @@ exec_as.mo <- function(x, } # not when input is like Genustext, because then Neospora would lead to Actinokineospora found <- lookup(fullname_lower %like_case% paste(b.x_trimmed, "species"), - haystack = uncertain.reference_data_to_use) + haystack = uncertain.reference_data_to_use + ) if (!is.na(found)) { found_result <- found found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, - input = a.x_backup, - result_mo = found_result), - stringsAsFactors = FALSE) + format_uncertainty_as_df( + uncertainty_level = now_checks_for_uncertainty_level, + input = a.x_backup, + result_mo = found_result + ), + stringsAsFactors = FALSE + ) return(found) } } } - + # (4) strip values between brackets ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (4) strip values between brackets\n")) @@ -1185,12 +1251,13 @@ exec_as.mo <- function(x, if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) { found_result <- found uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE + ) found <- lookup(mo == found) return(found) } - + # (5) inverse input ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (5) inverse input\n")) @@ -1199,7 +1266,7 @@ exec_as.mo <- function(x, if (isTRUE(debug)) { message("Running '", a.x_backup_inversed, "'") } - + # first try without dyslexia mode found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_inversed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) if (empty_result(found)) { @@ -1209,23 +1276,26 @@ exec_as.mo <- function(x, if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) { found_result <- found uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE + ) found <- lookup(mo == found) return(found) } - + # (6) remove non-taxonomic prefix and suffix ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (6) remove non-taxonomic prefix and suffix\n")) } x_without_nontax <- gsub("(^[a-zA-Z]+[./-]+[a-zA-Z]+[^a-zA-Z]* )([a-zA-Z.]+ [a-zA-Z]+.*)", - "\\2", a.x_backup, perl = TRUE) + "\\2", a.x_backup, + perl = TRUE + ) x_without_nontax <- gsub("( *[(].*[)] *)[^a-zA-Z]*$", "", x_without_nontax, perl = TRUE) if (isTRUE(debug)) { message("Running '", x_without_nontax, "'") } - + # first try without dyslexia mode found <- suppressMessages(suppressWarnings(exec_as.mo(x_without_nontax, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = x_without_nontax))) if (empty_result(found)) { @@ -1238,12 +1308,13 @@ exec_as.mo <- function(x, uncertain_df$input <- a.x_backup found_result <- found uncertainties <<- rbind(uncertainties, - uncertain_df, - stringsAsFactors = FALSE) + uncertain_df, + stringsAsFactors = FALSE + ) found <- lookup(mo == found) return(found) } - + # (7) try to strip off half an element from end and check the remains ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (7) try to strip off half an element from end and check the remains\n")) @@ -1268,8 +1339,9 @@ exec_as.mo <- function(x, if (!empty_result(found)) { found_result <- found uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE + ) found <- lookup(mo == found) return(found) } @@ -1293,12 +1365,13 @@ exec_as.mo <- function(x, # then with dyslexia mode found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) } - + if (!empty_result(found)) { found_result <- found uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE + ) found <- lookup(mo == found) return(found) } @@ -1314,10 +1387,13 @@ exec_as.mo <- function(x, found_result <- found found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, - input = a.x_backup, - result_mo = found_result), - stringsAsFactors = FALSE) + format_uncertainty_as_df( + uncertainty_level = now_checks_for_uncertainty_level, + input = a.x_backup, + result_mo = found_result + ), + stringsAsFactors = FALSE + ) return(found) } if (b.x_trimmed %like_case% "(fungus|fungi)" & b.x_trimmed %unlike_case% "fungiphrya") { @@ -1325,10 +1401,13 @@ exec_as.mo <- function(x, found_result <- found found <- lookup(mo == found) uncertainties <<- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, - input = a.x_backup, - result_mo = found_result), - stringsAsFactors = FALSE) + format_uncertainty_as_df( + uncertainty_level = now_checks_for_uncertainty_level, + input = a.x_backup, + result_mo = found_result + ), + stringsAsFactors = FALSE + ) return(found) } # (10) try to strip off one element from start and check the remains (only allow >= 2-part name outcome) ---- @@ -1353,8 +1432,9 @@ exec_as.mo <- function(x, # uncertainty level 2 only if searched part contains a space (otherwise it will be found with lvl 3) if (x_strip_collapsed %like_case% " ") { uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE + ) found <- lookup(mo == found) return(found) } @@ -1362,11 +1442,11 @@ exec_as.mo <- function(x, } } } - + # UNCERTAINTY LEVEL 3 ---- if (uncertainty_level >= 3) { now_checks_for_uncertainty_level <- 3 - + # (11) try to strip off one element from start and check the remains (any text size) ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (11) try to strip off one element from start and check the remains (any text size)\n")) @@ -1387,8 +1467,9 @@ exec_as.mo <- function(x, if (!empty_result(found)) { found_result <- found uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE + ) found <- lookup(mo == found) return(found) } @@ -1414,14 +1495,15 @@ exec_as.mo <- function(x, if (!empty_result(found)) { found_result <- found uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE + ) found <- lookup(mo == found) return(found) } } } - + # (13) part of a name (very unlikely match) ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (13) part of a name (very unlikely match)\n")) @@ -1434,65 +1516,70 @@ exec_as.mo <- function(x, if (!is.na(found)) { found_result <- lookup(mo == found) uncertainties <<- rbind(uncertainties, - attr(found, which = "uncertainties", exact = TRUE), - stringsAsFactors = FALSE) + attr(found, which = "uncertainties", exact = TRUE), + stringsAsFactors = FALSE + ) found <- lookup(mo == found) return(found) } } } - - + + # didn't found in uncertain results too return(NA_character_) } - + # uncertain results - x[i] <- uncertain_fn(a.x_backup = a.x_backup, - b.x_trimmed = b.x_trimmed, - d.x_withspaces_start_end = d.x_withspaces_start_end, - e.x_withspaces_start_only = e.x_withspaces_start_only, - f.x_withspaces_end_only = f.x_withspaces_end_only, - g.x_backup_without_spp = g.x_backup_without_spp, - uncertain.reference_data_to_use = MO_lookup) + x[i] <- uncertain_fn( + a.x_backup = a.x_backup, + b.x_trimmed = b.x_trimmed, + d.x_withspaces_start_end = d.x_withspaces_start_end, + e.x_withspaces_start_only = e.x_withspaces_start_only, + f.x_withspaces_end_only = f.x_withspaces_end_only, + g.x_backup_without_spp = g.x_backup_without_spp, + uncertain.reference_data_to_use = MO_lookup + ) if (!empty_result(x[i])) { return(x[i]) } - + # didn't found any return(NA_character_) } - + # CHECK ALL IN ONE GO ---- - x[i] <- check_per_prevalence(data_to_check = MO_lookup, - data.old_to_check = MO.old_lookup, - a.x_backup = x_backup[i], - b.x_trimmed = x_trimmed[i], - c.x_trimmed_without_group = x_trimmed_without_group[i], - d.x_withspaces_start_end = x_withspaces_start_end[i], - e.x_withspaces_start_only = x_withspaces_start_only[i], - f.x_withspaces_end_only = x_withspaces_end_only[i], - g.x_backup_without_spp = x_backup_without_spp[i]) + x[i] <- check_per_prevalence( + data_to_check = MO_lookup, + data.old_to_check = MO.old_lookup, + a.x_backup = x_backup[i], + b.x_trimmed = x_trimmed[i], + c.x_trimmed_without_group = x_trimmed_without_group[i], + d.x_withspaces_start_end = x_withspaces_start_end[i], + e.x_withspaces_start_only = x_withspaces_start_only[i], + f.x_withspaces_end_only = x_withspaces_end_only[i], + g.x_backup_without_spp = x_backup_without_spp[i] + ) if (!empty_result(x[i])) { next } - + # no results found: make them UNKNOWN ---- x[i] <- lookup(mo == "UNKNOWN", uncertainty = -1) if (initial_search == TRUE) { failures <- c(failures, x_backup_untouched[i]) } } - + if (initial_search == TRUE) { close(progress) } - + if (isTRUE(debug) && initial_search == TRUE) { cat("Ended search", time_track(), "\n") } - - + + # handling failures ---- failures <- failures[!failures %in% c(NA, NULL, NaN)] if (length(failures) > 0 & initial_search == TRUE) { @@ -1505,20 +1592,25 @@ exec_as.mo <- function(x, x_input_clean <- trimws2(x_input) total_failures <- length(x_input_clean[as.character(x_input_clean) %in% as.character(failures) & !x_input %in% c(NA, NULL, NaN)]) total_n <- length(x_input[!x_input %in% c(NA, NULL, NaN)]) - msg <- paste0(nr2char(pm_n_distinct(failures)), " unique ", plural[1], - " (covering ", percentage(total_failures / total_n), - ") could not be coerced and ", plural[3], " considered 'unknown'") + msg <- paste0( + nr2char(pm_n_distinct(failures)), " unique ", plural[1], + " (covering ", percentage(total_failures / total_n), + ") could not be coerced and ", plural[3], " considered 'unknown'" + ) if (pm_n_distinct(failures) <= 10) { msg <- paste0(msg, ": ", vector_and(failures, quotes = TRUE)) } - msg <- paste0(msg, - ".\nUse `mo_failures()` to review ", plural[2], ". Edit the `allow_uncertain` argument if needed (see ?as.mo).\n", - "You can also use your own reference data with set_mo_source() or directly, e.g.:\n", - ' as.mo("mycode", reference_df = data.frame(own = "mycode", mo = "', MO_lookup$mo[match("Escherichia coli", MO_lookup$fullname)], '"))\n', - ' mo_name("mycode", reference_df = data.frame(own = "mycode", mo = "', MO_lookup$mo[match("Escherichia coli", MO_lookup$fullname)], '"))\n') + msg <- paste0( + msg, + ".\nUse `mo_failures()` to review ", plural[2], ". Edit the `allow_uncertain` argument if needed (see ?as.mo).\n", + "You can also use your own reference data with set_mo_source() or directly, e.g.:\n", + ' as.mo("mycode", reference_df = data.frame(own = "mycode", mo = "', MO_lookup$mo[match("Escherichia coli", MO_lookup$fullname)], '"))\n', + ' mo_name("mycode", reference_df = data.frame(own = "mycode", mo = "', MO_lookup$mo[match("Escherichia coli", MO_lookup$fullname)], '"))\n' + ) warning_(paste0("\nin `as.mo()`: ", msg), - add_fn = font_red, - immediate = TRUE) # thus will always be shown, even if >= warnings + add_fn = font_red, + immediate = TRUE + ) # thus will always be shown, even if >= warnings } # handling uncertainties ---- if (NROW(uncertainties) > 0 & initial_search == TRUE) { @@ -1530,21 +1622,26 @@ exec_as.mo <- function(x, plural <- c("s", "these uncertainties") } if (length(uncertainties$input) <= 3) { - examples <- vector_and(paste0('"', uncertainties$input, - '" (assuming ', font_italic(uncertainties$fullname, collapse = NULL), ")"), - quotes = FALSE) + examples <- vector_and(paste0( + '"', uncertainties$input, + '" (assuming ', font_italic(uncertainties$fullname, collapse = NULL), ")" + ), + quotes = FALSE + ) } else { examples <- paste0(nr2char(length(uncertainties$input)), " microorganism", plural[1]) } - msg <- paste0("Function `as.mo()` is uncertain about ", examples, - ". Run `mo_uncertainties()` to review ", plural[2], ".") + msg <- paste0( + "Function `as.mo()` is uncertain about ", examples, + ". Run `mo_uncertainties()` to review ", plural[2], "." + ) message_(msg) } } x[already_known] <- x_known } } - + # Becker ---- if (Becker == TRUE | Becker == "all") { # warn when species found that are not in: @@ -1552,33 +1649,35 @@ exec_as.mo <- function(x, # - Becker et al. 2019, PMID 30872103 # - Becker et al. 2020, PMID 32056452 post_Becker <- c("caledonicus", "canis", "durrellii", "lloydii", "roterodami") - + # nolint start # comment below code if all staphylococcal species are categorised as CoNS/CoPS if (any(x %in% MO_lookup[which(MO_lookup$species %in% post_Becker), property, drop = TRUE])) { if (message_not_thrown_before("as.mo", "becker")) { warning_("in `as.mo()`: Becker ", font_italic("et al."), " (2014, 2019, 2020) does not contain these species named after their publication: ", - font_italic(paste("S.", - sort(mo_species(unique(x[x %in% MO_lookup[which(MO_lookup$species %in% post_Becker), property, drop = TRUE]]))), - collapse = ", ")), - ". Categorisation to CoNS/CoPS was taken from the original scientific publication(s).", - immediate = TRUE) + font_italic(paste("S.", + sort(mo_species(unique(x[x %in% MO_lookup[which(MO_lookup$species %in% post_Becker), property, drop = TRUE]]))), + collapse = ", " + )), + ". Categorisation to CoNS/CoPS was taken from the original scientific publication(s).", + immediate = TRUE + ) } } # nolint end - + # 'MO_CONS' and 'MO_COPS' are vectors created in R/zzz.R CoNS <- MO_lookup[which(MO_lookup$mo %in% MO_CONS), property, drop = TRUE] x[x %in% CoNS] <- lookup(mo == "B_STPHY_CONS", uncertainty = -1) - + CoPS <- MO_lookup[which(MO_lookup$mo %in% MO_COPS), property, drop = TRUE] x[x %in% CoPS] <- lookup(mo == "B_STPHY_COPS", uncertainty = -1) - + if (Becker == "all") { x[x %in% lookup(fullname %like_case% "^Staphylococcus aureus", n = Inf)] <- lookup(mo == "B_STPHY_COPS", uncertainty = -1) } } - + # Lancefield ---- if (Lancefield == TRUE | Lancefield == "all") { # group A - S. pyogenes @@ -1587,8 +1686,9 @@ exec_as.mo <- function(x, x[x %in% lookup(genus == "Streptococcus" & species == "agalactiae", n = Inf)] <- lookup(fullname == "Streptococcus group B", uncertainty = -1) # group C x[x %in% lookup(genus == "Streptococcus" & - species %in% c("equisimilis", "equi", "zooepidemicus", "dysgalactiae"), - n = Inf)] <- lookup(fullname == "Streptococcus group C", uncertainty = -1) + species %in% c("equisimilis", "equi", "zooepidemicus", "dysgalactiae"), + n = Inf + )] <- lookup(fullname == "Streptococcus group C", uncertainty = -1) if (Lancefield == "all") { # all Enterococci x[x %in% lookup(genus == "Enterococcus", n = Inf)] <- lookup(fullname == "Streptococcus group D", uncertainty = -1) @@ -1600,37 +1700,40 @@ exec_as.mo <- function(x, # group K - S. salivarius x[x %in% lookup(genus == "Streptococcus" & species == "salivarius", n = Inf)] <- lookup(fullname == "Streptococcus group K", uncertainty = -1) } - + # Wrap up ---------------------------------------------------------------- - + # comply to x, which is also unique and without empty values - x_input_unique_nonempty <- unique(x_input[!is.na(x_input) - & !is.null(x_input) - & !identical(x_input, "") - & !identical(x_input, "xxx")]) - + x_input_unique_nonempty <- unique(x_input[!is.na(x_input) & + !is.null(x_input) & + !identical(x_input, "") & + !identical(x_input, "xxx")]) + x <- x[match(x_input, x_input_unique_nonempty)] if (property == "mo") { x <- set_clean_class(x, new_class = c("mo", "character")) } - + # keep track of time end_time <- Sys.time() - + if (length(mo_renamed()) > 0) { print(mo_renamed()) } - + if (initial_search == FALSE) { # we got here from uncertain_fn(). if (NROW(uncertainties) == 0) { # the stripped/transformed version of x_backup is apparently a full hit, like with: as.mo("Escherichia (hello there) coli") uncertainties <- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = actual_uncertainty, - input = actual_input, - result_mo = x, - candidates = ""), - stringsAsFactors = FALSE) + format_uncertainty_as_df( + uncertainty_level = actual_uncertainty, + input = actual_input, + result_mo = x, + candidates = "" + ), + stringsAsFactors = FALSE + ) } # this will save the uncertain items as attribute, so they can be bound to `uncertainties` in the uncertain_fn() function x <- structure(x, uncertainties = uncertainties) @@ -1640,21 +1743,27 @@ exec_as.mo <- function(x, if (delta_time >= 30) { message_("Using `as.mo()` took ", round(delta_time), " seconds, which is a long time. Some suggestions to improve speed include:") message_(word_wrap("- Try to use as many valid taxonomic names as possible for your input.", - extra_indent = 2), - as_note = FALSE) + extra_indent = 2 + ), + as_note = FALSE + ) message_(word_wrap("- Save the output and use it as input for future calculations, e.g. create a new variable to your data using `as.mo()`. All functions in this package that rely on microorganism codes will automatically use that new column where possible. All `mo_*()` functions also do not require you to set their `x` argument as long as you have a column of class .", - extra_indent = 2), - as_note = FALSE) + extra_indent = 2 + ), + as_note = FALSE + ) message_(word_wrap("- Use `set_mo_source()` to continually transform your organisation codes to microorganisms codes used by this package, see `?mo_source`.", - extra_indent = 2), - as_note = FALSE) + extra_indent = 2 + ), + as_note = FALSE + ) } } - + if (isTRUE(debug) && initial_search == TRUE) { cat("Finished function", time_track(), "\n") } - + x } @@ -1663,17 +1772,20 @@ empty_result <- function(x) { } was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "") { - newly_set <- data.frame(old_name = name_old, - old_ref = ref_old, - new_name = name_new, - new_ref = ref_new, - mo = mo, - stringsAsFactors = FALSE) + newly_set <- data.frame( + old_name = name_old, + old_ref = ref_old, + new_name = name_new, + new_ref = ref_new, + mo = mo, + stringsAsFactors = FALSE + ) already_set <- pkg_env$mo_renamed if (!is.null(already_set)) { - pkg_env$mo_renamed = rbind(already_set, - newly_set, - stringsAsFactors = FALSE) + pkg_env$mo_renamed <- rbind(already_set, + newly_set, + stringsAsFactors = FALSE + ) } else { pkg_env$mo_renamed <- newly_set } @@ -1691,14 +1803,16 @@ format_uncertainty_as_df <- function(uncertainty_level, fullname <- MO_lookup[match(result_mo, MO_lookup$mo), "fullname", drop = TRUE][1] renamed_to <- NA_character_ } - data.frame(uncertainty = uncertainty_level, - input = input, - fullname = fullname, - renamed_to = renamed_to, - mo = result_mo, - # save max 26 entries: the one to be chosen and 25 more - candidates = if (length(candidates) > 1) paste(candidates[c(2:min(26, length(candidates)))], collapse = ", ") else "", - stringsAsFactors = FALSE) + data.frame( + uncertainty = uncertainty_level, + input = input, + fullname = fullname, + renamed_to = renamed_to, + mo = result_mo, + # save max 26 entries: the one to be chosen and 25 more + candidates = if (length(candidates) > 1) paste(candidates[c(2:min(26, length(candidates)))], collapse = ", ") else "", + stringsAsFactors = FALSE + ) } # will be exported using s3_register() in R/zzz.R @@ -1708,43 +1822,49 @@ pillar_shaft.mo <- function(x, ...) { out[!is.na(x)] <- gsub("^([A-Z]+_)(.*)", paste0(font_subtle("\\1"), "\\2"), out[!is.na(x)], perl = TRUE) # and grey out every _ out[!is.na(x)] <- gsub("_", font_subtle("_"), out[!is.na(x)]) - + # markup NA and UNKNOWN out[is.na(x)] <- font_na(" NA") out[x == "UNKNOWN"] <- font_na(" UNKNOWN") - + df <- tryCatch(get_current_data(arg_name = "x", call = 0), - error = function(e) NULL) + error = function(e) NULL + ) if (!is.null(df)) { mo_cols <- vapply(FUN.VALUE = logical(1), df, is.mo) } else { mo_cols <- NULL } - - if (!all(x[!is.na(x)] %in% MO_lookup$mo) | - (!is.null(df) && !all(unlist(df[, which(mo_cols), drop = FALSE]) %in% MO_lookup$mo))) { + + if (!all(x[!is.na(x)] %in% MO_lookup$mo) | + (!is.null(df) && !all(unlist(df[, which(mo_cols), drop = FALSE]) %in% MO_lookup$mo))) { # markup old mo codes - out[!x %in% MO_lookup$mo] <- font_italic(font_na(x[!x %in% MO_lookup$mo], - collapse = NULL), - collapse = NULL) + out[!x %in% MO_lookup$mo] <- font_italic(font_na(x[!x %in% MO_lookup$mo], + collapse = NULL + ), + collapse = NULL + ) # throw a warning with the affected column name(s) if (!is.null(mo_cols)) { col <- paste0("Column ", vector_or(colnames(df)[mo_cols], quotes = TRUE, sort = FALSE)) } else { col <- "The data" } - warning_(col, " contains old MO codes (from a previous AMR package version). ", - "Please update your MO codes with `as.mo()`.") + warning_( + col, " contains old MO codes (from a previous AMR package version). ", + "Please update your MO codes with `as.mo()`." + ) } - + # make it always fit exactly max_char <- max(nchar(x)) if (is.na(max_char)) { max_char <- 7 } create_pillar_column(out, - align = "left", - width = max_char + ifelse(any(x %in% c(NA, "UNKNOWN")), 2, 0)) + align = "left", + width = max_char + ifelse(any(x %in% c(NA, "UNKNOWN")), 2, 0) + ) } # will be exported using s3_register() in R/zzz.R @@ -1766,32 +1886,42 @@ freq.mo <- function(x, ...) { .add_header = list( `Gram-negative` = paste0( format(sum(grams == "Gram-negative", na.rm = TRUE), - big.mark = ",", - decimal.mark = "."), + big.mark = ",", + decimal.mark = "." + ), " (", percentage(sum(grams == "Gram-negative", na.rm = TRUE) / length(grams), - digits = digits), - ")"), + digits = digits + ), + ")" + ), `Gram-positive` = paste0( format(sum(grams == "Gram-positive", na.rm = TRUE), - big.mark = ",", - decimal.mark = "."), + big.mark = ",", + decimal.mark = "." + ), " (", percentage(sum(grams == "Gram-positive", na.rm = TRUE) / length(grams), - digits = digits), - ")"), + digits = digits + ), + ")" + ), `Nr. of genera` = pm_n_distinct(mo_genus(x_noNA, language = NULL)), - `Nr. of species` = pm_n_distinct(paste(mo_genus(x_noNA, language = NULL), - mo_species(x_noNA, language = NULL))))) + `Nr. of species` = pm_n_distinct(paste( + mo_genus(x_noNA, language = NULL), + mo_species(x_noNA, language = NULL) + )) + ) + ) } # will be exported using s3_register() in R/zzz.R get_skimmers.mo <- function(column) { skimr::sfl( skim_type = "mo", - unique_total = ~length(unique(stats::na.omit(.))), - gram_negative = ~sum(mo_is_gram_negative(.), na.rm = TRUE), - gram_positive = ~sum(mo_is_gram_positive(.), na.rm = TRUE), - top_genus = ~names(sort(-table(mo_genus(stats::na.omit(.), language = NULL))))[1L], - top_species = ~names(sort(-table(mo_name(stats::na.omit(.), language = NULL))))[1L] + unique_total = ~ length(unique(stats::na.omit(.))), + gram_negative = ~ sum(mo_is_gram_negative(.), na.rm = TRUE), + gram_positive = ~ sum(mo_is_gram_positive(.), na.rm = TRUE), + top_genus = ~ names(sort(-table(mo_genus(stats::na.omit(.), language = NULL))))[1L], + top_species = ~ names(sort(-table(mo_name(stats::na.omit(.), language = NULL))))[1L] ) } @@ -1807,8 +1937,10 @@ print.mo <- function(x, print.shortnames = FALSE, ...) { x <- as.character(x) names(x) <- x_names if (!all(x[!is.na(x)] %in% MO_lookup$mo)) { - warning_("Some MO codes are from a previous AMR package version. ", - "Please update the MO codes with `as.mo()`.") + warning_( + "Some MO codes are from a previous AMR package version. ", + "Please update the MO codes with `as.mo()`." + ) } print.default(x, quote = FALSE) } @@ -1821,12 +1953,14 @@ summary.mo <- function(object, ...) { x <- as.mo(object) # force again, could be mo from older pkg version top <- as.data.frame(table(x), responseName = "n", stringsAsFactors = FALSE) top_3 <- top[order(-top$n), 1, drop = TRUE][1:3] - value <- c("Class" = "mo", - "" = length(x[is.na(x)]), - "Unique" = pm_n_distinct(x[!is.na(x)]), - "#1" = top_3[1], - "#2" = top_3[2], - "#3" = top_3[3]) + value <- c( + "Class" = "mo", + "" = length(x[is.na(x)]), + "Unique" = pm_n_distinct(x[!is.na(x)]), + "#1" = top_3[1], + "#2" = top_3[2], + "#3" = top_3[3] + ) class(value) <- c("summaryDefault", "table") value } @@ -1836,8 +1970,10 @@ summary.mo <- function(object, ...) { #' @noRd as.data.frame.mo <- function(x, ...) { if (!all(x[!is.na(x)] %in% MO_lookup$mo)) { - warning_("The data contains old MO codes (from a previous AMR package version). ", - "Please update your MO codes with `as.mo()`.") + warning_( + "The data contains old MO codes (from a previous AMR package version). ", + "Please update your MO codes with `as.mo()`." + ) } nm <- deparse1(substitute(x)) if (!"nm" %in% names(list(...))) { @@ -1921,9 +2057,11 @@ mo_uncertainties <- function() { if (is.null(pkg_env$mo_uncertainties)) { return(NULL) } - set_clean_class(as.data.frame(pkg_env$mo_uncertainties, - stringsAsFactors = FALSE), - new_class = c("mo_uncertainties", "data.frame")) + set_clean_class(as.data.frame(pkg_env$mo_uncertainties, + stringsAsFactors = FALSE + ), + new_class = c("mo_uncertainties", "data.frame") + ) } #' @method print mo_uncertainties @@ -1934,51 +2072,70 @@ print.mo_uncertainties <- function(x, ...) { return(NULL) } cat(word_wrap("Matching scores", ifelse(has_colour(), " (in blue)", ""), " are based on pathogenicity in humans and the resemblance between the input and the full taxonomic name. See `?mo_matching_score`.\n\n", add_fn = font_blue)) - + txt <- "" for (i in seq_len(nrow(x))) { if (x[i, ]$candidates != "") { candidates <- unlist(strsplit(x[i, ]$candidates, ", ", fixed = TRUE)) scores <- mo_matching_score(x = x[i, ]$input, n = candidates) n_candidates <- length(candidates) - + candidates_formatted <- font_italic(candidates, collapse = NULL) scores_formatted <- trimws(formatC(round(scores, 3), format = "f", digits = 3)) - + # sort on descending scores candidates_formatted <- candidates_formatted[order(1 - scores)] scores_formatted <- scores_formatted[order(1 - scores)] - - candidates <- word_wrap(paste0("Also matched: ", - vector_and(paste0(candidates_formatted, - font_blue(paste0(" (", scores_formatted, ")"), collapse = NULL)), - quotes = FALSE, sort = FALSE), - ifelse(n_candidates > 25, - paste0(" [showing first 25 of ", n_candidates, "]"), - "")), - extra_indent = nchar("Also matched: ")) + + candidates <- word_wrap(paste0( + "Also matched: ", + vector_and(paste0( + candidates_formatted, + font_blue(paste0(" (", scores_formatted, ")"), collapse = NULL) + ), + quotes = FALSE, sort = FALSE + ), + ifelse(n_candidates > 25, + paste0(" [showing first 25 of ", n_candidates, "]"), + "" + ) + ), + extra_indent = nchar("Also matched: ") + ) } else { candidates <- "" } - score <- trimws(formatC(round(mo_matching_score(x = x[i, ]$input, - n = x[i, ]$fullname), - 3), - format = "f", digits = 3)) + score <- trimws(formatC(round( + mo_matching_score( + x = x[i, ]$input, + n = x[i, ]$fullname + ), + 3 + ), + format = "f", digits = 3 + )) txt <- paste(txt, - paste0( - strwrap( - paste0('"', x[i, ]$input, '"', - " -> ", - paste0(font_bold(font_italic(x[i, ]$fullname)), - ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""), - " (", x[i, ]$mo, - ", ", font_blue(score), - ") ")), - width = 0.98 * getOption("width"), - exdent = nchar(x[i, ]$input) + 6), - collapse = "\n"), - candidates, - sep = "\n") + paste0( + strwrap( + paste0( + '"', x[i, ]$input, '"', + " -> ", + paste0( + font_bold(font_italic(x[i, ]$fullname)), + ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""), + " (", x[i, ]$mo, + ", ", font_blue(score), + ") " + ) + ), + width = 0.98 * getOption("width"), + exdent = nchar(x[i, ]$input) + 6 + ), + collapse = "\n" + ), + candidates, + sep = "\n" + ) txt <- paste0(gsub("\n\n", "\n", txt), "\n\n") } cat(txt) @@ -1994,8 +2151,10 @@ mo_renamed <- function() { items <- pm_distinct(items, old_name, .keep_all = TRUE) } set_clean_class(as.data.frame(items, - stringsAsFactors = FALSE), - new_class = c("mo_renamed", "data.frame")) + stringsAsFactors = FALSE + ), + new_class = c("mo_renamed", "data.frame") + ) } #' @method print mo_renamed @@ -2006,26 +2165,33 @@ print.mo_renamed <- function(x, ...) { return(invisible()) } for (i in seq_len(nrow(x))) { - message_(font_italic(x$old_name[i]), - ifelse(x$old_ref[i] %in% c("", NA), - "", - paste0(" (", gsub("et al.", font_italic("et al."), x$old_ref[i]), ")")), - " was renamed ", - ifelse(!x$new_ref[i] %in% c("", NA) && as.integer(gsub("[^0-9]", "", x$new_ref[i])) < as.integer(gsub("[^0-9]", "", x$old_ref[i])), - font_bold("back to "), - ""), - font_italic(x$new_name[i]), - ifelse(x$new_ref[i] %in% c("", NA), - "", - paste0(" (", gsub("et al.", font_italic("et al."), x$new_ref[i]), ")")), - " [", x$mo[i], "]") + message_( + font_italic(x$old_name[i]), + ifelse(x$old_ref[i] %in% c("", NA), + "", + paste0(" (", gsub("et al.", font_italic("et al."), x$old_ref[i]), ")") + ), + " was renamed ", + ifelse(!x$new_ref[i] %in% c("", NA) && as.integer(gsub("[^0-9]", "", x$new_ref[i])) < as.integer(gsub("[^0-9]", "", x$old_ref[i])), + font_bold("back to "), + "" + ), + font_italic(x$new_name[i]), + ifelse(x$new_ref[i] %in% c("", NA), + "", + paste0(" (", gsub("et al.", font_italic("et al."), x$new_ref[i]), ")") + ), + " [", x$mo[i], "]" + ) } } nr2char <- function(x) { if (x %in% c(1:10)) { - v <- c("one" = 1, "two" = 2, "three" = 3, "four" = 4, "five" = 5, - "six" = 6, "seven" = 7, "eight" = 8, "nine" = 9, "ten" = 10) + v <- c( + "one" = 1, "two" = 2, "three" = 3, "four" = 4, "five" = 5, + "six" = 6, "seven" = 7, "eight" = 8, "nine" = 9, "ten" = 10 + ) names(v[x]) } else { x @@ -2045,15 +2211,19 @@ translate_allow_uncertain <- function(allow_uncertain) { allow_uncertain[tolower(allow_uncertain) == "all"] <- 3 allow_uncertain <- as.integer(allow_uncertain) stop_ifnot(allow_uncertain %in% c(0:3), - '`allow_uncertain` must be a number between 0 (or "none") and 3 (or "all"), or TRUE (= 2) or FALSE (= 0)', call = FALSE) + '`allow_uncertain` must be a number between 0 (or "none") and 3 (or "all"), or TRUE (= 2) or FALSE (= 0)', + call = FALSE + ) } allow_uncertain } get_mo_failures_uncertainties_renamed <- function() { - remember <- list(failures = pkg_env$mo_failures, - uncertainties = pkg_env$mo_uncertainties, - renamed = pkg_env$mo_renamed) + remember <- list( + failures = pkg_env$mo_failures, + uncertainties = pkg_env$mo_uncertainties, + renamed = pkg_env$mo_renamed + ) # empty them, otherwise mo_shortname("Chlamydophila psittaci") will give 3 notes pkg_env$mo_failures <- NULL pkg_env$mo_uncertainties <- NULL @@ -2072,28 +2242,31 @@ trimws2 <- function(x) { } parse_and_convert <- function(x) { - tryCatch({ - if (!is.null(dim(x))) { - if (NCOL(x) > 2) { - stop("a maximum of two columns is allowed", call. = FALSE) - } else if (NCOL(x) == 2) { - # support Tidyverse selection like: df %>% select(colA, colB) - # paste these columns together - x <- as.data.frame(x, stringsAsFactors = FALSE) - colnames(x) <- c("A", "B") - x <- paste(x$A, x$B) - } else { - # support Tidyverse selection like: df %>% select(colA) - x <- as.data.frame(x, stringsAsFactors = FALSE)[[1]] + tryCatch( + { + if (!is.null(dim(x))) { + if (NCOL(x) > 2) { + stop("a maximum of two columns is allowed", call. = FALSE) + } else if (NCOL(x) == 2) { + # support Tidyverse selection like: df %>% select(colA, colB) + # paste these columns together + x <- as.data.frame(x, stringsAsFactors = FALSE) + colnames(x) <- c("A", "B") + x <- paste(x$A, x$B) + } else { + # support Tidyverse selection like: df %>% select(colA) + x <- as.data.frame(x, stringsAsFactors = FALSE)[[1]] + } } - } - parsed <- iconv(as.character(x), to = "UTF-8") - parsed[is.na(parsed) & !is.na(x)] <- iconv(x[is.na(parsed) & !is.na(x)], from = "Latin1", to = "ASCII//TRANSLIT") - parsed <- gsub('"', "", parsed, fixed = TRUE) - parsed <- gsub(" +", " ", parsed, perl = TRUE) - parsed <- trimws(parsed) - parsed - }, error = function(e) stop(e$message, call. = FALSE)) # this will also be thrown when running `as.mo(no_existing_object)` + parsed <- iconv(as.character(x), to = "UTF-8") + parsed[is.na(parsed) & !is.na(x)] <- iconv(x[is.na(parsed) & !is.na(x)], from = "Latin1", to = "ASCII//TRANSLIT") + parsed <- gsub('"', "", parsed, fixed = TRUE) + parsed <- gsub(" +", " ", parsed, perl = TRUE) + parsed <- trimws(parsed) + parsed + }, + error = function(e) stop(e$message, call. = FALSE) + ) # this will also be thrown when running `as.mo(no_existing_object)` parsed } @@ -2107,27 +2280,29 @@ replace_old_mo_codes <- function(x, property) { affected_unique <- unique(affected) all_direct_matches <- TRUE # find their new codes, once per code - solved_unique <- unlist(lapply(strsplit(affected_unique, ""), - function(m) { - kingdom <- paste0("^", m[1]) - name <- m[3:length(m)] - name[name == "_"] <- " " - name <- tolower(paste0(name, ".*", collapse = "")) - name <- gsub(" .*", " ", name, fixed = TRUE) - name <- paste0("^", name) - results <- MO_lookup$mo[MO_lookup$kingdom %like_case% kingdom & - MO_lookup$fullname_lower %like_case% name] - if (length(results) > 1) { - all_direct_matches <<- FALSE - } else if (length(results) == 0) { - # not found, so now search in old taxonomic names - results <- MO.old_lookup$fullname_new[MO.old_lookup$fullname_lower %like% name] - if (length(results) > 0) { - results <- MO_lookup$mo[match(results, MO_lookup$fullname)] - } - } - results[1L] - }), use.names = FALSE) + solved_unique <- unlist(lapply( + strsplit(affected_unique, ""), + function(m) { + kingdom <- paste0("^", m[1]) + name <- m[3:length(m)] + name[name == "_"] <- " " + name <- tolower(paste0(name, ".*", collapse = "")) + name <- gsub(" .*", " ", name, fixed = TRUE) + name <- paste0("^", name) + results <- MO_lookup$mo[MO_lookup$kingdom %like_case% kingdom & + MO_lookup$fullname_lower %like_case% name] + if (length(results) > 1) { + all_direct_matches <<- FALSE + } else if (length(results) == 0) { + # not found, so now search in old taxonomic names + results <- MO.old_lookup$fullname_new[MO.old_lookup$fullname_lower %like% name] + if (length(results) > 0) { + results <- MO_lookup$mo[match(results, MO_lookup$fullname)] + } + } + results[1L] + } + ), use.names = FALSE) solved <- solved_unique[match(affected, affected_unique)] # assign on places where a match was found x[ind] <- solved @@ -2141,22 +2316,27 @@ replace_old_mo_codes <- function(x, property) { n_unique <- "" } if (property != "mo") { - warning_("in `mo_", property, "()`: the input contained ", n_matched, - " old MO code", ifelse(n_matched == 1, "", "s"), - " (", n_unique, "from a previous AMR package version). ", - "Please update your MO codes with `as.mo()` to increase speed.") + warning_( + "in `mo_", property, "()`: the input contained ", n_matched, + " old MO code", ifelse(n_matched == 1, "", "s"), + " (", n_unique, "from a previous AMR package version). ", + "Please update your MO codes with `as.mo()` to increase speed." + ) } else { - warning_("in `as.mo()`: the input contained ", n_matched, - " old MO code", ifelse(n_matched == 1, "", "s"), - " (", n_unique, "from a previous AMR package version). ", - n_solved, " old MO code", ifelse(n_solved == 1, "", "s"), - ifelse(n_solved == 1, " was", " were"), - ifelse(all_direct_matches, " updated ", font_bold(" guessed ")), - "to ", ifelse(n_solved == 1, "a ", ""), - "currently used MO code", ifelse(n_solved == 1, "", "s"), - ifelse(n_unsolved > 0, - paste0(" and ", n_unsolved, " old MO code", ifelse(n_unsolved == 1, "", "s"), " could not be updated."), - ".")) + warning_( + "in `as.mo()`: the input contained ", n_matched, + " old MO code", ifelse(n_matched == 1, "", "s"), + " (", n_unique, "from a previous AMR package version). ", + n_solved, " old MO code", ifelse(n_solved == 1, "", "s"), + ifelse(n_solved == 1, " was", " were"), + ifelse(all_direct_matches, " updated ", font_bold(" guessed ")), + "to ", ifelse(n_solved == 1, "a ", ""), + "currently used MO code", ifelse(n_solved == 1, "", "s"), + ifelse(n_unsolved > 0, + paste0(" and ", n_unsolved, " old MO code", ifelse(n_unsolved == 1, "", "s"), " could not be updated."), + "." + ) + ) } } x @@ -2166,8 +2346,10 @@ replace_ignore_pattern <- function(x, ignore_pattern) { if (!is.null(ignore_pattern) && !identical(trimws2(ignore_pattern), "")) { ignore_cases <- x %like% ignore_pattern if (sum(ignore_cases) > 0) { - message_("The following input was ignored by `ignore_pattern = \"", ignore_pattern, "\"`: ", - vector_and(x[ignore_cases], quotes = TRUE)) + message_( + "The following input was ignored by `ignore_pattern = \"", ignore_pattern, "\"`: ", + vector_and(x[ignore_cases], quotes = TRUE) + ) x[ignore_cases] <- NA_character_ } } @@ -2178,19 +2360,19 @@ repair_reference_df <- function(reference_df) { # has valid own reference_df reference_df <- reference_df %pm>% pm_filter(!is.na(mo)) - + # keep only first two columns, second must be mo if (colnames(reference_df)[1] == "mo") { reference_df <- reference_df %pm>% pm_select(2, "mo") } else { reference_df <- reference_df %pm>% pm_select(1, "mo") } - + # remove factors, just keep characters colnames(reference_df)[1] <- "x" reference_df[, "x"] <- as.character(reference_df[, "x", drop = TRUE]) reference_df[, "mo"] <- as.character(reference_df[, "mo", drop = TRUE]) - + # some MO codes might be old reference_df[, "mo"] <- as.mo(reference_df[, "mo", drop = TRUE]) reference_df diff --git a/R/mo_matching_score.R b/R/mo_matching_score.R index 00bd5b6f2..4ecc6ef28 100755 --- a/R/mo_matching_score.R +++ b/R/mo_matching_score.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,57 +24,63 @@ # ==================================================================== # #' Calculate the Matching Score for Microorganisms -#' -#' This algorithm is used by [as.mo()] and all the [`mo_*`][mo_property()] functions to determine the most probable match of taxonomic records based on user input. +#' +#' This algorithm is used by [as.mo()] and all the [`mo_*`][mo_property()] functions to determine the most probable match of taxonomic records based on user input. #' @author Dr Matthijs Berends #' @param x Any user input value(s) #' @param n A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms] #' @section Matching Score for Microorganisms: #' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as: -#' +#' #' \ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}} -#' +#' #' where: -#' +#' #' * \ifelse{html}{\out{x is the user input;}}{\eqn{x} is the user input;} #' * \ifelse{html}{\out{n is a taxonomic name (genus, species, and subspecies);}}{\eqn{n} is a taxonomic name (genus, species, and subspecies);} #' * \ifelse{html}{\out{ln is the length of n;}}{l_n is the length of \eqn{n};} #' * \ifelse{html}{\out{lev is the Levenshtein distance function, which counts any insertion, deletion and substitution as 1 that is needed to change x into n;}}{lev is the Levenshtein distance function, which counts any insertion, deletion and substitution as 1 that is needed to change \eqn{x} into \eqn{n};} #' * \ifelse{html}{\out{pn is the human pathogenic prevalence group of n, as described below;}}{p_n is the human pathogenic prevalence group of \eqn{n}, as described below;} #' * \ifelse{html}{\out{kn is the taxonomic kingdom of n, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.}}{l_n is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.} -#' +#' #' The grouping into human pathogenic prevalence (\eqn{p}) is based on experience from several microbiological laboratories in the Netherlands in conjunction with international reports on pathogen prevalence. **Group 1** (most prevalent microorganisms) consists of all microorganisms where the taxonomic class is Gammaproteobacteria or where the taxonomic genus is *Enterococcus*, *Staphylococcus* or *Streptococcus*. This group consequently contains all common Gram-negative bacteria, such as *Pseudomonas* and *Legionella* and all species within the order Enterobacterales. **Group 2** consists of all microorganisms where the taxonomic phylum is Proteobacteria, Firmicutes, Actinobacteria or Sarcomastigophora, or where the taxonomic genus is *Absidia*, *Acremonium*, *Actinotignum*, *Alternaria*, *Anaerosalibacter*, *Apophysomyces*, *Arachnia*, *Aspergillus*, *Aureobacterium*, *Aureobasidium*, *Bacteroides*, *Basidiobolus*, *Beauveria*, *Blastocystis*, *Branhamella*, *Calymmatobacterium*, *Candida*, *Capnocytophaga*, *Catabacter*, *Chaetomium*, *Chryseobacterium*, *Chryseomonas*, *Chrysonilia*, *Cladophialophora*, *Cladosporium*, *Conidiobolus*, *Cryptococcus*, *Curvularia*, *Exophiala*, *Exserohilum*, *Flavobacterium*, *Fonsecaea*, *Fusarium*, *Fusobacterium*, *Hendersonula*, *Hypomyces*, *Koserella*, *Lelliottia*, *Leptosphaeria*, *Leptotrichia*, *Malassezia*, *Malbranchea*, *Mortierella*, *Mucor*, *Mycocentrospora*, *Mycoplasma*, *Nectria*, *Ochroconis*, *Oidiodendron*, *Phoma*, *Piedraia*, *Pithomyces*, *Pityrosporum*, *Prevotella*, *Pseudallescheria*, *Rhizomucor*, *Rhizopus*, *Rhodotorula*, *Scolecobasidium*, *Scopulariopsis*, *Scytalidium*, *Sporobolomyces*, *Stachybotrys*, *Stomatococcus*, *Treponema*, *Trichoderma*, *Trichophyton*, *Trichosporon*, *Tritirachium* or *Ureaplasma*. **Group 3** consists of all other microorganisms. -#' +#' #' All characters in \eqn{x} and \eqn{n} are ignored that are other than A-Z, a-z, 0-9, spaces and parentheses. -#' -#' All matches are sorted descending on their matching score and for all user input values, the top match will be returned. This will lead to the effect that e.g., `"E. coli"` will return the microbial ID of *Escherichia coli* (\eqn{m = `r round(mo_matching_score("E. coli", "Escherichia coli"), 3)`}, a highly prevalent microorganism found in humans) and not *Entamoeba coli* (\eqn{m = `r round(mo_matching_score("E. coli", "Entamoeba coli"), 3)`}, a less prevalent microorganism in humans), although the latter would alphabetically come first. -#' +#' +#' All matches are sorted descending on their matching score and for all user input values, the top match will be returned. This will lead to the effect that e.g., `"E. coli"` will return the microbial ID of *Escherichia coli* (\eqn{m = `r round(mo_matching_score("E. coli", "Escherichia coli"), 3)`}, a highly prevalent microorganism found in humans) and not *Entamoeba coli* (\eqn{m = `r round(mo_matching_score("E. coli", "Entamoeba coli"), 3)`}, a less prevalent microorganism in humans), although the latter would alphabetically come first. +#' #' Since `AMR` version 1.8.1, common microorganism abbreviations are ignored in determining the matching score. These abbreviations are currently: `r vector_and(pkg_env$mo_field_abbreviations, quotes = FALSE)`. #' @export #' @inheritSection AMR Reference Data Publicly Available -#' @examples +#' @examples #' as.mo("E. coli") #' mo_uncertainties() -#' -#' mo_matching_score(x = "E. coli", -#' n = c("Escherichia coli", "Entamoeba coli")) +#' +#' mo_matching_score( +#' x = "E. coli", +#' n = c("Escherichia coli", "Entamoeba coli") +#' ) mo_matching_score <- function(x, n) { meet_criteria(x, allow_class = c("character", "data.frame", "list")) meet_criteria(n, allow_class = "character") - + x <- parse_and_convert(x) # no dots and other non-whitespace characters x <- gsub("[^a-zA-Z0-9 \\(\\)]+", "", x) - + # remove abbreviations known to the field - x <- gsub(paste0("(^|[^a-z0-9]+)(", - paste0(pkg_env$mo_field_abbreviations, collapse = "|"), - ")([^a-z0-9]+|$)"), - "", x, perl = TRUE, ignore.case = TRUE) - + x <- gsub(paste0( + "(^|[^a-z0-9]+)(", + paste0(pkg_env$mo_field_abbreviations, collapse = "|"), + ")([^a-z0-9]+|$)" + ), + "", x, + perl = TRUE, ignore.case = TRUE + ) + # only keep one space x <- gsub(" +", " ", x) - + # n is always a taxonomically valid full name if (length(n) == 1) { n <- rep(n, length(x)) @@ -82,7 +88,7 @@ mo_matching_score <- function(x, n) { if (length(x) == 1) { x <- rep(x, length(n)) } - + # length of fullname l_n <- nchar(n) lev <- double(length = length(x)) @@ -97,7 +103,7 @@ mo_matching_score <- function(x, n) { p_n <- MO_lookup[match(n, MO_lookup$fullname), "prevalence", drop = TRUE] # kingdom index (Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5) k_n <- MO_lookup[match(n, MO_lookup$fullname), "kingdom_index", drop = TRUE] - + # matching score: (l_n - 0.5 * l_n.lev) / (l_n * p_n * k_n) } diff --git a/R/mo_property.R b/R/mo_property.R index eeb87b29d..182569370 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -42,15 +42,15 @@ #' Since the top-level of the taxonomy is sometimes referred to as 'kingdom' and sometimes as 'domain', the functions [mo_kingdom()] and [mo_domain()] return the exact same results. #' #' The Gram stain - [mo_gramstain()] - will be determined based on the taxonomic kingdom and phylum. According to Cavalier-Smith (2002, [PMID 11837318](https://pubmed.ncbi.nlm.nih.gov/11837318)), who defined subkingdoms Negibacteria and Posibacteria, only these phyla are Posibacteria: Actinobacteria, Chloroflexi, Firmicutes and Tenericutes. These bacteria are considered Gram-positive, except for members of the class Negativicutes which are Gram-negative. Members of other bacterial phyla are all considered Gram-negative. Species outside the kingdom of Bacteria will return a value `NA`. Functions [mo_is_gram_negative()] and [mo_is_gram_positive()] always return `TRUE` or `FALSE` (except when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria. -#' +#' #' Determination of yeasts - [mo_is_yeast()] - will be based on the taxonomic kingdom and class. *Budding yeasts* are fungi of the phylum Ascomycetes, class Saccharomycetes (also called Hemiascomycetes). *True yeasts* are aggregated into the underlying order Saccharomycetales. Thus, for all microorganisms that are fungi and member of the taxonomic class Saccharomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (except when the input is `NA` or the MO code is `UNKNOWN`). -#' +#' #' Intrinsic resistance - [mo_is_intrinsic_resistant()] - will be determined based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.3)`. The [mo_is_intrinsic_resistant()] functions can be vectorised over arguments `x` (input for microorganisms) and over `ab` (input for antibiotics). #' #' All output [will be translated][translate] where possible. #' #' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species. -#' +#' #' SNOMED codes - [mo_snomed()] - are from the `r SNOMED_VERSION$current_source`. See *Source* and the [microorganisms] data set for more info. #' @inheritSection mo_matching_score Matching Score for Microorganisms #' @inheritSection catalogue_of_life Catalogue of Life @@ -139,30 +139,32 @@ #' mo_type("Klebsiella pneumoniae") #' #' mo_fullname("S. pyogenes", -#' Lancefield = TRUE, -#' language = "de") +#' Lancefield = TRUE, +#' language = "de" +#' ) #' mo_fullname("S. pyogenes", -#' Lancefield = TRUE, -#' language = "nl") +#' Lancefield = TRUE, +#' language = "nl" +#' ) #' #' #' # other -------------------------------------------------------------------- -#' +#' #' mo_is_yeast(c("Candida", "Trichophyton", "Klebsiella")) -#' +#' #' # gram stains and intrinsic resistance can be used as a filter in dplyr verbs #' if (require("dplyr")) { #' example_isolates %>% #' filter(mo_is_gram_positive()) -#' +#' #' example_isolates %>% #' filter(mo_is_intrinsic_resistant(ab = "vanco")) #' } -#' -#' +#' +#' #' # get a list with the complete taxonomy (from kingdom to subspecies) #' mo_taxonomy("Klebsiella pneumoniae") -#' +#' #' # get a list with the taxonomy, the authors, Gram-stain, #' # SNOMED codes, and URL to the online database #' mo_info("Klebsiella pneumoniae") @@ -174,11 +176,12 @@ mo_name <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_into_language(mo_validate(x = x, property = "fullname", language = language, ...), - language = language, - only_unknown = FALSE, - only_affect_mo_names = TRUE) + language = language, + only_unknown = FALSE, + only_affect_mo_names = TRUE + ) } #' @rdname mo_property @@ -194,20 +197,20 @@ mo_shortname <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x.mo <- as.mo(x, language = language, ...) - + metadata <- get_mo_failures_uncertainties_renamed() - + replace_empty <- function(x) { x[x == ""] <- "spp." x } - + # get first char of genus and complete species in English genera <- mo_genus(x.mo, language = NULL) shortnames <- paste0(substr(genera, 1, 1), ". ", replace_empty(mo_species(x.mo, language = NULL))) - + # exceptions for where no species is known shortnames[shortnames %like% ".[.] spp[.]"] <- genera[shortnames %like% ".[.] spp[.]"] # exceptions for staphylococci @@ -217,7 +220,7 @@ mo_shortname <- function(x, language = get_AMR_locale(), ...) { shortnames[shortnames %like% "S. group [ABCDFGHK]"] <- paste0("G", gsub("S. group ([ABCDFGHK])", "\\1", shortnames[shortnames %like% "S. group [ABCDFGHK]"], perl = TRUE), "S") # unknown species etc. shortnames[shortnames %like% "unknown"] <- paste0("(", trimws(gsub("[^a-zA-Z -]", "", shortnames[shortnames %like% "unknown"], perl = TRUE)), ")") - + shortnames[is.na(x.mo)] <- NA_character_ load_mo_failures_uncertainties_renamed(metadata) translate_into_language(shortnames, language = language, only_unknown = FALSE, only_affect_mo_names = TRUE) @@ -234,7 +237,7 @@ mo_subspecies <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_into_language(mo_validate(x = x, property = "subspecies", language = language, ...), language = language, only_unknown = TRUE) } @@ -247,7 +250,7 @@ mo_species <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_into_language(mo_validate(x = x, property = "species", language = language, ...), language = language, only_unknown = TRUE) } @@ -260,7 +263,7 @@ mo_genus <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_into_language(mo_validate(x = x, property = "genus", language = language, ...), language = language, only_unknown = TRUE) } @@ -273,7 +276,7 @@ mo_family <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_into_language(mo_validate(x = x, property = "family", language = language, ...), language = language, only_unknown = TRUE) } @@ -286,7 +289,7 @@ mo_order <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_into_language(mo_validate(x = x, property = "order", language = language, ...), language = language, only_unknown = TRUE) } @@ -299,7 +302,7 @@ mo_class <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_into_language(mo_validate(x = x, property = "class", language = language, ...), language = language, only_unknown = TRUE) } @@ -312,7 +315,7 @@ mo_phylum <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_into_language(mo_validate(x = x, property = "phylum", language = language, ...), language = language, only_unknown = TRUE) } @@ -325,7 +328,7 @@ mo_kingdom <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_into_language(mo_validate(x = x, property = "kingdom", language = language, ...), language = language, only_unknown = TRUE) } @@ -342,7 +345,7 @@ mo_type <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x.mo <- as.mo(x, language = language, ...) out <- mo_kingdom(x.mo, language = NULL) out[which(mo_is_yeast(x.mo))] <- "Yeasts" @@ -358,24 +361,26 @@ mo_gramstain <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x.mo <- as.mo(x, language = language, ...) metadata <- get_mo_failures_uncertainties_renamed() - + x <- rep(NA_character_, length(x)) # make all bacteria Gram negative x[mo_kingdom(x.mo) == "Bacteria"] <- "Gram-negative" # overwrite these 4 phyla with Gram-positives # Source: https://itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=956097 (Cavalier-Smith, 2002) - x[(mo_phylum(x.mo) %in% c("Actinobacteria", - "Chloroflexi", - "Firmicutes", - "Tenericutes") & - # but class Negativicutes (of phylum Firmicutes) are Gram-negative! - mo_class(x.mo) != "Negativicutes") - # and of course our own ID for Gram-positives - | x.mo == "B_GRAMP"] <- "Gram-positive" - + x[(mo_phylum(x.mo) %in% c( + "Actinobacteria", + "Chloroflexi", + "Firmicutes", + "Tenericutes" + ) & + # but class Negativicutes (of phylum Firmicutes) are Gram-negative! + mo_class(x.mo) != "Negativicutes") + # and of course our own ID for Gram-positives + | x.mo == "B_GRAMP"] <- "Gram-positive" + load_mo_failures_uncertainties_renamed(metadata) translate_into_language(x, language = language, only_unknown = FALSE) } @@ -389,7 +394,7 @@ mo_is_gram_negative <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x.mo <- as.mo(x, language = language, ...) metadata <- get_mo_failures_uncertainties_renamed() grams <- mo_gramstain(x.mo, language = NULL) @@ -408,7 +413,7 @@ mo_is_gram_positive <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x.mo <- as.mo(x, language = language, ...) metadata <- get_mo_failures_uncertainties_renamed() grams <- mo_gramstain(x.mo, language = NULL) @@ -427,15 +432,15 @@ mo_is_yeast <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x.mo <- as.mo(x, language = language, ...) metadata <- get_mo_failures_uncertainties_renamed() - + x.kingdom <- mo_kingdom(x.mo, language = NULL) x.class <- mo_class(x.mo, language = NULL) - + load_mo_failures_uncertainties_renamed(metadata) - + out <- rep(FALSE, length(x)) out[x.kingdom == "Fungi" & x.class == "Saccharomycetes"] <- TRUE out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA @@ -452,10 +457,10 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(ab, allow_NA = FALSE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- as.mo(x, language = language, ...) ab <- as.ab(ab, language = NULL, flag_multiple_results = FALSE, info = FALSE) - + if (length(x) == 1 & length(ab) > 1) { x <- rep(x, length(ab)) } else if (length(ab) == 1 & length(x) > 1) { @@ -464,14 +469,16 @@ mo_is_intrinsic_resistant <- function(x, ab, language = get_AMR_locale(), ...) { if (length(x) != length(ab)) { stop_("length of `x` and `ab` must be equal, or one of them must be of length 1.") } - + # show used version number once per session (pkg_env will reload every session) if (message_not_thrown_before("mo_is_intrinsic_resistant", "version.mo", entire_session = TRUE)) { - message_("Determining intrinsic resistance based on ", - format_eucast_version_nr(3.3, markdown = FALSE), ". ", - font_red("This note will be shown once per session.")) + message_( + "Determining intrinsic resistance based on ", + format_eucast_version_nr(3.3, markdown = FALSE), ". ", + font_red("This note will be shown once per session.") + ) } - + # runs against internal vector: INTRINSIC_R (see zzz.R) paste(x, ab) %in% INTRINSIC_R } @@ -485,7 +492,7 @@ mo_snomed <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + mo_validate(x = x, property = "snomed", language = language, ...) } @@ -498,7 +505,7 @@ mo_ref <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + mo_validate(x = x, property = "ref", language = language, ...) } @@ -511,7 +518,7 @@ mo_authors <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- mo_validate(x = x, property = "ref", language = language, ...) # remove last 4 digits and presumably the comma and space that preceed them x[!is.na(x)] <- gsub(",? ?[0-9]{4}", "", x[!is.na(x)], perl = TRUE) @@ -527,7 +534,7 @@ mo_year <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- mo_validate(x = x, property = "ref", language = language, ...) # get last 4 digits x[!is.na(x)] <- gsub(".*([0-9]{4})$", "\\1", x[!is.na(x)], perl = TRUE) @@ -543,7 +550,7 @@ mo_lpsn <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + mo_validate(x = x, property = "species_id", language = language, ...) } @@ -556,32 +563,34 @@ mo_rank <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + mo_validate(x = x, property = "rank", language = language, ...) } #' @rdname mo_property #' @export -mo_taxonomy <- function(x, language = get_AMR_locale(), ...) { +mo_taxonomy <- function(x, language = get_AMR_locale(), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_taxonomy") } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- as.mo(x, language = language, ...) metadata <- get_mo_failures_uncertainties_renamed() - - out <- list(kingdom = mo_kingdom(x, language = language), - phylum = mo_phylum(x, language = language), - class = mo_class(x, language = language), - order = mo_order(x, language = language), - family = mo_family(x, language = language), - genus = mo_genus(x, language = language), - species = mo_species(x, language = language), - subspecies = mo_subspecies(x, language = language)) - + + out <- list( + kingdom = mo_kingdom(x, language = language), + phylum = mo_phylum(x, language = language), + class = mo_class(x, language = language), + order = mo_order(x, language = language), + family = mo_family(x, language = language), + genus = mo_genus(x, language = language), + species = mo_species(x, language = language), + subspecies = mo_subspecies(x, language = language) + ) + load_mo_failures_uncertainties_renamed(metadata) out } @@ -595,10 +604,10 @@ mo_synonyms <- function(x, language = get_AMR_locale(), ...) { } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- as.mo(x, language = language, ...) metadata <- get_mo_failures_uncertainties_renamed() - + IDs <- mo_name(x = x, language = NULL) syns <- lapply(IDs, function(newname) { res <- sort(microorganisms.old[which(microorganisms.old$fullname_new == newname), "fullname", drop = TRUE]) @@ -614,38 +623,43 @@ mo_synonyms <- function(x, language = get_AMR_locale(), ...) { } else { result <- unlist(syns) } - + load_mo_failures_uncertainties_renamed(metadata) result } #' @rdname mo_property #' @export -mo_info <- function(x, language = get_AMR_locale(), ...) { +mo_info <- function(x, language = get_AMR_locale(), ...) { if (missing(x)) { # this tries to find the data and an column x <- find_mo_col(fn = "mo_info") } meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- as.mo(x, language = language, ...) metadata <- get_mo_failures_uncertainties_renamed() - - info <- lapply(x, function(y) - c(mo_taxonomy(y, language = language), - list(synonyms = mo_synonyms(y), - gramstain = mo_gramstain(y, language = language), - url = unname(mo_url(y, open = FALSE)), - ref = mo_ref(y), - snomed = unlist(mo_snomed(y))))) + + info <- lapply(x, function(y) { + c( + mo_taxonomy(y, language = language), + list( + synonyms = mo_synonyms(y), + gramstain = mo_gramstain(y, language = language), + url = unname(mo_url(y, open = FALSE)), + ref = mo_ref(y), + snomed = unlist(mo_snomed(y)) + ) + ) + }) if (length(info) > 1) { names(info) <- mo_name(x) result <- info } else { result <- info[[1L]] } - + load_mo_failures_uncertainties_renamed(metadata) result } @@ -660,30 +674,31 @@ mo_url <- function(x, open = FALSE, language = get_AMR_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(open, allow_class = "logical", has_length = 1) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x.mo <- as.mo(x = x, language = language, ... = ...) metadata <- get_mo_failures_uncertainties_renamed() - + df <- microorganisms[match(x.mo, microorganisms$mo), c("mo", "fullname", "source", "kingdom", "rank"), drop = FALSE] df$url <- ifelse(df$source == "LPSN", - paste0(CATALOGUE_OF_LIFE$url_LPSN, "/species/", gsub(" ", "-", tolower(df$fullname), fixed = TRUE)), - paste0(CATALOGUE_OF_LIFE$url_CoL, "/data/search?type=EXACT&q=", gsub(" ", "%20", df$fullname, fixed = TRUE))) - + paste0(CATALOGUE_OF_LIFE$url_LPSN, "/species/", gsub(" ", "-", tolower(df$fullname), fixed = TRUE)), + paste0(CATALOGUE_OF_LIFE$url_CoL, "/data/search?type=EXACT&q=", gsub(" ", "%20", df$fullname, fixed = TRUE)) + ) + genera <- which(df$kingdom == "Bacteria" & df$rank == "genus") df$url[genera] <- gsub("/species/", "/genus/", df$url[genera], fixed = TRUE) subsp <- which(df$kingdom == "Bacteria" & df$rank %in% c("subsp.", "infraspecies")) df$url[subsp] <- gsub("/species/", "/subspecies/", df$url[subsp], fixed = TRUE) - + u <- df$url names(u) <- df$fullname - + if (isTRUE(open)) { if (length(u) > 1) { warning_("in `mo_url()`: only the first URL will be opened, as `browseURL()` only suports one string.") } utils::browseURL(u[1L]) } - + load_mo_failures_uncertainties_renamed(metadata) u } @@ -699,7 +714,7 @@ mo_property <- function(x, property = "fullname", language = get_AMR_locale(), . meet_criteria(x, allow_NA = TRUE) meet_criteria(property, allow_class = "character", has_length = 1, is_in = colnames(microorganisms)) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_into_language(mo_validate(x = x, property = property, language = language, ...), language = language, only_unknown = TRUE) } @@ -715,22 +730,22 @@ mo_validate <- function(x, property, language, ...) { Lancefield <- FALSE } has_Becker_or_Lancefield <- Becker %in% c(TRUE, "all") | Lancefield %in% c(TRUE, "all") - + if (tryCatch(all(x[!is.na(x)] %in% MO_lookup$mo) & !has_Becker_or_Lancefield, error = function(e) FALSE)) { # special case for mo_* functions where class is already x <- MO_lookup[match(x, MO_lookup$mo), property, drop = TRUE] - } else { # try to catch an error when inputting an invalid argument # so the 'call.' can be set to FALSE tryCatch(x[1L] %in% MO_lookup[1, property, drop = TRUE], - error = function(e) stop(e$message, call. = FALSE)) - + error = function(e) stop(e$message, call. = FALSE) + ) + if (!all(x[!is.na(x)] %in% MO_lookup[, property, drop = TRUE]) | has_Becker_or_Lancefield) { x <- exec_as.mo(x, property = property, language = language, ...) } } - + if (property == "mo") { return(set_clean_class(x, new_class = c("mo", "character"))) } else if (property == "species_id") { @@ -747,9 +762,12 @@ find_mo_col <- function(fn) { # which is useful when functions are used within dplyr verbs df <- get_current_data(arg_name = "x", call = -3) # will return an error if not found mo <- NULL - try({ - mo <- suppressMessages(search_type_in_df(df, "mo")) - }, silent = TRUE) + try( + { + mo <- suppressMessages(search_type_in_df(df, "mo")) + }, + silent = TRUE + ) if (!is.null(df) && !is.null(mo) && is.data.frame(df)) { if (message_not_thrown_before(fn = fn)) { message_("Using column '", font_bold(mo), "' as input for `", fn, "()`") diff --git a/R/mo_source.R b/R/mo_source.R index 4aa83d5ca..49ffe59d3 100644 --- a/R/mo_source.R +++ b/R/mo_source.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -36,17 +36,17 @@ #' @details The reference file can be a text file separated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an \R object file (extension '.rds'). To use an Excel file, you will need to have the `readxl` package installed. #' #' [set_mo_source()] will check the file for validity: it must be a [data.frame], must have a column named `"mo"` which contains values from [`microorganisms$mo`][microorganisms] or [`microorganisms$fullname`][microorganisms] and must have a reference column with your own defined values. If all tests pass, [set_mo_source()] will read the file into \R and will ask to export it to `"~/mo_source.rds"`. The CRAN policy disallows packages to write to the file system, although '*exceptions may be allowed in interactive sessions if the package obtains confirmation from the user*'. For this reason, this function only works in interactive sessions so that the user can **specifically confirm and allow** that this file will be created. The destination of this file can be set with the `destination` argument and defaults to the user's home directory. It can also be set as an \R option, using `options(AMR_mo_source = "my/location/file.rds")`. -#' -#' The created compressed data file `"mo_source.rds"` will be used at default for MO determination (function [as.mo()] and consequently all `mo_*` functions like [mo_genus()] and [mo_gramstain()]). The location and timestamp of the original file will be saved as an [attribute][base::attributes()] to the compressed data file. -#' +#' +#' The created compressed data file `"mo_source.rds"` will be used at default for MO determination (function [as.mo()] and consequently all `mo_*` functions like [mo_genus()] and [mo_gramstain()]). The location and timestamp of the original file will be saved as an [attribute][base::attributes()] to the compressed data file. +#' #' The function [get_mo_source()] will return the data set by reading `"mo_source.rds"` with [readRDS()]. If the original file has changed (by checking the location and timestamp of the original file), it will call [set_mo_source()] to update the data file automatically if used in an interactive session. #' #' Reading an Excel file (`.xlsx`) with only one row has a size of 8-9 kB. The compressed file created with [set_mo_source()] will then have a size of 0.1 kB and can be read by [get_mo_source()] in only a couple of microseconds (millionths of a second). -#' +#' #' @section How to Setup: -#' +#' #' Imagine this data on a sheet of an Excel file. The first column contains the organisation specific codes, the second column contains valid taxonomic names: -#' +#' #' ``` #' | A | B | #' --|--------------------|-----------------------| @@ -57,18 +57,18 @@ #' ``` #' #' We save it as `"home/me/ourcodes.xlsx"`. Now we have to set it as a source: -#' +#' #' ``` #' set_mo_source("home/me/ourcodes.xlsx") #' #> NOTE: Created mo_source file '/Users/me/mo_source.rds' (0.3 kB) from -#' #> '/Users/me/Documents/ourcodes.xlsx' (9 kB), columns +#' #> '/Users/me/Documents/ourcodes.xlsx' (9 kB), columns #' #> "Organisation XYZ" and "mo" #' ``` #' #' It has now created a file `"~/mo_source.rds"` with the contents of our Excel file. Only the first column with foreign values and the 'mo' column will be kept when creating the RDS file. #' #' And now we can use it in our functions: -#' +#' #' ``` #' as.mo("lab_mo_ecoli") #' #> Class @@ -86,7 +86,7 @@ #' ``` #' #' If we edit the Excel file by, let's say, adding row 4 like this: -#' +#' #' ``` #' | A | B | #' --|--------------------|-----------------------| @@ -98,10 +98,10 @@ #' ``` #' #' ...any new usage of an MO function in this package will update your data file: -#' +#' #' ``` #' as.mo("lab_mo_ecoli") -#' #> NOTE: Updated mo_source file '/Users/me/mo_source.rds' (0.3 kB) from +#' #> NOTE: Updated mo_source file '/Users/me/mo_source.rds' (0.3 kB) from #' #> '/Users/me/Documents/ourcodes.xlsx' (9 kB), columns #' #> "Organisation XYZ" and "mo" #' #> Class @@ -112,21 +112,21 @@ #' ``` #' #' To delete the reference data file, just use `""`, `NULL` or `FALSE` as input for [set_mo_source()]: -#' +#' #' ``` #' set_mo_source(NULL) #' #> Removed mo_source file '/Users/me/mo_source.rds' #' ``` -#' +#' #' If the original file (in the previous case an Excel file) is moved or deleted, the `mo_source.rds` file will be removed upon the next use of [as.mo()] or any [`mo_*`][mo_property()] function. #' @export set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_source.rds")) { meet_criteria(path, allow_class = "character", has_length = 1, allow_NULL = TRUE) meet_criteria(destination, allow_class = "character", has_length = 1) stop_ifnot(destination %like% "[.]rds$", "the `destination` must be a file location with file extension .rds.") - + mo_source_destination <- path.expand(destination) - + stop_ifnot(interactive(), "this function can only be used in interactive mode, since it must ask for the user's permission to write a file to their home folder.") if (is.null(path) || path %in% c(FALSE, "")) { @@ -134,77 +134,81 @@ set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_s if (file.exists(mo_source_destination)) { unlink(mo_source_destination) message_("Removed mo_source file '", font_bold(mo_source_destination), "'", - add_fn = font_red, - as_note = FALSE) + add_fn = font_red, + as_note = FALSE + ) } return(invisible()) } - + stop_ifnot(file.exists(path), "file not found: ", path) - + df <- NULL if (path %like% "[.]rds$") { df <- readRDS(path) - } else if (path %like% "[.]xlsx?$") { # is Excel file (old or new) stop_ifnot_installed("readxl") df <- readxl::read_excel(path) - } else if (path %like% "[.]tsv$") { df <- utils::read.table(file = path, header = TRUE, sep = "\t", stringsAsFactors = FALSE) - } else if (path %like% "[.]csv$") { df <- utils::read.table(file = path, header = TRUE, sep = ",", stringsAsFactors = FALSE) - } else { # try comma first try( df <- utils::read.table(file = path, header = TRUE, sep = ",", stringsAsFactors = FALSE), - silent = TRUE) + silent = TRUE + ) if (!check_validity_mo_source(df, stop_on_error = FALSE)) { # try tab try( df <- utils::read.table(file = path, header = TRUE, sep = "\t", stringsAsFactors = FALSE), - silent = TRUE) + silent = TRUE + ) } if (!check_validity_mo_source(df, stop_on_error = FALSE)) { # try pipe try( df <- utils::read.table(file = path, header = TRUE, sep = "|", stringsAsFactors = FALSE), - silent = TRUE) + silent = TRUE + ) } } - + # check integrity if (is.null(df)) { stop_("the path '", path, "' could not be imported as a dataset.") } check_validity_mo_source(df) - + df <- subset(df, !is.na(mo)) - + # keep only first two columns, second must be mo if (colnames(df)[1] == "mo") { df <- df[, c(colnames(df)[2], "mo")] } else { df <- df[, c(colnames(df)[1], "mo")] } - + df <- as.data.frame(df, stringAsFactors = FALSE) df[, "mo"] <- as.mo(df[, "mo", drop = TRUE]) - + # success if (file.exists(mo_source_destination)) { action <- "Updated" } else { action <- "Created" # only ask when file is created, not when it is updated - txt <- paste0(word_wrap(paste0("This will write create the new file '", - mo_source_destination, - "', for which your permission is needed.")), - "\n\n", - word_wrap("Do you agree that this file will be created?")) + txt <- paste0( + word_wrap(paste0( + "This will write create the new file '", + mo_source_destination, + "', for which your permission is needed." + )), + "\n\n", + word_wrap("Do you agree that this file will be created?") + ) showQuestion <- import_fn("showQuestion", "rstudioapi", error_on_fail = FALSE) if (!is.null(showQuestion)) { q_continue <- showQuestion("Create new file in home directory", txt) @@ -220,11 +224,13 @@ set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_s attr(df, "mo_source_timestamp") <- file.mtime(path) saveRDS(df, mo_source_destination) pkg_env$mo_source <- df - message_(action, " mo_source file '", font_bold(mo_source_destination), - "' (", formatted_filesize(mo_source_destination), - ") from '", font_bold(path), - "' (", formatted_filesize(path), - '), columns "', colnames(df)[1], '" and "', colnames(df)[2], '"') + message_( + action, " mo_source file '", font_bold(mo_source_destination), + "' (", formatted_filesize(mo_source_destination), + ") from '", font_bold(path), + "' (", formatted_filesize(path), + '), columns "', colnames(df)[1], '" and "', colnames(df)[2], '"' + ) } #' @rdname mo_source @@ -240,7 +246,7 @@ get_mo_source <- function(destination = getOption("AMR_mo_source", "~/mo_source. if (is.null(pkg_env$mo_source)) { pkg_env$mo_source <- readRDS(path.expand(destination)) } - + old_time <- attributes(pkg_env$mo_source)$mo_source_timestamp new_time <- file.mtime(attributes(pkg_env$mo_source)$mo_source_location) if (interactive() && !identical(old_time, new_time)) { @@ -252,7 +258,7 @@ get_mo_source <- function(destination = getOption("AMR_mo_source", "~/mo_source. check_validity_mo_source <- function(x, refer_to_name = "`reference_df`", stop_on_error = TRUE) { check_dataset_integrity() - + if (paste(deparse(substitute(x)), collapse = "") == "get_mo_source()") { return(TRUE) } @@ -288,10 +294,11 @@ check_validity_mo_source <- function(x, refer_to_name = "`reference_df`", stop_o } else { plural <- "" } - stop_("Value", plural, " ", vector_and(invalid[, 1, drop = TRUE], quotes = TRUE), - " found in ", tolower(refer_to_name), - ", but with invalid microorganism code", plural, " ", vector_and(invalid$mo, quotes = TRUE), - call = FALSE) + stop_("Value", plural, " ", vector_and(invalid[, 1, drop = TRUE], quotes = TRUE), + " found in ", tolower(refer_to_name), + ", but with invalid microorganism code", plural, " ", vector_and(invalid$mo, quotes = TRUE), + call = FALSE + ) } else { return(FALSE) } diff --git a/R/pca.R b/R/pca.R index fc169d676..ca34b0418 100755 --- a/R/pca.R +++ b/R/pca.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,42 +24,44 @@ # ==================================================================== # #' Principal Component Analysis (for AMR) -#' +#' #' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables. #' @param x a [data.frame] containing [numeric] columns #' @param ... columns of `x` to be selected for PCA, can be unquoted since it supports quasiquotation. #' @inheritParams stats::prcomp #' @details The [pca()] function takes a [data.frame] as input and performs the actual PCA with the \R function [prcomp()]. -#' +#' #' The result of the [pca()] function is a [prcomp] object, with an additional attribute `non_numeric_cols` which is a vector with the column names of all columns that do not contain [numeric] values. These are probably the groups and labels, and will be used by [ggplot_pca()]. #' @return An object of classes [pca] and [prcomp] #' @importFrom stats prcomp #' @export -#' @examples +#' @examples #' # `example_isolates` is a data set available in the AMR package. #' # See ?example_isolates. #' #' \donttest{ #' if (require("dplyr")) { -#' # calculate the resistance per group first -#' resistance_data <- example_isolates %>% -#' group_by(order = mo_order(mo), # group on anything, like order -#' genus = mo_genus(mo)) %>% # and genus as we do here; -#' filter(n() >= 30) %>% # filter on only 30 results per group -#' summarise_if(is.rsi, resistance) # then get resistance of all drugs -#' +#' # calculate the resistance per group first +#' resistance_data <- example_isolates %>% +#' group_by( +#' order = mo_order(mo), # group on anything, like order +#' genus = mo_genus(mo) +#' ) %>% # and genus as we do here; +#' filter(n() >= 30) %>% # filter on only 30 results per group +#' summarise_if(is.rsi, resistance) # then get resistance of all drugs +#' #' # now conduct PCA for certain antimicrobial agents -#' pca_result <- resistance_data %>% -#' pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT) -#' +#' pca_result <- resistance_data %>% +#' pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT) +#' #' pca_result #' summary(pca_result) -#' +#' #' # old base R plotting method: #' biplot(pca_result) #' # new ggplot2 plotting method using this package: #' ggplot_pca(pca_result) -#' +#' #' if (require("ggplot2")) { #' ggplot_pca(pca_result) + #' scale_colour_viridis_d() + @@ -70,7 +72,7 @@ pca <- function(x, ..., retx = TRUE, - center = TRUE, + center = TRUE, scale. = TRUE, tol = NULL, rank. = NULL) { @@ -80,19 +82,20 @@ pca <- function(x, meet_criteria(scale., allow_class = "logical", has_length = 1) meet_criteria(tol, allow_class = "numeric", has_length = 1, allow_NULL = TRUE) meet_criteria(rank., allow_class = "numeric", has_length = 1, allow_NULL = TRUE) - + # unset data.table, tibble, etc. # also removes groups made by dplyr::group_by x <- as.data.frame(x, stringsAsFactors = FALSE) x.bak <- x - + # defuse R expressions, this replaces rlang::enquos() dots <- substitute(list(...)) if (length(dots) > 1) { new_list <- list(0) for (i in seq_len(length(dots) - 1)) { new_list[[i]] <- tryCatch(eval(dots[[i + 1]], envir = x), - error = function(e) stop(e$message, call. = FALSE)) + error = function(e) stop(e$message, call. = FALSE) + ) if (length(new_list[[i]]) == 1) { if (is.character(new_list[[i]]) & new_list[[i]] %in% colnames(x)) { # this is to support quoted variables: df %pm>% pca("mycol1", "mycol2") @@ -103,30 +106,33 @@ pca <- function(x, } } } - + x <- as.data.frame(new_list, stringsAsFactors = FALSE) if (any(vapply(FUN.VALUE = logical(1), x, function(y) !is.numeric(y)))) { warning_("in `pca()`: be sure to first calculate the resistance (or susceptibility) of variables with antimicrobial test results, since PCA works with numeric variables only. See Examples in ?pca.", call = FALSE) } - + # set column names tryCatch(colnames(x) <- as.character(dots)[2:length(dots)], - error = function(e) warning("column names could not be set")) - + error = function(e) warning("column names could not be set") + ) + # keep only numeric columns x <- x[, vapply(FUN.VALUE = logical(1), x, function(y) is.numeric(y)), drop = FALSE] # bind the data set with the non-numeric columns x <- cbind(x.bak[, vapply(FUN.VALUE = logical(1), x.bak, function(y) !is.numeric(y) & !all(is.na(y))), drop = FALSE], x) } - - x <- pm_ungroup(x) # would otherwise select the grouping vars + + x <- pm_ungroup(x) # would otherwise select the grouping vars x <- x[rowSums(is.na(x)) == 0, ] # remove columns containing NAs - + pca_data <- x[, which(vapply(FUN.VALUE = logical(1), x, function(x) is.numeric(x))), drop = FALSE] - - message_("Columns selected for PCA: ", vector_and(font_bold(colnames(pca_data), collapse = NULL), quotes = TRUE), - ". Total observations available: ", nrow(pca_data), ".") - + + message_( + "Columns selected for PCA: ", vector_and(font_bold(colnames(pca_data), collapse = NULL), quotes = TRUE), + ". Total observations available: ", nrow(pca_data), "." + ) + if (getRversion() < "3.4.0") { # stats::prcomp prior to 3.4.0 does not have the 'rank.' argument pca_model <- prcomp(pca_data, retx = retx, center = center, scale. = scale., tol = tol) diff --git a/R/plot.R b/R/plot.R index 43920a0ed..cdad32e72 100644 --- a/R/plot.R +++ b/R/plot.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,7 +24,7 @@ # ==================================================================== # #' Plotting for Classes `rsi`, `mic` and `disk` -#' +#' #' Functions to plot classes `rsi`, `mic` and `disk`, with support for base \R and `ggplot2`. #' @param x,object values created with [as.mic()], [as.disk()] or [as.rsi()] (or their `random_*` variants, such as [random_mic()]) @@ -38,30 +38,30 @@ #' @param expand a [logical] to indicate whether the range on the x axis should be expanded between the lowest and highest value. For MIC values, intermediate values will be factors of 2 starting from the highest MIC value. For disk diameters, the whole diameter range will be filled. #' @details #' The interpretation of "I" will be named "Increased exposure" for all EUCAST guidelines since 2019, and will be named "Intermediate" in all other cases. -#' +#' #' For interpreting MIC values as well as disk diffusion diameters, supported guidelines to be used as input for the `guideline` argument are: `r vector_and(AMR::rsi_translation$guideline, quotes = TRUE, reverse = TRUE)`. -#' +#' #' Simply using `"CLSI"` or `"EUCAST"` as input will automatically select the latest version of that guideline. #' @name plot #' @rdname plot #' @return The `autoplot()` functions return a [`ggplot`][ggplot2::ggplot()] model that is extendible with any `ggplot2` function. -#' +#' #' The `fortify()` functions return a [data.frame] as an extension for usage in the [ggplot2::ggplot()] function. #' @param ... arguments passed on to methods -#' @examples +#' @examples #' some_mic_values <- random_mic(size = 100) #' some_disk_values <- random_disk(size = 100, mo = "Escherichia coli", ab = "cipro") #' some_rsi_values <- random_rsi(50, prob_RSI = c(0.30, 0.55, 0.05)) -#' +#' #' plot(some_mic_values) #' plot(some_disk_values) #' plot(some_rsi_values) -#' +#' #' # when providing the microorganism and antibiotic, colours will show interpretations: #' plot(some_mic_values, mo = "S. aureus", ab = "ampicillin") #' plot(some_disk_values, mo = "Escherichia coli", ab = "cipro") #' plot(some_disk_values, mo = "Escherichia coli", ab = "cipro", language = "uk") -#' +#' #' \donttest{ #' if (require("ggplot2")) { #' autoplot(some_mic_values) @@ -95,7 +95,7 @@ plot.mic <- function(x, meet_criteria(colours_RSI, allow_class = "character", has_length = c(1, 3)) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(expand, allow_class = "logical", has_length = 1) - + # translate if not specifically set if (missing(ylab)) { ylab <- translate_into_language(ylab, language = language) @@ -103,34 +103,37 @@ plot.mic <- function(x, if (missing(xlab)) { xlab <- translate_into_language(xlab, language = language) } - + if (length(colours_RSI) == 1) { colours_RSI <- rep(colours_RSI, 3) } main <- gsub(" +", " ", paste0(main, collapse = " ")) - + x <- plot_prepare_table(x, expand = expand) - - cols_sub <- plot_colours_subtitle_guideline(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.mic, - language = language, - ...) + + cols_sub <- plot_colours_subtitle_guideline( + x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.mic, + language = language, + ... + ) barplot(x, - col = cols_sub$cols, - main = main, - ylim = c(0, max(x) * ifelse(any(colours_RSI %in% cols_sub$cols), 1.1, 1)), - ylab = ylab, - xlab = xlab, - axes = FALSE) + col = cols_sub$cols, + main = main, + ylim = c(0, max(x) * ifelse(any(colours_RSI %in% cols_sub$cols), 1.1, 1)), + ylab = ylab, + xlab = xlab, + axes = FALSE + ) axis(2, seq(0, max(x))) if (!is.null(cols_sub$sub)) { mtext(side = 3, line = 0.5, adj = 0.5, cex = 0.75, cols_sub$sub) } - + if (any(colours_RSI %in% cols_sub$cols)) { legend_txt <- character(0) legend_col <- character(0) @@ -146,16 +149,17 @@ plot.mic <- function(x, legend_txt <- c(legend_txt, "Resistant") legend_col <- c(legend_col, colours_RSI[1]) } - + legend("top", - x.intersp = 0.5, - legend = translate_into_language(legend_txt, language = language), - fill = legend_col, - horiz = TRUE, - cex = 0.75, - box.lwd = 0, - box.col = "#FFFFFF55", - bg = "#FFFFFF55") + x.intersp = 0.5, + legend = translate_into_language(legend_txt, language = language), + fill = legend_col, + horiz = TRUE, + cex = 0.75, + box.lwd = 0, + box.col = "#FFFFFF55", + bg = "#FFFFFF55" + ) } } @@ -182,7 +186,7 @@ barplot.mic <- function(height, meet_criteria(colours_RSI, allow_class = "character", has_length = c(1, 3)) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(expand, allow_class = "logical", has_length = 1) - + # translate if not specifically set if (missing(ylab)) { ylab <- translate_into_language(ylab, language = language) @@ -190,18 +194,20 @@ barplot.mic <- function(height, if (missing(xlab)) { xlab <- translate_into_language(xlab, language = language) } - + main <- gsub(" +", " ", paste0(main, collapse = " ")) - - plot(x = height, - main = main, - ylab = ylab, - xlab = xlab, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - ...) + + plot( + x = height, + main = main, + ylab = ylab, + xlab = xlab, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + ... + ) } #' @method autoplot mic @@ -228,7 +234,7 @@ autoplot.mic <- function(object, meet_criteria(colours_RSI, allow_class = "character", has_length = c(1, 3)) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(expand, allow_class = "logical", has_length = 1) - + # translate if not specifically set if (missing(ylab)) { ylab <- translate_into_language(ylab, language = language) @@ -236,23 +242,25 @@ autoplot.mic <- function(object, if (missing(xlab)) { xlab <- translate_into_language(xlab, language = language) } - + if ("main" %in% names(list(...))) { title <- list(...)$main } if (!is.null(title)) { title <- gsub(" +", " ", paste0(title, collapse = " ")) } - + x <- plot_prepare_table(object, expand = expand) - cols_sub <- plot_colours_subtitle_guideline(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.mic, - language = language, - ...) + cols_sub <- plot_colours_subtitle_guideline( + x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.mic, + language = language, + ... + ) df <- as.data.frame(x, stringsAsFactors = TRUE) colnames(df) <- c("mic", "count") df$cols <- cols_sub$cols @@ -260,28 +268,34 @@ autoplot.mic <- function(object, df$cols[df$cols == colours_RSI[2]] <- "Susceptible" df$cols[df$cols == colours_RSI[3]] <- plot_name_of_I(cols_sub$guideline) df$cols <- factor(translate_into_language(df$cols, language = language), - levels = translate_into_language(c("Susceptible", plot_name_of_I(cols_sub$guideline), "Resistant"), - language = language), - ordered = TRUE) + levels = translate_into_language(c("Susceptible", plot_name_of_I(cols_sub$guideline), "Resistant"), + language = language + ), + ordered = TRUE + ) p <- ggplot2::ggplot(df) - + if (any(colours_RSI %in% cols_sub$cols)) { - vals <- c("Resistant" = colours_RSI[1], - "Susceptible" = colours_RSI[2], - "Susceptible, incr. exp." = colours_RSI[3], - "Intermediate" = colours_RSI[3]) + vals <- c( + "Resistant" = colours_RSI[1], + "Susceptible" = colours_RSI[2], + "Susceptible, incr. exp." = colours_RSI[3], + "Intermediate" = colours_RSI[3] + ) names(vals) <- translate_into_language(names(vals), language = language) p <- p + - ggplot2::geom_col(ggplot2::aes(x = mic, y = count, fill = cols)) + + ggplot2::geom_col(ggplot2::aes(x = mic, y = count, fill = cols)) + # limits = force is needed because of a ggplot2 >= 3.3.4 bug (#4511) - ggplot2::scale_fill_manual(values = vals, - name = NULL, - limits = force) + ggplot2::scale_fill_manual( + values = vals, + name = NULL, + limits = force + ) } else { p <- p + ggplot2::geom_col(ggplot2::aes(x = mic, y = count)) } - + p + ggplot2::labs(title = title, x = xlab, y = ylab, subtitle = cols_sub$sub) } @@ -290,8 +304,10 @@ autoplot.mic <- function(object, #' @rdname plot # will be exported using s3_register() in R/zzz.R fortify.mic <- function(object, ...) { - stats::setNames(as.data.frame(plot_prepare_table(object, expand = FALSE)), - c("x", "y")) + stats::setNames( + as.data.frame(plot_prepare_table(object, expand = FALSE)), + c("x", "y") + ) } #' @method plot disk @@ -318,7 +334,7 @@ plot.disk <- function(x, meet_criteria(colours_RSI, allow_class = "character", has_length = c(1, 3)) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(expand, allow_class = "logical", has_length = 1) - + # translate if not specifically set if (missing(ylab)) { ylab <- translate_into_language(ylab, language = language) @@ -326,35 +342,38 @@ plot.disk <- function(x, if (missing(xlab)) { xlab <- translate_into_language(xlab, language = language) } - + if (length(colours_RSI) == 1) { colours_RSI <- rep(colours_RSI, 3) } main <- gsub(" +", " ", paste0(main, collapse = " ")) - + x <- plot_prepare_table(x, expand = expand) - - cols_sub <- plot_colours_subtitle_guideline(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.disk, - language = language, - ...) - + + cols_sub <- plot_colours_subtitle_guideline( + x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.disk, + language = language, + ... + ) + barplot(x, - col = cols_sub$cols, - main = main, - ylim = c(0, max(x) * ifelse(any(colours_RSI %in% cols_sub$cols), 1.1, 1)), - ylab = ylab, - xlab = xlab, - axes = FALSE) + col = cols_sub$cols, + main = main, + ylim = c(0, max(x) * ifelse(any(colours_RSI %in% cols_sub$cols), 1.1, 1)), + ylab = ylab, + xlab = xlab, + axes = FALSE + ) axis(2, seq(0, max(x))) if (!is.null(cols_sub$sub)) { mtext(side = 3, line = 0.5, adj = 0.5, cex = 0.75, cols_sub$sub) } - + if (any(colours_RSI %in% cols_sub$cols)) { legend_txt <- character(0) legend_col <- character(0) @@ -370,15 +389,16 @@ plot.disk <- function(x, legend_txt <- c(legend_txt, "Susceptible") legend_col <- c(legend_col, colours_RSI[2]) } - legend("top", - x.intersp = 0.5, - legend = translate_into_language(legend_txt, language = language), - fill = legend_col, - horiz = TRUE, - cex = 0.75, - box.lwd = 0, - box.col = "#FFFFFF55", - bg = "#FFFFFF55") + legend("top", + x.intersp = 0.5, + legend = translate_into_language(legend_txt, language = language), + fill = legend_col, + horiz = TRUE, + cex = 0.75, + box.lwd = 0, + box.col = "#FFFFFF55", + bg = "#FFFFFF55" + ) } } @@ -405,7 +425,7 @@ barplot.disk <- function(height, meet_criteria(colours_RSI, allow_class = "character", has_length = c(1, 3)) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(expand, allow_class = "logical", has_length = 1) - + # translate if not specifically set if (missing(ylab)) { ylab <- translate_into_language(ylab, language = language) @@ -413,18 +433,20 @@ barplot.disk <- function(height, if (missing(xlab)) { xlab <- translate_into_language(xlab, language = language) } - + main <- gsub(" +", " ", paste0(main, collapse = " ")) - - plot(x = height, - main = main, - ylab = ylab, - xlab = xlab, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - ...) + + plot( + x = height, + main = main, + ylab = ylab, + xlab = xlab, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + ... + ) } #' @method autoplot disk @@ -451,7 +473,7 @@ autoplot.disk <- function(object, meet_criteria(colours_RSI, allow_class = "character", has_length = c(1, 3)) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(expand, allow_class = "logical", has_length = 1) - + # translate if not specifically set if (missing(ylab)) { ylab <- translate_into_language(ylab, language = language) @@ -459,53 +481,61 @@ autoplot.disk <- function(object, if (missing(xlab)) { xlab <- translate_into_language(xlab, language = language) } - + if ("main" %in% names(list(...))) { title <- list(...)$main } if (!is.null(title)) { title <- gsub(" +", " ", paste0(title, collapse = " ")) } - + x <- plot_prepare_table(object, expand = expand) - cols_sub <- plot_colours_subtitle_guideline(x = x, - mo = mo, - ab = ab, - guideline = guideline, - colours_RSI = colours_RSI, - fn = as.disk, - language = language, - ...) + cols_sub <- plot_colours_subtitle_guideline( + x = x, + mo = mo, + ab = ab, + guideline = guideline, + colours_RSI = colours_RSI, + fn = as.disk, + language = language, + ... + ) df <- as.data.frame(x, stringsAsFactors = TRUE) colnames(df) <- c("disk", "count") df$cols <- cols_sub$cols - + df$cols[df$cols == colours_RSI[1]] <- "Resistant" df$cols[df$cols == colours_RSI[2]] <- "Susceptible" df$cols[df$cols == colours_RSI[3]] <- plot_name_of_I(cols_sub$guideline) df$cols <- factor(translate_into_language(df$cols, language = language), - levels = translate_into_language(c("Susceptible", plot_name_of_I(cols_sub$guideline), "Resistant"), - language = language), - ordered = TRUE) + levels = translate_into_language(c("Susceptible", plot_name_of_I(cols_sub$guideline), "Resistant"), + language = language + ), + ordered = TRUE + ) p <- ggplot2::ggplot(df) - + if (any(colours_RSI %in% cols_sub$cols)) { - vals <- c("Resistant" = colours_RSI[1], - "Susceptible" = colours_RSI[2], - "Susceptible, incr. exp." = colours_RSI[3], - "Intermediate" = colours_RSI[3]) + vals <- c( + "Resistant" = colours_RSI[1], + "Susceptible" = colours_RSI[2], + "Susceptible, incr. exp." = colours_RSI[3], + "Intermediate" = colours_RSI[3] + ) names(vals) <- translate_into_language(names(vals), language = language) p <- p + - ggplot2::geom_col(ggplot2::aes(x = disk, y = count, fill = cols)) + + ggplot2::geom_col(ggplot2::aes(x = disk, y = count, fill = cols)) + # limits = force is needed because of a ggplot2 >= 3.3.4 bug (#4511) - ggplot2::scale_fill_manual(values = vals, - name = NULL, - limits = force) + ggplot2::scale_fill_manual( + values = vals, + name = NULL, + limits = force + ) } else { p <- p + ggplot2::geom_col(ggplot2::aes(x = disk, y = count)) } - + p + ggplot2::labs(title = title, x = xlab, y = ylab, subtitle = cols_sub$sub) } @@ -514,8 +544,10 @@ autoplot.disk <- function(object, #' @rdname plot # will be exported using s3_register() in R/zzz.R fortify.disk <- function(object, ...) { - stats::setNames(as.data.frame(plot_prepare_table(object, expand = FALSE)), - c("x", "y")) + stats::setNames( + as.data.frame(plot_prepare_table(object, expand = FALSE)), + c("x", "y") + ) } #' @method plot rsi @@ -531,7 +563,7 @@ plot.rsi <- function(x, meet_criteria(ylab, allow_class = "character", has_length = 1) meet_criteria(xlab, allow_class = "character", has_length = 1) meet_criteria(main, allow_class = "character", has_length = 1, allow_NULL = TRUE) - + # translate if not specifically set if (missing(ylab)) { ylab <- translate_into_language(ylab, language = language) @@ -539,44 +571,51 @@ plot.rsi <- function(x, if (missing(xlab)) { xlab <- translate_into_language(xlab, language = language) } - + data <- as.data.frame(table(x), stringsAsFactors = FALSE) colnames(data) <- c("x", "n") data$s <- round((data$n / sum(data$n)) * 100, 1) - + if (!"S" %in% data$x) { data <- rbind(data, data.frame(x = "S", n = 0, s = 0, stringsAsFactors = FALSE), - stringsAsFactors = FALSE) + stringsAsFactors = FALSE + ) } if (!"I" %in% data$x) { data <- rbind(data, data.frame(x = "I", n = 0, s = 0, stringsAsFactors = FALSE), - stringsAsFactors = FALSE) + stringsAsFactors = FALSE + ) } if (!"R" %in% data$x) { data <- rbind(data, data.frame(x = "R", n = 0, s = 0, stringsAsFactors = FALSE), - stringsAsFactors = FALSE) + stringsAsFactors = FALSE + ) } - + data$x <- factor(data$x, levels = c("S", "I", "R"), ordered = TRUE) - + ymax <- pm_if_else(max(data$s) > 95, 105, 100) - - plot(x = data$x, - y = data$s, - lwd = 2, - ylim = c(0, ymax), - ylab = ylab, - xlab = xlab, - main = main, - axes = FALSE) + + plot( + x = data$x, + y = data$s, + lwd = 2, + ylim = c(0, ymax), + ylab = ylab, + xlab = xlab, + main = main, + axes = FALSE + ) # x axis axis(side = 1, at = 1:pm_n_distinct(data$x), labels = levels(data$x), lwd = 0) # y axis, 0-100% axis(side = 2, at = seq(0, 100, 5)) - - text(x = data$x, - y = data$s + 4, - labels = paste0(data$s, "% (n = ", data$n, ")")) + + text( + x = data$x, + y = data$s + 4, + labels = paste0(data$s, "% (n = ", data$n, ")") + ) } @@ -598,7 +637,7 @@ barplot.rsi <- function(height, meet_criteria(colours_RSI, allow_class = "character", has_length = c(1, 3)) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) meet_criteria(expand, allow_class = "logical", has_length = 1) - + # translate if not specifically set if (missing(ylab)) { ylab <- translate_into_language(ylab, language = language) @@ -606,22 +645,23 @@ barplot.rsi <- function(height, if (missing(xlab)) { xlab <- translate_into_language(xlab, language = language) } - + if (length(colours_RSI) == 1) { colours_RSI <- rep(colours_RSI, 3) } else { colours_RSI <- c(colours_RSI[2], colours_RSI[3], colours_RSI[1]) } main <- gsub(" +", " ", paste0(main, collapse = " ")) - + x <- table(height) x <- x[c(1, 2, 3)] barplot(x, - col = colours_RSI, - xlab = xlab, - main = main, - ylab = ylab, - axes = FALSE) + col = colours_RSI, + xlab = xlab, + main = main, + ylab = ylab, + axes = FALSE + ) axis(2, seq(0, max(x))) } @@ -640,7 +680,7 @@ autoplot.rsi <- function(object, meet_criteria(ylab, allow_class = "character", has_length = 1) meet_criteria(xlab, allow_class = "character", has_length = 1) meet_criteria(colours_RSI, allow_class = "character", has_length = c(1, 3)) - + # translate if not specifically set if (missing(ylab)) { ylab <- translate_into_language(ylab, language = language) @@ -648,27 +688,31 @@ autoplot.rsi <- function(object, if (missing(xlab)) { xlab <- translate_into_language(xlab, language = language) } - + if ("main" %in% names(list(...))) { title <- list(...)$main } if (!is.null(title)) { title <- gsub(" +", " ", paste0(title, collapse = " ")) } - + if (length(colours_RSI) == 1) { colours_RSI <- rep(colours_RSI, 3) } - + df <- as.data.frame(table(object), stringsAsFactors = TRUE) colnames(df) <- c("rsi", "count") ggplot2::ggplot(df) + - ggplot2::geom_col(ggplot2::aes(x = rsi, y = count, fill = rsi)) + + ggplot2::geom_col(ggplot2::aes(x = rsi, y = count, fill = rsi)) + # limits = force is needed because of a ggplot2 >= 3.3.4 bug (#4511) - ggplot2::scale_fill_manual(values = c("R" = colours_RSI[1], - "S" = colours_RSI[2], - "I" = colours_RSI[3]), - limits = force) + + ggplot2::scale_fill_manual( + values = c( + "R" = colours_RSI[1], + "S" = colours_RSI[2], + "I" = colours_RSI[3] + ), + limits = force + ) + ggplot2::labs(title = title, x = xlab, y = ylab) + ggplot2::theme(legend.position = "none") } @@ -677,8 +721,10 @@ autoplot.rsi <- function(object, #' @rdname plot # will be exported using s3_register() in R/zzz.R fortify.rsi <- function(object, ...) { - stats::setNames(as.data.frame(table(object)), - c("x", "y")) + stats::setNames( + as.data.frame(table(object)), + c("x", "y") + ) } plot_prepare_table <- function(x, expand) { @@ -743,8 +789,10 @@ plot_colours_subtitle_guideline <- function(x, mo, ab, guideline, colours_RSI, f moname <- mo_name(mo, language = language) abname <- ab_name(ab, language = language) if (all(cols == "#BEBEBE")) { - message_("No ", guideline, " interpretations found for ", - ab_name(ab, language = NULL, tolower = TRUE), " in ", moname) + message_( + "No ", guideline, " interpretations found for ", + ab_name(ab, language = NULL, tolower = TRUE), " in ", moname + ) guideline_txt <- "" } else { guideline_txt <- guideline @@ -753,7 +801,7 @@ plot_colours_subtitle_guideline <- function(x, mo, ab, guideline, colours_RSI, f } guideline_txt <- paste0("(", guideline_txt, ")") } - sub <- bquote(.(abname)~"-"~italic(.(moname))~.(guideline_txt)) + sub <- bquote(.(abname) ~ "-" ~ italic(.(moname)) ~ .(guideline_txt)) } else { cols <- "#BEBEBE" sub <- NULL diff --git a/R/proportion.R b/R/proportion.R index 78f4a9795..ff2050973 100755 --- a/R/proportion.R +++ b/R/proportion.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -40,7 +40,7 @@ #' @inheritSection as.rsi Interpretation of R and S/I #' @details #' The function [resistance()] is equal to the function [proportion_R()]. The function [susceptibility()] is equal to the function [proportion_SI()]. -#' +#' #' **Remember that you should filter your data to let it contain only first isolates!** This is needed to exclude duplicates and to reduce selection bias. Use [first_isolate()] to determine them in your data set. #' #' These functions are not meant to count isolates, but to calculate the proportion of resistance/susceptibility. Use the [`count()`][AMR::count()] functions to count isolates. The function [susceptibility()] is essentially equal to `count_susceptible() / count_all()`. *Low counts can influence the outcome - the `proportion` functions may camouflage this, since they only return the proportion (albeit being dependent on the `minimum` argument).* @@ -90,9 +90,9 @@ #' @examples #' # example_isolates is a data set available in the AMR package. #' # run ?example_isolates for more info. -#' +#' #' # base R ------------------------------------------------------------ -#' resistance(example_isolates$AMX) # determines %R +#' resistance(example_isolates$AMX) # determines %R #' susceptibility(example_isolates$AMX) # determines %S+I #' #' # be more specific @@ -107,55 +107,65 @@ #' if (require("dplyr")) { #' example_isolates %>% #' group_by(ward) %>% -#' summarise(r = resistance(CIP), -#' n = n_rsi(CIP)) # n_rsi works like n_distinct in dplyr, see ?n_rsi -#' +#' summarise( +#' r = resistance(CIP), +#' n = n_rsi(CIP) +#' ) # n_rsi works like n_distinct in dplyr, see ?n_rsi +#' #' example_isolates %>% #' group_by(ward) %>% -#' summarise(R = resistance(CIP, as_percent = TRUE), -#' SI = susceptibility(CIP, as_percent = TRUE), -#' n1 = count_all(CIP), # the actual total; sum of all three -#' n2 = n_rsi(CIP), # same - analogous to n_distinct -#' total = n()) # NOT the number of tested isolates! -#' +#' summarise( +#' R = resistance(CIP, as_percent = TRUE), +#' SI = susceptibility(CIP, as_percent = TRUE), +#' n1 = count_all(CIP), # the actual total; sum of all three +#' n2 = n_rsi(CIP), # same - analogous to n_distinct +#' total = n() +#' ) # NOT the number of tested isolates! +#' #' # Calculate co-resistance between amoxicillin/clav acid and gentamicin, #' # so we can see that combination therapy does a lot more than mono therapy: -#' example_isolates %>% susceptibility(AMC) # %SI = 76.3% -#' example_isolates %>% count_all(AMC) # n = 1879 -#' -#' example_isolates %>% susceptibility(GEN) # %SI = 75.4% -#' example_isolates %>% count_all(GEN) # n = 1855 -#' +#' example_isolates %>% susceptibility(AMC) # %SI = 76.3% +#' example_isolates %>% count_all(AMC) # n = 1879 +#' +#' example_isolates %>% susceptibility(GEN) # %SI = 75.4% +#' example_isolates %>% count_all(GEN) # n = 1855 +#' #' example_isolates %>% susceptibility(AMC, GEN) # %SI = 94.1% -#' example_isolates %>% count_all(AMC, GEN) # n = 1939 -#' -#' +#' example_isolates %>% count_all(AMC, GEN) # n = 1939 +#' +#' #' # See Details on how `only_all_tested` works. Example: #' example_isolates %>% -#' summarise(numerator = count_susceptible(AMC, GEN), -#' denominator = count_all(AMC, GEN), -#' proportion = susceptibility(AMC, GEN)) -#' +#' summarise( +#' numerator = count_susceptible(AMC, GEN), +#' denominator = count_all(AMC, GEN), +#' proportion = susceptibility(AMC, GEN) +#' ) +#' #' example_isolates %>% -#' summarise(numerator = count_susceptible(AMC, GEN, only_all_tested = TRUE), -#' denominator = count_all(AMC, GEN, only_all_tested = TRUE), -#' proportion = susceptibility(AMC, GEN, only_all_tested = TRUE)) -#' -#' +#' summarise( +#' numerator = count_susceptible(AMC, GEN, only_all_tested = TRUE), +#' denominator = count_all(AMC, GEN, only_all_tested = TRUE), +#' proportion = susceptibility(AMC, GEN, only_all_tested = TRUE) +#' ) +#' +#' #' example_isolates %>% #' group_by(ward) %>% -#' summarise(cipro_p = susceptibility(CIP, as_percent = TRUE), -#' cipro_n = count_all(CIP), -#' genta_p = susceptibility(GEN, as_percent = TRUE), -#' genta_n = count_all(GEN), -#' combination_p = susceptibility(CIP, GEN, as_percent = TRUE), -#' combination_n = count_all(CIP, GEN)) -#' +#' summarise( +#' cipro_p = susceptibility(CIP, as_percent = TRUE), +#' cipro_n = count_all(CIP), +#' genta_p = susceptibility(GEN, as_percent = TRUE), +#' genta_n = count_all(GEN), +#' combination_p = susceptibility(CIP, GEN, as_percent = TRUE), +#' combination_n = count_all(CIP, GEN) +#' ) +#' #' # Get proportions S/I/R immediately of all rsi columns #' example_isolates %>% #' select(AMX, CIP) %>% #' proportion_df(translate = FALSE) -#' +#' #' # It also supports grouping variables #' # (use rsi_df to also include the count) #' example_isolates %>% @@ -170,12 +180,14 @@ resistance <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = "R", - minimum = minimum, - as_percent = as_percent, - only_all_tested = only_all_tested, - only_count = FALSE), - error = function(e) stop_(e$message, call = -5)) + ab_result = "R", + minimum = minimum, + as_percent = as_percent, + only_all_tested = only_all_tested, + only_count = FALSE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname proportion @@ -186,12 +198,14 @@ susceptibility <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = c("S", "I"), - minimum = minimum, - as_percent = as_percent, - only_all_tested = only_all_tested, - only_count = FALSE), - error = function(e) stop_(e$message, call = -5)) + ab_result = c("S", "I"), + minimum = minimum, + as_percent = as_percent, + only_all_tested = only_all_tested, + only_count = FALSE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname proportion @@ -202,12 +216,14 @@ proportion_R <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = "R", - minimum = minimum, - as_percent = as_percent, - only_all_tested = only_all_tested, - only_count = FALSE), - error = function(e) stop_(e$message, call = -5)) + ab_result = "R", + minimum = minimum, + as_percent = as_percent, + only_all_tested = only_all_tested, + only_count = FALSE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname proportion @@ -218,12 +234,14 @@ proportion_IR <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = c("I", "R"), - minimum = minimum, - as_percent = as_percent, - only_all_tested = only_all_tested, - only_count = FALSE), - error = function(e) stop_(e$message, call = -5)) + ab_result = c("I", "R"), + minimum = minimum, + as_percent = as_percent, + only_all_tested = only_all_tested, + only_count = FALSE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname proportion @@ -234,12 +252,14 @@ proportion_I <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = "I", - minimum = minimum, - as_percent = as_percent, - only_all_tested = only_all_tested, - only_count = FALSE), - error = function(e) stop_(e$message, call = -5)) + ab_result = "I", + minimum = minimum, + as_percent = as_percent, + only_all_tested = only_all_tested, + only_count = FALSE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname proportion @@ -250,12 +270,14 @@ proportion_SI <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = c("S", "I"), - minimum = minimum, - as_percent = as_percent, - only_all_tested = only_all_tested, - only_count = FALSE), - error = function(e) stop_(e$message, call = -5)) + ab_result = c("S", "I"), + minimum = minimum, + as_percent = as_percent, + only_all_tested = only_all_tested, + only_count = FALSE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname proportion @@ -266,12 +288,14 @@ proportion_S <- function(..., only_all_tested = FALSE) { tryCatch( rsi_calc(..., - ab_result = "S", - minimum = minimum, - as_percent = as_percent, - only_all_tested = only_all_tested, - only_count = FALSE), - error = function(e) stop_(e$message, call = -5)) + ab_result = "S", + minimum = minimum, + as_percent = as_percent, + only_all_tested = only_all_tested, + only_count = FALSE + ), + error = function(e) stop_(e$message, call = -5) + ) } #' @rdname proportion @@ -284,14 +308,17 @@ proportion_df <- function(data, combine_SI = TRUE, combine_IR = FALSE) { tryCatch( - rsi_calc_df(type = "proportion", - data = data, - translate_ab = translate_ab, - language = language, - minimum = minimum, - as_percent = as_percent, - combine_SI = combine_SI, - combine_IR = combine_IR, - combine_SI_missing = missing(combine_SI)), - error = function(e) stop_(e$message, call = -5)) + rsi_calc_df( + type = "proportion", + data = data, + translate_ab = translate_ab, + language = language, + minimum = minimum, + as_percent = as_percent, + combine_SI = combine_SI, + combine_IR = combine_IR, + combine_SI_missing = missing(combine_SI) + ), + error = function(e) stop_(e$message, call = -5) + ) } diff --git a/R/random.R b/R/random.R index 12f692635..dff3bc32e 100644 --- a/R/random.R +++ b/R/random.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -25,14 +25,14 @@ #' Random MIC Values/Disk Zones/RSI Generation #' -#' These functions can be used for generating random MIC values and disk diffusion diameters, for AMR data analysis practice. By providing a microorganism and antimicrobial agent, the generated results will reflect reality as much as possible. +#' These functions can be used for generating random MIC values and disk diffusion diameters, for AMR data analysis practice. By providing a microorganism and antimicrobial agent, the generated results will reflect reality as much as possible. #' @param size desired size of the returned vector. If used in a [data.frame] call or `dplyr` verb, will get the current (group) size if left blank. #' @param mo any [character] that can be coerced to a valid microorganism code with [as.mo()] #' @param ab any [character] that can be coerced to a valid antimicrobial agent code with [as.ab()] #' @param prob_RSI a vector of length 3: the probabilities for "R" (1st value), "S" (2nd value) and "I" (3rd value) #' @param ... ignored, only in place to allow future extensions #' @details The base \R function [sample()] is used for generating values. -#' +#' #' Generated values are based on the EUCAST `r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))` guideline as implemented in the [rsi_translation] data set. To create specific generated values per bug or drug, set the `mo` and/or `ab` argument. #' @return class `` for [random_mic()] (see [as.mic()]) and class `` for [random_disk()] (see [as.disk()]) #' @name random @@ -42,15 +42,15 @@ #' random_mic(25) #' random_disk(25) #' random_rsi(25) -#' +#' #' \donttest{ #' # make the random generation more realistic by setting a bug and/or drug: -#' random_mic(25, "Klebsiella pneumoniae") # range 0.0625-64 -#' random_mic(25, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16 +#' random_mic(25, "Klebsiella pneumoniae") # range 0.0625-64 +#' random_mic(25, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16 #' random_mic(25, "Streptococcus pneumoniae", "meropenem") # range 0.0625-4 -#' -#' random_disk(25, "Klebsiella pneumoniae") # range 8-50 -#' random_disk(25, "Klebsiella pneumoniae", "ampicillin") # range 11-17 +#' +#' random_disk(25, "Klebsiella pneumoniae") # range 8-50 +#' random_disk(25, "Klebsiella pneumoniae", "ampicillin") # range 11-17 #' random_disk(25, "Streptococcus pneumoniae", "ampicillin") # range 12-27 #' } random_mic <- function(size = NULL, mo = NULL, ab = NULL, ...) { @@ -92,14 +92,16 @@ random_exec <- function(type, size, mo = NULL, ab = NULL) { pm_filter(guideline %like% "EUCAST") %pm>% pm_arrange(pm_desc(guideline)) %pm>% subset(guideline == max(guideline) & - method == type) - + method == type) + if (!is.null(mo)) { mo_coerced <- as.mo(mo) - mo_include <- c(mo_coerced, - as.mo(mo_genus(mo_coerced)), - as.mo(mo_family(mo_coerced)), - as.mo(mo_order(mo_coerced))) + mo_include <- c( + mo_coerced, + as.mo(mo_genus(mo_coerced)), + as.mo(mo_family(mo_coerced)), + as.mo(mo_order(mo_coerced)) + ) df_new <- df %pm>% subset(mo %in% mo_include) if (nrow(df_new) > 0) { @@ -108,7 +110,7 @@ random_exec <- function(type, size, mo = NULL, ab = NULL) { warning_("in `random_", tolower(type), "()`: no rows found that match mo '", mo, "', ignoring argument `mo`") } } - + if (!is.null(ab)) { ab_coerced <- as.ab(ab) df_new <- df %pm>% @@ -119,16 +121,20 @@ random_exec <- function(type, size, mo = NULL, ab = NULL) { warning_("in `random_", tolower(type), "()`: no rows found that match ab '", ab, "', ignoring argument `ab`") } } - + if (type == "MIC") { # set range mic_range <- c(0.001, 0.002, 0.005, 0.010, 0.025, 0.0625, 0.125, 0.250, 0.5, 1, 2, 4, 8, 16, 32, 64, 128, 256) # get highest/lowest +/- random 1 to 3 higher factors of two - max_range <- mic_range[min(length(mic_range), - which(mic_range == max(df$breakpoint_R)) + sample(c(1:3), 1))] - min_range <- mic_range[max(1, - which(mic_range == min(df$breakpoint_S)) - sample(c(1:3), 1))] + max_range <- mic_range[min( + length(mic_range), + which(mic_range == max(df$breakpoint_R)) + sample(c(1:3), 1) + )] + min_range <- mic_range[max( + 1, + which(mic_range == min(df$breakpoint_S)) - sample(c(1:3), 1) + )] mic_range_new <- mic_range[mic_range <= max_range & mic_range >= min_range] if (length(mic_range_new) == 0) { @@ -144,9 +150,11 @@ random_exec <- function(type, size, mo = NULL, ab = NULL) { } return(out) } else if (type == "DISK") { - set_range <- seq(from = as.integer(min(df$breakpoint_R) / 1.25), - to = as.integer(max(df$breakpoint_S) * 1.25), - by = 1) + set_range <- seq( + from = as.integer(min(df$breakpoint_R) / 1.25), + to = as.integer(max(df$breakpoint_S) * 1.25), + by = 1 + ) out <- sample(set_range, size = size, replace = TRUE) out[out < 6] <- sample(c(6:10), length(out[out < 6]), replace = TRUE) out[out > 50] <- sample(c(40:50), length(out[out > 50]), replace = TRUE) diff --git a/R/resistance_predict.R b/R/resistance_predict.R index 3cde201c3..d08d6a2e0 100755 --- a/R/resistance_predict.R +++ b/R/resistance_predict.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -34,7 +34,7 @@ #' @param year_every unit of sequence between lowest year found in the data and `year_max` #' @param minimum minimal amount of available isolates per year to include. Years containing less observations will be estimated by the model. #' @param model the statistical model of choice. This could be a generalised linear regression model with binomial distribution (i.e. using `glm(..., family = binomial)`, assuming that a period of zero resistance was followed by a period of increasing resistance leading slowly to more and more resistance. See *Details* for all valid options. -#' @param I_as_S a [logical] to indicate whether values `"I"` should be treated as `"S"` (will otherwise be treated as `"R"`). The default, `TRUE`, follows the redefinition by EUCAST about the interpretation of I (increased exposure) in 2019, see section *Interpretation of S, I and R* below. +#' @param I_as_S a [logical] to indicate whether values `"I"` should be treated as `"S"` (will otherwise be treated as `"R"`). The default, `TRUE`, follows the redefinition by EUCAST about the interpretation of I (increased exposure) in 2019, see section *Interpretation of S, I and R* below. #' @param preserve_measurements a [logical] to indicate whether predictions of years that are actually available in the data should be overwritten by the original data. The standard errors of those years will be `NA`. #' @param info a [logical] to indicate whether textual analysis should be printed with the name and [summary()] of the statistical model. #' @param main title of the plot @@ -55,19 +55,20 @@ #' - `observations`, the total number of available observations in that year, i.e. \eqn{S + I + R} #' - `observed`, the original observed resistant percentages #' - `estimated`, the estimated resistant percentages, calculated by the model -#' +#' #' Furthermore, the model itself is available as an attribute: `attributes(x)$model`, see *Examples*. #' @seealso The [proportion()] functions to calculate resistance -#' +#' #' Models: [lm()] [glm()] #' @rdname resistance_predict #' @export #' @importFrom stats predict glm lm #' @examples -#' x <- resistance_predict(example_isolates, -#' col_ab = "AMX", -#' year_min = 2010, -#' model = "binomial") +#' x <- resistance_predict(example_isolates, +#' col_ab = "AMX", +#' year_min = 2010, +#' model = "binomial" +#' ) #' plot(x) #' \donttest{ #' if (require("ggplot2")) { @@ -89,14 +90,15 @@ #' #' # create nice plots with ggplot2 yourself #' if (require("dplyr") && require("ggplot2")) { -#' #' data <- example_isolates %>% #' filter(mo == as.mo("E. coli")) %>% -#' resistance_predict(col_ab = "AMX", -#' col_date = "date", -#' model = "binomial", -#' info = FALSE, -#' minimum = 15) +#' resistance_predict( +#' col_ab = "AMX", +#' col_date = "date", +#' model = "binomial", +#' info = FALSE, +#' minimum = 15 +#' ) #' head(data) #' autoplot(data) #' } @@ -124,12 +126,12 @@ resistance_predict <- function(x, meet_criteria(I_as_S, allow_class = "logical", has_length = 1) meet_criteria(preserve_measurements, allow_class = "logical", has_length = 1) meet_criteria(info, allow_class = "logical", has_length = 1) - + stop_if(is.null(model), 'choose a regression model with the `model` argument, e.g. resistance_predict(..., model = "binomial")') - + x.bak <- x x <- as.data.frame(x, stringsAsFactors = FALSE) - + dots <- unlist(list(...)) if (length(dots) != 0) { # backwards compatibility with old arguments @@ -141,15 +143,17 @@ resistance_predict <- function(x, warning_("in `resistance_predict()`: I_as_R is deprecated - use I_as_S instead.") } } - + # -- date if (is.null(col_date)) { col_date <- search_type_in_df(x = x, type = "date") stop_if(is.null(col_date), "`col_date` must be set") } - stop_ifnot(col_date %in% colnames(x), - "column '", col_date, "' not found") - + stop_ifnot( + col_date %in% colnames(x), + "column '", col_date, "' not found" + ) + year <- function(x) { # don't depend on lubridate or so, would be overkill for only this function if (all(grepl("^[0-9]{4}$", x))) { @@ -158,7 +162,7 @@ resistance_predict <- function(x, as.integer(format(as.Date(x), "%Y")) } } - + df <- x df[, col_ab] <- droplevels(as.rsi(df[, col_ab, drop = TRUE])) if (I_as_S == TRUE) { @@ -169,22 +173,23 @@ resistance_predict <- function(x, df[, col_ab] <- gsub("I", "R", df[, col_ab, drop = TRUE]) } df[, col_ab] <- ifelse(is.na(df[, col_ab, drop = TRUE]), 0, df[, col_ab, drop = TRUE]) - + # remove rows with NAs df <- subset(df, !is.na(df[, col_ab, drop = TRUE])) df$year <- year(df[, col_date, drop = TRUE]) df <- as.data.frame(rbind(table(df[, c("year", col_ab), drop = FALSE])), - stringsAsFactors = FALSE) + stringsAsFactors = FALSE + ) df$year <- as.integer(rownames(df)) rownames(df) <- NULL - + df <- subset(df, sum(df$R + df$S, na.rm = TRUE) >= minimum) # nolint start df_matrix <- as.matrix(df[, c("R", "S"), drop = FALSE]) # nolint end - + stop_if(NROW(df) == 0, "there are no observations") - + year_lowest <- min(df$year) if (is.null(year_min)) { year_min <- year_lowest @@ -194,9 +199,9 @@ resistance_predict <- function(x, if (is.null(year_max)) { year_max <- year(Sys.Date()) + 10 } - + years <- list(year = seq(from = year_min, to = year_max, by = year_every)) - + if (model %in% c("binomial", "binom", "logit")) { model <- "binomial" model_lm <- with(df, glm(df_matrix ~ year, family = binomial)) @@ -205,11 +210,10 @@ resistance_predict <- function(x, cat("\n------------------------------------------------------------\n") print(summary(model_lm)) } - + predictmodel <- predict(model_lm, newdata = years, type = "response", se.fit = TRUE) prediction <- predictmodel$fit se <- predictmodel$se.fit - } else if (model %in% c("loglin", "poisson")) { model <- "poisson" model_lm <- with(df, glm(R ~ year, family = poisson)) @@ -218,11 +222,10 @@ resistance_predict <- function(x, cat("\n--------------------------------------------------------------\n") print(summary(model_lm)) } - + predictmodel <- predict(model_lm, newdata = years, type = "response", se.fit = TRUE) prediction <- predictmodel$fit se <- predictmodel$se.fit - } else if (model %in% c("lin", "linear")) { model <- "linear" model_lm <- with(df, lm((R / (R + S)) ~ year)) @@ -231,59 +234,61 @@ resistance_predict <- function(x, cat("\n-----------------------\n") print(summary(model_lm)) } - + predictmodel <- predict(model_lm, newdata = years, se.fit = TRUE) prediction <- predictmodel$fit se <- predictmodel$se.fit - } else { stop("no valid model selected. See ?resistance_predict.") } - + # prepare the output dataframe - df_prediction <- data.frame(year = unlist(years), - value = prediction, - se_min = prediction - se, - se_max = prediction + se, - stringsAsFactors = FALSE) - + df_prediction <- data.frame( + year = unlist(years), + value = prediction, + se_min = prediction - se, + se_max = prediction + se, + stringsAsFactors = FALSE + ) + if (model == "poisson") { df_prediction$value <- as.integer(format(df_prediction$value, scientific = FALSE)) df_prediction$se_min <- as.integer(df_prediction$se_min) df_prediction$se_max <- as.integer(df_prediction$se_max) - } else { # se_max not above 1 df_prediction$se_max <- ifelse(df_prediction$se_max > 1, 1, df_prediction$se_max) } # se_min not below 0 df_prediction$se_min <- ifelse(df_prediction$se_min < 0, 0, df_prediction$se_min) - - df_observations <- data.frame(year = df$year, - observations = df$R + df$S, - observed = df$R / (df$R + df$S), - stringsAsFactors = FALSE) + + df_observations <- data.frame( + year = df$year, + observations = df$R + df$S, + observed = df$R / (df$R + df$S), + stringsAsFactors = FALSE + ) df_prediction <- df_prediction %pm>% pm_left_join(df_observations, by = "year") df_prediction$estimated <- df_prediction$value - + if (preserve_measurements == TRUE) { # replace estimated data by observed data df_prediction$value <- ifelse(!is.na(df_prediction$observed), df_prediction$observed, df_prediction$value) df_prediction$se_min <- ifelse(!is.na(df_prediction$observed), NA, df_prediction$se_min) df_prediction$se_max <- ifelse(!is.na(df_prediction$observed), NA, df_prediction$se_max) } - + df_prediction$value <- ifelse(df_prediction$value > 1, 1, ifelse(df_prediction$value < 0, 0, df_prediction$value)) df_prediction <- df_prediction[order(df_prediction$year), , drop = FALSE] - + out <- as_original_data_class(df_prediction, class(x.bak)) structure(out, - class = c("resistance_predict", class(out)), - I_as_S = I_as_S, - model_title = model, - model = model_lm, - ab = col_ab + class = c("resistance_predict", class(out)), + I_as_S = I_as_S, + model_title = model, + model = model_lm, + ab = col_ab ) } @@ -298,40 +303,48 @@ rsi_predict <- resistance_predict plot.resistance_predict <- function(x, main = paste("Resistance Prediction of", x_name), ...) { x_name <- paste0(ab_name(attributes(x)$ab), " (", attributes(x)$ab, ")") meet_criteria(main, allow_class = "character", has_length = 1) - + if (attributes(x)$I_as_S == TRUE) { ylab <- "%R" } else { ylab <- "%IR" } - - plot(x = x$year, - y = x$value, - ylim = c(0, 1), - yaxt = "n", # no y labels - pch = 19, # closed dots - ylab = paste0("Percentage (", ylab, ")"), - xlab = "Year", - main = main, - sub = paste0("(n = ", sum(x$observations, na.rm = TRUE), - ", model: ", attributes(x)$model_title, ")"), - cex.sub = 0.75) - - + + plot( + x = x$year, + y = x$value, + ylim = c(0, 1), + yaxt = "n", # no y labels + pch = 19, # closed dots + ylab = paste0("Percentage (", ylab, ")"), + xlab = "Year", + main = main, + sub = paste0( + "(n = ", sum(x$observations, na.rm = TRUE), + ", model: ", attributes(x)$model_title, ")" + ), + cex.sub = 0.75 + ) + + axis(side = 2, at = seq(0, 1, 0.1), labels = paste0(0:10 * 10, "%")) - + # hack for error bars: https://stackoverflow.com/a/22037078/4575331 - arrows(x0 = x$year, - y0 = x$se_min, - x1 = x$year, - y1 = x$se_max, - length = 0.05, angle = 90, code = 3, lwd = 1.5) - + arrows( + x0 = x$year, + y0 = x$se_min, + x1 = x$year, + y1 = x$se_max, + length = 0.05, angle = 90, code = 3, lwd = 1.5 + ) + # overlay grey points for prediction - points(x = subset(x, is.na(observations))$year, - y = subset(x, is.na(observations))$value, - pch = 19, - col = "grey40") + points( + x = subset(x, is.na(observations))$year, + y = subset(x, is.na(observations))$value, + pch = 19, + col = "grey40" + ) } #' @rdname resistance_predict @@ -343,27 +356,35 @@ ggplot_rsi_predict <- function(x, x_name <- paste0(ab_name(attributes(x)$ab), " (", attributes(x)$ab, ")") meet_criteria(main, allow_class = "character", has_length = 1) meet_criteria(ribbon, allow_class = "logical", has_length = 1) - + stop_ifnot_installed("ggplot2") stop_ifnot(inherits(x, "resistance_predict"), "`x` must be a resistance prediction model created with resistance_predict()") - + if (attributes(x)$I_as_S == TRUE) { ylab <- "%R" } else { ylab <- "%IR" } - - p <- ggplot2::ggplot(as.data.frame(x, stringsAsFactors = FALSE), - ggplot2::aes(x = year, y = value)) + - ggplot2::geom_point(data = subset(x, !is.na(observations)), - size = 2) + + + p <- ggplot2::ggplot( + as.data.frame(x, stringsAsFactors = FALSE), + ggplot2::aes(x = year, y = value) + ) + + ggplot2::geom_point( + data = subset(x, !is.na(observations)), + size = 2 + ) + scale_y_percent(limits = c(0, 1)) + - ggplot2::labs(title = main, - y = paste0("Percentage (", ylab, ")"), - x = "Year", - caption = paste0("(n = ", sum(x$observations, na.rm = TRUE), - ", model: ", attributes(x)$model_title, ")")) - + ggplot2::labs( + title = main, + y = paste0("Percentage (", ylab, ")"), + x = "Year", + caption = paste0( + "(n = ", sum(x$observations, na.rm = TRUE), + ", model: ", attributes(x)$model_title, ")" + ) + ) + if (ribbon == TRUE) { p <- p + ggplot2::geom_ribbon(ggplot2::aes(ymin = se_min, ymax = se_max), alpha = 0.25) } else { @@ -371,9 +392,11 @@ ggplot_rsi_predict <- function(x, } p <- p + # overlay grey points for prediction - ggplot2::geom_point(data = subset(x, is.na(observations)), - size = 2, - colour = "grey40") + ggplot2::geom_point( + data = subset(x, is.na(observations)), + size = 2, + colour = "grey40" + ) p } diff --git a/R/rsi.R b/R/rsi.R index 7f8112319..b797c39f7 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -38,46 +38,46 @@ #' @param reference_data a [data.frame] to be used for interpretation, which defaults to the [rsi_translation] data set. Changing this argument allows for using own interpretation guidelines. This argument must contain a data set that is equal in structure to the [rsi_translation] data set (same column names and column types). Please note that the `guideline` argument will be ignored when `reference_data` is manually set. #' @param threshold maximum fraction of invalid antimicrobial interpretations of `x`, see *Examples* #' @param ... for using on a [data.frame]: names of columns to apply [as.rsi()] on (supports tidy selection such as `column1:column4`). Otherwise: arguments passed on to methods. -#' @details +#' @details #' ## How it Works -#' +#' #' The [as.rsi()] function works in four ways: -#' +#' #' 1. For **cleaning raw / untransformed data**. The data will be cleaned to only contain values S, I and R and will try its best to determine this with some intelligence. For example, mixed values with R/SI interpretations and MIC values such as `"<0.25; S"` will be coerced to `"S"`. Combined interpretations for multiple test methods (as seen in laboratory records) such as `"S; S"` will be coerced to `"S"`, but a value like `"S; I"` will return `NA` with a warning that the input is unclear. -#' +#' #' 2. For **interpreting minimum inhibitory concentration (MIC) values** according to EUCAST or CLSI. You must clean your MIC values first using [as.mic()], that also gives your columns the new data class [`mic`]. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the `mo` argument. -#' * Using `dplyr`, R/SI interpretation can be done very easily with either: +#' * Using `dplyr`, R/SI interpretation can be done very easily with either: #' ``` #' your_data %>% mutate_if(is.mic, as.rsi) # until dplyr 1.0.0 #' your_data %>% mutate(across(where(is.mic), as.rsi)) # since dplyr 1.0.0 #' ``` #' * Operators like "<=" will be stripped before interpretation. When using `conserve_capped_values = TRUE`, an MIC value of e.g. ">2" will always return "R", even if the breakpoint according to the chosen guideline is ">=4". This is to prevent that capped values from raw laboratory data would not be treated conservatively. The default behaviour (`conserve_capped_values = FALSE`) considers ">2" to be lower than ">=4" and might in this case return "S" or "I". -#' +#' #' 3. For **interpreting disk diffusion diameters** according to EUCAST or CLSI. You must clean your disk zones first using [as.disk()], that also gives your columns the new data class [`disk`]. Also, be sure to have a column with microorganism names or codes. It will be found automatically, but can be set manually using the `mo` argument. -#' * Using `dplyr`, R/SI interpretation can be done very easily with either: +#' * Using `dplyr`, R/SI interpretation can be done very easily with either: #' ``` #' your_data %>% mutate_if(is.disk, as.rsi) # until dplyr 1.0.0 #' your_data %>% mutate(across(where(is.disk), as.rsi)) # since dplyr 1.0.0 #' ``` -#' +#' #' 4. For **interpreting a complete data set**, with automatic determination of MIC values, disk diffusion diameters, microorganism names or codes, and antimicrobial test results. This is done very simply by running `as.rsi(your_data)`. -#' +#' #' ## Supported Guidelines -#' -#' For interpreting MIC values as well as disk diffusion diameters, currently implemented guidelines are EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`) and CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`). -#' +#' +#' For interpreting MIC values as well as disk diffusion diameters, currently implemented guidelines are EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "EUCAST")$guideline)))`) and CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(rsi_translation, guideline %like% "CLSI")$guideline)))`). +#' #' Thus, the `guideline` argument must be set to e.g., ``r paste0('"', subset(rsi_translation, guideline %like% "EUCAST")$guideline[1], '"')`` or ``r paste0('"', subset(rsi_translation, guideline %like% "CLSI")$guideline[1], '"')``. By simply using `"EUCAST"` (the default) or `"CLSI"` as input, the latest included version of that guideline will automatically be selected. You can set your own data set using the `reference_data` argument. The `guideline` argument will then be ignored. -#' +#' #' ## After Interpretation -#' +#' #' After using [as.rsi()], you can use the [eucast_rules()] defined by EUCAST to (1) apply inferred susceptibility and resistance based on results of other antimicrobials and (2) apply intrinsic resistance based on taxonomic properties of a microorganism. -#' +#' #' ## Machine-Readable Interpretation Guidelines -#' +#' #' The repository of this package [contains a machine-readable version](https://github.com/msberends/AMR/blob/main/data-raw/rsi_translation.txt) of all guidelines. This is a CSV file consisting of `r format(nrow(AMR::rsi_translation), big.mark = ",")` rows and `r ncol(AMR::rsi_translation)` columns. This file is machine-readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial agent and the microorganism. **This allows for easy implementation of these rules in laboratory information systems (LIS)**. Note that it only contains interpretation guidelines for humans - interpretation guidelines from CLSI for animals were removed. #' #' ## Other -#' +#' #' The function [is.rsi()] detects if the input contains class ``. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector. #' #' The function [is.rsi.eligible()] returns `TRUE` when a columns contains at most 5% invalid antimicrobial interpretations (not S and/or I and/or R), and `FALSE` otherwise. The threshold of 5% can be set with the `threshold` argument. If the input is a [data.frame], it iterates over all columns and returns a [logical] vector. @@ -100,29 +100,35 @@ #' @examples #' example_isolates #' summary(example_isolates) # see all R/SI results at a glance -#' -#' # For INTERPRETING disk diffusion and MIC values ----------------------- -#' -#' # a whole data set, even with combined MIC values and disk zones -#' df <- data.frame(microorganism = "Escherichia coli", -#' AMP = as.mic(8), -#' CIP = as.mic(0.256), -#' GEN = as.disk(18), -#' TOB = as.disk(16), -#' NIT = as.mic(32), -#' ERY = "R") -#' as.rsi(df) -#' -#' # for single values -#' as.rsi(x = as.mic(2), -#' mo = as.mo("S. pneumoniae"), -#' ab = "AMP", -#' guideline = "EUCAST") #' -#' as.rsi(x = as.disk(18), -#' mo = "Strep pneu", # `mo` will be coerced with as.mo() -#' ab = "ampicillin", # and `ab` with as.ab() -#' guideline = "EUCAST") +#' # For INTERPRETING disk diffusion and MIC values ----------------------- +#' +#' # a whole data set, even with combined MIC values and disk zones +#' df <- data.frame( +#' microorganism = "Escherichia coli", +#' AMP = as.mic(8), +#' CIP = as.mic(0.256), +#' GEN = as.disk(18), +#' TOB = as.disk(16), +#' NIT = as.mic(32), +#' ERY = "R" +#' ) +#' as.rsi(df) +#' +#' # for single values +#' as.rsi( +#' x = as.mic(2), +#' mo = as.mo("S. pneumoniae"), +#' ab = "AMP", +#' guideline = "EUCAST" +#' ) +#' +#' as.rsi( +#' x = as.disk(18), +#' mo = "Strep pneu", # `mo` will be coerced with as.mo() +#' ab = "ampicillin", # and `ab` with as.ab() +#' guideline = "EUCAST" +#' ) #' #' \donttest{ #' # the dplyr way @@ -132,48 +138,51 @@ #' df %>% mutate(across(where(is.mic), as.rsi)) #' df %>% mutate_at(vars(AMP:TOB), as.rsi) #' df %>% mutate(across(AMP:TOB, as.rsi)) -#' +#' #' df %>% #' mutate_at(vars(AMP:TOB), as.rsi, mo = .$microorganism) -#' +#' #' # to include information about urinary tract infections (UTI) -#' data.frame(mo = "E. coli", -#' NIT = c("<= 2", 32), -#' from_the_bladder = c(TRUE, FALSE)) %>% +#' data.frame( +#' mo = "E. coli", +#' NIT = c("<= 2", 32), +#' from_the_bladder = c(TRUE, FALSE) +#' ) %>% #' as.rsi(uti = "from_the_bladder") -#' -#' data.frame(mo = "E. coli", -#' NIT = c("<= 2", 32), -#' specimen = c("urine", "blood")) %>% +#' +#' data.frame( +#' mo = "E. coli", +#' NIT = c("<= 2", 32), +#' specimen = c("urine", "blood") +#' ) %>% #' as.rsi() # automatically determines urine isolates -#' +#' #' df %>% -#' mutate_at(vars(AMP:NIT), as.rsi, mo = "E. coli", uti = TRUE) +#' mutate_at(vars(AMP:NIT), as.rsi, mo = "E. coli", uti = TRUE) #' } #' #' # For CLEANING existing R/SI values ------------------------------------ -#' +#' #' as.rsi(c("S", "I", "R", "A", "B", "C")) #' as.rsi("<= 0.002; S") # will return "S" - #' rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370))) #' is.rsi(rsi_data) -#' plot(rsi_data) # for percentages +#' plot(rsi_data) # for percentages #' barplot(rsi_data) # for frequencies #' #' # the dplyr way #' if (require("dplyr")) { #' example_isolates %>% #' mutate_at(vars(PEN:RIF), as.rsi) -#' # same: +#' # same: #' example_isolates %>% #' as.rsi(PEN:RIF) -#' +#' #' # fastest way to transform all columns with already valid AMR results to class `rsi`: #' example_isolates %>% #' mutate_if(is.rsi.eligible, as.rsi) -#' -#' # since dplyr 1.0.0, this can also be: +#' +#' # since dplyr 1.0.0, this can also be: #' # example_isolates %>% #' # mutate(across(where(is.rsi.eligible), as.rsi)) #' } @@ -186,7 +195,8 @@ as.rsi <- function(x, ...) { #' @details `NA_rsi_` is a missing value of the new `` class, analogous to e.g. base \R's [`NA_character_`][base::NA]. #' @export NA_rsi_ <- set_clean_class(factor(NA, levels = c("S", "I", "R"), ordered = TRUE), - new_class = c("rsi", "ordered", "factor")) + new_class = c("rsi", "ordered", "factor") +) #' @rdname as.rsi #' @export @@ -202,24 +212,26 @@ is.rsi <- function(x) { #' @export is.rsi.eligible <- function(x, threshold = 0.05) { meet_criteria(threshold, allow_class = "numeric", has_length = 1) - + if (inherits(x, "data.frame")) { # iterate this function over all columns return(unname(vapply(FUN.VALUE = logical(1), x, is.rsi.eligible))) } - + stop_if(NCOL(x) > 1, "`x` must be a one-dimensional vector.") - if (any(c("numeric", - "integer", - "mo", - "ab", - "Date", - "POSIXt", - "raw", - "hms", - "mic", - "disk") - %in% class(x))) { + if (any(c( + "numeric", + "integer", + "mo", + "ab", + "Date", + "POSIXt", + "raw", + "hms", + "mic", + "disk" + ) + %in% class(x))) { # no transformation needed return(FALSE) } else if (all(x %in% c("R", "S", "I", NA)) & !all(is.na(x))) { @@ -235,8 +247,10 @@ is.rsi.eligible <- function(x, threshold = 0.05) { ab <- suppressWarnings(as.ab(cur_col, fast_mode = TRUE, info = FALSE)) if (!is.na(ab)) { # this is a valid antibiotic code - message_("Column '", font_bold(cur_col), "' is as.rsi()-eligible (despite only having empty values), since it seems to be ", - ab_name(ab, language = NULL, tolower = TRUE), " (", ab, ")") + message_( + "Column '", font_bold(cur_col), "' is as.rsi()-eligible (despite only having empty values), since it seems to be ", + ab_name(ab, language = NULL, tolower = TRUE), " (", ab, ")" + ) return(TRUE) } } @@ -256,10 +270,10 @@ as.rsi.default <- function(x, ...) { if (is.rsi(x)) { return(x) } - + x.bak <- x x <- as.character(x) # this is needed to prevent the vctrs pkg from throwing an error - + if (inherits(x.bak, c("integer", "numeric", "double")) && all(x %in% c(1:3, NA))) { # support haven package for importing e.g., from SPSS - it adds the 'labels' attribute lbls <- attributes(x.bak)$labels @@ -270,11 +284,9 @@ as.rsi.default <- function(x, ...) { } else { x[x.bak == 1] <- "S" x[x.bak == 2] <- "I" - x[x.bak == 3] <- "R" + x[x.bak == 3] <- "R" } - } else if (!all(is.na(x)) && !identical(levels(x), c("R", "S", "I")) && !all(x %in% c("R", "S", "I", NA))) { - if (all(x %unlike% "(R|S|I)", na.rm = TRUE)) { # check if they are actually MICs or disks if (all_valid_mics(x)) { @@ -283,20 +295,26 @@ as.rsi.default <- function(x, ...) { warning_("in `as.rsi()`: the input seems to contain disk diffusion values. You can transform them with `as.disk()` before running `as.rsi()` to interpret them.") } } - + # trim leading and trailing spaces, new lines, etc. x <- trimws2(as.character(unlist(x))) x[x %in% c(NA, "", "-", "NULL")] <- NA_character_ x.bak <- x na_before <- length(x[is.na(x)]) - + # correct for translations - trans_R <- unlist(TRANSLATIONS[which(TRANSLATIONS$pattern == "Resistant"), - LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]]) - trans_S <- unlist(TRANSLATIONS[which(TRANSLATIONS$pattern == "Susceptible"), - LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]]) - trans_I <- unlist(TRANSLATIONS[which(TRANSLATIONS$pattern %in% c("Incr. exposure", "Susceptible, incr. exp.", "Intermediate")), - LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)]]) + trans_R <- unlist(TRANSLATIONS[ + which(TRANSLATIONS$pattern == "Resistant"), + LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)] + ]) + trans_S <- unlist(TRANSLATIONS[ + which(TRANSLATIONS$pattern == "Susceptible"), + LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)] + ]) + trans_I <- unlist(TRANSLATIONS[ + which(TRANSLATIONS$pattern %in% c("Incr. exposure", "Susceptible, incr. exp.", "Intermediate")), + LANGUAGES_SUPPORTED[LANGUAGES_SUPPORTED %in% colnames(TRANSLATIONS)] + ]) x <- gsub(paste0(unique(trans_R[!is.na(trans_R)]), collapse = "|"), "R", x, ignore.case = TRUE) x <- gsub(paste0(unique(trans_S[!is.na(trans_S)]), collapse = "|"), "S", x, ignore.case = TRUE) x <- gsub(paste0(unique(trans_I[!is.na(trans_I)]), collapse = "|"), "I", x, ignore.case = TRUE) @@ -320,7 +338,7 @@ as.rsi.default <- function(x, ...) { x <- gsub("^R+$", "R", x) x[!x %in% c("S", "I", "R")] <- NA_character_ na_after <- length(x[is.na(x) | x == ""]) - + if (!isFALSE(list(...)$warn)) { # so as.rsi(..., warn = FALSE) will never throw a warning if (na_before != na_after) { list_missing <- x.bak[is.na(x) & !is.na(x.bak) & x.bak != ""] %pm>% @@ -328,9 +346,11 @@ as.rsi.default <- function(x, ...) { sort() %pm>% vector_and(quotes = TRUE) warning_("in `as.rsi()`: ", na_after - na_before, " results truncated (", - round(((na_after - na_before) / length(x)) * 100), - "%) that were invalid antimicrobial interpretations: ", - list_missing, call = FALSE) + round(((na_after - na_before) / length(x)) * 100), + "%) that were invalid antimicrobial interpretations: ", + list_missing, + call = FALSE + ) } if (any(toupper(x.bak[!is.na(x.bak)]) == "U") && message_not_thrown_before("as.rsi", "U")) { warning_("in `as.rsi()`: 'U' was interpreted as 'S', following some laboratory systems") @@ -343,63 +363,68 @@ as.rsi.default <- function(x, ...) { } } } - + set_clean_class(factor(x, levels = c("S", "I", "R"), ordered = TRUE), - new_class = c("rsi", "ordered", "factor")) + new_class = c("rsi", "ordered", "factor") + ) } #' @rdname as.rsi #' @export as.rsi.mic <- function(x, - mo = NULL, - ab = deparse(substitute(x)), - guideline = "EUCAST", + mo = NULL, + ab = deparse(substitute(x)), + guideline = "EUCAST", uti = FALSE, conserve_capped_values = FALSE, add_intrinsic_resistance = FALSE, reference_data = AMR::rsi_translation, ...) { - as_rsi_method(method_short = "mic", - method_long = "MIC values", - x = x, - mo = mo, - ab = ab, - guideline = guideline, - uti = uti, - conserve_capped_values = conserve_capped_values, - add_intrinsic_resistance = add_intrinsic_resistance, - reference_data = reference_data, - ...) + as_rsi_method( + method_short = "mic", + method_long = "MIC values", + x = x, + mo = mo, + ab = ab, + guideline = guideline, + uti = uti, + conserve_capped_values = conserve_capped_values, + add_intrinsic_resistance = add_intrinsic_resistance, + reference_data = reference_data, + ... + ) } #' @rdname as.rsi #' @export as.rsi.disk <- function(x, - mo = NULL, - ab = deparse(substitute(x)), - guideline = "EUCAST", + mo = NULL, + ab = deparse(substitute(x)), + guideline = "EUCAST", uti = FALSE, add_intrinsic_resistance = FALSE, reference_data = AMR::rsi_translation, ...) { - as_rsi_method(method_short = "disk", - method_long = "disk diffusion zones", - x = x, - mo = mo, - ab = ab, - guideline = guideline, - uti = uti, - conserve_capped_values = FALSE, - add_intrinsic_resistance = add_intrinsic_resistance, - reference_data = reference_data, - ...) + as_rsi_method( + method_short = "disk", + method_long = "disk diffusion zones", + x = x, + mo = mo, + ab = ab, + guideline = guideline, + uti = uti, + conserve_capped_values = FALSE, + add_intrinsic_resistance = add_intrinsic_resistance, + reference_data = reference_data, + ... + ) } #' @rdname as.rsi #' @export as.rsi.data.frame <- function(x, - ..., - col_mo = NULL, + ..., + col_mo = NULL, guideline = "EUCAST", uti = NULL, conserve_capped_values = FALSE, @@ -412,7 +437,7 @@ as.rsi.data.frame <- function(x, meet_criteria(conserve_capped_values, allow_class = "logical", has_length = 1) meet_criteria(add_intrinsic_resistance, allow_class = "logical", has_length = 1) meet_criteria(reference_data, allow_class = "data.frame") - + x.bak <- x for (i in seq_len(ncol(x))) { # don't keep factors, overwriting them is hard @@ -420,13 +445,13 @@ as.rsi.data.frame <- function(x, x[, i] <- as.character(x[, i, drop = TRUE]) } } - + # -- MO col_mo.bak <- col_mo if (is.null(col_mo)) { col_mo <- search_type_in_df(x = x, type = "mo", info = FALSE) } - + # -- UTIs col_uti <- uti if (is.null(col_uti)) { @@ -442,8 +467,10 @@ as.rsi.data.frame <- function(x, } } else { # column found, transform to logical - stop_if(length(col_uti) != 1 | !col_uti %in% colnames(x), - "argument `uti` must be a [logical] vector, of must be a single column name of `x`") + stop_if( + length(col_uti) != 1 | !col_uti %in% colnames(x), + "argument `uti` must be a [logical] vector, of must be a single column name of `x`" + ) uti <- as.logical(x[, col_uti, drop = TRUE]) } } else { @@ -457,17 +484,19 @@ as.rsi.data.frame <- function(x, } else { plural <- c("", "s", "a ") } - message_("Assuming value", plural[1], " ", - vector_and(values, quotes = TRUE), - " in column '", font_bold(col_specimen), - "' reflect", plural[2], " ", plural[3], "urinary tract infection", plural[1], - ".\n Use `as.rsi(uti = FALSE)` to prevent this.") + message_( + "Assuming value", plural[1], " ", + vector_and(values, quotes = TRUE), + " in column '", font_bold(col_specimen), + "' reflect", plural[2], " ", plural[3], "urinary tract infection", plural[1], + ".\n Use `as.rsi(uti = FALSE)` to prevent this." + ) } else { # no data about UTI's found uti <- FALSE } } - + i <- 0 if (tryCatch(length(list(...)) > 0, error = function(e) TRUE)) { sel <- colnames(pm_select(x, ...)) @@ -477,7 +506,7 @@ as.rsi.data.frame <- function(x, if (!is.null(col_mo)) { sel <- sel[sel != col_mo] } - + ab_cols <- colnames(x)[vapply(FUN.VALUE = logical(1), x, function(y) { i <<- i + 1 check <- is.mic(y) | is.disk(y) @@ -500,9 +529,11 @@ as.rsi.data.frame <- function(x, return(FALSE) } })] - - stop_if(length(ab_cols) == 0, - "no columns with MIC values, disk zones or antibiotic column names found in this data set. Use as.mic() or as.disk() to transform antimicrobial columns.") + + stop_if( + length(ab_cols) == 0, + "no columns with MIC values, disk zones or antibiotic column names found in this data set. Use as.mic() or as.disk() to transform antimicrobial columns." + ) # set type per column types <- character(length(ab_cols)) types[vapply(FUN.VALUE = logical(1), x.bak[, ab_cols, drop = FALSE], is.disk)] <- "disk" @@ -519,33 +550,37 @@ as.rsi.data.frame <- function(x, } x_mo <- as.mo(x[, col_mo, drop = TRUE]) } - + for (i in seq_len(length(ab_cols))) { if (types[i] == "mic") { - x[, ab_cols[i]] <- as.rsi(x = x %pm>% - pm_pull(ab_cols[i]) %pm>% - as.character() %pm>% - as.mic(), - mo = x_mo, - ab = ab_cols[i], - guideline = guideline, - uti = uti, - conserve_capped_values = conserve_capped_values, - add_intrinsic_resistance = add_intrinsic_resistance, - reference_data = reference_data, - is_data.frame = TRUE) + x[, ab_cols[i]] <- as.rsi( + x = x %pm>% + pm_pull(ab_cols[i]) %pm>% + as.character() %pm>% + as.mic(), + mo = x_mo, + ab = ab_cols[i], + guideline = guideline, + uti = uti, + conserve_capped_values = conserve_capped_values, + add_intrinsic_resistance = add_intrinsic_resistance, + reference_data = reference_data, + is_data.frame = TRUE + ) } else if (types[i] == "disk") { - x[, ab_cols[i]] <- as.rsi(x = x %pm>% - pm_pull(ab_cols[i]) %pm>% - as.character() %pm>% - as.disk(), - mo = x_mo, - ab = ab_cols[i], - guideline = guideline, - uti = uti, - add_intrinsic_resistance = add_intrinsic_resistance, - reference_data = reference_data, - is_data.frame = TRUE) + x[, ab_cols[i]] <- as.rsi( + x = x %pm>% + pm_pull(ab_cols[i]) %pm>% + as.character() %pm>% + as.disk(), + mo = x_mo, + ab = ab_cols[i], + guideline = guideline, + uti = uti, + add_intrinsic_resistance = add_intrinsic_resistance, + reference_data = reference_data, + is_data.frame = TRUE + ) } else if (types[i] == "rsi") { show_message <- FALSE ab <- ab_cols[i] @@ -554,18 +589,20 @@ as.rsi.data.frame <- function(x, show_message <- TRUE # only print message if values are not already clean message_("=> Cleaning values in column '", font_bold(ab), "' (", - ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""), - ab_name(ab_coerced, tolower = TRUE), ")... ", - appendLF = FALSE, - as_note = FALSE) + ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""), + ab_name(ab_coerced, tolower = TRUE), ")... ", + appendLF = FALSE, + as_note = FALSE + ) } else if (!is.rsi(x.bak[, ab_cols[i], drop = TRUE])) { show_message <- TRUE # only print message if class not already set message_("=> Assigning class to already clean column '", font_bold(ab), "' (", - ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""), - ab_name(ab_coerced, tolower = TRUE), ")... ", - appendLF = FALSE, - as_note = FALSE) + ifelse(ab_coerced != toupper(ab), paste0(ab_coerced, ", "), ""), + ab_name(ab_coerced, tolower = TRUE), ")... ", + appendLF = FALSE, + as_note = FALSE + ) } x[, ab_cols[i]] <- as.rsi.default(x = as.character(x[, ab_cols[i], drop = TRUE])) if (show_message == TRUE) { @@ -573,7 +610,7 @@ as.rsi.data.frame <- function(x, } } } - + x } @@ -589,11 +626,13 @@ get_guideline <- function(guideline, reference_data) { # like 'EUCAST2020', should be 'EUCAST 2020' guideline_param <- gsub("([a-z]+)([0-9]+)", "\\1 \\2", guideline_param, ignore.case = TRUE) } - + stop_ifnot(guideline_param %in% reference_data$guideline, - "invalid guideline: '", guideline, - "'.\nValid guidelines are: ", vector_and(reference_data$guideline, quotes = TRUE, reverse = TRUE), call = FALSE) - + "invalid guideline: '", guideline, + "'.\nValid guidelines are: ", vector_and(reference_data$guideline, quotes = TRUE, reverse = TRUE), + call = FALSE + ) + guideline_param } @@ -617,50 +656,60 @@ as_rsi_method <- function(method_short, meet_criteria(add_intrinsic_resistance, allow_class = "logical", has_length = 1) meet_criteria(reference_data, allow_class = "data.frame") check_reference_data(reference_data) - + # for dplyr's across() cur_column_dplyr <- import_fn("cur_column", "dplyr", error_on_fail = FALSE) if (!is.null(cur_column_dplyr) && tryCatch(is.data.frame(get_current_data("ab", call = 0)), error = function(e) FALSE)) { # try to get current column, which will only be available when in across() ab <- tryCatch(cur_column_dplyr(), - error = function(e) ab) + error = function(e) ab + ) } - + # for auto-determining mo mo_var_found <- "" if (is.null(mo)) { - tryCatch({ - df <- get_current_data(arg_name = "mo", call = -3) # will return an error if not found - mo <- NULL - try({ - mo <- suppressMessages(search_type_in_df(df, "mo")) - }, silent = TRUE) - if (!is.null(df) && !is.null(mo) && is.data.frame(df)) { - mo_var_found <- paste0(" based on column '", font_bold(mo), "'") - mo <- df[, mo, drop = TRUE] + tryCatch( + { + df <- get_current_data(arg_name = "mo", call = -3) # will return an error if not found + mo <- NULL + try( + { + mo <- suppressMessages(search_type_in_df(df, "mo")) + }, + silent = TRUE + ) + if (!is.null(df) && !is.null(mo) && is.data.frame(df)) { + mo_var_found <- paste0(" based on column '", font_bold(mo), "'") + mo <- df[, mo, drop = TRUE] + } + }, + error = function(e) { + mo <- NULL } - }, error = function(e) { - mo <- NULL - }) + ) } if (is.null(mo)) { stop_("No information was supplied about the microorganisms (missing argument `mo` and no column of class found). See ?as.rsi.\n\n", - "To transform certain columns with e.g. mutate(), use `data %>% mutate(across(..., as.rsi, mo = x))`, where x is your column with microorganisms.\n", - "To tranform all ", method_long, " in a data set, use `data %>% as.rsi()` or `data %>% mutate(across(where(is.", method_short, "), as.rsi))`.", call = FALSE) + "To transform certain columns with e.g. mutate(), use `data %>% mutate(across(..., as.rsi, mo = x))`, where x is your column with microorganisms.\n", + "To tranform all ", method_long, " in a data set, use `data %>% as.rsi()` or `data %>% mutate(across(where(is.", method_short, "), as.rsi))`.", + call = FALSE + ) } - + if (length(ab) == 1 && ab %like% paste0("as.", method_short)) { - stop_('No unambiguous name was supplied about the antibiotic (argument `ab`). See ?as.rsi.', call = FALSE) + stop_("No unambiguous name was supplied about the antibiotic (argument `ab`). See ?as.rsi.", call = FALSE) } - + ab_coerced <- suppressWarnings(as.ab(ab)) mo_coerced <- suppressWarnings(as.mo(mo)) guideline_coerced <- get_guideline(guideline, reference_data) if (is.na(ab_coerced)) { message_("Returning NAs for unknown drug: '", font_bold(ab), - "'. Rename this column to a drug name or code, and check the output with `as.ab()`.", - add_fn = font_red, - as_note = FALSE) + "'. Rename this column to a drug name or code, and check the output with `as.ab()`.", + add_fn = font_red, + as_note = FALSE + ) return(as.rsi(rep(NA, length(x)))) } if (length(mo_coerced) == 1) { @@ -669,30 +718,34 @@ as_rsi_method <- function(method_short, if (length(uti) == 1) { uti <- rep(uti, length(x)) } - + agent_formatted <- paste0("'", font_bold(ab), "'") agent_name <- ab_name(ab_coerced, tolower = TRUE, language = NULL) if (generalise_antibiotic_name(ab) != generalise_antibiotic_name(agent_name)) { agent_formatted <- paste0(agent_formatted, " (", ab_coerced, ", ", agent_name, ")") } message_("=> Interpreting ", method_long, " of ", ifelse(isTRUE(list(...)$is_data.frame), "column ", ""), - agent_formatted, - mo_var_found, - " according to ", ifelse(identical(reference_data, AMR::rsi_translation), - font_bold(guideline_coerced), - "manually defined 'reference_data'"), - "... ", - appendLF = FALSE, - as_note = FALSE) - result <- exec_as.rsi(method = method_short, - x = x, - mo = mo_coerced, - ab = ab_coerced, - guideline = guideline_coerced, - uti = uti, - conserve_capped_values = conserve_capped_values, - add_intrinsic_resistance = add_intrinsic_resistance, - reference_data = reference_data) # exec_as.rsi will return message 'OK' + agent_formatted, + mo_var_found, + " according to ", ifelse(identical(reference_data, AMR::rsi_translation), + font_bold(guideline_coerced), + "manually defined 'reference_data'" + ), + "... ", + appendLF = FALSE, + as_note = FALSE + ) + result <- exec_as.rsi( + method = method_short, + x = x, + mo = mo_coerced, + ab = ab_coerced, + guideline = guideline_coerced, + uti = uti, + conserve_capped_values = conserve_capped_values, + add_intrinsic_resistance = add_intrinsic_resistance, + reference_data = reference_data + ) # exec_as.rsi will return message 'OK' result } @@ -702,25 +755,25 @@ exec_as.rsi <- function(method, ab, guideline, uti, - conserve_capped_values, + conserve_capped_values, add_intrinsic_resistance, reference_data) { metadata_mo <- get_mo_failures_uncertainties_renamed() - + x_bak <- data.frame(x_mo = paste0(x, mo), stringsAsFactors = FALSE) df <- unique(data.frame(x, mo, x_mo = paste0(x, mo), stringsAsFactors = FALSE)) x <- df$x mo <- df$mo - + if (method == "mic") { x <- as.mic(x) # when as.rsi.mic is called directly } else if (method == "disk") { x <- as.disk(x) # when as.rsi.disk is called directly } - + rise_warning <- FALSE method_param <- toupper(method) - + genera <- mo_genus(mo, language = NULL) mo_genus <- as.mo(genera, language = NULL) mo_family <- as.mo(mo_family(mo, language = NULL)) @@ -736,16 +789,22 @@ exec_as.rsi <- function(method, mo_lancefield <- mo } mo_other <- as.mo(rep("UNKNOWN", length(mo))) - + guideline_coerced <- get_guideline(guideline, reference_data) if (guideline_coerced != guideline) { - if (message_not_thrown_before("as.rsi", "msg1")) { + if (message_not_thrown_before("as.rsi", "guideline")) { message_("Using guideline ", font_bold(guideline_coerced), " as input for `guideline`.") } } - + new_rsi <- rep(NA_character_, length(x)) ab_param <- ab + if (ab_param == "AMX") { + ab_param <- "AMP" + if (message_not_thrown_before("as.rsi", "AMP_for_AMX")) { + message_("(using ampicillin rules)", appendLF = FALSE, as_note = FALSE) + } + } if (identical(reference_data, AMR::rsi_translation)) { trans <- reference_data %pm>% subset(guideline == guideline_coerced & method == method_param & ab == ab_param) @@ -754,24 +813,24 @@ exec_as.rsi <- function(method, subset(method == method_param & ab == ab_param) } trans$lookup <- paste(trans$mo, trans$ab) - - lookup_mo <- paste(mo, ab) - lookup_genus <- paste(mo_genus, ab) - lookup_family <- paste(mo_family, ab) - lookup_order <- paste(mo_order, ab) - lookup_becker <- paste(mo_becker, ab) - lookup_lancefield <- paste(mo_lancefield, ab) - lookup_other <- paste(mo_other, ab) - + + lookup_mo <- paste(mo, ab_param) + lookup_genus <- paste(mo_genus, ab_param) + lookup_family <- paste(mo_family, ab_param) + lookup_order <- paste(mo_order, ab_param) + lookup_becker <- paste(mo_becker, ab_param) + lookup_lancefield <- paste(mo_lancefield, ab_param) + lookup_other <- paste(mo_other, ab_param) + any_is_intrinsic_resistant <- FALSE - + for (i in seq_len(length(x))) { - is_intrinsic_r <- paste(mo[i], ab) %in% INTRINSIC_R + is_intrinsic_r <- paste(mo[i], ab_param) %in% INTRINSIC_R any_is_intrinsic_resistant <- any_is_intrinsic_resistant | is_intrinsic_r - + if (isTRUE(add_intrinsic_resistance) & is_intrinsic_r) { if (guideline_coerced %unlike% "EUCAST") { - if (message_not_thrown_before("as.rsi", "msg2")) { + if (message_not_thrown_before("as.rsi", "intrinsic")) { warning_("in `as.rsi()`: using 'add_intrinsic_resistance' is only useful when using EUCAST guidelines, since the rules for intrinsic resistance are based on EUCAST.") } } else { @@ -779,85 +838,95 @@ exec_as.rsi <- function(method, next } } - + get_record <- trans %pm>% # no subsetting to UTI here - subset(lookup %in% c(lookup_mo[i], - lookup_genus[i], - lookup_family[i], - lookup_order[i], - lookup_becker[i], - lookup_lancefield[i], - lookup_other[i])) - - if (any(get_record$uti == TRUE, na.rm = TRUE) && !any(uti == TRUE, na.rm = TRUE) && message_not_thrown_before("as.rsi", "msg3", ab)) { - warning_("in `as.rsi()`: interpretation of ", font_bold(ab_name(ab, tolower = TRUE)), " is only available for (uncomplicated) urinary tract infections (UTI) for some microorganisms. Use argument `uti` to set which isolates are from urine. See ?as.rsi.") + subset(lookup %in% c( + lookup_mo[i], + lookup_genus[i], + lookup_family[i], + lookup_order[i], + lookup_becker[i], + lookup_lancefield[i], + lookup_other[i] + )) + + if (any(get_record$uti == TRUE, na.rm = TRUE) && !any(uti == TRUE, na.rm = TRUE) && message_not_thrown_before("as.rsi", "uti", ab_param)) { + warning_("in `as.rsi()`: interpretation of ", font_bold(ab_name(ab_param, tolower = TRUE)), " is only available for (uncomplicated) urinary tract infections (UTI) for some microorganisms. Use argument `uti` to set which isolates are from urine. See ?as.rsi.") rise_warning <- TRUE } - + if (isTRUE(uti[i])) { - get_record <- get_record %pm>% + get_record <- get_record %pm>% # be as specific as possible (i.e. prefer species over genus): # pm_desc(uti) = TRUE on top and FALSE on bottom pm_arrange(pm_desc(uti), rank_index) # 'uti' is a column in data set 'rsi_translation' } else { - get_record <- get_record %pm>% + get_record <- get_record %pm>% pm_filter(uti == FALSE) %pm>% # 'uti' is a column in rsi_translation pm_arrange(rank_index) } - + get_record <- get_record[1L, , drop = FALSE] - + if (NROW(get_record) > 0) { if (is.na(x[i]) | (is.na(get_record$breakpoint_S) & is.na(get_record$breakpoint_R))) { new_rsi[i] <- NA_character_ } else if (method == "mic") { - new_rsi[i] <- quick_case_when(isTRUE(conserve_capped_values) & isTRUE(x[i] %like% "^<[0-9]") ~ "S", - isTRUE(conserve_capped_values) & isTRUE(x[i] %like% "^>[0-9]") ~ "R", - # these basically call `<=.mic()` and `>=.mic()`: - isTRUE(x[i] <= get_record$breakpoint_S) ~ "S", - guideline_coerced %like% "EUCAST" & isTRUE(x[i] > get_record$breakpoint_R) ~ "R", - guideline_coerced %like% "CLSI" & isTRUE(x[i] >= get_record$breakpoint_R) ~ "R", - # return "I" when not match the bottom or top - !is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I", - # and NA otherwise - TRUE ~ NA_character_) + new_rsi[i] <- quick_case_when( + isTRUE(conserve_capped_values) & isTRUE(x[i] %like% "^<[0-9]") ~ "S", + isTRUE(conserve_capped_values) & isTRUE(x[i] %like% "^>[0-9]") ~ "R", + # these basically call `<=.mic()` and `>=.mic()`: + isTRUE(x[i] <= get_record$breakpoint_S) ~ "S", + guideline_coerced %like% "EUCAST" & isTRUE(x[i] > get_record$breakpoint_R) ~ "R", + guideline_coerced %like% "CLSI" & isTRUE(x[i] >= get_record$breakpoint_R) ~ "R", + # return "I" when not match the bottom or top + !is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I", + # and NA otherwise + TRUE ~ NA_character_ + ) } else if (method == "disk") { - new_rsi[i] <- quick_case_when(isTRUE(as.double(x[i]) >= as.double(get_record$breakpoint_S)) ~ "S", - guideline_coerced %like% "EUCAST" & isTRUE(as.double(x[i]) < as.double(get_record$breakpoint_R)) ~ "R", - guideline_coerced %like% "CLSI" & isTRUE(as.double(x[i]) <= as.double(get_record$breakpoint_R)) ~ "R", - # return "I" when not match the bottom or top - !is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I", - # and NA otherwise - TRUE ~ NA_character_) + new_rsi[i] <- quick_case_when( + isTRUE(as.double(x[i]) >= as.double(get_record$breakpoint_S)) ~ "S", + guideline_coerced %like% "EUCAST" & isTRUE(as.double(x[i]) < as.double(get_record$breakpoint_R)) ~ "R", + guideline_coerced %like% "CLSI" & isTRUE(as.double(x[i]) <= as.double(get_record$breakpoint_R)) ~ "R", + # return "I" when not match the bottom or top + !is.na(get_record$breakpoint_S) & !is.na(get_record$breakpoint_R) ~ "I", + # and NA otherwise + TRUE ~ NA_character_ + ) } } } - + if (any_is_intrinsic_resistant & guideline_coerced %like% "EUCAST" & !isTRUE(add_intrinsic_resistance)) { # found some intrinsic resistance, but was not applied - if (message_not_thrown_before("as.rsi", "msg4")) { + if (message_not_thrown_before("as.rsi", "unapplied_instrinsic")) { warning_("in `as.rsi()`: found intrinsic resistance in some bug/drug combinations, although it was not applied.\nUse `as.rsi(..., add_intrinsic_resistance = TRUE)` to apply it.") } rise_warning <- TRUE } - + new_rsi <- x_bak %pm>% - pm_left_join(data.frame(x_mo = paste0(x, mo), new_rsi, - stringsAsFactors = FALSE), - by = "x_mo") %pm>% + pm_left_join(data.frame( + x_mo = paste0(x, mo), new_rsi, + stringsAsFactors = FALSE + ), + by = "x_mo" + ) %pm>% pm_pull(new_rsi) - + if (isTRUE(rise_warning)) { message_("WARNING.", add_fn = list(font_yellow, font_bold), as_note = FALSE) } else { message_(" OK.", add_fn = list(font_green, font_bold), as_note = FALSE) } - + load_mo_failures_uncertainties_renamed(metadata_mo) - + set_clean_class(factor(new_rsi, levels = c("S", "I", "R"), ordered = TRUE), - new_class = c("rsi", "ordered", "factor")) + new_class = c("rsi", "ordered", "factor") + ) } # will be exported using s3_register() in R/zzz.R @@ -885,12 +954,14 @@ freq.rsi <- function(x, ...) { x_name <- gsub(".*[$]", "", x_name) if (x_name %in% c("x", ".")) { # try again going through system calls - x_name <- stats::na.omit(vapply(FUN.VALUE = character(1), - sys.calls(), - function(call) { - call_txt <- as.character(call) - ifelse(call_txt[1] %like% "freq$", call_txt[length(call_txt)], character(0)) - }))[1L] + x_name <- stats::na.omit(vapply( + FUN.VALUE = character(1), + sys.calls(), + function(call) { + call_txt <- as.character(call) + ifelse(call_txt[1] %like% "freq$", call_txt[length(call_txt)], character(0)) + } + ))[1L] } ab <- suppressMessages(suppressWarnings(as.ab(x_name))) digits <- list(...)$digits @@ -898,17 +969,25 @@ freq.rsi <- function(x, ...) { digits <- 2 } if (!is.na(ab)) { - cleaner::freq.default(x = x, ..., - .add_header = list( - Drug = paste0(ab_name(ab, language = NULL), " (", ab, ", ", paste(ab_atc(ab), collapse = "/"), ")"), - `Drug group` = ab_group(ab, language = NULL), - `%SI` = trimws(percentage(susceptibility(x, minimum = 0, as_percent = FALSE), - digits = digits)))) + cleaner::freq.default( + x = x, ..., + .add_header = list( + Drug = paste0(ab_name(ab, language = NULL), " (", ab, ", ", paste(ab_atc(ab), collapse = "/"), ")"), + `Drug group` = ab_group(ab, language = NULL), + `%SI` = trimws(percentage(susceptibility(x, minimum = 0, as_percent = FALSE), + digits = digits + )) + ) + ) } else { - cleaner::freq.default(x = x, ..., - .add_header = list( - `%SI` = trimws(percentage(susceptibility(x, minimum = 0, as_percent = FALSE), - digits = digits)))) + cleaner::freq.default( + x = x, ..., + .add_header = list( + `%SI` = trimws(percentage(susceptibility(x, minimum = 0, as_percent = FALSE), + digits = digits + )) + ) + ) } } @@ -922,24 +1001,26 @@ get_skimmers.rsi <- function(column) { calls_txt <- vapply(calls, function(x) paste(deparse(x), collapse = ""), FUN.VALUE = character(1)) if (any(calls_txt %like% "skim_variable", na.rm = TRUE)) { ind <- which(calls_txt %like% "skim_variable")[1L] - vars <- tryCatch(eval(parse(text = ".data$skim_variable$rsi"), envir = frms[[ind]]), - error = function(e) NULL) + vars <- tryCatch(eval(parse(text = ".data$skim_variable$rsi"), envir = frms[[ind]]), + error = function(e) NULL + ) tryCatch(ab_name(as.character(calls[[length(calls)]][[2]]), language = NULL), - error = function(e) NA_character_) + error = function(e) NA_character_ + ) } else { NA_character_ } } - + skimr::sfl( skim_type = "rsi", ab_name = name_call, count_R = count_R, count_S = count_susceptible, count_I = count_I, - prop_R = ~proportion_R(., minimum = 0), - prop_S = ~susceptibility(., minimum = 0), - prop_I = ~proportion_I(., minimum = 0) + prop_R = ~ proportion_R(., minimum = 0), + prop_S = ~ susceptibility(., minimum = 0), + prop_I = ~ proportion_I(., minimum = 0) ) } diff --git a/R/rsi_calc.R b/R/rsi_calc.R index ed80c9c54..4569b956c 100755 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,7 +24,7 @@ # ==================================================================== # dots2vars <- function(...) { - # this function is to give more informative output about + # this function is to give more informative output about # variable names in count_* and proportion_* functions dots <- substitute(list(...)) as.character(dots)[2:length(dots)] @@ -41,26 +41,30 @@ rsi_calc <- function(..., meet_criteria(as_percent, allow_class = "logical", has_length = 1, .call_depth = 1) meet_criteria(only_all_tested, allow_class = "logical", has_length = 1, .call_depth = 1) meet_criteria(only_count, allow_class = "logical", has_length = 1, .call_depth = 1) - + data_vars <- dots2vars(...) - - dots_df <- switch(1, ...) + + dots_df <- switch(1, + ... + ) if (is.data.frame(dots_df)) { # make sure to remove all other classes like tibbles, data.tables, etc dots_df <- as.data.frame(dots_df, stringsAsFactors = FALSE) } - + dots <- eval(substitute(alist(...))) stop_if(length(dots) == 0, "no variables selected", call = -2) - + stop_if("also_single_tested" %in% names(dots), - "`also_single_tested` was replaced by `only_all_tested`.\n", - "Please read Details in the help page (`?proportion`) as this may have a considerable impact on your analysis.", call = -2) + "`also_single_tested` was replaced by `only_all_tested`.\n", + "Please read Details in the help page (`?proportion`) as this may have a considerable impact on your analysis.", + call = -2 + ) ndots <- length(dots) - + if (is.data.frame(dots_df)) { # data.frame passed with other columns, like: example_isolates %pm>% proportion_S(AMC, GEN) - + dots <- as.character(dots) # remove first element, it's the data.frame if (length(dots) == 1) { @@ -69,14 +73,16 @@ rsi_calc <- function(..., dots <- dots[2:length(dots)] } if (length(dots) == 0 | all(dots == "df")) { - # for complete data.frames, like example_isolates %pm>% select(AMC, GEN) %pm>% proportion_S() + # for complete data.frames, like example_isolates %pm>% select(AMC, GEN) %pm>% proportion_S() # and the old rsi function, which has "df" as name of the first argument x <- dots_df } else { # get dots that are in column names already, and the ones that will be once evaluated using dots_df or global env # this is to support susceptibility(example_isolates, AMC, any_of(some_vector_with_AB_names)) - dots <- c(dots[dots %in% colnames(dots_df)], - eval(parse(text = dots[!dots %in% colnames(dots_df)]), envir = dots_df, enclos = globalenv())) + dots <- c( + dots[dots %in% colnames(dots_df)], + eval(parse(text = dots[!dots %in% colnames(dots_df)]), envir = dots_df, enclos = globalenv()) + ) dots_not_exist <- dots[!dots %in% colnames(dots_df)] stop_if(length(dots_not_exist) > 0, "column(s) not found: ", vector_and(dots_not_exist, quotes = TRUE), call = -2) x <- dots_df[, dots, drop = FALSE] @@ -93,7 +99,7 @@ rsi_calc <- function(..., x <- as.data.frame(list(...), stringsAsFactors = FALSE) } } - + if (is.null(x)) { warning_("argument is NULL (check if columns exist): returning NA") if (as_percent == TRUE) { @@ -102,11 +108,11 @@ rsi_calc <- function(..., return(NA_real_) } } - + print_warning <- FALSE - + ab_result <- as.rsi(ab_result) - + if (is.data.frame(x)) { rsi_integrity_check <- character(0) for (i in seq_len(ncol(x))) { @@ -121,13 +127,15 @@ rsi_calc <- function(..., # this will give a warning for invalid results, of all input columns (so only 1 warning) rsi_integrity_check <- as.rsi(rsi_integrity_check) } - + x_transposed <- as.list(as.data.frame(t(x), stringsAsFactors = FALSE)) if (only_all_tested == TRUE) { # no NAs in any column - y <- apply(X = as.data.frame(lapply(x, as.integer), stringsAsFactors = FALSE), - MARGIN = 1, - FUN = min) + y <- apply( + X = as.data.frame(lapply(x, as.integer), stringsAsFactors = FALSE), + MARGIN = 1, + FUN = min + ) numerator <- sum(as.integer(y) %in% as.integer(ab_result), na.rm = TRUE) denominator <- sum(vapply(FUN.VALUE = logical(1), x_transposed, function(y) !(any(is.na(y))))) } else { @@ -145,20 +153,21 @@ rsi_calc <- function(..., numerator <- sum(x %in% ab_result, na.rm = TRUE) denominator <- sum(x %in% levels(ab_result), na.rm = TRUE) } - + if (print_warning == TRUE) { if (message_not_thrown_before("rsi_calc")) { warning_("Increase speed by transforming to class on beforehand:\n", - " your_data %>% mutate_if(is.rsi.eligible, as.rsi)\n", - " your_data %>% mutate(across(where(is.rsi.eligible), as.rsi))", - call = FALSE) + " your_data %>% mutate_if(is.rsi.eligible, as.rsi)\n", + " your_data %>% mutate(across(where(is.rsi.eligible), as.rsi))", + call = FALSE + ) } } - + if (only_count == TRUE) { return(numerator) } - + if (denominator < minimum) { if (data_vars != "") { data_vars <- paste(" for", data_vars) @@ -182,16 +191,18 @@ rsi_calc <- function(..., } } warning_("Introducing NA: ", - ifelse(denominator == 0, "no", paste("only", denominator)), - " results available", - data_vars, - " (`minimum` = ", minimum, ").", call = FALSE) + ifelse(denominator == 0, "no", paste("only", denominator)), + " results available", + data_vars, + " (`minimum` = ", minimum, ").", + call = FALSE + ) fraction <- NA_real_ } else { fraction <- numerator / denominator fraction[is.nan(fraction)] <- NA_real_ } - + if (as_percent == TRUE) { percentage(fraction, digits = 1) } else { @@ -216,16 +227,16 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" meet_criteria(as_percent, allow_class = "logical", has_length = 1, .call_depth = 1) meet_criteria(combine_SI, allow_class = "logical", has_length = 1, .call_depth = 1) meet_criteria(combine_SI_missing, allow_class = "logical", has_length = 1, .call_depth = 1) - + check_dataset_integrity() - + if (isTRUE(combine_IR) & isTRUE(combine_SI_missing)) { combine_SI <- FALSE } stop_if(isTRUE(combine_SI) & isTRUE(combine_IR), "either `combine_SI` or `combine_IR` can be TRUE, not both", call = -2) - + translate_ab <- get_translate_ab(translate_ab) - + data.bak <- data # select only groups and antibiotics if (is_null_or_grouped_tbl(data)) { @@ -236,7 +247,7 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" data_has_groups <- FALSE data <- data[, colnames(data)[vapply(FUN.VALUE = logical(1), data, is.rsi)], drop = FALSE] } - + data <- as.data.frame(data, stringsAsFactors = FALSE) if (isTRUE(combine_SI) | isTRUE(combine_IR)) { for (i in seq_len(ncol(data))) { @@ -250,13 +261,15 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" } } } - + sum_it <- function(.data) { - out <- data.frame(antibiotic = character(0), - interpretation = character(0), - value = double(0), - isolates = integer(0), - stringsAsFactors = FALSE) + out <- data.frame( + antibiotic = character(0), + interpretation = character(0), + value = double(0), + isolates = integer(0), + stringsAsFactors = FALSE + ) if (data_has_groups) { group_values <- unique(.data[, which(colnames(.data) %in% groups), drop = FALSE]) rownames(group_values) <- NULL @@ -280,18 +293,22 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" } else { col_results$value <- rep(NA_real_, NROW(col_results)) } - out_new <- data.frame(antibiotic = ifelse(isFALSE(translate_ab), - colnames(.data)[i], - ab_property(colnames(.data)[i], property = translate_ab, language = language)), - interpretation = col_results$interpretation, - value = col_results$value, - isolates = col_results$isolates, - stringsAsFactors = FALSE) + out_new <- data.frame( + antibiotic = ifelse(isFALSE(translate_ab), + colnames(.data)[i], + ab_property(colnames(.data)[i], property = translate_ab, language = language) + ), + interpretation = col_results$interpretation, + value = col_results$value, + isolates = col_results$isolates, + stringsAsFactors = FALSE + ) if (data_has_groups) { if (nrow(group_values) < nrow(out_new)) { # repeat group_values for the number of rows in out_new repeated <- rep(seq_len(nrow(group_values)), - each = nrow(out_new) / nrow(group_values)) + each = nrow(out_new) / nrow(group_values) + ) group_values <- group_values[repeated, , drop = FALSE] } out_new <- cbind(group_values, out_new) @@ -301,7 +318,7 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" } out } - + # based on pm_apply_grouped_function apply_group <- function(.data, fn, groups, drop = FALSE, ...) { grouped <- pm_split_into_groups(.data, groups, drop) @@ -312,13 +329,13 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" } res } - + if (data_has_groups) { out <- apply_group(data, "sum_it", groups) } else { out <- sum_it(data) } - + # apply factors for right sorting in interpretation if (isTRUE(combine_SI)) { out$interpretation <- factor(out$interpretation, levels = c("SI", "R"), ordered = TRUE) @@ -329,21 +346,21 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both" # the same data structure as output, regardless of input out$interpretation <- factor(out$interpretation, levels = c("S", "I", "R"), ordered = TRUE) } - + if (data_has_groups) { # ordering by the groups and two more: "antibiotic" and "interpretation" - out <- pm_ungroup(out[do.call("order", out[, seq_len(length(groups) + 2), drop = FALSE]), , drop = FALSE]) + out <- pm_ungroup(out[do.call("order", out[, seq_len(length(groups) + 2), drop = FALSE]), , drop = FALSE]) } else { out <- out[order(out$antibiotic, out$interpretation), , drop = FALSE] } - + if (type == "proportion") { out <- subset(out, select = -c(isolates)) } else if (type == "count") { out$value <- out$isolates out <- subset(out, select = -c(isolates)) - } - + } + rownames(out) <- NULL out <- as_original_data_class(out, class(data.bak)) structure(out, class = c("rsi_df", class(out))) @@ -358,9 +375,10 @@ get_translate_ab <- function(translate_ab) { } else { translate_ab <- tolower(translate_ab) stop_ifnot(translate_ab %in% colnames(AMR::antibiotics), - "invalid value for 'translate_ab', this must be a column name of the antibiotics data set\n", - "or TRUE (equals 'name') or FALSE to not translate at all.", - call = FALSE) + "invalid value for 'translate_ab', this must be a column name of the antibiotics data set\n", + "or TRUE (equals 'name') or FALSE to not translate at all.", + call = FALSE + ) translate_ab } } diff --git a/R/rsi_df.R b/R/rsi_df.R index 2f7b912d4..d7b6164ee 100644 --- a/R/rsi_df.R +++ b/R/rsi_df.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -23,7 +23,7 @@ # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -#' @rdname proportion +#' @rdname proportion #' @export rsi_df <- function(data, translate_ab = "name", @@ -32,14 +32,15 @@ rsi_df <- function(data, as_percent = FALSE, combine_SI = TRUE, combine_IR = FALSE) { - rsi_calc_df(type = "both", - data = data, - translate_ab = translate_ab, - language = language, - minimum = minimum, - as_percent = as_percent, - combine_SI = combine_SI, - combine_IR = combine_IR, - combine_SI_missing = missing(combine_SI)) - + rsi_calc_df( + type = "both", + data = data, + translate_ab = translate_ab, + language = language, + minimum = minimum, + as_percent = as_percent, + combine_SI = combine_SI, + combine_IR = combine_IR, + combine_SI_missing = missing(combine_SI) + ) } diff --git a/R/skewness.R b/R/skewness.R index 0e5050911..682423111 100755 --- a/R/skewness.R +++ b/R/skewness.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -33,7 +33,7 @@ #' @seealso [kurtosis()] #' @rdname skewness #' @export -#' @examples +#' @examples #' skewness(runif(1000)) skewness <- function(x, na.rm = FALSE) { meet_criteria(na.rm, allow_class = "logical", has_length = 1) @@ -50,7 +50,7 @@ skewness.default <- function(x, na.rm = FALSE) { x <- x[!is.na(x)] } n <- length(x) - (sum((x - mean(x))^3) / n) / (sum((x - mean(x)) ^ 2) / n) ^ (3 / 2) + (sum((x - mean(x))^3) / n) / (sum((x - mean(x))^2) / n)^(3 / 2) } #' @method skewness matrix diff --git a/R/translate.R b/R/translate.R index 9b8bbeffe..0a13288c5 100755 --- a/R/translate.R +++ b/R/translate.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -34,12 +34,12 @@ #' #' ## Changing the Default Language #' The system language will be used at default (as returned by `Sys.getenv("LANG")` or, if `LANG` is not set, [`Sys.getlocale("LC_COLLATE")`][Sys.getlocale()]), if that language is supported. But the language to be used can be overwritten in two ways and will be checked in this order: -#' +#' #' 1. Setting the R option `AMR_locale`, either by using `set_AMR_locale()` or by running e.g. `options(AMR_locale = "de")`. -#' +#' #' Note that setting an \R option only works in the same session. Save the command `options(AMR_locale = "(your language)")` to your `.Rprofile` file to apply it for every session. #' 2. Setting the system variable `LANGUAGE` or `LANG`, e.g. by adding `LANGUAGE="de_DE.utf8"` to your `.Renviron` file in your home directory. -#' +#' #' Thus, if the R option `AMR_locale` is set, the system variables `LANGUAGE` and `LANG` will be ignored. #' @rdname translate #' @name translate @@ -53,11 +53,11 @@ #' set_AMR_locale("Greek") #' ab_name("Ciprofloxacin") #' mo_name("Coagulase-negative Staphylococcus") -#' +#' #' set_AMR_locale("Spanish") #' ab_name("Ciprofloxacin") #' mo_name("Coagulase-negative Staphylococcus") -#' +#' #' # set_AMR_locale() understands endonyms, English exonyms, and ISO-639-1: #' set_AMR_locale("Deutsch") #' set_AMR_locale("German") @@ -69,7 +69,7 @@ get_AMR_locale <- function() { if (!is.null(getOption("AMR_locale", default = NULL))) { return(validate_language(getOption("AMR_locale"), extra_txt = "set with `options(AMR_locale = ...)`")) } - + lang <- "" # now check the LANGUAGE system variable - return it if set if (!identical("", Sys.getenv("LANGUAGE"))) { @@ -81,12 +81,14 @@ get_AMR_locale <- function() { if (lang == "") { lang <- Sys.getlocale("LC_COLLATE") } - + lang <- find_language(lang) if (lang != "en" && interactive() && message_not_thrown_before("get_AMR_locale", entire_session = TRUE)) { - message_("Assuming the ", LANGUAGES_SUPPORTED_NAMES[[lang]]$exonym, " language (", - LANGUAGES_SUPPORTED_NAMES[[lang]]$endonym, ") for the AMR package. Change this with `set_AMR_locale()`. ", - "This note will be shown once per session.") + message_( + "Assuming the ", LANGUAGES_SUPPORTED_NAMES[[lang]]$exonym, " language (", + LANGUAGES_SUPPORTED_NAMES[[lang]]$endonym, ") for the AMR package. Change this with `set_AMR_locale()`. ", + "This note will be shown once per session." + ) } lang } @@ -96,7 +98,7 @@ get_AMR_locale <- function() { set_AMR_locale <- function(language) { language <- validate_language(language) options(AMR_locale = language) - message_("Using the ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym, " language (", LANGUAGES_SUPPORTED_NAMES[[language]]$endonym, ") for the AMR package for this session.") + message_("Using the ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym, " language (", LANGUAGES_SUPPORTED_NAMES[[language]]$endonym, ") for the AMR package for this session.") } #' @rdname translate @@ -118,25 +120,32 @@ validate_language <- function(language, extra_txt = character(0)) { } lang <- find_language(language, fallback = FALSE) stop_ifnot(length(lang) > 0 && lang %in% LANGUAGES_SUPPORTED, - "unsupported language for AMR package", extra_txt, ": \"", language, "\". Use one of these language names or ISO-639-1 codes: ", - paste0('"', vapply(FUN.VALUE = character(1), LANGUAGES_SUPPORTED_NAMES, function(x) x[[1]]), - '" ("' , LANGUAGES_SUPPORTED, '")', collapse = ", "), - call = FALSE) + "unsupported language for AMR package", extra_txt, ": \"", language, "\". Use one of these language names or ISO-639-1 codes: ", + paste0('"', vapply(FUN.VALUE = character(1), LANGUAGES_SUPPORTED_NAMES, function(x) x[[1]]), + '" ("', LANGUAGES_SUPPORTED, '")', + collapse = ", " + ), + call = FALSE + ) lang } find_language <- function(language, fallback = TRUE) { language <- Map(function(l, n, check = language) { - grepl(paste0("^(", l[1], "|", l[2], "|", - n, "(_|$)|", toupper(n), "(_|$))"), - check, - ignore.case = FALSE, - perl = TRUE, - useBytes = FALSE) + grepl(paste0( + "^(", l[1], "|", l[2], "|", + n, "(_|$)|", toupper(n), "(_|$))" + ), + check, + ignore.case = FALSE, + perl = TRUE, + useBytes = FALSE + ) }, LANGUAGES_SUPPORTED_NAMES, LANGUAGES_SUPPORTED, - USE.NAMES = TRUE) + USE.NAMES = TRUE + ) language <- names(which(language == TRUE)) if (isTRUE(fallback) && length(language) == 0) { # other language -> set to English @@ -147,23 +156,22 @@ find_language <- function(language, fallback = TRUE) { # translate strings based on inst/translations.tsv translate_into_language <- function(from, - language = get_AMR_locale(), + language = get_AMR_locale(), only_unknown = FALSE, only_affect_ab_names = FALSE, only_affect_mo_names = FALSE) { - if (is.null(language)) { return(from) } if (language %in% c("en", "", NA)) { return(from) } - + df_trans <- TRANSLATIONS # internal data file from.bak <- from from_unique <- unique(from) from_unique_translated <- from_unique - + # get ISO-639-1 of language lang <- validate_language(language) # only keep lines where translation is available for this language @@ -182,35 +190,42 @@ translate_into_language <- function(from, if (NROW(df_trans) == 0) { return(from) } - + # default: case sensitive if value if 'case_sensitive' is missing: df_trans$case_sensitive[is.na(df_trans$case_sensitive)] <- TRUE # default: not using regular expressions if 'regular_expr' is missing: df_trans$regular_expr[is.na(df_trans$regular_expr)] <- FALSE - + # check if text to look for is in one of the patterns any_form_in_patterns <- tryCatch( any(from_unique %like% paste0("(", paste(gsub(" +\\(.*", "", df_trans$pattern), collapse = "|"), ")")), error = function(e) { warning_("Translation not possible. Please open an issue on GitHub (https://github.com/msberends/AMR/issues).") return(FALSE) - }) - + } + ) + if (NROW(df_trans) == 0 | !any_form_in_patterns) { return(from) } - - lapply(seq_len(nrow(df_trans)), - function(i) from_unique_translated <<- gsub(pattern = df_trans$pattern[i], - replacement = df_trans[i, lang, drop = TRUE], - x = from_unique_translated, - ignore.case = !df_trans$case_sensitive[i] & df_trans$regular_expr[i], - fixed = !df_trans$regular_expr[i], - perl = df_trans$regular_expr[i])) - + + lapply( + seq_len(nrow(df_trans)), + function(i) { + from_unique_translated <<- gsub( + pattern = df_trans$pattern[i], + replacement = df_trans[i, lang, drop = TRUE], + x = from_unique_translated, + ignore.case = !df_trans$case_sensitive[i] & df_trans$regular_expr[i], + fixed = !df_trans$regular_expr[i], + perl = df_trans$regular_expr[i] + ) + } + ) + # force UTF-8 for diacritics from_unique_translated <- enc2utf8(from_unique_translated) - + # a kind of left join to get all results back from_unique_translated[match(from.bak, from_unique)] } diff --git a/R/vctrs.R b/R/vctrs.R index 3241c502e..070792087 100644 --- a/R/vctrs.R +++ b/R/vctrs.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -23,7 +23,7 @@ # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -# These are all S3 implementations for the vctrs package, +# These are all S3 implementations for the vctrs package, # that is used internally by tidyverse packages such as dplyr. # They are to convert AMR-specific classes to bare characters and integers. # All of them will be exported using s3_register() in R/zzz.R when loading the package. diff --git a/R/whocc.R b/R/whocc.R index e99bd3663..ca3a5f68b 100755 --- a/R/whocc.R +++ b/R/whocc.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -28,12 +28,12 @@ #' All antimicrobial drugs and their official names, ATC codes, ATC groups and defined daily dose (DDD) are included in this package, using the WHO Collaborating Centre for Drug Statistics Methodology. #' @section WHOCC: #' \if{html}{\figure{logo_who.png}{options: height="60" style=margin-bottom:"5"} \cr} -#' This package contains **all ~550 antibiotic, antimycotic and antiviral drugs** and their Anatomical Therapeutic Chemical (ATC) codes, ATC groups and Defined Daily Dose (DDD) from the World Health Organization Collaborating Centre for Drug Statistics Methodology (WHOCC, ) and the Pharmaceuticals Community Register of the European Commission (). +#' This package contains **all ~550 antibiotic, antimycotic and antiviral drugs** and their Anatomical Therapeutic Chemical (ATC) codes, ATC groups and Defined Daily Dose (DDD) from the World Health Organization Collaborating Centre for Drug Statistics Methodology (WHOCC, ) and the Pharmaceuticals Community Register of the European Commission (). #' #' These have become the gold standard for international drug utilisation monitoring and research. #' #' The WHOCC is located in Oslo at the Norwegian Institute of Public Health and funded by the Norwegian government. The European Commission is the executive of the European Union and promotes its general interest. -#' +#' #' **NOTE: The WHOCC copyright does not allow use for commercial purposes, unlike any other info from this package.** See #' @name WHOCC diff --git a/R/zzz.R b/R/zzz.R index d3951597f..8dadb750e 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -26,16 +26,19 @@ # set up package environment, used by numerous AMR functions pkg_env <- new.env(hash = FALSE) pkg_env$mo_failed <- character(0) -pkg_env$mo_field_abbreviations <- c("AIEC", "ATEC", "BORSA", "CRSM", "DAEC", "EAEC", - "EHEC", "EIEC", "EPEC", "ETEC", "GISA", "MRPA", - "MRSA", "MRSE", "MSSA", "MSSE", "NMEC", "PISP", - "PRSP", "STEC", "UPEC", "VISA", "VISP", "VRE", - "VRSA", "VRSP") +pkg_env$mo_field_abbreviations <- c( + "AIEC", "ATEC", "BORSA", "CRSM", "DAEC", "EAEC", + "EHEC", "EIEC", "EPEC", "ETEC", "GISA", "MRPA", + "MRSA", "MRSE", "MSSA", "MSSE", "NMEC", "PISP", + "PRSP", "STEC", "UPEC", "VISA", "VISP", "VRE", + "VRSA", "VRSP" +) # determine info icon for messages utf8_supported <- isTRUE(base::l10n_info()$`UTF-8`) is_latex <- tryCatch(import_fn("is_latex_output", "knitr", error_on_fail = FALSE)(), - error = function(e) FALSE) + error = function(e) FALSE +) if (utf8_supported && !is_latex) { # \u2139 is a symbol officially named 'information source' pkg_env$info_icon <- "\u2139" @@ -45,8 +48,8 @@ if (utf8_supported && !is_latex) { .onLoad <- function(...) { # Support for tibble headers (type_sum) and tibble columns content (pillar_shaft) - # without the need to depend on other packages. This was suggested by the - # developers of the vctrs package: + # without the need to depend on other packages. This was suggested by the + # developers of the vctrs package: # https://github.com/r-lib/vctrs/blob/05968ce8e669f73213e3e894b5f4424af4f46316/R/register-s3.R s3_register("pillar::pillar_shaft", "ab") s3_register("pillar::pillar_shaft", "mo") @@ -93,19 +96,22 @@ if (utf8_supported && !is_latex) { s3_register("vctrs::vec_ptype2", "disk.integer") s3_register("vctrs::vec_ptype2", "integer.disk") s3_register("vctrs::vec_cast", "integer.disk") - + # if mo source exists, fire it up (see mo_source()) - try({ - if (file.exists(getOption("AMR_mo_source", "~/mo_source.rds"))) { - invisible(get_mo_source()) - } - }, silent = TRUE) - + try( + { + if (file.exists(getOption("AMR_mo_source", "~/mo_source.rds"))) { + invisible(get_mo_source()) + } + }, + silent = TRUE + ) + # be sure to print tibbles as tibbles if (pkg_is_available("tibble", also_load = FALSE)) { loadNamespace("tibble") } - + # reference data - they have additional columns compared to `antibiotics` and `microorganisms` to improve speed # they cannot be part of R/sysdata.rda since CRAN thinks it would make the package too large (+3 MB) assign(x = "AB_lookup", value = create_AB_lookup(), envir = asNamespace("AMR")) @@ -123,7 +129,7 @@ create_AB_lookup <- function() { create_MO_lookup <- function() { MO_lookup <- AMR::microorganisms - + MO_lookup$kingdom_index <- NA_real_ MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1 MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2 @@ -131,18 +137,18 @@ create_MO_lookup <- function() { MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4 # all the rest MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5 - + # use this paste instead of `fullname` to work with Viridans Group Streptococci, etc. if (length(MO_FULLNAME_LOWER) == nrow(MO_lookup)) { MO_lookup$fullname_lower <- MO_FULLNAME_LOWER } else { MO_lookup$fullname_lower <- "" - warning("MO table updated - Run: source(\"data-raw/pre-commit-hook.R\")", call. = FALSE) + warning("MO table updated - Run: source(\"data-raw/_pre_commit_hook.R\")", call. = FALSE) } - + # add a column with only "e coli" like combinations MO_lookup$g_species <- gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO_lookup$fullname_lower, perl = TRUE) - + # so arrange data on prevalence first, then kingdom, then full name MO_lookup[order(MO_lookup$prevalence, MO_lookup$kingdom_index, MO_lookup$fullname_lower), , drop = FALSE] } @@ -150,10 +156,10 @@ create_MO_lookup <- function() { create_MO.old_lookup <- function() { MO.old_lookup <- AMR::microorganisms.old MO.old_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", tolower(trimws(MO.old_lookup$fullname)))) - + # add a column with only "e coli"-like combinations MO.old_lookup$g_species <- trimws(gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO.old_lookup$fullname_lower)) - + # so arrange data on prevalence first, then full name MO.old_lookup[order(MO.old_lookup$prevalence, MO.old_lookup$fullname_lower), , drop = FALSE] } diff --git a/data-raw/_install_deps.R b/data-raw/_install_deps.R index 3ba86f3f7..f22b7aea6 100644 --- a/data-raw/_install_deps.R +++ b/data-raw/_install_deps.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -28,10 +28,14 @@ install.packages("data-raw/tinytest_1.3.1.tar.gz", dependencies = c("Depends", " install.packages(getwd(), repos = NULL, type = "source") # install.packages("data-raw/AMR_latest.tar.gz", dependencies = FALSE) -pkg_suggests <- gsub("[^a-zA-Z0-9]+", "", - unlist(strsplit(unlist(packageDescription("AMR", - fields = c("Suggests", "Enhances", "LinkingTo"))), - split = ", ?"))) +pkg_suggests <- gsub( + "[^a-zA-Z0-9]+", "", + unlist(strsplit(unlist(packageDescription("AMR", + fields = c("Suggests", "Enhances", "LinkingTo") + )), + split = ", ?" + )) +) pkg_suggests <- unname(pkg_suggests[!is.na(pkg_suggests)]) cat("################################################\n") cat("Packages listed in Suggests/Enhances:", paste(pkg_suggests, collapse = ", "), "\n") @@ -49,22 +53,26 @@ if (length(to_install) == 0) { for (i in seq_len(length(to_install))) { cat("Installing package", to_install[i], "\n") tryCatch(install.packages(to_install[i], - type = "source", - repos = "https://cran.rstudio.com/", - dependencies = c("Depends", "Imports", "LinkingTo"), - quiet = FALSE), - # message = function(m) invisible(), - warning = function(w) message(w$message), - error = function(e) message(e$message)) + type = "source", + repos = "https://cran.rstudio.com/", + dependencies = c("Depends", "Imports", "LinkingTo"), + quiet = FALSE + ), + # message = function(m) invisible(), + warning = function(w) message(w$message), + error = function(e) message(e$message) + ) if (.Platform$OS.type != "unix" && !to_install[i] %in% rownames(utils::installed.packages())) { tryCatch(install.packages(to_install[i], - type = "binary", - repos = "https://cran.rstudio.com/", - dependencies = c("Depends", "Imports", "LinkingTo"), - quiet = FALSE), - # message = function(m) invisible(), - warning = function(w) message(w$message), - error = function(e) message(e$message)) + type = "binary", + repos = "https://cran.rstudio.com/", + dependencies = c("Depends", "Imports", "LinkingTo"), + quiet = FALSE + ), + # message = function(m) invisible(), + warning = function(w) message(w$message), + error = function(e) message(e$message) + ) } } @@ -76,8 +84,9 @@ if (length(to_update) == 0) { for (i in seq_len(length(to_update))) { cat("Updating package '", to_update[i], "' v", as.character(packageVersion(to_update[i])), "\n", sep = "") tryCatch(update.packages(to_update[i], repos = "https://cran.rstudio.com/", ask = FALSE), - # message = function(m) invisible(), - warning = function(w) message(w$message), - error = function(e) message(e$message)) + # message = function(m) invisible(), + warning = function(w) message(w$message), + error = function(e) message(e$message) + ) cat("Updated to '", to_update[i], "' v", as.character(packageVersion(to_update[i])), "\n", sep = "") } diff --git a/data-raw/_language_update.R b/data-raw/_language_update.R index b53407243..0547a0f77 100644 --- a/data-raw/_language_update.R +++ b/data-raw/_language_update.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -28,9 +28,10 @@ if (!file.exists("DESCRIPTION") || !"Package: AMR" %in% readLines("DESCRIPTION")) { stop("Be sure to run this script in the root location of the AMR package folder.\n", - "Working directory expected to contain the DESCRIPTION file of the AMR package.\n", - "Current working directory: ", getwd(), - call. = FALSE) + "Working directory expected to contain the DESCRIPTION file of the AMR package.\n", + "Current working directory: ", getwd(), + call. = FALSE + ) } # save old global env to restore later @@ -41,34 +42,42 @@ load("R/sysdata.rda", envir = lang_env) # replace language objects with updates message("Reading translation file...") -lang_env$TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv", - sep = "\t", - stringsAsFactors = FALSE, - header = TRUE, - blank.lines.skip = TRUE, - fill = TRUE, - strip.white = TRUE, - encoding = "UTF-8", - fileEncoding = "UTF-8", - na.strings = c(NA, "", NULL), - allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1" - quote = "") +lang_env$TRANSLATIONS <- utils::read.delim( + file = "data-raw/translations.tsv", + sep = "\t", + stringsAsFactors = FALSE, + header = TRUE, + blank.lines.skip = TRUE, + fill = TRUE, + strip.white = TRUE, + encoding = "UTF-8", + fileEncoding = "UTF-8", + na.strings = c(NA, "", NULL), + allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1" + quote = "" +) -lang_env$LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")), - lapply(lang_env$TRANSLATIONS[, which(nchar(colnames(lang_env$TRANSLATIONS)) == 2), drop = FALSE], - function(x) list(exonym = x[1], endonym = x[2]))) +lang_env$LANGUAGES_SUPPORTED_NAMES <- c( + list(en = list(exonym = "English", endonym = "English")), + lapply( + lang_env$TRANSLATIONS[, which(nchar(colnames(lang_env$TRANSLATIONS)) == 2), drop = FALSE], + function(x) list(exonym = x[1], endonym = x[2]) + ) +) lang_env$LANGUAGES_SUPPORTED <- names(lang_env$LANGUAGES_SUPPORTED_NAMES) # save env to internal package data # usethis::use_data() does not allow to save a list :( message("Saving to internal data...") -save(list = names(lang_env), - file = "R/sysdata.rda", - ascii = FALSE, - version = 2, - compress = "xz", - envir = lang_env) +save( + list = names(lang_env), + file = "R/sysdata.rda", + ascii = FALSE, + version = 2, + compress = "xz", + envir = lang_env +) rm(lang_env) diff --git a/data-raw/pre-commit-hook.R b/data-raw/_pre_commit_hook.R similarity index 50% rename from data-raw/pre-commit-hook.R rename to data-raw/_pre_commit_hook.R index 1c3d8bf06..4b1d43a2c 100644 --- a/data-raw/pre-commit-hook.R +++ b/data-raw/_pre_commit_hook.R @@ -24,7 +24,7 @@ # ==================================================================== # # Run this file to update the package using: -# source("data-raw/pre-commit-hook.R") +# source("data-raw/_pre_commit_hook.R") library(dplyr, warn.conflicts = FALSE) devtools::load_all(quiet = TRUE) @@ -36,41 +36,54 @@ old_globalenv <- ls(envir = globalenv()) # Save internal data to R/sysdata.rda ------------------------------------- # See 'data-raw/eucast_rules.tsv' for the EUCAST reference file -EUCAST_RULES_DF <- utils::read.delim(file = "data-raw/eucast_rules.tsv", - skip = 10, - sep = "\t", - stringsAsFactors = FALSE, - header = TRUE, - strip.white = TRUE, - na = c(NA, "", NULL)) %>% +EUCAST_RULES_DF <- utils::read.delim( + file = "data-raw/eucast_rules.tsv", + skip = 10, + sep = "\t", + stringsAsFactors = FALSE, + header = TRUE, + strip.white = TRUE, + na = c(NA, "", NULL) +) %>% # take the order of the reference.rule_group column in the original data file - mutate(reference.rule_group = factor(reference.rule_group, - levels = unique(reference.rule_group), - ordered = TRUE), - sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>% - arrange(reference.rule_group, - reference.version, - sorting_rule, - reference.rule) %>% + mutate( + reference.rule_group = factor(reference.rule_group, + levels = unique(reference.rule_group), + ordered = TRUE + ), + sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2) + ) %>% + arrange( + reference.rule_group, + reference.version, + sorting_rule, + reference.rule + ) %>% mutate(reference.rule_group = as.character(reference.rule_group)) %>% select(-sorting_rule) -TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv", - sep = "\t", - stringsAsFactors = FALSE, - header = TRUE, - blank.lines.skip = TRUE, - fill = TRUE, - strip.white = TRUE, - encoding = "UTF-8", - fileEncoding = "UTF-8", - na.strings = c(NA, "", NULL), - allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1" - quote = "") +TRANSLATIONS <- utils::read.delim( + file = "data-raw/translations.tsv", + sep = "\t", + stringsAsFactors = FALSE, + header = TRUE, + blank.lines.skip = TRUE, + fill = TRUE, + strip.white = TRUE, + encoding = "UTF-8", + fileEncoding = "UTF-8", + na.strings = c(NA, "", NULL), + allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1" + quote = "" +) -LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")), - lapply(TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2), drop = FALSE], - function(x) list(exonym = x[1], endonym = x[2]))) +LANGUAGES_SUPPORTED_NAMES <- c( + list(en = list(exonym = "English", endonym = "English")), + lapply( + TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2), drop = FALSE], + function(x) list(exonym = x[1], endonym = x[2]) + ) +) LANGUAGES_SUPPORTED <- names(LANGUAGES_SUPPORTED_NAMES) @@ -84,43 +97,53 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) { MO_staph <- AMR::microorganisms MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE] if (type == "CoNS") { - MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae", - "auricularis", "borealis", "caeli", "capitis", "caprae", - "carnosus", "casei", "chromogenes", "cohnii", "condimenti", - "croceilyticus", - "debuckii", "devriesei", "edaphicus", "epidermidis", - "equorum", "felis", "fleurettii", "gallinarum", - "haemolyticus", "hominis", "jettensis", "kloosii", - "lentus", "lugdunensis", "massiliensis", "microti", - "muscae", "nepalensis", "pasteuri", "petrasii", - "pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus", - "pulvereri", "rostri", "saccharolyticus", "saprophyticus", - "sciuri", "simulans", "stepanovicii", "succinus", - "ureilyticus", - "vitulinus", "vitulus", "warneri", "xylosus", - "caledonicus", "canis", - "durrellii", "lloydii") - | (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))), - "mo", drop = TRUE] + MO_staph[which(MO_staph$species %in% c( + "coagulase-negative", "argensis", "arlettae", + "auricularis", "borealis", "caeli", "capitis", "caprae", + "carnosus", "casei", "chromogenes", "cohnii", "condimenti", + "croceilyticus", + "debuckii", "devriesei", "edaphicus", "epidermidis", + "equorum", "felis", "fleurettii", "gallinarum", + "haemolyticus", "hominis", "jettensis", "kloosii", + "lentus", "lugdunensis", "massiliensis", "microti", + "muscae", "nepalensis", "pasteuri", "petrasii", + "pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus", + "pulvereri", "rostri", "saccharolyticus", "saprophyticus", + "sciuri", "simulans", "stepanovicii", "succinus", + "ureilyticus", + "vitulinus", "vitulus", "warneri", "xylosus", + "caledonicus", "canis", + "durrellii", "lloydii" + ) | + (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))), + "mo", + drop = TRUE + ] } else if (type == "CoPS") { - MO_staph[which(MO_staph$species %in% c("coagulase-positive", "coagulans", - "agnetis", "argenteus", - "cornubiensis", - "delphini", "lutrae", - "hyicus", "intermedius", - "pseudintermedius", "pseudointermedius", - "schweitzeri", "simiae", - "roterodami") - | (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")), - "mo", drop = TRUE] + MO_staph[which(MO_staph$species %in% c( + "coagulase-positive", "coagulans", + "agnetis", "argenteus", + "cornubiensis", + "delphini", "lutrae", + "hyicus", "intermedius", + "pseudintermedius", "pseudointermedius", + "schweitzeri", "simiae", + "roterodami" + ) | + (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")), + "mo", + drop = TRUE + ] } } create_MO_fullname_lower <- function() { MO_lookup <- AMR::microorganisms # use this paste instead of `fullname` to work with Viridans Group Streptococci, etc. - MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus, - MO_lookup$species, - MO_lookup$subspecies))) + MO_lookup$fullname_lower <- tolower(trimws(paste( + MO_lookup$genus, + MO_lookup$species, + MO_lookup$subspecies + ))) ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname, perl = TRUE) MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname", drop = TRUE]) MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE)) @@ -130,59 +153,103 @@ MO_CONS <- create_species_cons_cops("CoNS") MO_COPS <- create_species_cons_cops("CoPS") MO_STREP_ABCG <- as.mo(MO_lookup[which(MO_lookup$genus == "Streptococcus"), "mo", drop = TRUE], Lancefield = TRUE) %in% c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_GRPC", "B_STRPT_GRPG") MO_FULLNAME_LOWER <- create_MO_fullname_lower() -MO_PREVALENT_GENERA <- c("Absidia", "Acholeplasma", "Acremonium", "Actinotignum", "Aedes", "Alistipes", "Alloprevotella", - "Alternaria", "Anaerosalibacter", "Ancylostoma", "Angiostrongylus", "Anisakis", "Anopheles", - "Apophysomyces", "Arachnia", "Aspergillus", "Aureobasidium", "Bacteroides", "Basidiobolus", - "Beauveria", "Bergeyella", "Blastocystis", "Blastomyces", "Borrelia", "Brachyspira", "Branhamella", - "Butyricimonas", "Candida", "Capillaria", "Capnocytophaga", "Catabacter", "Cetobacterium", "Chaetomium", - "Chlamydia", "Chlamydophila", "Chryseobacterium", "Chrysonilia", "Cladophialophora", "Cladosporium", - "Conidiobolus", "Contracaecum", "Cordylobia", "Cryptococcus", "Curvularia", "Deinococcus", "Demodex", - "Dermatobia", "Diphyllobothrium", "Dirofilaria", "Dysgonomonas", "Echinostoma", "Elizabethkingia", - "Empedobacter", "Enterobius", "Exophiala", "Exserohilum", "Fasciola", "Flavobacterium", "Fonsecaea", - "Fusarium", "Fusobacterium", "Giardia", "Haloarcula", "Halobacterium", "Halococcus", "Hendersonula", - "Heterophyes", "Histoplasma", "Hymenolepis", "Hypomyces", "Hysterothylacium", "Lelliottia", - "Leptosphaeria", "Leptotrichia", "Lucilia", "Lumbricus", "Malassezia", "Malbranchea", "Metagonimus", - "Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Mycoplasma", "Myroides", "Necator", - "Nectria", "Ochroconis", "Odoribacter", "Oesophagostomum", "Oidiodendron", "Opisthorchis", - "Ornithobacterium", "Parabacteroides", "Pediculus", "Pedobacter", "Phlebotomus", "Phocaeicola", - "Phocanema", "Phoma", "Piedraia", "Pithomyces", "Pityrosporum", "Porphyromonas", "Prevotella", - "Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula", "Riemerella", - "Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Sphingobacterium", - "Spirometra", "Spiroplasma", "Sporobolomyces", "Stachybotrys", "Streptobacillus", "Strongyloides", - "Syngamus", "Taenia", "Tannerella", "Tenacibaculum", "Terrimonas", "Toxocara", "Treponema", "Trichinella", - "Trichobilharzia", "Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus", - "Trichuris", "Tritirachium", "Trombicula", "Tunga", "Ureaplasma", "Victivallis", "Wautersiella", - "Weeksella", "Wuchereria") +MO_PREVALENT_GENERA <- c( + "Absidia", "Acholeplasma", "Acremonium", "Actinotignum", "Aedes", "Alistipes", "Alloprevotella", + "Alternaria", "Anaerosalibacter", "Ancylostoma", "Angiostrongylus", "Anisakis", "Anopheles", + "Apophysomyces", "Arachnia", "Aspergillus", "Aureobasidium", "Bacteroides", "Basidiobolus", + "Beauveria", "Bergeyella", "Blastocystis", "Blastomyces", "Borrelia", "Brachyspira", "Branhamella", + "Butyricimonas", "Candida", "Capillaria", "Capnocytophaga", "Catabacter", "Cetobacterium", "Chaetomium", + "Chlamydia", "Chlamydophila", "Chryseobacterium", "Chrysonilia", "Cladophialophora", "Cladosporium", + "Conidiobolus", "Contracaecum", "Cordylobia", "Cryptococcus", "Curvularia", "Deinococcus", "Demodex", + "Dermatobia", "Diphyllobothrium", "Dirofilaria", "Dysgonomonas", "Echinostoma", "Elizabethkingia", + "Empedobacter", "Enterobius", "Exophiala", "Exserohilum", "Fasciola", "Flavobacterium", "Fonsecaea", + "Fusarium", "Fusobacterium", "Giardia", "Haloarcula", "Halobacterium", "Halococcus", "Hendersonula", + "Heterophyes", "Histoplasma", "Hymenolepis", "Hypomyces", "Hysterothylacium", "Lelliottia", + "Leptosphaeria", "Leptotrichia", "Lucilia", "Lumbricus", "Malassezia", "Malbranchea", "Metagonimus", + "Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Mycoplasma", "Myroides", "Necator", + "Nectria", "Ochroconis", "Odoribacter", "Oesophagostomum", "Oidiodendron", "Opisthorchis", + "Ornithobacterium", "Parabacteroides", "Pediculus", "Pedobacter", "Phlebotomus", "Phocaeicola", + "Phocanema", "Phoma", "Piedraia", "Pithomyces", "Pityrosporum", "Porphyromonas", "Prevotella", + "Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula", "Riemerella", + "Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Sphingobacterium", + "Spirometra", "Spiroplasma", "Sporobolomyces", "Stachybotrys", "Streptobacillus", "Strongyloides", + "Syngamus", "Taenia", "Tannerella", "Tenacibaculum", "Terrimonas", "Toxocara", "Treponema", "Trichinella", + "Trichobilharzia", "Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus", + "Trichuris", "Tritirachium", "Trombicula", "Tunga", "Ureaplasma", "Victivallis", "Wautersiella", + "Weeksella", "Wuchereria" +) # antibiotic groups # (these will also be used for eucast_rules() and understanding data-raw/eucast_rules.tsv) globalenv_before_ab <- c(ls(envir = globalenv()), "globalenv_before_ab") -AB_AMINOGLYCOSIDES <- antibiotics %>% filter(group %like% "aminoglycoside") %>% pull(ab) +AB_AMINOGLYCOSIDES <- antibiotics %>% + filter(group %like% "aminoglycoside") %>% + pull(ab) AB_AMINOPENICILLINS <- as.ab(c("AMP", "AMX")) -AB_ANTIFUNGALS <- AB_lookup %>% filter(group %like% "antifungal") %>% pull(ab) -AB_ANTIMYCOBACTERIALS <- AB_lookup %>% filter(group %like% "antimycobacterial") %>% pull(ab) -AB_CARBAPENEMS <- antibiotics %>% filter(group %like% "carbapenem") %>% pull(ab) -AB_CEPHALOSPORINS <- antibiotics %>% filter(group %like% "cephalosporin") %>% pull(ab) -AB_CEPHALOSPORINS_1ST <- antibiotics %>% filter(group %like% "cephalosporin.*1") %>% pull(ab) -AB_CEPHALOSPORINS_2ND <- antibiotics %>% filter(group %like% "cephalosporin.*2") %>% pull(ab) -AB_CEPHALOSPORINS_3RD <- antibiotics %>% filter(group %like% "cephalosporin.*3") %>% pull(ab) -AB_CEPHALOSPORINS_4TH <- antibiotics %>% filter(group %like% "cephalosporin.*4") %>% pull(ab) -AB_CEPHALOSPORINS_5TH <- antibiotics %>% filter(group %like% "cephalosporin.*5") %>% pull(ab) +AB_ANTIFUNGALS <- AB_lookup %>% + filter(group %like% "antifungal") %>% + pull(ab) +AB_ANTIMYCOBACTERIALS <- AB_lookup %>% + filter(group %like% "antimycobacterial") %>% + pull(ab) +AB_CARBAPENEMS <- antibiotics %>% + filter(group %like% "carbapenem") %>% + pull(ab) +AB_CEPHALOSPORINS <- antibiotics %>% + filter(group %like% "cephalosporin") %>% + pull(ab) +AB_CEPHALOSPORINS_1ST <- antibiotics %>% + filter(group %like% "cephalosporin.*1") %>% + pull(ab) +AB_CEPHALOSPORINS_2ND <- antibiotics %>% + filter(group %like% "cephalosporin.*2") %>% + pull(ab) +AB_CEPHALOSPORINS_3RD <- antibiotics %>% + filter(group %like% "cephalosporin.*3") %>% + pull(ab) +AB_CEPHALOSPORINS_4TH <- antibiotics %>% + filter(group %like% "cephalosporin.*4") %>% + pull(ab) +AB_CEPHALOSPORINS_5TH <- antibiotics %>% + filter(group %like% "cephalosporin.*5") %>% + pull(ab) AB_CEPHALOSPORINS_EXCEPT_CAZ <- AB_CEPHALOSPORINS[AB_CEPHALOSPORINS != "CAZ"] -AB_FLUOROQUINOLONES <- antibiotics %>% filter(atc_group2 %like% "fluoroquinolone" | (group %like% "quinolone" & is.na(atc_group2))) %>% pull(ab) -AB_GLYCOPEPTIDES <- antibiotics %>% filter(group %like% "glycopeptide") %>% pull(ab) +AB_FLUOROQUINOLONES <- antibiotics %>% + filter(atc_group2 %like% "fluoroquinolone" | (group %like% "quinolone" & is.na(atc_group2))) %>% + pull(ab) +AB_GLYCOPEPTIDES <- antibiotics %>% + filter(group %like% "glycopeptide") %>% + pull(ab) AB_LIPOGLYCOPEPTIDES <- as.ab(c("DAL", "ORI", "TLV")) # dalba/orita/tela AB_GLYCOPEPTIDES_EXCEPT_LIPO <- AB_GLYCOPEPTIDES[!AB_GLYCOPEPTIDES %in% AB_LIPOGLYCOPEPTIDES] -AB_LINCOSAMIDES <- antibiotics %>% filter(atc_group2 %like% "lincosamide" | (group %like% "lincosamide" & is.na(atc_group2))) %>% pull(ab) -AB_MACROLIDES <- antibiotics %>% filter(atc_group2 %like% "macrolide" | (group %like% "macrolide" & is.na(atc_group2))) %>% pull(ab) -AB_OXAZOLIDINONES <- antibiotics %>% filter(group %like% "oxazolidinone") %>% pull(ab) -AB_PENICILLINS <- antibiotics %>% filter(group %like% "penicillin") %>% pull(ab) -AB_POLYMYXINS <- antibiotics %>% filter(group %like% "polymyxin") %>% pull(ab) -AB_QUINOLONES <- antibiotics %>% filter(group %like% "quinolone") %>% pull(ab) -AB_STREPTOGRAMINS <- antibiotics %>% filter(atc_group2 %like% "streptogramin") %>% pull(ab) -AB_TETRACYCLINES <- antibiotics %>% filter(group %like% "tetracycline") %>% pull(ab) +AB_LINCOSAMIDES <- antibiotics %>% + filter(atc_group2 %like% "lincosamide" | (group %like% "lincosamide" & is.na(atc_group2))) %>% + pull(ab) +AB_MACROLIDES <- antibiotics %>% + filter(atc_group2 %like% "macrolide" | (group %like% "macrolide" & is.na(atc_group2))) %>% + pull(ab) +AB_OXAZOLIDINONES <- antibiotics %>% + filter(group %like% "oxazolidinone") %>% + pull(ab) +AB_PENICILLINS <- antibiotics %>% + filter(group %like% "penicillin") %>% + pull(ab) +AB_POLYMYXINS <- antibiotics %>% + filter(group %like% "polymyxin") %>% + pull(ab) +AB_QUINOLONES <- antibiotics %>% + filter(group %like% "quinolone") %>% + pull(ab) +AB_STREPTOGRAMINS <- antibiotics %>% + filter(atc_group2 %like% "streptogramin") %>% + pull(ab) +AB_TETRACYCLINES <- antibiotics %>% + filter(group %like% "tetracycline") %>% + pull(ab) AB_TETRACYCLINES_EXCEPT_TGC <- AB_TETRACYCLINES[AB_TETRACYCLINES != "TGC"] -AB_TRIMETHOPRIMS <- antibiotics %>% filter(group %like% "trimethoprim") %>% pull(ab) +AB_TRIMETHOPRIMS <- antibiotics %>% + filter(group %like% "trimethoprim") %>% + pull(ab) AB_UREIDOPENICILLINS <- as.ab(c("PIP", "TZP", "AZL", "MEZ")) AB_BETALACTAMS <- c(AB_PENICILLINS, AB_CEPHALOSPORINS, AB_CARBAPENEMS) # this will be used for documentation: @@ -194,15 +261,21 @@ create_AB_lookup <- function() { AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name) AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name) AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name) - AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[, - c("ab", "atc", "cid", "name", - colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]), - drop = FALSE]), - stringsAsFactors = FALSE)), - function(x) { - x <- generalise_antibiotic_name(unname(unlist(x))) - x[x != ""] - })) + AB_lookup$generalised_all <- unname(lapply( + as.list(as.data.frame(t(AB_lookup[, + c( + "ab", "atc", "cid", "name", + colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"] + ), + drop = FALSE + ]), + stringsAsFactors = FALSE + )), + function(x) { + x <- generalise_antibiotic_name(unname(unlist(x))) + x[x != ""] + } + )) AB_lookup[, colnames(AB_lookup)[colnames(AB_lookup) %like% "^generalised"]] } AB_LOOKUP <- create_AB_lookup() @@ -210,48 +283,49 @@ AB_LOOKUP <- create_AB_lookup() # Export to package as internal data ---- usethis::ui_info(paste0("Saving {usethis::ui_value('sysdata.rda')} to {usethis::ui_value('R/')}")) suppressMessages(usethis::use_data(EUCAST_RULES_DF, - TRANSLATIONS, - LANGUAGES_SUPPORTED_NAMES, - LANGUAGES_SUPPORTED, - MO_CONS, - MO_COPS, - MO_STREP_ABCG, - MO_FULLNAME_LOWER, - MO_PREVALENT_GENERA, - AB_LOOKUP, - AB_AMINOGLYCOSIDES, - AB_AMINOPENICILLINS, - AB_ANTIFUNGALS, - AB_ANTIMYCOBACTERIALS, - AB_CARBAPENEMS, - AB_CEPHALOSPORINS, - AB_CEPHALOSPORINS_1ST, - AB_CEPHALOSPORINS_2ND, - AB_CEPHALOSPORINS_3RD, - AB_CEPHALOSPORINS_4TH, - AB_CEPHALOSPORINS_5TH, - AB_CEPHALOSPORINS_EXCEPT_CAZ, - AB_FLUOROQUINOLONES, - AB_LIPOGLYCOPEPTIDES, - AB_GLYCOPEPTIDES, - AB_GLYCOPEPTIDES_EXCEPT_LIPO, - AB_LINCOSAMIDES, - AB_MACROLIDES, - AB_OXAZOLIDINONES, - AB_PENICILLINS, - AB_POLYMYXINS, - AB_QUINOLONES, - AB_STREPTOGRAMINS, - AB_TETRACYCLINES, - AB_TETRACYCLINES_EXCEPT_TGC, - AB_TRIMETHOPRIMS, - AB_UREIDOPENICILLINS, - AB_BETALACTAMS, - DEFINED_AB_GROUPS, - internal = TRUE, - overwrite = TRUE, - version = 2, - compress = "xz")) + TRANSLATIONS, + LANGUAGES_SUPPORTED_NAMES, + LANGUAGES_SUPPORTED, + MO_CONS, + MO_COPS, + MO_STREP_ABCG, + MO_FULLNAME_LOWER, + MO_PREVALENT_GENERA, + AB_LOOKUP, + AB_AMINOGLYCOSIDES, + AB_AMINOPENICILLINS, + AB_ANTIFUNGALS, + AB_ANTIMYCOBACTERIALS, + AB_CARBAPENEMS, + AB_CEPHALOSPORINS, + AB_CEPHALOSPORINS_1ST, + AB_CEPHALOSPORINS_2ND, + AB_CEPHALOSPORINS_3RD, + AB_CEPHALOSPORINS_4TH, + AB_CEPHALOSPORINS_5TH, + AB_CEPHALOSPORINS_EXCEPT_CAZ, + AB_FLUOROQUINOLONES, + AB_LIPOGLYCOPEPTIDES, + AB_GLYCOPEPTIDES, + AB_GLYCOPEPTIDES_EXCEPT_LIPO, + AB_LINCOSAMIDES, + AB_MACROLIDES, + AB_OXAZOLIDINONES, + AB_PENICILLINS, + AB_POLYMYXINS, + AB_QUINOLONES, + AB_STREPTOGRAMINS, + AB_TETRACYCLINES, + AB_TETRACYCLINES_EXCEPT_TGC, + AB_TRIMETHOPRIMS, + AB_UREIDOPENICILLINS, + AB_BETALACTAMS, + DEFINED_AB_GROUPS, + internal = TRUE, + overwrite = TRUE, + version = 2, + compress = "xz" +)) # Export data sets to the repository in different formats ----------------- @@ -273,12 +347,15 @@ write_md5 <- function(object) { close(conn) } changed_md5 <- function(object) { - tryCatch({ - conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5")) - compared <- md5(object) != readLines(con = conn) - close(conn) - compared - }, error = function(e) TRUE) + tryCatch( + { + conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5")) + compared <- md5(object) != readLines(con = conn) + close(conn) + compared + }, + error = function(e) TRUE + ) } # give official names to ABs and MOs @@ -306,7 +383,7 @@ if (changed_md5(microorganisms)) { max_50_snomed <- sapply(microorganisms$snomed, function(x) paste(x[seq_len(min(50, length(x), na.rm = TRUE))], collapse = " ")) mo <- microorganisms mo$snomed <- max_50_snomed - mo <- dplyr::mutate_if(mo, ~!is.numeric(.), as.character) + mo <- dplyr::mutate_if(mo, ~ !is.numeric(.), as.character) try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE) try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE) try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE) @@ -328,7 +405,7 @@ if (changed_md5(microorganisms.old)) { try(arrow::write_parquet(microorganisms.old, "data-raw/microorganisms.old.parquet"), silent = TRUE) } -ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character) +ab <- dplyr::mutate_if(antibiotics, ~ !is.numeric(.), as.character) if (changed_md5(ab)) { usethis::ui_info(paste0("Saving {usethis::ui_value('antibiotics')} to {usethis::ui_value('data-raw/')}")) write_md5(ab) @@ -342,7 +419,7 @@ if (changed_md5(ab)) { try(arrow::write_parquet(antibiotics, "data-raw/antibiotics.parquet"), silent = TRUE) } -av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character) +av <- dplyr::mutate_if(antivirals, ~ !is.numeric(.), as.character) if (changed_md5(av)) { usethis::ui_info(paste0("Saving {usethis::ui_value('antivirals')} to {usethis::ui_value('data-raw/')}")) write_md5(av) @@ -357,9 +434,11 @@ if (changed_md5(av)) { } # give official names to ABs and MOs -intrinsicR <- data.frame(microorganism = mo_name(intrinsic_resistant$mo, language = NULL), - antibiotic = ab_name(intrinsic_resistant$ab, language = NULL), - stringsAsFactors = FALSE) +intrinsicR <- data.frame( + microorganism = mo_name(intrinsic_resistant$mo, language = NULL), + antibiotic = ab_name(intrinsic_resistant$ab, language = NULL), + stringsAsFactors = FALSE +) if (changed_md5(intrinsicR)) { usethis::ui_info(paste0("Saving {usethis::ui_value('intrinsic_resistant')} to {usethis::ui_value('data-raw/')}")) write_md5(intrinsicR) @@ -394,4 +473,25 @@ rm(list = current_globalenv[!current_globalenv %in% old_globalenv]) rm(current_globalenv) devtools::load_all(quiet = TRUE) -devtools::document() + + +# Document pkg ------------------------------------------------------------ +usethis::ui_info("Documenting package") +suppressMessages(devtools::document(quiet = TRUE)) + + +# Style pkg --------------------------------------------------------------- +usethis::ui_info("Styling package") +invisible(capture.output(styler::style_pkg( + style = styler::tidyverse_style, + filetype = c("R", "Rmd") +))) +invisible(capture.output(styler::style_dir( + path = "inst", # unit tests + style = styler::tidyverse_style, + filetype = c("R", "Rmd") +))) + + +# Finished ---------------------------------------------------------------- +usethis::ui_info("All done") diff --git a/data-raw/loinc.R b/data-raw/loinc.R index 37fd5ab38..a700ac410 100644 --- a/data-raw/loinc.R +++ b/data-raw/loinc.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -30,8 +30,9 @@ # 2. Download the CSV from https://loinc.org/download/loinc-table-file-csv/ (Loinc_2.67_Text_2.67.zip) # 3. Read Loinc.csv that's in this zip file loinc_df <- read.csv("data-raw/Loinc.csv", - row.names = NULL, - stringsAsFactors = FALSE) + row.names = NULL, + stringsAsFactors = FALSE +) # 4. Clean and add library(dplyr) @@ -39,7 +40,10 @@ library(cleaner) library(AMR) loinc_df %>% freq(CLASS) # to find the drugs loinc_df <- loinc_df %>% filter(CLASS == "DRUG/TOX") -ab_names <- antibiotics %>% pull(name) %>% paste0(collapse = "|") %>% paste0("(", ., ")") +ab_names <- antibiotics %>% + pull(name) %>% + paste0(collapse = "|") %>% + paste0("(", ., ")") antibiotics$loinc <- as.list(rep(NA_character_, nrow(antibiotics))) for (i in seq_len(nrow(antibiotics))) { diff --git a/data-raw/poorman_prepend.R b/data-raw/poorman_prepend.R index a4fc772cd..cc78347eb 100644 --- a/data-raw/poorman_prepend.R +++ b/data-raw/poorman_prepend.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -34,10 +34,10 @@ # # All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes. # -# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a +# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a # copy of the software and associated documentation files (the "Software"), to deal in the Software # without restriction, including without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software # is furnished to do so', given that a copyright notice is given in the software. # # Copyright notice on {date}, the day this code was downloaded, as found on diff --git a/data-raw/read_EUCAST.R b/data-raw/read_EUCAST.R index 7769243a8..6fbcc605e 100644 --- a/data-raw/read_EUCAST.R +++ b/data-raw/read_EUCAST.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -31,11 +31,10 @@ library(AMR) # USE THIS FUNCTION TO READ THE EUCAST EXCEL FILE THAT CONTAINS THE BREAKPOINT TABLES -read_EUCAST <- function(sheet, file, guideline_name) { - +read_EUCAST <- function(sheet, file, guideline_name) { message("\nGetting sheet: ", sheet) sheet.bak <- sheet - + uncertainties <- NULL add_uncertainties <- function(old, new) { if (is.null(old)) { @@ -44,55 +43,64 @@ read_EUCAST <- function(sheet, file, guideline_name) { bind_rows(old, new) } } - - raw_data <- read.xlsx(xlsxFile = file, - sheet = sheet, - colNames = FALSE, - skipEmptyRows = FALSE, - skipEmptyCols = FALSE, - fillMergedCells = TRUE, - na.strings = c("", "-", "NA", "IE", "IP")) - probable_rows <- suppressWarnings(raw_data %>% mutate_all(as.double) %>% summarise_all(~sum(!is.na(.))) %>% unlist() %>% max()) + + raw_data <- read.xlsx( + xlsxFile = file, + sheet = sheet, + colNames = FALSE, + skipEmptyRows = FALSE, + skipEmptyCols = FALSE, + fillMergedCells = TRUE, + na.strings = c("", "-", "NA", "IE", "IP") + ) + probable_rows <- suppressWarnings(raw_data %>% mutate_all(as.double) %>% summarise_all(~ sum(!is.na(.))) %>% unlist() %>% max()) if (probable_rows == 0) { message("NO ROWS FOUND") message("------------------------") return(NULL) } - + # in the info header in the Excel file, EUCAST mentions which genera are targeted if (sheet %like% "anaerob.*Gram.*posi") { - sheet <- paste0(c("Actinomyces", "Bifidobacterium", "Clostridioides", - "Clostridium", "Cutibacterium", "Eggerthella", - "Eubacterium", "Lactobacillus", "Propionibacterium", - "Staphylococcus saccharolyticus"), - collapse = "_") + sheet <- paste0(c( + "Actinomyces", "Bifidobacterium", "Clostridioides", + "Clostridium", "Cutibacterium", "Eggerthella", + "Eubacterium", "Lactobacillus", "Propionibacterium", + "Staphylococcus saccharolyticus" + ), + collapse = "_" + ) } else if (sheet %like% "anaerob.*Gram.*nega") { - sheet <- paste0(c("Bacteroides", - "Bilophila", - "Fusobacterium", - "Mobiluncus", - "Parabacteroides", - "Porphyromonas", - "Prevotella"), - collapse = "_") + sheet <- paste0(c( + "Bacteroides", + "Bilophila", + "Fusobacterium", + "Mobiluncus", + "Parabacteroides", + "Porphyromonas", + "Prevotella" + ), + collapse = "_" + ) } else if (sheet == "Streptococcus A,B,C,G") { sheet <- paste0(microorganisms %>% - filter(genus == "Streptococcus") %>% - mutate(lancefield = mo_name(mo, Lancefield = TRUE)) %>% - filter(lancefield %like% "^Streptococcus group") %>% - pull(fullname), - collapse = "_") + filter(genus == "Streptococcus") %>% + mutate(lancefield = mo_name(mo, Lancefield = TRUE)) %>% + filter(lancefield %like% "^Streptococcus group") %>% + pull(fullname), + collapse = "_" + ) } else if (sheet %like% "PK.*PD") { sheet <- "UNKNOWN" } mo_sheet <- paste0(suppressMessages(as.mo(unlist(strsplit(sheet, "_")))), collapse = "|") if (!is.null(mo_uncertainties())) uncertainties <- add_uncertainties(uncertainties, mo_uncertainties()) - + set_columns_names <- function(x, cols) { colnames(x) <- cols[1:length(colnames(x))] x } - + get_mo <- function(x) { for (i in seq_len(length(x))) { y <- trimws(unlist(strsplit(x[i], "(,|and)"))) @@ -104,76 +112,91 @@ read_EUCAST <- function(sheet, file, guideline_name) { } x } - - MICs_with_trailing_superscript <- c(seq(from = 0.0011, to = 0.0019, by = 0.0001), - seq(from = 0.031, to = 0.039, by = 0.001), - seq(from = 0.061, to = 0.069, by = 0.001), - seq(from = 0.1251, to = 0.1259, by = 0.0001), - seq(from = 0.251, to = 0.259, by = 0.001), - seq(from = 0.51, to = 0.59, by = 0.01), - seq(from = 11, to = 19, by = 1), - seq(from = 161, to = 169, by = 01), - seq(from = 21, to = 29, by = 1), - seq(from = 321, to = 329, by = 1), - seq(from = 41, to = 49, by = 1), - seq(from = 81, to = 89, by = 1)) + + MICs_with_trailing_superscript <- c( + seq(from = 0.0011, to = 0.0019, by = 0.0001), + seq(from = 0.031, to = 0.039, by = 0.001), + seq(from = 0.061, to = 0.069, by = 0.001), + seq(from = 0.1251, to = 0.1259, by = 0.0001), + seq(from = 0.251, to = 0.259, by = 0.001), + seq(from = 0.51, to = 0.59, by = 0.01), + seq(from = 11, to = 19, by = 1), + seq(from = 161, to = 169, by = 01), + seq(from = 21, to = 29, by = 1), + seq(from = 321, to = 329, by = 1), + seq(from = 41, to = 49, by = 1), + seq(from = 81, to = 89, by = 1) + ) has_superscript <- function(x) { - # because due to floating point error, 0.1252 is not in: + # because due to floating point error, 0.1252 is not in: # seq(from = 0.1251, to = 0.1259, by = 0.0001) sapply(x, function(x) any(near(x, MICs_with_trailing_superscript))) } - + has_zone_diameters <- rep(any(unlist(raw_data) %like% "zone diameter"), nrow(raw_data)) - cleaned <- raw_data %>% - as_tibble() %>% - set_columns_names(LETTERS) %>% - transmute(drug = A, - MIC_S = B, - MIC_R = C, - disk_dose = ifelse(has_zone_diameters, E, NA_character_), - disk_S = ifelse(has_zone_diameters, `F`, NA_character_), - disk_R = ifelse(has_zone_diameters, G, NA_character_)) %>% - filter(!is.na(drug), - !(is.na(MIC_S) & is.na(MIC_R) & is.na(disk_S) & is.na(disk_R)), - MIC_S %unlike% "(MIC|S ≤|note)", - MIC_S %unlike% "^[-]", - drug != MIC_S,) %>% - mutate(administration = case_when(drug %like% "[( ]oral" ~ "oral", - drug %like% "[( ]iv" ~ "iv", - TRUE ~ NA_character_), - uti = ifelse(drug %like% "(UTI|urinary|urine)", TRUE, FALSE), - systemic = ifelse(drug %like% "(systemic|septic)", TRUE, FALSE), - mo = ifelse(drug %like% "([.]|spp)", get_mo(drug), mo_sheet)) %>% + cleaned <- raw_data %>% + as_tibble() %>% + set_columns_names(LETTERS) %>% + transmute( + drug = A, + MIC_S = B, + MIC_R = C, + disk_dose = ifelse(has_zone_diameters, E, NA_character_), + disk_S = ifelse(has_zone_diameters, `F`, NA_character_), + disk_R = ifelse(has_zone_diameters, G, NA_character_) + ) %>% + filter( + !is.na(drug), + !(is.na(MIC_S) & is.na(MIC_R) & is.na(disk_S) & is.na(disk_R)), + MIC_S %unlike% "(MIC|S ≤|note)", + MIC_S %unlike% "^[-]", + drug != MIC_S, + ) %>% + mutate( + administration = case_when( + drug %like% "[( ]oral" ~ "oral", + drug %like% "[( ]iv" ~ "iv", + TRUE ~ NA_character_ + ), + uti = ifelse(drug %like% "(UTI|urinary|urine)", TRUE, FALSE), + systemic = ifelse(drug %like% "(systemic|septic)", TRUE, FALSE), + mo = ifelse(drug %like% "([.]|spp)", get_mo(drug), mo_sheet) + ) %>% # clean disk doses - mutate(disk_dose = clean_character(disk_dose, remove = "[^0-9.-]")) %>% + mutate(disk_dose = clean_character(disk_dose, remove = "[^0-9.-]")) %>% # clean MIC and disk values - mutate(MIC_S = gsub(".,.", "", MIC_S), # remove superscript notes with comma, like 0.5^2,3 - MIC_R = gsub(".,.", "", MIC_R), - disk_S = gsub(".,.", "", disk_S), - disk_R = gsub(".,.", "", disk_R), - MIC_S = clean_double(MIC_S), # make them valid numeric values - MIC_R = clean_double(MIC_R), - disk_S = clean_integer(disk_S), - disk_R = clean_integer(disk_R), - # invalid MIC values have a superscript text, delete those - MIC_S = ifelse(has_superscript(MIC_S), - substr(MIC_S, 1, nchar(MIC_S) - 1), - MIC_S), - MIC_R = ifelse(has_superscript(MIC_R), - substr(MIC_R, 1, nchar(MIC_R) - 1), - MIC_R), - # and some are just awful - MIC_S = ifelse(MIC_S == 43.4, 4, MIC_S), - MIC_R = ifelse(MIC_R == 43.4, 4, MIC_R), - ) %>% + mutate( + MIC_S = gsub(".,.", "", MIC_S), # remove superscript notes with comma, like 0.5^2,3 + MIC_R = gsub(".,.", "", MIC_R), + disk_S = gsub(".,.", "", disk_S), + disk_R = gsub(".,.", "", disk_R), + MIC_S = clean_double(MIC_S), # make them valid numeric values + MIC_R = clean_double(MIC_R), + disk_S = clean_integer(disk_S), + disk_R = clean_integer(disk_R), + # invalid MIC values have a superscript text, delete those + MIC_S = ifelse(has_superscript(MIC_S), + substr(MIC_S, 1, nchar(MIC_S) - 1), + MIC_S + ), + MIC_R = ifelse(has_superscript(MIC_R), + substr(MIC_R, 1, nchar(MIC_R) - 1), + MIC_R + ), + # and some are just awful + MIC_S = ifelse(MIC_S == 43.4, 4, MIC_S), + MIC_R = ifelse(MIC_R == 43.4, 4, MIC_R), + ) %>% # clean drug names - mutate(drug = gsub(" ?[(, ].*$", "", drug), - drug = gsub("[1-9]+$", "", drug), - ab = as.ab(drug)) %>% - select(ab, mo, everything(), -drug) %>% + mutate( + drug = gsub(" ?[(, ].*$", "", drug), + drug = gsub("[1-9]+$", "", drug), + ab = as.ab(drug) + ) %>% + select(ab, mo, everything(), -drug) %>% as.data.frame(stringsAsFactors = FALSE) - + # new row for every different MO mentioned for (i in 1:nrow(cleaned)) { mo <- cleaned[i, "mo", drop = TRUE] @@ -181,37 +204,44 @@ read_EUCAST <- function(sheet, file, guideline_name) { mo_vect <- unlist(strsplit(mo, "|", fixed = TRUE)) cleaned[i, "mo"] <- mo_vect[1] for (j in seq_len(length(mo_vect))) { - cleaned <- bind_rows(cleaned, cleaned[i , , drop = FALSE]) + cleaned <- bind_rows(cleaned, cleaned[i, , drop = FALSE]) cleaned[nrow(cleaned), "mo"] <- mo_vect[j] } } } - - cleaned <- cleaned %>% - distinct(ab, mo, administration, uti, systemic, .keep_all = TRUE) %>% - arrange(ab, mo) %>% + + cleaned <- cleaned %>% + distinct(ab, mo, administration, uti, systemic, .keep_all = TRUE) %>% + arrange(ab, mo) %>% mutate_at(c("MIC_S", "MIC_R", "disk_S", "disk_R"), as.double) %>% - pivot_longer(c("MIC_S", "MIC_R", "disk_S", "disk_R"), "type") %>% - mutate(method = ifelse(type %like% "MIC", "MIC", "DISK"), - type = gsub("^.*_", "breakpoint_", type)) %>% - pivot_wider(names_from = type, values_from = value) %>% - mutate(guideline = guideline_name, - disk_dose = ifelse(method == "DISK", disk_dose, NA_character_), - mo = ifelse(mo == "", mo_sheet, mo)) %>% - filter(!(is.na(breakpoint_S) & is.na(breakpoint_R))) %>% + pivot_longer(c("MIC_S", "MIC_R", "disk_S", "disk_R"), "type") %>% + mutate( + method = ifelse(type %like% "MIC", "MIC", "DISK"), + type = gsub("^.*_", "breakpoint_", type) + ) %>% + pivot_wider(names_from = type, values_from = value) %>% + mutate( + guideline = guideline_name, + disk_dose = ifelse(method == "DISK", disk_dose, NA_character_), + mo = ifelse(mo == "", mo_sheet, mo) + ) %>% + filter(!(is.na(breakpoint_S) & is.na(breakpoint_R))) %>% # comply with rsi_translation for now transmute(guideline, - method, - site = case_when(uti ~ "UTI", - systemic ~ "Systemic", - TRUE ~ administration), - mo, ab, - ref_tbl = sheet.bak, - disk_dose = ifelse(!is.na(disk_dose), paste0(disk_dose, "ug"), NA_character_), - breakpoint_S, - breakpoint_R) %>% + method, + site = case_when( + uti ~ "UTI", + systemic ~ "Systemic", + TRUE ~ administration + ), + mo, ab, + ref_tbl = sheet.bak, + disk_dose = ifelse(!is.na(disk_dose), paste0(disk_dose, "ug"), NA_character_), + breakpoint_S, + breakpoint_R + ) %>% as.data.frame(stringsAsFactors = FALSE) - + if (!is.null(uncertainties)) { print(uncertainties %>% distinct(input, mo, .keep_all = TRUE)) } @@ -231,24 +261,33 @@ guideline_name <- "EUCAST 2021" sheets_to_analyse <- sheets[!sheets %in% c("Content", "Changes", "Notes", "Guidance", "Dosages", "Technical uncertainty", "Topical agents")] # takes the longest time: -new_EUCAST <- read_EUCAST(sheet = sheets_to_analyse[1], - file = file, - guideline_name = guideline_name) +new_EUCAST <- read_EUCAST( + sheet = sheets_to_analyse[1], + file = file, + guideline_name = guideline_name +) for (i in 2:length(sheets_to_analyse)) { tryCatch( - new_EUCAST <<- bind_rows(new_EUCAST, - read_EUCAST(sheet = sheets_to_analyse[i], - file = file, - guideline_name = guideline_name)) - , error = function(e) message(e$message)) + new_EUCAST <<- bind_rows( + new_EUCAST, + read_EUCAST( + sheet = sheets_to_analyse[i], + file = file, + guideline_name = guideline_name + ) + ), + error = function(e) message(e$message) + ) } # 2021-07-12 fix for Morganellaceae (check other lines too next time) morg <- rsi_translation %>% as_tibble() %>% - filter(ab == "IPM", - guideline == "EUCAST 2021", - mo == as.mo("Enterobacterales")) %>% + filter( + ab == "IPM", + guideline == "EUCAST 2021", + mo == as.mo("Enterobacterales") + ) %>% mutate(mo = as.mo("Morganellaceae")) morg[which(morg$method == "MIC"), "breakpoint_S"] <- 0.001 morg[which(morg$method == "MIC"), "breakpoint_R"] <- 4 @@ -258,5 +297,5 @@ morg[which(morg$method == "DISK"), "breakpoint_R"] <- 19 rsi_translation <- rsi_translation %>% bind_rows(morg) %>% bind_rows(morg %>% - mutate(guideline = "EUCAST 2020")) %>% + mutate(guideline = "EUCAST 2020")) %>% arrange(desc(guideline), ab, mo, method) diff --git a/data-raw/reproduction_of_antibiotics.R b/data-raw/reproduction_of_antibiotics.R index 3d711a8a4..c9c914b29 100644 --- a/data-raw/reproduction_of_antibiotics.R +++ b/data-raw/reproduction_of_antibiotics.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -33,34 +33,43 @@ library(dplyr) library(readxl) DRGLST <- read_excel("DRGLST.xlsx") abx <- DRGLST %>% - select(ab = WHON5_CODE, - name = ANTIBIOTIC) %>% + select( + ab = WHON5_CODE, + name = ANTIBIOTIC + ) %>% # remove the ones without WHONET code filter(!is.na(ab)) %>% distinct(name, .keep_all = TRUE) %>% # add the ones without WHONET code bind_rows( DRGLST %>% - select(ab = WHON5_CODE, - name = ANTIBIOTIC) %>% + select( + ab = WHON5_CODE, + name = ANTIBIOTIC + ) %>% filter(is.na(ab)) %>% distinct(name, .keep_all = TRUE) - # add new ab code later + # add new ab code later ) %>% arrange(name) # add old ATC codes ab_old <- AMR::antibiotics %>% - mutate(official = gsub("( and |, )", "/", official), - abbr = tolower(paste(ifelse(is.na(abbr), "", abbr), - ifelse(is.na(certe), "", certe), - ifelse(is.na(umcg), "", umcg), - sep = "|"))) + mutate( + official = gsub("( and |, )", "/", official), + abbr = tolower(paste(ifelse(is.na(abbr), "", abbr), + ifelse(is.na(certe), "", certe), + ifelse(is.na(umcg), "", umcg), + sep = "|" + )) + ) for (i in 1:nrow(ab_old)) { abbr <- ab_old[i, "abbr"] - abbr <- strsplit(abbr, "|", fixed = TRUE) %>% unlist() %>% unique() + abbr <- strsplit(abbr, "|", fixed = TRUE) %>% + unlist() %>% + unique() abbr <- abbr[abbr != ""] - #print(abbr) + # print(abbr) if (length(abbr) == 0) { ab_old[i, "abbr"] <- NA_character_ } else { @@ -72,50 +81,54 @@ for (i in 1:nrow(ab_old)) { abx_atc1 <- abx %>% mutate(name_lower = tolower(name)) %>% left_join(ab_old %>% - select(ears_net, atc), by = c(ab = "ears_net")) %>% + select(ears_net, atc), by = c(ab = "ears_net")) %>% rename(atc1 = atc) %>% left_join(ab_old %>% - mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>% - transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>% + mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>% + transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>% rename(atc2 = atc) %>% left_join(ab_old %>% - mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>% - mutate(official = gsub("f", "ph", official)) %>% - transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>% + mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>% + mutate(official = gsub("f", "ph", official)) %>% + transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>% rename(atc3 = atc) %>% left_join(ab_old %>% - mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>% - mutate(official = gsub("t", "th", official)) %>% - transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>% + mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>% + mutate(official = gsub("t", "th", official)) %>% + transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>% rename(atc4 = atc) %>% left_join(ab_old %>% - mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>% - mutate(official = gsub("f", "ph", official)) %>% - mutate(official = gsub("t", "th", official)) %>% - transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>% + mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>% + mutate(official = gsub("f", "ph", official)) %>% + mutate(official = gsub("t", "th", official)) %>% + transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>% rename(atc5 = atc) %>% left_join(ab_old %>% - mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>% - mutate(official = gsub("f", "ph", official)) %>% - mutate(official = gsub("t", "th", official)) %>% - mutate(official = gsub("ine$", "in", official)) %>% - transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>% + mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>% + mutate(official = gsub("f", "ph", official)) %>% + mutate(official = gsub("t", "th", official)) %>% + mutate(official = gsub("ine$", "in", official)) %>% + transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>% rename(atc6 = atc) %>% - mutate(atc = case_when(!is.na(atc1) ~ atc1, - !is.na(atc2) ~ atc2, - !is.na(atc3) ~ atc3, - !is.na(atc4) ~ atc4, - !is.na(atc4) ~ atc5, - TRUE ~ atc6)) %>% + mutate(atc = case_when( + !is.na(atc1) ~ atc1, + !is.na(atc2) ~ atc2, + !is.na(atc3) ~ atc3, + !is.na(atc4) ~ atc4, + !is.na(atc4) ~ atc5, + TRUE ~ atc6 + )) %>% distinct(ab, name, .keep_all = TRUE) %>% select(ab, atc, name) abx_atc2 <- ab_old %>% - filter(!atc %in% abx_atc1$atc, - is.na(ears_net), - !is.na(atc_group1), - atc_group1 %unlike% ("virus|vaccin|viral|immun"), - official %unlike% "(combinations| with )") %>% + filter( + !atc %in% abx_atc1$atc, + is.na(ears_net), + !is.na(atc_group1), + atc_group1 %unlike% ("virus|vaccin|viral|immun"), + official %unlike% "(combinations| with )" + ) %>% mutate(ab = NA_character_) %>% as.data.frame(stringsAsFactors = FALSE) %>% select(ab, atc, name = official) @@ -125,12 +138,15 @@ abx2 <- bind_rows(abx_atc1, abx_atc2) rm(abx_atc1) rm(abx_atc2) -abx2$ab[is.na(abx2$ab)] <- toupper(abbreviate(gsub("[/0-9-]", - " ", - abx2$name[is.na(abx2$ab)]), - minlength = 3, - method = "left.kept", - strict = TRUE)) +abx2$ab[is.na(abx2$ab)] <- toupper(abbreviate(gsub( + "[/0-9-]", + " ", + abx2$name[is.na(abx2$ab)] +), +minlength = 3, +method = "left.kept", +strict = TRUE +)) n_distinct(abx2$ab) @@ -150,7 +166,9 @@ for (i in 2:nrow(abx2)) { abx2[i, "ab"] <- paste0(abx2[i, "ab", drop = TRUE], abx2[i, "seqnr", drop = TRUE]) } } -abx2 <- abx2 %>% select(-seqnr) %>% arrange(name) +abx2 <- abx2 %>% + select(-seqnr) %>% + arrange(name) # everything unique?? nrow(abx2) == n_distinct(abx2$ab) @@ -158,8 +176,10 @@ nrow(abx2) == n_distinct(abx2$ab) # get ATC properties abx2 <- abx2 %>% left_join(ab_old %>% - select(atc, abbr, atc_group1, atc_group2, - oral_ddd, oral_units, iv_ddd, iv_units)) + select( + atc, abbr, atc_group1, atc_group2, + oral_ddd, oral_units, iv_ddd, iv_units + )) abx2$abbr <- lapply(as.list(abx2$abbr), function(x) unlist(strsplit(x, "|", fixed = TRUE))) @@ -171,29 +191,41 @@ get_CID <- function(ab) { p$tick()$print() CID[i] <- tryCatch( - data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/", - URLencode(ab[i], reserved = TRUE), - "/cids/TXT?name_type=complete"), - showProgress = FALSE)[[1]][1], - error = function(e) NA_integer_) + data.table::fread(paste0( + "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/", + URLencode(ab[i], reserved = TRUE), + "/cids/TXT?name_type=complete" + ), + showProgress = FALSE + )[[1]][1], + error = function(e) NA_integer_ + ) if (is.na(CID[i])) { # try with removing the text in brackets CID[i] <- tryCatch( - data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/", - URLencode(trimws(gsub("[(].*[)]", "", ab[i])), reserved = TRUE), - "/cids/TXT?name_type=complete"), - showProgress = FALSE)[[1]][1], - error = function(e) NA_integer_) + data.table::fread(paste0( + "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/", + URLencode(trimws(gsub("[(].*[)]", "", ab[i])), reserved = TRUE), + "/cids/TXT?name_type=complete" + ), + showProgress = FALSE + )[[1]][1], + error = function(e) NA_integer_ + ) } if (is.na(CID[i])) { # try match on word and take the lowest CID value (sorted) ab[i] <- gsub("[^a-z0-9]+", " ", ab[i], ignore.case = TRUE) CID[i] <- tryCatch( - data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/", - URLencode(ab[i], reserved = TRUE), - "/cids/TXT?name_type=word"), - showProgress = FALSE)[[1]][1], - error = function(e) NA_integer_) + data.table::fread(paste0( + "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/", + URLencode(ab[i], reserved = TRUE), + "/cids/TXT?name_type=word" + ), + showProgress = FALSE + )[[1]][1], + error = function(e) NA_integer_ + ) } Sys.sleep(0.1) } @@ -203,15 +235,15 @@ get_CID <- function(ab) { # get CIDs (2-3 min) CIDs <- get_CID(abx2$name) # These could not be found: -abx2[is.na(CIDs),] %>% View() +abx2[is.na(CIDs), ] %>% View() # returns list with synonyms (brand names), with CIDs as names get_synonyms <- function(CID, clean = TRUE) { synonyms <- rep(NA_character_, length(CID)) - #p <- progress_ticker(n = length(CID), min_time = 0) + # p <- progress_ticker(n = length(CID), min_time = 0) for (i in 1:length(CID)) { - #p$tick()$print() + # p$tick()$print() synonyms_txt <- "" @@ -220,27 +252,37 @@ get_synonyms <- function(CID, clean = TRUE) { } synonyms_txt <- tryCatch( - data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/", - CID[i], - "/synonyms/TXT"), - sep = "\n", - showProgress = FALSE)[[1]], - error = function(e) NA_character_) + data.table::fread(paste0( + "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/", + CID[i], + "/synonyms/TXT" + ), + sep = "\n", + showProgress = FALSE + )[[1]], + error = function(e) NA_character_ + ) Sys.sleep(0.1) if (clean == TRUE) { # remove text between brackets - synonyms_txt <- trimws(gsub("[(].*[)]", "", - gsub("[[].*[]]", "", - gsub("[(].*[]]", "", - gsub("[[].*[)]", "", synonyms_txt))))) + synonyms_txt <- trimws(gsub( + "[(].*[)]", "", + gsub( + "[[].*[]]", "", + gsub( + "[(].*[]]", "", + gsub("[[].*[)]", "", synonyms_txt) + ) + ) + )) synonyms_txt <- gsub("Co-", "Co", synonyms_txt, fixed = TRUE) # only length 6 to 20 and no txt with reading marks or numbers and must start with capital letter (= brand) - synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20) - & !grepl("[-&{},_0-9/]", synonyms_txt) - & grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)] - synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE)) + synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20) & + !grepl("[-&{},_0-9/]", synonyms_txt) & + grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)] + synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE)) } synonyms_txt <- unique(trimws(synonyms_txt[tolower(synonyms_txt) %in% unique(tolower(synonyms_txt))])) synonyms[i] <- list(sort(synonyms_txt)) @@ -251,52 +293,56 @@ get_synonyms <- function(CID, clean = TRUE) { # get brand names from PubChem (2-3 min) synonyms <- get_synonyms(CIDs) -synonyms <- lapply(synonyms, - function(x) { - if (length(x) == 0 | all(is.na(x))) { - "" - } else { - x - }}) +synonyms <- lapply( + synonyms, + function(x) { + if (length(x) == 0 | all(is.na(x))) { + "" + } else { + x + } + } +) # add them to data set antibiotics <- abx2 %>% left_join(DRGLST %>% - select(ab = WHON5_CODE, CLASS, SUBCLASS) %>% - distinct(ab, .keep_all = TRUE), by = "ab") %>% + select(ab = WHON5_CODE, CLASS, SUBCLASS) %>% + distinct(ab, .keep_all = TRUE), by = "ab") %>% transmute(ab, - atc, - cid = CIDs, - # no capital after a slash: Ampicillin/Sulbactam -> Ampicillin/sulbactam - name = name %>% - gsub("([/-])([A-Z])", "\\1\\L\\2", ., perl = TRUE) %>% - gsub("edta", "EDTA", ., ignore.case = TRUE), - group = case_when( - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "am(ph|f)enicol" ~ "Amphenicols", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "aminoglycoside" ~ "Aminoglycosides", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "carbapenem" | name %like% "(imipenem|meropenem)" ~ "Carbapenems", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "First-generation cephalosporin" ~ "Cephalosporins (1st gen.)", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Second-generation cephalosporin" ~ "Cephalosporins (2nd gen.)", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Third-generation cephalosporin" ~ "Cephalosporins (3rd gen.)", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Fourth-generation cephalosporin" ~ "Cephalosporins (4th gen.)", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(tuberculosis|mycobacter)" ~ "Antimycobacterials", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "cephalosporin" ~ "Cephalosporins", - name %like% "^Ce" & is.na(atc_group1) & paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "beta-?lactam" ~ "Cephalosporins", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(beta-?lactam|penicillin)" ~ "Beta-lactams/penicillins", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "quinolone" ~ "Quinolones", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "glycopeptide" ~ "Glycopeptides", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "macrolide" ~ "Macrolides/lincosamides", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "tetracycline" ~ "Tetracyclines", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "trimethoprim" ~ "Trimethoprims", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "polymyxin" ~ "Polymyxins", - paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(fungal|mycot)" ~ "Antifungals/antimycotics", - TRUE ~ "Other antibacterials" - ), - atc_group1, atc_group2, - abbreviations = unname(abbr), - synonyms = unname(synonyms), - oral_ddd, oral_units, - iv_ddd, iv_units) %>% + atc, + cid = CIDs, + # no capital after a slash: Ampicillin/Sulbactam -> Ampicillin/sulbactam + name = name %>% + gsub("([/-])([A-Z])", "\\1\\L\\2", ., perl = TRUE) %>% + gsub("edta", "EDTA", ., ignore.case = TRUE), + group = case_when( + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "am(ph|f)enicol" ~ "Amphenicols", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "aminoglycoside" ~ "Aminoglycosides", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "carbapenem" | name %like% "(imipenem|meropenem)" ~ "Carbapenems", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "First-generation cephalosporin" ~ "Cephalosporins (1st gen.)", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Second-generation cephalosporin" ~ "Cephalosporins (2nd gen.)", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Third-generation cephalosporin" ~ "Cephalosporins (3rd gen.)", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Fourth-generation cephalosporin" ~ "Cephalosporins (4th gen.)", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(tuberculosis|mycobacter)" ~ "Antimycobacterials", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "cephalosporin" ~ "Cephalosporins", + name %like% "^Ce" & is.na(atc_group1) & paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "beta-?lactam" ~ "Cephalosporins", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(beta-?lactam|penicillin)" ~ "Beta-lactams/penicillins", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "quinolone" ~ "Quinolones", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "glycopeptide" ~ "Glycopeptides", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "macrolide" ~ "Macrolides/lincosamides", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "tetracycline" ~ "Tetracyclines", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "trimethoprim" ~ "Trimethoprims", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "polymyxin" ~ "Polymyxins", + paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(fungal|mycot)" ~ "Antifungals/antimycotics", + TRUE ~ "Other antibacterials" + ), + atc_group1, atc_group2, + abbreviations = unname(abbr), + synonyms = unname(synonyms), + oral_ddd, oral_units, + iv_ddd, iv_units + ) %>% as.data.frame(stringsAsFactors = FALSE) # some exceptions @@ -329,13 +375,15 @@ antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]] <- antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]], "cfxt")) # Add cefoxitin screening class(antibiotics$ab) <- "character" -antibiotics <- rbind(antibiotics,data.frame(ab = "FOX1", atc = NA, cid = NA, - name = "Cefoxitin screening", - group = "Cephalosporins (2nd gen.)", atc_group1 = NA, atc_group2 = NA, - abbreviations = "cfsc", synonyms = NA, - oral_ddd = NA, oral_units = NA, iv_ddd = NA, iv_units = NA, - loinc = NA, - stringsAsFactors = FALSE)) +antibiotics <- rbind(antibiotics, data.frame( + ab = "FOX1", atc = NA, cid = NA, + name = "Cefoxitin screening", + group = "Cephalosporins (2nd gen.)", atc_group1 = NA, atc_group2 = NA, + abbreviations = "cfsc", synonyms = NA, + oral_ddd = NA, oral_units = NA, iv_ddd = NA, iv_units = NA, + loinc = NA, + stringsAsFactors = FALSE +)) # More GLIMS codes antibiotics[which(antibiotics$ab == "AMB"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "AMB"), "abbreviations"][[1]], "amf")) antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz")) @@ -520,27 +568,33 @@ antibiotics[which(antibiotics$ab == "RFP"), "abbreviations"][[1]] <- list(sort(c antibiotics[which(antibiotics$ab == "RTP"), "abbreviations"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "RTP"), "abbreviations"][[1]], "RET"))) antibiotics[which(antibiotics$ab == "TYL1"), "abbreviations"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "TYL1"), "abbreviations"][[1]], "TVN"))) -antibiotics <- antibiotics %>% - mutate(ab = as.character(ab)) %>% - rbind(antibiotics %>% - filter(ab == "GEH") %>% - mutate(ab = "AMH", - name = "Amphotericin B-high", - abbreviations = list(c("amhl", "amfo b high", "ampho b high", "amphotericin high")))) %>% - rbind(antibiotics %>% - filter(ab == "GEH") %>% - mutate(ab = "TOH", - name = "Tobramycin-high", - abbreviations = list(c("tohl", "tobra high", "tobramycin high")))) %>% - rbind(antibiotics %>% - filter(ab == "BUT") %>% - mutate(ab = "CIX", - atc = "D01AE14", - name = "Ciclopirox", - group = "Antifungals/antimycotics", - atc_group1 = "Antifungals for topical use", - atc_group2 = "Other antifungals for topical use", - abbreviations = list(c("cipx")))) +antibiotics <- antibiotics %>% + mutate(ab = as.character(ab)) %>% + rbind(antibiotics %>% + filter(ab == "GEH") %>% + mutate( + ab = "AMH", + name = "Amphotericin B-high", + abbreviations = list(c("amhl", "amfo b high", "ampho b high", "amphotericin high")) + )) %>% + rbind(antibiotics %>% + filter(ab == "GEH") %>% + mutate( + ab = "TOH", + name = "Tobramycin-high", + abbreviations = list(c("tohl", "tobra high", "tobramycin high")) + )) %>% + rbind(antibiotics %>% + filter(ab == "BUT") %>% + mutate( + ab = "CIX", + atc = "D01AE14", + name = "Ciclopirox", + group = "Antifungals/antimycotics", + atc_group1 = "Antifungals for topical use", + atc_group2 = "Other antifungals for topical use", + abbreviations = list(c("cipx")) + )) antibiotics[which(antibiotics$ab == "SSS"), "name"] <- "Sulfonamide" # ESBL E-test codes: antibiotics[which(antibiotics$ab == "CCV"), "abbreviations"][[1]] <- list(c("xtzl")) @@ -600,13 +654,13 @@ antibiotics[which(antibiotics$ab == "RXT"), "name"] <- "Roxithromycin" antibiotics[which(antibiotics$ab == "PEN"), "atc"] <- "J01CE01" # WHONET cleanup -antibiotics[which(antibiotics$ab == "BCZ"), "name"] <- "Bicyclomycin" -antibiotics[which(antibiotics$ab == "CCL"), "name"] <- "Cefetecol" -antibiotics[which(antibiotics$ab == "ENV"), "name"] <- "Enviomycin" -antibiotics[which(antibiotics$ab == "KIT"), "name"] <- "Kitasamycin" -antibiotics[which(antibiotics$ab == "LSP"), "name"] <- "Linco-spectin" -antibiotics[which(antibiotics$ab == "MEC"), "name"] <- "Mecillinam" -antibiotics[which(antibiotics$ab == "PMR"), "name"] <- "Pimaricin" +antibiotics[which(antibiotics$ab == "BCZ"), "name"] <- "Bicyclomycin" +antibiotics[which(antibiotics$ab == "CCL"), "name"] <- "Cefetecol" +antibiotics[which(antibiotics$ab == "ENV"), "name"] <- "Enviomycin" +antibiotics[which(antibiotics$ab == "KIT"), "name"] <- "Kitasamycin" +antibiotics[which(antibiotics$ab == "LSP"), "name"] <- "Linco-spectin" +antibiotics[which(antibiotics$ab == "MEC"), "name"] <- "Mecillinam" +antibiotics[which(antibiotics$ab == "PMR"), "name"] <- "Pimaricin" antibiotics[which(antibiotics$ab == "BCZ"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "BCZ"), "abbreviations"][[1]], "Bicozamycin")))) antibiotics[which(antibiotics$ab == "CCL"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "CCL"), "abbreviations"][[1]], "Cefcatacol")))) antibiotics[which(antibiotics$ab == "ENV"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "ENV"), "abbreviations"][[1]], "Tuberactinomycin")))) @@ -617,7 +671,7 @@ antibiotics[which(antibiotics$ab == "PMR"), "abbreviations"][[1]] <- list(sort(u # set cephalosporins groups for the ones that could not be determined automatically: -antibiotics <- antibiotics %>% +antibiotics <- antibiotics %>% mutate(group = case_when( name == "Cefcapene" ~ "Cephalosporins (3rd gen.)", name == "Cefcapene pivoxil" ~ "Cephalosporins (3rd gen.)", @@ -650,21 +704,24 @@ antibiotics <- antibiotics %>% name == "Ceftolozane/enzyme inhibitor" ~ "Cephalosporins (5th gen.)", name == "Ceftolozane/tazobactam" ~ "Cephalosporins (5th gen.)", name == "Cefuroxime axetil" ~ "Cephalosporins (2nd gen.)", - TRUE ~ group)) + TRUE ~ group + )) antibiotics[which(antibiotics$ab %in% c("CYC", "LNZ", "THA", "TZD")), "group"] <- "Oxazolidinones" # add pretomanid antibiotics <- antibiotics %>% - mutate(ab = as.character(ab)) %>% + mutate(ab = as.character(ab)) %>% bind_rows(antibiotics %>% - mutate(ab = as.character(ab)) %>% - filter(ab == "SMF") %>% - mutate(ab = "PMD", - atc = "J04AK08", - cid = 456199, - name = "Pretomanid", - abbreviations = list(""), - oral_ddd = NA_real_)) + mutate(ab = as.character(ab)) %>% + filter(ab == "SMF") %>% + mutate( + ab = "PMD", + atc = "J04AK08", + cid = 456199, + name = "Pretomanid", + abbreviations = list(""), + oral_ddd = NA_real_ + )) @@ -675,25 +732,24 @@ antibiotics <- antibiotics %>% updated_atc <- as.list(antibiotics$atc) get_atcs <- function(ab_name, url = "https://www.whocc.no/atc_ddd_index/") { - ab_name <- gsub("/", " and ", tolower(ab_name), fixed = TRUE) - + # we will do a search on their website, which means: - + # go to the url - atc_tbl <- read_html(url) %>% + atc_tbl <- read_html(url) %>% # get all forms html_form() %>% # get the second form (the first form is a global website form) - .[[2]] %>% + .[[2]] %>% # set the name input box to our search parameter - html_form_set(name = ab_name) %>% + html_form_set(name = ab_name) %>% # hit Submit - html_form_submit() %>% + html_form_submit() %>% # read the resulting page - read_html() %>% + read_html() %>% # retrieve the table on it - html_node("table") %>% + html_node("table") %>% # transform it to an R data set html_table(header = FALSE) # and get the ATCs (first column) of only exact hits @@ -702,9 +758,10 @@ get_atcs <- function(ab_name, url = "https://www.whocc.no/atc_ddd_index/") { # this takes around 4 minutes (some are skipped and go faster) for (i in seq_len(nrow(antibiotics))) { - message(percentage(i / nrow(antibiotics), digits = 1), - " - Downloading ", antibiotics$name[i], - appendLF = FALSE) + message(percentage(i / nrow(antibiotics), digits = 1), + " - Downloading ", antibiotics$name[i], + appendLF = FALSE + ) atcs <- get_atcs(antibiotics$name[i]) if (length(atcs) > 0) { updated_atc[[i]] <- atcs diff --git a/data-raw/reproduction_of_antivirals.R b/data-raw/reproduction_of_antivirals.R index fb4af03de..96303a095 100644 --- a/data-raw/reproduction_of_antivirals.R +++ b/data-raw/reproduction_of_antivirals.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -28,8 +28,10 @@ get_atc_table <- function(atc_group) { # give as input J0XXX, like atc_group = "J05AB" downloaded <- read_html(paste0("https://www.whocc.no/atc_ddd_index/?code=", atc_group, "&showdescription=no")) - table_title <- downloaded %>% html_nodes(paste0('a[href="./?code=', atc_group, '"]')) %>% html_text() - table_content <- downloaded %>% + table_title <- downloaded %>% + html_nodes(paste0('a[href="./?code=', atc_group, '"]')) %>% + html_text() + table_content <- downloaded %>% html_nodes("table") %>% html_table(header = TRUE) %>% # returns list, so make data.frame out of it @@ -37,8 +39,8 @@ get_atc_table <- function(atc_group) { # select right columns select(atc = ATC.code, name = Name, ddd = DDD, unit = U, ddd_type = Adm.R) %>% # fill empty rows - mutate(atc = ifelse(atc == "", lag(atc), atc), name = ifelse(name == "", lag(name), name)) %>% - pivot_wider(names_from = ddd_type, values_from = c(ddd, unit)) %>% + mutate(atc = ifelse(atc == "", lag(atc), atc), name = ifelse(name == "", lag(name), name)) %>% + pivot_wider(names_from = ddd_type, values_from = c(ddd, unit)) %>% mutate(atc_group = table_title) if (!"ddd_O" %in% colnames(table_content)) { table_content <- table_content %>% mutate(ddd_O = NA_real_, unit_O = NA_character_) @@ -46,9 +48,10 @@ get_atc_table <- function(atc_group) { if (!"ddd_P" %in% colnames(table_content)) { table_content <- table_content %>% mutate(ddd_P = NA_real_, unit_P = NA_character_) } - table_content %>% select(atc, name, atc_group, - oral_ddd = ddd_O, oral_units = unit_O, - iv_ddd = ddd_P, iv_units = unit_P) + table_content %>% select(atc, name, atc_group, + oral_ddd = ddd_O, oral_units = unit_O, + iv_ddd = ddd_P, iv_units = unit_P + ) } # these are the relevant groups for input: https://www.whocc.no/atc_ddd_index/?code=J05A (J05 only contains J05A) @@ -62,32 +65,38 @@ for (i in 2:length(atc_groups)) { } # arrange on name, untibble it -antivirals <- antivirals %>% arrange(name) %>% as.data.frame(stringsAsFactors = FALSE) +antivirals <- antivirals %>% + arrange(name) %>% + as.data.frame(stringsAsFactors = FALSE) # add PubChem Compound ID (cid) and their trade names - functions are in file to create `antibiotics` data set CIDs <- get_CID(antivirals$name) # these could not be found: -antivirals[is.na(CIDs),] %>% View() -# get brand names from PubChem +antivirals[is.na(CIDs), ] %>% View() +# get brand names from PubChem synonyms <- get_synonyms(CIDs) -synonyms <- lapply(synonyms, - function(x) { - if (length(x) == 0 | all(is.na(x))) { - "" - } else { - x - }}) +synonyms <- lapply( + synonyms, + function(x) { + if (length(x) == 0 | all(is.na(x))) { + "" + } else { + x + } + } +) antivirals <- antivirals %>% transmute(atc, - cid = CIDs, - name, - atc_group, - synonyms = unname(synonyms), - oral_ddd, - oral_units, - iv_ddd, - iv_units) + cid = CIDs, + name, + atc_group, + synonyms = unname(synonyms), + oral_ddd, + oral_units, + iv_ddd, + iv_units + ) # save it usethis::use_data(antivirals, overwrite = TRUE) diff --git a/data-raw/reproduction_of_dosage.R b/data-raw/reproduction_of_dosage.R index 54d63575f..0073e6a2a 100644 --- a/data-raw/reproduction_of_dosage.R +++ b/data-raw/reproduction_of_dosage.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -32,37 +32,45 @@ library(cleaner) # download the PDF file, open in Acrobat Pro and export as Excel workbook breakpoints_version <- 11 -dosage_source <- read_excel("data-raw/Dosages_v_11.0_Breakpoint_Tables.xlsx", skip = 5, na = "None") %>% - format_names(snake_case = TRUE, penicillins = "drug") %>% - filter(!tolower(standard_dosage) %in% c("standard dosage_source", "under review")) %>% - filter(!is.na(standard_dosage)) %>% +dosage_source <- read_excel("data-raw/Dosages_v_11.0_Breakpoint_Tables.xlsx", skip = 5, na = "None") %>% + format_names(snake_case = TRUE, penicillins = "drug") %>% + filter(!tolower(standard_dosage) %in% c("standard dosage_source", "under review")) %>% + filter(!is.na(standard_dosage)) %>% # keep only one drug in the table - arrange(desc(drug)) %>% - mutate(drug = gsub("(.*) ([(]|iv|oral).*", "\\1", drug)) %>% - #distinct(drug, .keep_all = TRUE) %>% - arrange(drug) %>% - mutate(ab = as.ab(drug), - ab_name = ab_name(ab, language = NULL)) + arrange(desc(drug)) %>% + mutate(drug = gsub("(.*) ([(]|iv|oral).*", "\\1", drug)) %>% + # distinct(drug, .keep_all = TRUE) %>% + arrange(drug) %>% + mutate( + ab = as.ab(drug), + ab_name = ab_name(ab, language = NULL) + ) dosage_source <- bind_rows( # oral - dosage_source %>% + dosage_source %>% filter(standard_dosage %like% " oral") %>% - mutate(standard_dosage = gsub("oral.*", "oral", standard_dosage), - high_dosage = if_else(high_dosage %like% "oral", - gsub("oral.*", "oral", high_dosage), - NA_character_)), + mutate( + standard_dosage = gsub("oral.*", "oral", standard_dosage), + high_dosage = if_else(high_dosage %like% "oral", + gsub("oral.*", "oral", high_dosage), + NA_character_ + ) + ), # iv - dosage_source %>% + dosage_source %>% filter(standard_dosage %like% " iv") %>% - mutate(standard_dosage = gsub(".* or ", "", standard_dosage), - high_dosage = if_else(high_dosage %like% "( or | iv)", - gsub(".* or ", "", high_dosage), - NA_character_)), + mutate( + standard_dosage = gsub(".* or ", "", standard_dosage), + high_dosage = if_else(high_dosage %like% "( or | iv)", + gsub(".* or ", "", high_dosage), + NA_character_ + ) + ), # im - dosage_source %>% + dosage_source %>% filter(standard_dosage %like% " im") -) %>% +) %>% arrange(drug) @@ -71,34 +79,36 @@ get_dosage_lst <- function(col_data) { # remove new lines gsub(" ?(\n|\t)+ ?", " ", .) %>% # keep only the first suggestion, replace all after 'or' and more informative texts - gsub("(.*?) (or|with|loading|depending|over|by) .*", "\\1", .) %>% + gsub("(.*?) (or|with|loading|depending|over|by) .*", "\\1", .) %>% # remove (1 MU) - gsub(" [(][0-9] [A-Z]+[)]", "", .) %>% + gsub(" [(][0-9] [A-Z]+[)]", "", .) %>% # remove parentheses - gsub("[)(]", "", .) %>% + gsub("[)(]", "", .) %>% # remove drug names - gsub(" [a-z]{5,99}( |$)", " ", .) %>% - gsub(" [a-z]{5,99}( |$)", " ", .) %>% - gsub(" (acid|dose)", "", .)# %>% - # keep lowest value only (25-30 mg -> 25 mg) - # gsub("[-].*? ", " ", .) - - dosage_lst <- lapply(strsplit(standard, " x "), - function(x) { - dose <- x[1] - if (dose %like% "under") { - dose <- NA_character_ - } - admin <- x[2] - - list( - dose = trimws(dose), - dose_times = gsub("^([0-9.]+).*", "\\1", admin), - administration = clean_character(admin), - notes = "", - original_txt = "" - ) - }) + gsub(" [a-z]{5,99}( |$)", " ", .) %>% + gsub(" [a-z]{5,99}( |$)", " ", .) %>% + gsub(" (acid|dose)", "", .) # %>% + # keep lowest value only (25-30 mg -> 25 mg) + # gsub("[-].*? ", " ", .) + + dosage_lst <- lapply( + strsplit(standard, " x "), + function(x) { + dose <- x[1] + if (dose %like% "under") { + dose <- NA_character_ + } + admin <- x[2] + + list( + dose = trimws(dose), + dose_times = gsub("^([0-9.]+).*", "\\1", admin), + administration = clean_character(admin), + notes = "", + original_txt = "" + ) + } + ) for (i in seq_len(length(col_data))) { dosage_lst[[i]]$original_txt <- gsub("\n", " ", col_data[i]) if (col_data[i] %like% " (or|with|loading|depending|over) ") { @@ -147,12 +157,15 @@ dosage <- bind_rows( notes = sapply(uti, function(x) x$notes), original_txt = sapply(uti, function(x) x$original_txt), stringsAsFactors = FALSE - )) %>% - mutate(eucast_version = breakpoints_version, - dose_times = as.integer(dose_times), - administration = gsub("([a-z]+) .*", "\\1", administration)) %>% - arrange(name, administration, type) %>% - filter(!is.na(dose), dose != ".") %>% + ) +) %>% + mutate( + eucast_version = breakpoints_version, + dose_times = as.integer(dose_times), + administration = gsub("([a-z]+) .*", "\\1", administration) + ) %>% + arrange(name, administration, type) %>% + filter(!is.na(dose), dose != ".") %>% as.data.frame(stringsAsFactors = FALSE) rownames(dosage) <- NULL diff --git a/data-raw/reproduction_of_example_isolates_unclean.R b/data-raw/reproduction_of_example_isolates_unclean.R index fa24456c2..762141930 100644 --- a/data-raw/reproduction_of_example_isolates_unclean.R +++ b/data-raw/reproduction_of_example_isolates_unclean.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -25,74 +25,118 @@ patients <- unlist(lapply(LETTERS, paste0, 1:10)) -patients_table <- data.frame(patient_id = patients, - gender = c(rep("M", 135), - rep("F", 125))) +patients_table <- data.frame( + patient_id = patients, + gender = c( + rep("M", 135), + rep("F", 125) + ) +) dates <- seq(as.Date("2011-01-01"), as.Date("2020-01-01"), by = "day") -bacteria_a <- c("E. coli", "S. aureus", - "S. pneumoniae", "K. pneumoniae") +bacteria_a <- c( + "E. coli", "S. aureus", + "S. pneumoniae", "K. pneumoniae" +) bacteria_b <- c("esccol", "staaur", "strpne", "klepne") -bacteria_c <- c("Escherichia coli", "Staphylococcus aureus", - "Streptococcus pneumoniae", "Klebsiella pneumoniae") +bacteria_c <- c( + "Escherichia coli", "Staphylococcus aureus", + "Streptococcus pneumoniae", "Klebsiella pneumoniae" +) ab_interpretations <- c("S", "I", "R") -ab_interpretations_messy = c("R", "< 0.5 S", "I") +ab_interpretations_messy <- c("R", "< 0.5 S", "I") sample_size <- 1000 -data_a <- data.frame(date = sample(dates, size = sample_size, replace = TRUE), - hospital = "A", - bacteria = sample(bacteria_a, size = sample_size, replace = TRUE, - prob = c(0.50, 0.25, 0.15, 0.10)), - AMX = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.60, 0.05, 0.35)), - AMC = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.75, 0.10, 0.15)), - CIP = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.80, 0.00, 0.20)), - GEN = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.92, 0.00, 0.08))) +data_a <- data.frame( + date = sample(dates, size = sample_size, replace = TRUE), + hospital = "A", + bacteria = sample(bacteria_a, + size = sample_size, replace = TRUE, + prob = c(0.50, 0.25, 0.15, 0.10) + ), + AMX = sample(ab_interpretations, + size = sample_size, replace = TRUE, + prob = c(0.60, 0.05, 0.35) + ), + AMC = sample(ab_interpretations, + size = sample_size, replace = TRUE, + prob = c(0.75, 0.10, 0.15) + ), + CIP = sample(ab_interpretations, + size = sample_size, replace = TRUE, + prob = c(0.80, 0.00, 0.20) + ), + GEN = sample(ab_interpretations, + size = sample_size, replace = TRUE, + prob = c(0.92, 0.00, 0.08) + ) +) -data_b <- data.frame(date = sample(dates, size = sample_size, replace = TRUE), - hospital = "B", - bacteria = sample(bacteria_b, size = sample_size, replace = TRUE, - prob = c(0.50, 0.25, 0.15, 0.10)), - AMX = sample(ab_interpretations_messy, size = sample_size, replace = TRUE, - prob = c(0.60, 0.05, 0.35)), - AMC = sample(ab_interpretations_messy, size = sample_size, replace = TRUE, - prob = c(0.75, 0.10, 0.15)), - CIP = sample(ab_interpretations_messy, size = sample_size, replace = TRUE, - prob = c(0.80, 0.00, 0.20)), - GEN = sample(ab_interpretations_messy, size = sample_size, replace = TRUE, - prob = c(0.92, 0.00, 0.08))) +data_b <- data.frame( + date = sample(dates, size = sample_size, replace = TRUE), + hospital = "B", + bacteria = sample(bacteria_b, + size = sample_size, replace = TRUE, + prob = c(0.50, 0.25, 0.15, 0.10) + ), + AMX = sample(ab_interpretations_messy, + size = sample_size, replace = TRUE, + prob = c(0.60, 0.05, 0.35) + ), + AMC = sample(ab_interpretations_messy, + size = sample_size, replace = TRUE, + prob = c(0.75, 0.10, 0.15) + ), + CIP = sample(ab_interpretations_messy, + size = sample_size, replace = TRUE, + prob = c(0.80, 0.00, 0.20) + ), + GEN = sample(ab_interpretations_messy, + size = sample_size, replace = TRUE, + prob = c(0.92, 0.00, 0.08) + ) +) -data_c <- data.frame(date = sample(dates, size = sample_size, replace = TRUE), - hospital = "C", - bacteria = sample(bacteria_c, size = sample_size, replace = TRUE, - prob = c(0.50, 0.25, 0.15, 0.10)), - AMX = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.60, 0.05, 0.35)), - AMC = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.75, 0.10, 0.15)), - CIP = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.80, 0.00, 0.20)), - GEN = sample(ab_interpretations, size = sample_size, replace = TRUE, - prob = c(0.92, 0.00, 0.08))) +data_c <- data.frame( + date = sample(dates, size = sample_size, replace = TRUE), + hospital = "C", + bacteria = sample(bacteria_c, + size = sample_size, replace = TRUE, + prob = c(0.50, 0.25, 0.15, 0.10) + ), + AMX = sample(ab_interpretations, + size = sample_size, replace = TRUE, + prob = c(0.60, 0.05, 0.35) + ), + AMC = sample(ab_interpretations, + size = sample_size, replace = TRUE, + prob = c(0.75, 0.10, 0.15) + ), + CIP = sample(ab_interpretations, + size = sample_size, replace = TRUE, + prob = c(0.80, 0.00, 0.20) + ), + GEN = sample(ab_interpretations, + size = sample_size, replace = TRUE, + prob = c(0.92, 0.00, 0.08) + ) +) -example_isolates_unclean <- data_a %>% +example_isolates_unclean <- data_a %>% bind_rows(data_b, data_c) example_isolates_unclean$patient_id <- sample(patients, size = nrow(example_isolates_unclean), replace = TRUE) -example_isolates_unclean <- example_isolates_unclean %>% - select(patient_id, hospital, date, bacteria, everything()) %>% +example_isolates_unclean <- example_isolates_unclean %>% + select(patient_id, hospital, date, bacteria, everything()) %>% dataset_UTF8_to_ASCII() usethis::use_data(example_isolates_unclean, overwrite = TRUE, internal = FALSE, version = 2, compress = "xz") diff --git a/data-raw/reproduction_of_intrinsic_resistant.R b/data-raw/reproduction_of_intrinsic_resistant.R index 5f964a853..f97c7ac27 100644 --- a/data-raw/reproduction_of_intrinsic_resistant.R +++ b/data-raw/reproduction_of_intrinsic_resistant.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -32,19 +32,22 @@ for (i in seq_len(nrow(antibiotics))) { } int_resis <- eucast_rules(int_resis, - eucast_rules_df = subset(AMR:::EUCAST_RULES_DF, - is.na(have_these_values) & reference.version == 3.3), - info = FALSE) + eucast_rules_df = subset( + AMR:::EUCAST_RULES_DF, + is.na(have_these_values) & reference.version == 3.3 + ), + info = FALSE +) -int_resis2 <- int_resis[, sapply(int_resis, function(x) any(!is.rsi(x) | x == "R")), drop = FALSE] %>% +int_resis2 <- int_resis[, sapply(int_resis, function(x) any(!is.rsi(x) | x == "R")), drop = FALSE] %>% tidyr::pivot_longer(-mo) %>% - filter(value == "R") %>% + filter(value == "R") %>% select(mo, ab = name) # remove lab drugs untreatable <- antibiotics[which(antibiotics$name %like% "-high|EDTA|polysorbate|macromethod|screening|/nacubactam"), "ab", drop = TRUE] -int_resis2 <- int_resis2 %>% - filter(!ab %in% untreatable) %>% +int_resis2 <- int_resis2 %>% + filter(!ab %in% untreatable) %>% arrange(mo, ab) intrinsic_resistant <- as.data.frame(int_resis2, stringsAsFactors = FALSE) diff --git a/data-raw/reproduction_of_microorganisms.R b/data-raw/reproduction_of_microorganisms.R index 6b21f2648..5d514f307 100644 --- a/data-raw/reproduction_of_microorganisms.R +++ b/data-raw/reproduction_of_microorganisms.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -28,7 +28,7 @@ # Data retrieved from the Catalogue of Life (CoL): # https://download.catalogueoflife.org/col/monthly/ # (download latest dwca, such as https://download.catalogueoflife.org/col/monthly/2022-01-14_dwca.zip) -# Data retrieved from the Global Biodiversity Information Facility (GBIF): +# Data retrieved from the Global Biodiversity Information Facility (GBIF): # https://doi.org/10.15468/rffz4x # # And from the List of Prokaryotic names with Standing in Nomenclature (LPSN) @@ -44,9 +44,9 @@ data_col_raw <- data.table::fread("data-raw/taxon.tsv", quote = "") data_gbif <- data.table::fread("data-raw/taxa.txt", quote = "") # merge the two -data_col <- data_gbif %>% - rename(referenceID = identifier) %>% - bind_rows(data_col_raw) %>% +data_col <- data_gbif %>% + rename(referenceID = identifier) %>% + bind_rows(data_col_raw) %>% distinct(scientificName, kingdom, genus, specificEpithet, infraspecificEpithet, .keep_all = TRUE) rm(data_col_raw) rm(data_gbif) @@ -66,8 +66,10 @@ dsmz_first <- GET_df("https://bacdive.dsmz.de/api/pnu/species?page=1&format=json data_dsmz <- dsmz_first$results # this next process will take appr. `dsmz_first$count / 100 * 5 / 60` minutes for (i in 2:round((dsmz_first$count / 100) + 0.5)) { - data_dsmz <<- rbind(data_dsmz, - GET_df(paste0("https://bacdive.dsmz.de/api/pnu/species/?page=", i, "&format=json"))$results) + data_dsmz <<- rbind( + data_dsmz, + GET_df(paste0("https://bacdive.dsmz.de/api/pnu/species/?page=", i, "&format=json"))$results + ) cat(i, "-", AMR:::percentage(i / round((dsmz_first$count / 100) + 0.5)), "\n") } rm(dsmz_first) @@ -89,70 +91,83 @@ data_col %>% cleaner::freq(kingdom) data_col.bak <- data_col data_col_old <- data_col %>% # filter: has new accepted name - filter(!is.na(acceptedNameUsageID)) %>% + filter(!is.na(acceptedNameUsageID)) %>% as_tibble() %>% - transmute(fullname = trimws(stringr::str_replace(scientificName, - pattern = stringr::fixed(scientificNameAuthorship), - replacement = "")), - fullname_new = trimws(paste(ifelse(is.na(genus), "", genus), - ifelse(is.na(specificEpithet), "", specificEpithet), - ifelse(is.na(infraspecificEpithet), "", infraspecificEpithet))), - ref = scientificNameAuthorship, - prevalence = NA_integer_) + transmute( + fullname = trimws(stringr::str_replace(scientificName, + pattern = stringr::fixed(scientificNameAuthorship), + replacement = "" + )), + fullname_new = trimws(paste( + ifelse(is.na(genus), "", genus), + ifelse(is.na(specificEpithet), "", specificEpithet), + ifelse(is.na(infraspecificEpithet), "", infraspecificEpithet) + )), + ref = scientificNameAuthorship, + prevalence = NA_integer_ + ) data_col <- data_col %>% # filter: has no new accepted name - filter(is.na(acceptedNameUsageID)) %>% + filter(is.na(acceptedNameUsageID)) %>% as_tibble() %>% - transmute(fullname = "", - kingdom, - phylum, - class, - order, - family, - genus, - species = specificEpithet, - subspecies = infraspecificEpithet, - rank = taxonRank, - ref = scientificNameAuthorship, - species_id = referenceID, - source = "CoL") + transmute( + fullname = "", + kingdom, + phylum, + class, + order, + family, + genus, + species = specificEpithet, + subspecies = infraspecificEpithet, + rank = taxonRank, + ref = scientificNameAuthorship, + species_id = referenceID, + source = "CoL" + ) # clean data_dsmz data_dsmz.bak <- data_dsmz data_dsmz_old <- data_dsmz %>% # filter: correct name is not NULL - filter(!sapply(correct_name, is.null)) %>% + filter(!sapply(correct_name, is.null)) %>% as_tibble() %>% - transmute(fullname = trimws(paste(ifelse(is.na(genus), "", genus), - ifelse(is.na(species_epithet), "", species_epithet), - ifelse(is.na(subspecies_epithet), "", subspecies_epithet))), - fullname_new = sapply(correct_name, function(x) x[2L]), - ref = authors, - prevalence = NA_integer_) + transmute( + fullname = trimws(paste( + ifelse(is.na(genus), "", genus), + ifelse(is.na(species_epithet), "", species_epithet), + ifelse(is.na(subspecies_epithet), "", subspecies_epithet) + )), + fullname_new = sapply(correct_name, function(x) x[2L]), + ref = authors, + prevalence = NA_integer_ + ) data_dsmz <- data_dsmz %>% # filter: correct name is NULL - filter(sapply(correct_name, is.null)) %>% + filter(sapply(correct_name, is.null)) %>% as_tibble() %>% - transmute(fullname = "", - kingdom = regio, - phylum, - class = classis, - # order = "", # does not contain order, will add later based on CoL - family = familia, - genus = ifelse(is.na(genus), "", genus), - species = ifelse(is.na(species_epithet), "", species_epithet), - subspecies = ifelse(is.na(subspecies_epithet), "", subspecies_epithet), - rank = ifelse(species == "", "genus", "species"), - ref = authors, - species_id = as.character(pnu_no), - source = "DSMZ") + transmute( + fullname = "", + kingdom = regio, + phylum, + class = classis, + # order = "", # does not contain order, will add later based on CoL + family = familia, + genus = ifelse(is.na(genus), "", genus), + species = ifelse(is.na(species_epithet), "", species_epithet), + subspecies = ifelse(is.na(subspecies_epithet), "", subspecies_epithet), + rank = ifelse(species == "", "genus", "species"), + ref = authors, + species_id = as.character(pnu_no), + source = "DSMZ" + ) # DSMZ only contains genus/(sub)species, try to find taxonomic properties based on genus and data_col ref_taxonomy <- data_col %>% - filter(family %in% data_dsmz$family & family != "") %>% - arrange(kingdom) %>% - distinct(family, .keep_all = TRUE) %>% + filter(family %in% data_dsmz$family & family != "") %>% + arrange(kingdom) %>% + distinct(family, .keep_all = TRUE) %>% select(family, order) data_dsmz <- data_dsmz %>% @@ -175,41 +190,43 @@ MOs <- data_total %>% !kingdom %in% c("Animalia", "Plantae", "Viruses") # and not all fungi: Aspergillus, Candida, Trichphyton and Pneumocystis are the most important, # so only keep these orders from the fungi: - & !(kingdom == "Fungi" - & !order %in% c("Eurotiales", "Microascales", "Mucorales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales", "Onygenales", "Pneumocystales")) + & !(kingdom == "Fungi" & + !order %in% c("Eurotiales", "Microascales", "Mucorales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales", "Onygenales", "Pneumocystales")) ) # or the genus has to be one of the genera we found in our hospitals last decades (Northern Netherlands, 2002-2018) | genus %in% MO_PREVALENT_GENERA ) %>% # really no Plantae (e.g. Dracunculus exist both as worm and as plant) - filter(kingdom != "Plantae") %>% + filter(kingdom != "Plantae") %>% filter(!rank %in% c("kingdom", "phylum", "class", "order", "family", "genus")) # include all ranks other than species for the included species -MOs <- MOs %>% bind_rows(data_total %>% - filter((kingdom %in% MOs$kingdom & rank == "kingdom") - | (phylum %in% MOs$phylum & rank == "phylum") - | (class %in% MOs$class & rank == "class") - | (order %in% MOs$order & rank == "order") - | (family %in% MOs$family & rank == "family") - | (genus %in% MOs$genus & rank == "genus"))) +MOs <- MOs %>% bind_rows(data_total %>% + filter((kingdom %in% MOs$kingdom & rank == "kingdom") | + (phylum %in% MOs$phylum & rank == "phylum") | + (class %in% MOs$class & rank == "class") | + (order %in% MOs$order & rank == "order") | + (family %in% MOs$family & rank == "family") | + (genus %in% MOs$genus & rank == "genus"))) get_author_year <- function(ref) { # Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011' - + authors2 <- iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT") # remove leading and trailing brackets authors2 <- gsub("^[(](.*)[)]$", "\\1", authors2) # only take part after brackets if there's a name authors2 <- ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2), - gsub(".*[)] (.*)", "\\1", authors2), - authors2) + gsub(".*[)] (.*)", "\\1", authors2), + authors2 + ) # get year from last 4 digits - lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2)) + lastyear <- as.integer(gsub(".*([0-9]{4})$", "\\1", authors2)) # can never be later than now - lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")), - NA, - lastyear) + lastyear <- ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")), + NA, + lastyear + ) # get authors without last year authors <- gsub("(.*)[0-9]{4}$", "\\1", authors2) # remove nonsense characters from names @@ -227,17 +244,18 @@ get_author_year <- function(ref) { authors <- gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE) # combine author and year if year is available ref <- ifelse(!is.na(lastyear), - paste0(authors, ", ", lastyear), - authors) + paste0(authors, ", ", lastyear), + authors + ) # fix beginning and ending ref <- gsub(", $", "", ref) ref <- gsub("^, ", "", ref) ref <- gsub("^(emend|et al.,?)", "", ref) ref <- trimws(ref) - + # a lot start with a lowercase character - fix that ref[!grepl("^d[A-Z]", ref)] <- gsub("^([a-z])", "\\U\\1", ref[!grepl("^d[A-Z]", ref)], perl = TRUE) - # specific one for the French that are named dOrbigny + # specific one for the French that are named dOrbigny ref[grepl("^d[A-Z]", ref)] <- gsub("^d", "d'", ref[grepl("^d[A-Z]", ref)]) ref <- gsub(" +", " ", ref) ref @@ -247,116 +265,134 @@ MOs <- MOs %>% mutate(ref = get_author_year(ref)) # Remove non-ASCII characters (these are not allowed by CRAN) MOs <- MOs %>% - lapply(iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>% - as_tibble(stringsAsFactors = FALSE) %>% + lapply(iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>% + as_tibble(stringsAsFactors = FALSE) %>% # remove invalid characters - mutate_all(~gsub("[\"'`]+", "", .)) + mutate_all(~ gsub("[\"'`]+", "", .)) # set new fullnames MOs <- MOs %>% - mutate(fullname = trimws(case_when(rank == "family" ~ family, - rank == "order" ~ order, - rank == "class" ~ class, - rank == "phylum" ~ phylum, - rank == "kingdom" ~ kingdom, - TRUE ~ paste(genus, species, subspecies))), - fullname = gsub(" (var|f|subsp)[.]", "", fullname)) %>% + mutate( + fullname = trimws(case_when( + rank == "family" ~ family, + rank == "order" ~ order, + rank == "class" ~ class, + rank == "phylum" ~ phylum, + rank == "kingdom" ~ kingdom, + TRUE ~ paste(genus, species, subspecies) + )), + fullname = gsub(" (var|f|subsp)[.]", "", fullname) + ) %>% # remove text if it contains 'Not assigned', etc. - mutate_all(function(x) ifelse(x %like% "(not assigned|homonym|mistake)", NA, x)) %>% + mutate_all(function(x) ifelse(x %like% "(not assigned|homonym|mistake)", NA, x)) %>% # clean taxonomy - mutate(kingdom = ifelse(is.na(kingdom) | trimws(kingdom) == "", "(unknown kingdom)", trimws(kingdom)), - phylum = ifelse(is.na(phylum) | trimws(phylum) == "", "(unknown phylum)", trimws(phylum)), - class = ifelse(is.na(class) | trimws(class) == "", "(unknown class)", trimws(class)), - order = ifelse(is.na(order) | trimws(order) == "", "(unknown order)", trimws(order)), - family = ifelse(is.na(family) | trimws(family) == "", "(unknown family)", trimws(family))) + mutate( + kingdom = ifelse(is.na(kingdom) | trimws(kingdom) == "", "(unknown kingdom)", trimws(kingdom)), + phylum = ifelse(is.na(phylum) | trimws(phylum) == "", "(unknown phylum)", trimws(phylum)), + class = ifelse(is.na(class) | trimws(class) == "", "(unknown class)", trimws(class)), + order = ifelse(is.na(order) | trimws(order) == "", "(unknown order)", trimws(order)), + family = ifelse(is.na(family) | trimws(family) == "", "(unknown family)", trimws(family)) + ) # Split old taxonomic names -MOs.old <- data_col_old %>% - filter(!gsub(" (var|f|subsp)[.]", "", fullname_new) %in% data_dsmz_old$fullname) %>% +MOs.old <- data_col_old %>% + filter(!gsub(" (var|f|subsp)[.]", "", fullname_new) %in% data_dsmz_old$fullname) %>% bind_rows(data_dsmz_old) %>% - mutate(fullname_new = gsub(" (var|f|subsp)[.]", "", fullname_new), - fullname = gsub(" (var|f|subsp)[.]", "", fullname)) %>% + mutate( + fullname_new = gsub(" (var|f|subsp)[.]", "", fullname_new), + fullname = gsub(" (var|f|subsp)[.]", "", fullname) + ) %>% # for cases like Chlamydia pneumoniae -> Chlamydophila pneumoniae -> Chlamydia pneumoniae: filter(!fullname %in% fullname_new & - fullname_new %in% MOs$fullname & - !is.na(fullname) & - fullname != fullname_new) %>% + fullname_new %in% MOs$fullname & + !is.na(fullname) & + fullname != fullname_new) %>% distinct(fullname, .keep_all = TRUE) %>% - arrange(fullname) %>% + arrange(fullname) %>% mutate(ref = get_author_year(ref)) MOs <- MOs %>% # remove entries that are old and in MOs.old - filter(!fullname %in% MOs.old$fullname) %>% + filter(!fullname %in% MOs.old$fullname) %>% # mark up transmute(fullname, - kingdom, - phylum, - class, - order, - family, - genus, - species, - subspecies, - rank, - ref, - species_id = gsub("[^a-zA-Z0-9].*", "", species_id), - source) %>% + kingdom, + phylum, + class, + order, + family, + genus, + species, + subspecies, + rank, + ref, + species_id = gsub("[^a-zA-Z0-9].*", "", species_id), + source + ) %>% # prefer known taxonomy over unknown taxonomy, then DSMZ over CoL (= desc) arrange(desc(kingdom, genus, species, source)) %>% distinct(kingdom, fullname, .keep_all = TRUE) # remove all genera that have no species - they are irrelevant for microbiology and almost all from the kingdom of Animalia to_remove <- MOs %>% - filter(!kingdom %in% c("Bacteria", "Protozoa")) %>% + filter(!kingdom %in% c("Bacteria", "Protozoa")) %>% group_by(kingdom, genus) %>% count() %>% filter(n == 1) %>% ungroup() %>% - mutate(kingdom_genus = paste(kingdom, genus)) %>% + mutate(kingdom_genus = paste(kingdom, genus)) %>% pull(kingdom_genus) MOs <- MOs %>% filter(!(paste(kingdom, genus) %in% to_remove)) rm(to_remove) # add all mssing genera, families and orders -MOs <- MOs %>% - bind_rows(MOs %>% - arrange(genus, species) %>% - distinct(genus, .keep_all = TRUE) %>% - filter(rank == "species") %>% - mutate(fullname = genus, - species = "", - rank = "genus", - species_id = "", - ref = NA_character_)) %>% - bind_rows(MOs %>% - arrange(family, genus) %>% - distinct(family, .keep_all = TRUE) %>% - filter(rank == "genus") %>% - mutate(fullname = family, - genus = "", - rank = "family", - species_id = "", - ref = NA_character_)) %>% - bind_rows(MOs %>% - arrange(order, family) %>% - distinct(family, .keep_all = TRUE) %>% - filter(rank == "family") %>% - mutate(fullname = order, - family = "", - rank = "order", - species_id = "", - ref = NA_character_)) +MOs <- MOs %>% + bind_rows(MOs %>% + arrange(genus, species) %>% + distinct(genus, .keep_all = TRUE) %>% + filter(rank == "species") %>% + mutate( + fullname = genus, + species = "", + rank = "genus", + species_id = "", + ref = NA_character_ + )) %>% + bind_rows(MOs %>% + arrange(family, genus) %>% + distinct(family, .keep_all = TRUE) %>% + filter(rank == "genus") %>% + mutate( + fullname = family, + genus = "", + rank = "family", + species_id = "", + ref = NA_character_ + )) %>% + bind_rows(MOs %>% + arrange(order, family) %>% + distinct(family, .keep_all = TRUE) %>% + filter(rank == "family") %>% + mutate( + fullname = order, + family = "", + rank = "order", + species_id = "", + ref = NA_character_ + )) # remove the empty ones MOs <- MOs %>% - mutate(fullname = gsub(",.*", "", fullname)) %>% - distinct(kingdom, fullname, .keep_all = TRUE) %>% + mutate(fullname = gsub(",.*", "", fullname)) %>% + distinct(kingdom, fullname, .keep_all = TRUE) %>% filter(fullname != "") # what characters are in the fullnames? table(sort(unlist(strsplit(x = paste(MOs$fullname, collapse = ""), split = "")))) -MOs %>% filter(fullname %unlike% "^[a-z ]+$") %>% arrange(fullname) %>% View() +MOs %>% + filter(fullname %unlike% "^[a-z ]+$") %>% + arrange(fullname) %>% + View() table(MOs$kingdom, MOs$rank) table(AMR::microorganisms$kingdom, AMR::microorganisms$rank) @@ -364,16 +400,18 @@ table(AMR::microorganisms$kingdom, AMR::microorganisms$rank) # set prevalence per species MOs <- MOs %>% mutate(prevalence = case_when( - class == "Gammaproteobacteria" - | genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus") + class == "Gammaproteobacteria" | + genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus") ~ 1, - kingdom %in% c("Archaea", "Bacteria", "Chromista", "Fungi") - & (phylum %in% c("Proteobacteria", - "Firmicutes", - "Actinobacteria", - "Sarcomastigophora") - | genus %in% MO_PREVALENT_GENERA - | rank %in% c("kingdom", "phylum", "class", "order", "family")) + kingdom %in% c("Archaea", "Bacteria", "Chromista", "Fungi") & + (phylum %in% c( + "Proteobacteria", + "Firmicutes", + "Actinobacteria", + "Sarcomastigophora" + ) | + genus %in% MO_PREVALENT_GENERA | + rank %in% c("kingdom", "phylum", "class", "order", "family")) ~ 2, TRUE ~ 3 )) @@ -381,77 +419,104 @@ MOs <- MOs %>% # Add abbreviations so we can easily know which ones are which ones. # These will become valid and unique microbial IDs for the AMR package. MOs <- MOs %>% - arrange(prevalence, genus, species, subspecies) %>% + arrange(prevalence, genus, species, subspecies) %>% group_by(kingdom) %>% mutate(abbr_other = case_when( - rank == "family" ~ paste0("[FAM]_", - abbreviate(family, - minlength = 8, - use.classes = TRUE, - method = "both.sides", - strict = FALSE)), - rank == "order" ~ paste0("[ORD]_", - abbreviate(order, - minlength = 8, - use.classes = TRUE, - method = "both.sides", - strict = FALSE)), - rank == "class" ~ paste0("[CLS]_", - abbreviate(class, - minlength = 8, - use.classes = TRUE, - method = "both.sides", - strict = FALSE)), - rank == "phylum" ~ paste0("[PHL]_", - abbreviate(phylum, - minlength = 8, - use.classes = TRUE, - method = "both.sides", - strict = FALSE)), + rank == "family" ~ paste0( + "[FAM]_", + abbreviate(family, + minlength = 8, + use.classes = TRUE, + method = "both.sides", + strict = FALSE + ) + ), + rank == "order" ~ paste0( + "[ORD]_", + abbreviate(order, + minlength = 8, + use.classes = TRUE, + method = "both.sides", + strict = FALSE + ) + ), + rank == "class" ~ paste0( + "[CLS]_", + abbreviate(class, + minlength = 8, + use.classes = TRUE, + method = "both.sides", + strict = FALSE + ) + ), + rank == "phylum" ~ paste0( + "[PHL]_", + abbreviate(phylum, + minlength = 8, + use.classes = TRUE, + method = "both.sides", + strict = FALSE + ) + ), rank == "kingdom" ~ paste0("[KNG]_", kingdom), TRUE ~ NA_character_ )) %>% # abbreviations may be same for genera between kingdoms, # because each abbreviation starts with the the first character(s) of the kingdom mutate(abbr_genus = abbreviate(gsub("^ae", "\u00E6\u00E6", genus, ignore.case = TRUE), # keep a starting Latin ae - minlength = 5, - use.classes = TRUE, - method = "both.sides")) %>% + minlength = 5, + use.classes = TRUE, + method = "both.sides" + )) %>% ungroup() %>% group_by(genus) %>% # species abbreviations may be the same between genera # because the genus abbreviation is part of the abbreviation mutate(abbr_species = abbreviate(gsub("^ae", "\u00E6\u00E6", species), - minlength = 4, - use.classes = TRUE, - method = "both.sides")) %>% + minlength = 4, + use.classes = TRUE, + method = "both.sides" + )) %>% ungroup() %>% group_by(genus, species) %>% mutate(abbr_subspecies = abbreviate(gsub("^ae", "\u00E6\u00E6", subspecies), - minlength = 4, - use.classes = TRUE, - method = "both.sides")) %>% + minlength = 4, + use.classes = TRUE, + method = "both.sides" + )) %>% ungroup() %>% # remove trailing underscores - mutate(mo = gsub("_+$", "", - toupper(paste(ifelse(kingdom %in% c("Animalia", "Plantae"), - substr(kingdom, 1, 2), - substr(kingdom, 1, 1)), - ifelse(is.na(abbr_other), - paste(abbr_genus, - abbr_species, - abbr_subspecies, - sep = "_"), - abbr_other), - sep = "_"))), - mo = gsub("(\u00C6|\u00E6)+", "AE", mo)) %>% - mutate(mo = ifelse(duplicated(.$mo), - # these one or two must be unique too - paste0(mo, "1"), - mo), - fullname = ifelse(fullname == "", - trimws(paste(genus, species, subspecies)), - fullname)) %>% + mutate( + mo = gsub( + "_+$", "", + toupper(paste(ifelse(kingdom %in% c("Animalia", "Plantae"), + substr(kingdom, 1, 2), + substr(kingdom, 1, 1) + ), + ifelse(is.na(abbr_other), + paste(abbr_genus, + abbr_species, + abbr_subspecies, + sep = "_" + ), + abbr_other + ), + sep = "_" + )) + ), + mo = gsub("(\u00C6|\u00E6)+", "AE", mo) + ) %>% + mutate( + mo = ifelse(duplicated(.$mo), + # these one or two must be unique too + paste0(mo, "1"), + mo + ), + fullname = ifelse(fullname == "", + trimws(paste(genus, species, subspecies)), + fullname + ) + ) %>% # put `mo` in front, followed by the rest select(mo, everything(), -abbr_other, -abbr_genus, -abbr_species, -abbr_subspecies) @@ -459,350 +524,424 @@ MOs <- MOs %>% MOs <- MOs %>% bind_rows( # Unknowns - data.frame(mo = "UNKNOWN", - fullname = "(unknown name)", - kingdom = "(unknown kingdom)", - phylum = "(unknown phylum)", - class = "(unknown class)", - order = "(unknown order)", - family = "(unknown family)", - genus = "(unknown genus)", - species = "(unknown species)", - subspecies = "(unknown subspecies)", - rank = "(unknown rank)", - ref = NA_character_, - species_id = "", - source = "manually added", - prevalence = 1, - stringsAsFactors = FALSE), - data.frame(mo = "B_GRAMN", - fullname = "(unknown Gram-negatives)", - kingdom = "Bacteria", - phylum = "(unknown phylum)", - class = "(unknown class)", - order = "(unknown order)", - family = "(unknown family)", - genus = "(unknown Gram-negatives)", - species = "(unknown species)", - subspecies = "(unknown subspecies)", - rank = "species", - ref = NA_character_, - species_id = "", - source = "manually added", - prevalence = 1, - stringsAsFactors = FALSE), - data.frame(mo = "B_GRAMP", - fullname = "(unknown Gram-positives)", - kingdom = "Bacteria", - phylum = "(unknown phylum)", - class = "(unknown class)", - order = "(unknown order)", - family = "(unknown family)", - genus = "(unknown Gram-positives)", - species = "(unknown species)", - subspecies = "(unknown subspecies)", - rank = "species", - ref = NA_character_, - species_id = "", - source = "manually added", - prevalence = 1, - stringsAsFactors = FALSE), - data.frame(mo = "F_YEAST", - fullname = "(unknown yeast)", - kingdom = "Fungi", - phylum = "(unknown phylum)", - class = "(unknown class)", - order = "(unknown order)", - family = "(unknown family)", - genus = "(unknown genus)", - species = "(unknown species)", - subspecies = "(unknown subspecies)", - rank = "species", - ref = NA_character_, - species_id = "", - source = "manually added", - prevalence = 2, - stringsAsFactors = FALSE), - data.frame(mo = "F_FUNGUS", - fullname = "(unknown fungus)", - kingdom = "Fungi", - phylum = "(unknown phylum)", - class = "(unknown class)", - order = "(unknown order)", - family = "(unknown family)", - genus = "(unknown genus)", - species = "(unknown species)", - subspecies = "(unknown subspecies)", - rank = "species", - ref = NA_character_, - species_id = "", - source = "manually added", - prevalence = 2, - stringsAsFactors = FALSE), + data.frame( + mo = "UNKNOWN", + fullname = "(unknown name)", + kingdom = "(unknown kingdom)", + phylum = "(unknown phylum)", + class = "(unknown class)", + order = "(unknown order)", + family = "(unknown family)", + genus = "(unknown genus)", + species = "(unknown species)", + subspecies = "(unknown subspecies)", + rank = "(unknown rank)", + ref = NA_character_, + species_id = "", + source = "manually added", + prevalence = 1, + stringsAsFactors = FALSE + ), + data.frame( + mo = "B_GRAMN", + fullname = "(unknown Gram-negatives)", + kingdom = "Bacteria", + phylum = "(unknown phylum)", + class = "(unknown class)", + order = "(unknown order)", + family = "(unknown family)", + genus = "(unknown Gram-negatives)", + species = "(unknown species)", + subspecies = "(unknown subspecies)", + rank = "species", + ref = NA_character_, + species_id = "", + source = "manually added", + prevalence = 1, + stringsAsFactors = FALSE + ), + data.frame( + mo = "B_GRAMP", + fullname = "(unknown Gram-positives)", + kingdom = "Bacteria", + phylum = "(unknown phylum)", + class = "(unknown class)", + order = "(unknown order)", + family = "(unknown family)", + genus = "(unknown Gram-positives)", + species = "(unknown species)", + subspecies = "(unknown subspecies)", + rank = "species", + ref = NA_character_, + species_id = "", + source = "manually added", + prevalence = 1, + stringsAsFactors = FALSE + ), + data.frame( + mo = "F_YEAST", + fullname = "(unknown yeast)", + kingdom = "Fungi", + phylum = "(unknown phylum)", + class = "(unknown class)", + order = "(unknown order)", + family = "(unknown family)", + genus = "(unknown genus)", + species = "(unknown species)", + subspecies = "(unknown subspecies)", + rank = "species", + ref = NA_character_, + species_id = "", + source = "manually added", + prevalence = 2, + stringsAsFactors = FALSE + ), + data.frame( + mo = "F_FUNGUS", + fullname = "(unknown fungus)", + kingdom = "Fungi", + phylum = "(unknown phylum)", + class = "(unknown class)", + order = "(unknown order)", + family = "(unknown family)", + genus = "(unknown genus)", + species = "(unknown species)", + subspecies = "(unknown subspecies)", + rank = "species", + ref = NA_character_, + species_id = "", + source = "manually added", + prevalence = 2, + stringsAsFactors = FALSE + ), # CoNS MOs %>% - filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_CONS", mo), - species = "coagulase-negative", - fullname = "Coagulase-negative Staphylococcus (CoNS)", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Staphylococcus", species == "epidermidis") %>% + .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_CONS", mo), + species = "coagulase-negative", + fullname = "Coagulase-negative Staphylococcus (CoNS)", + ref = NA_character_, + species_id = "", + source = "manually added" + ), # CoPS MOs %>% - filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_COPS", mo), - species = "coagulase-positive", - fullname = "Coagulase-positive Staphylococcus (CoPS)", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Staphylococcus", species == "epidermidis") %>% + .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_COPS", mo), + species = "coagulase-positive", + fullname = "Coagulase-positive Staphylococcus (CoPS)", + ref = NA_character_, + species_id = "", + source = "manually added" + ), # Streptococci groups A, B, C, F, H, K MOs %>% - filter(genus == "Streptococcus", species == "pyogenes") %>% .[1,] %>% + filter(genus == "Streptococcus", species == "pyogenes") %>% + .[1, ] %>% # we can keep all other details, since S. pyogenes is the only member of group A - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPA", mo), - species = "group A" , - fullname = "Streptococcus group A", - source = "manually added"), + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPA", mo), + species = "group A", + fullname = "Streptococcus group A", + source = "manually added" + ), MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% + filter(genus == "Streptococcus", species == "agalactiae") %>% .[1, ] %>% # we can keep all other details, since S. agalactiae is the only member of group B - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPB", mo), - species = "group B" , - fullname = "Streptococcus group B", - source = "manually added"), + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPB", mo), + species = "group B", + fullname = "Streptococcus group B", + source = "manually added" + ), MOs %>% - filter(genus == "Streptococcus", species == "dysgalactiae") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPC", mo), - species = "group C" , - fullname = "Streptococcus group C", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Streptococcus", species == "dysgalactiae") %>% .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPC", mo), + species = "group C", + fullname = "Streptococcus group C", + ref = NA_character_, + species_id = "", + source = "manually added" + ), MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPD", mo), - species = "group D" , - fullname = "Streptococcus group D", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Streptococcus", species == "agalactiae") %>% .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPD", mo), + species = "group D", + fullname = "Streptococcus group D", + ref = NA_character_, + species_id = "", + source = "manually added" + ), MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPF", mo), - species = "group F" , - fullname = "Streptococcus group F", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Streptococcus", species == "agalactiae") %>% .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPF", mo), + species = "group F", + fullname = "Streptococcus group F", + ref = NA_character_, + species_id = "", + source = "manually added" + ), MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPG", mo), - species = "group G" , - fullname = "Streptococcus group G", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Streptococcus", species == "agalactiae") %>% .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPG", mo), + species = "group G", + fullname = "Streptococcus group G", + ref = NA_character_, + species_id = "", + source = "manually added" + ), MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPH", mo), - species = "group H" , - fullname = "Streptococcus group H", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Streptococcus", species == "agalactiae") %>% .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPH", mo), + species = "group H", + fullname = "Streptococcus group H", + ref = NA_character_, + species_id = "", + source = "manually added" + ), MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPK", mo), - species = "group K" , - fullname = "Streptococcus group K", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Streptococcus", species == "agalactiae") %>% .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPK", mo), + species = "group K", + fullname = "Streptococcus group K", + ref = NA_character_, + species_id = "", + source = "manually added" + ), # Beta haemolytic Streptococci MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_HAEM", mo), - species = "beta-haemolytic" , - fullname = "Beta-haemolytic Streptococcus", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Streptococcus", species == "agalactiae") %>% + .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_HAEM", mo), + species = "beta-haemolytic", + fullname = "Beta-haemolytic Streptococcus", + ref = NA_character_, + species_id = "", + source = "manually added" + ), # Viridans Streptococci MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_VIRI", mo), - species = "viridans" , - fullname = "Viridans Group Streptococcus (VGS)", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Streptococcus", species == "agalactiae") %>% + .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_VIRI", mo), + species = "viridans", + fullname = "Viridans Group Streptococcus (VGS)", + ref = NA_character_, + species_id = "", + source = "manually added" + ), # Milleri Streptococci MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_MILL", mo), - species = "milleri" , - fullname = "Milleri Group Streptococcus (MGS)", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Streptococcus", species == "agalactiae") %>% + .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_MILL", mo), + species = "milleri", + fullname = "Milleri Group Streptococcus (MGS)", + ref = NA_character_, + species_id = "", + source = "manually added" + ), # Candida krusei MOs %>% - filter(genus == "Candida", species == "glabrata") %>% .[1,] %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_KRUS", mo), - species = "krusei" , - fullname = "Candida krusei", - ref = NA_character_, - species_id = "", - source = "manually added"), + filter(genus == "Candida", species == "glabrata") %>% + .[1, ] %>% + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_KRUS", mo), + species = "krusei", + fullname = "Candida krusei", + ref = NA_character_, + species_id = "", + source = "manually added" + ), # Blastocystis hominis does not exist (it means 'got a Blastocystis from humans', PMID 15634993) # but let's be nice to the clinical people in microbiology MOs %>% filter(fullname == "Blastocystis") %>% - mutate(mo = paste0(mo, "_HMNS"), - fullname = paste(fullname, "hominis"), - species = "hominis", - source = "manually added", - ref = NA_character_, - species_id = ""), + mutate( + mo = paste0(mo, "_HMNS"), + fullname = paste(fullname, "hominis"), + species = "hominis", + source = "manually added", + ref = NA_character_, + species_id = "" + ), # Trichomonas vaginalis is missing, same order as Dientamoeba MOs %>% filter(fullname == "Dientamoeba") %>% - mutate(mo = gsub("(.*?)_.*", "\\1_THMNS", mo), - fullname = "Trichomonas", - family = "Trichomonadidae", - genus = "Trichomonas", - source = "manually added", - ref = "Donne, 1836", - species_id = ""), + mutate( + mo = gsub("(.*?)_.*", "\\1_THMNS", mo), + fullname = "Trichomonas", + family = "Trichomonadidae", + genus = "Trichomonas", + source = "manually added", + ref = "Donne, 1836", + species_id = "" + ), MOs %>% filter(fullname == "Dientamoeba fragilis") %>% - mutate(mo = gsub("(.*?)_.*", "\\1_THMNS_VAG", mo), - fullname = "Trichomonas vaginalis", - family = "Trichomonadidae", - genus = "Trichomonas", - species = "vaginalis", - source = "manually added", - ref = "Donne, 1836", - species_id = ""), + mutate( + mo = gsub("(.*?)_.*", "\\1_THMNS_VAG", mo), + fullname = "Trichomonas vaginalis", + family = "Trichomonadidae", + genus = "Trichomonas", + species = "vaginalis", + source = "manually added", + ref = "Donne, 1836", + species_id = "" + ), MOs %>% # add family as such too filter(fullname == "Monocercomonadidae") %>% - mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_TRCHMNDD", mo), - fullname = "Trichomonadidae", - family = "Trichomonadidae", - rank = "family", - genus = "", - species = "", - source = "manually added", - ref = "", - species_id = ""), + mutate( + mo = gsub("(.*)_(.*)_.*", "\\1_\\2_TRCHMNDD", mo), + fullname = "Trichomonadidae", + family = "Trichomonadidae", + rank = "family", + genus = "", + species = "", + source = "manually added", + ref = "", + species_id = "" + ), ) # Incorporate new microbial order for Gammaproteobacteria - Adeolu et al. (2016), PMID 27620848 MOs[which(MOs$family == "Enterobacteriaceae"), "family"] <- "" -MOs[which(MOs$genus %in% c("Escherichia", - "Atlantibacter", - "Biostraticola", - "Buttiauxella", - "Cedecea", - "Citrobacter", - "Cronobacter", - "Enterobacillus", - "Enterobacter", - "Franconibacter", - "Gibbsiella", - "Izhakiella", - "Klebsiella", - "Kluyvera", - "Kosakonia", - "Leclercia", - "Lelliottia", - "Mangrovibacter", - "Pluralibacter", - "Pseudocitrobacter", - "Raoultella", - "Rosenbergiella", - "Saccharobacter", - "Salmonella", - "Shigella", - "Shimwellia", - "Siccibacter", - "Trabulsiella", - "Yokenella")), "family"] <- "Enterobacteriaceae" -MOs[which(MOs$genus %in% c("Erwinia", - "Buchnera", - "Pantoea", - "Phaseolibacter", - "Tatumella", - "Wigglesworthia")), "family"] <- "Erwiniaceae" -MOs[which(MOs$genus %in% c("Pectobacterium", - "Brenneria", - "Dickeya", - "Lonsdalea", - "Sodalis")), "family"] <- "Pectobacteriaceae" -MOs[which(MOs$genus %in% c("Yersinia", - "Chania", - "Ewingella", - "Rahnella", - "Rouxiella", - "Samsonia", - "Serratia")), "family"] <- "Yersiniaceae" -MOs[which(MOs$genus %in% c("Hafnia", - "Edwardsiella", - "Obesumbacterium")), "family"] <- "Hafniaceae" -MOs[which(MOs$genus %in% c("Morganella", - "Arsenophonus", - "Cosenzaea", - "Moellerella", - "Photorhabdus", - "Proteus", - "Providencia", - "Xenorhabdus")), "family"] <- "Morganellaceae" -MOs[which(MOs$genus %in% c("Budvicia", - "Leminorella", - "Pragia")), "family"] <- "Budviciaceae" -MOs[which(MOs$family %in% c("Enterobacteriaceae", - "Erwiniaceae", - "Pectobacteriaceae", - "Yersiniaceae", - "Hafniaceae", - "Morganellaceae", - "Budviciaceae")), "order"] <- "Enterobacterales" +MOs[which(MOs$genus %in% c( + "Escherichia", + "Atlantibacter", + "Biostraticola", + "Buttiauxella", + "Cedecea", + "Citrobacter", + "Cronobacter", + "Enterobacillus", + "Enterobacter", + "Franconibacter", + "Gibbsiella", + "Izhakiella", + "Klebsiella", + "Kluyvera", + "Kosakonia", + "Leclercia", + "Lelliottia", + "Mangrovibacter", + "Pluralibacter", + "Pseudocitrobacter", + "Raoultella", + "Rosenbergiella", + "Saccharobacter", + "Salmonella", + "Shigella", + "Shimwellia", + "Siccibacter", + "Trabulsiella", + "Yokenella" +)), "family"] <- "Enterobacteriaceae" +MOs[which(MOs$genus %in% c( + "Erwinia", + "Buchnera", + "Pantoea", + "Phaseolibacter", + "Tatumella", + "Wigglesworthia" +)), "family"] <- "Erwiniaceae" +MOs[which(MOs$genus %in% c( + "Pectobacterium", + "Brenneria", + "Dickeya", + "Lonsdalea", + "Sodalis" +)), "family"] <- "Pectobacteriaceae" +MOs[which(MOs$genus %in% c( + "Yersinia", + "Chania", + "Ewingella", + "Rahnella", + "Rouxiella", + "Samsonia", + "Serratia" +)), "family"] <- "Yersiniaceae" +MOs[which(MOs$genus %in% c( + "Hafnia", + "Edwardsiella", + "Obesumbacterium" +)), "family"] <- "Hafniaceae" +MOs[which(MOs$genus %in% c( + "Morganella", + "Arsenophonus", + "Cosenzaea", + "Moellerella", + "Photorhabdus", + "Proteus", + "Providencia", + "Xenorhabdus" +)), "family"] <- "Morganellaceae" +MOs[which(MOs$genus %in% c( + "Budvicia", + "Leminorella", + "Pragia" +)), "family"] <- "Budviciaceae" +MOs[which(MOs$family %in% c( + "Enterobacteriaceae", + "Erwiniaceae", + "Pectobacteriaceae", + "Yersiniaceae", + "Hafniaceae", + "Morganellaceae", + "Budviciaceae" +)), "order"] <- "Enterobacterales" new_families <- MOs %>% filter(order == "Enterobacterales") %>% pull(family) %>% unique() -MOs <- MOs %>% - filter(!(rank == "family" & fullname %in% new_families)) %>% - bind_rows(tibble(mo = paste0("B_[FAM]_", - toupper(abbreviate(new_families, - minlength = 8, - use.classes = TRUE, - method = "both.sides", - strict = FALSE))), - fullname = new_families, - kingdom = "Bacteria", - phylum = "Proteobacteria", - class = "Gammaproteobacteria", - order = "Enterobacterales", - family = new_families, - genus = "", - species = "", - subspecies = "", - rank = "family", - ref = "Adeolu et al., 2016", - species_id = NA_character_, - source = "manually added", - prevalence = 1)) +MOs <- MOs %>% + filter(!(rank == "family" & fullname %in% new_families)) %>% + bind_rows(tibble( + mo = paste0( + "B_[FAM]_", + toupper(abbreviate(new_families, + minlength = 8, + use.classes = TRUE, + method = "both.sides", + strict = FALSE + )) + ), + fullname = new_families, + kingdom = "Bacteria", + phylum = "Proteobacteria", + class = "Gammaproteobacteria", + order = "Enterobacterales", + family = new_families, + genus = "", + species = "", + subspecies = "", + rank = "family", + ref = "Adeolu et al., 2016", + species_id = NA_character_, + source = "manually added", + prevalence = 1 + )) MOs[which(MOs$order == "Enterobacteriales"), "order"] <- "Enterobacterales" MOs[which(MOs$fullname == "Enterobacteriales"), "fullname"] <- "Enterobacterales" # add prevalence to old taxonomic names -MOs.old <- MOs.old %>% - select(-prevalence) %>% +MOs.old <- MOs.old %>% + select(-prevalence) %>% left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname")) # everything distinct? @@ -811,35 +950,43 @@ sum(duplicated(MOs$fullname)) colnames(MOs) # add the ones we would delete now, that have unexisting codes and names (also in the old names) -MOs <- MOs %>% - mutate(mo = as.character(mo)) %>% +MOs <- MOs %>% + mutate(mo = as.character(mo)) %>% bind_rows( AMR::microorganisms %>% - mutate(mo = as.character(mo)) %>% - filter(genus %in% gen & !fullname %in% AMR::microorganisms$fullname & - !fullname %in% AMR::microorganisms.old$fullname & - !mo %in% microorganisms$mo) %>% + mutate(mo = as.character(mo)) %>% + filter(genus %in% gen & !fullname %in% AMR::microorganisms$fullname & + !fullname %in% AMR::microorganisms.old$fullname & + !mo %in% microorganisms$mo) %>% select(all_of(colnames(AMR::microorganisms))) ) # here we welcome the new ones: -MOs %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms$fullname) %>% View() -MOs.old %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms.old$fullname) %>% View() +MOs %>% + arrange(fullname) %>% + filter(!fullname %in% AMR::microorganisms$fullname) %>% + View() +MOs.old %>% + arrange(fullname) %>% + filter(!fullname %in% AMR::microorganisms.old$fullname) %>% + View() # and the ones we lost: # AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View() # based on fullname -AMR::microorganisms %>% filter(!fullname %in% c(MOs$fullname, MOs.old$fullname)) %>% View() # excluding renamed ones +AMR::microorganisms %>% + filter(!fullname %in% c(MOs$fullname, MOs.old$fullname)) %>% + View() # excluding renamed ones # AMR::microorganisms %>% filter(!mo %in% MOs$mo) %>% View() # based on mo -# AMR::microorganisms %>% filter(!mo %in% MOs$mo & !fullname %in% MOs$fullname) %>% View() +# AMR::microorganisms %>% filter(!mo %in% MOs$mo & !fullname %in% MOs$fullname) %>% View() # and these IDs have changed: old_new <- MOs %>% - mutate(kingdom_fullname = paste(kingdom, fullname)) %>% - filter(kingdom_fullname %in% (AMR::microorganisms %>% - mutate(kingdom_fullname = paste(kingdom, fullname)) %>% - pull(kingdom_fullname))) %>% - left_join(AMR::microorganisms %>% - mutate(kingdom_fullname = paste(kingdom, fullname)) %>% - select(mo, kingdom_fullname), by = "kingdom_fullname", suffix = c("_new", "_old")) %>% - filter(mo_new != mo_old) %>% + mutate(kingdom_fullname = paste(kingdom, fullname)) %>% + filter(kingdom_fullname %in% (AMR::microorganisms %>% + mutate(kingdom_fullname = paste(kingdom, fullname)) %>% + pull(kingdom_fullname))) %>% + left_join(AMR::microorganisms %>% + mutate(kingdom_fullname = paste(kingdom, fullname)) %>% + select(mo, kingdom_fullname), by = "kingdom_fullname", suffix = c("_new", "_old")) %>% + filter(mo_new != mo_old) %>% select(mo_old, mo_new, everything()) View(old_new) @@ -848,9 +995,9 @@ rsi_translation$mo <- mo_name(rsi_translation$mo, language = NULL) microorganisms.codes$mo <- mo_name(microorganisms.codes$mo, language = NULL) # microorganisms.translation <- AMR:::microorganisms.translation %>% # bind_rows(tibble(mo_old = AMR:::microorganisms.translation$mo_new, mo_new = mo_old)) %>% -# filter(!mo_old %in% MOs$mo) %>% -# mutate(mo_new = mo_name(mo_new, language = NULL)) %>% -# bind_rows(old_new %>% select(mo_old, mo_new)) %>% +# filter(!mo_old %in% MOs$mo) %>% +# mutate(mo_new = mo_name(mo_new, language = NULL)) %>% +# bind_rows(old_new %>% select(mo_old, mo_new)) %>% # distinct(mo_old, .keep_all = TRUE) # arrange the data sets to save @@ -889,10 +1036,10 @@ class(microorganisms.codes$mo) <- c("mo", "character") # # (to do: add last package version to column pkg_version) # left_join(microorganisms.old[, c("fullname", "fullname_new")], # microorganisms.old is now new and loaded # by = c("mo_new" = "fullname")) %>% -# mutate(name = ifelse(!is.na(fullname_new), fullname_new, mo_new)) %>% +# mutate(name = ifelse(!is.na(fullname_new), fullname_new, mo_new)) %>% # left_join(microorganisms[, c("fullname", "mo")], # as is microorganisms -# by = c("name" = "fullname")) %>% -# select(mo_old, mo_new = mo) %>% +# by = c("name" = "fullname")) %>% +# select(mo_old, mo_new = mo) %>% # filter(!is.na(mo_old), !is.na(mo_new)) # class(microorganisms.translation$mo_old) <- "character" # no class since those aren't valid MO codes # class(microorganisms.translation$mo_new) <- c("mo", "character") @@ -901,7 +1048,7 @@ usethis::use_data(rsi_translation, overwrite = TRUE, version = 2) usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2) # saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2) # to save microorganisms.translation internally to the package -# source("data-raw/pre-commit-hook.R") +# source("data-raw/_pre_commit_hook.R") # load new data sets again devtools::load_all(".") @@ -924,13 +1071,15 @@ testthat::test_file("tests/testthat/test-mo_property.R") # edit 2020-05-28 # Not sure why it now says M. tuberculosis was renamed to M. africanum (B_MYCBC_AFRC), but that's not true -microorganisms <- microorganisms %>% - bind_rows(microorganisms %>% - filter(mo == "B_MYCBC_AFRC") %>% - mutate(mo = "B_MYCBC_TBRC", snomed = list(c("113861009", "113858008")), - ref = "Lehmann et al., 2018",species_id = "778540", - source = "DSMZ", species = "tuberculosis", - fullname = "Mycobacterium tuberculosis")) %>% +microorganisms <- microorganisms %>% + bind_rows(microorganisms %>% + filter(mo == "B_MYCBC_AFRC") %>% + mutate( + mo = "B_MYCBC_TBRC", snomed = list(c("113861009", "113858008")), + ref = "Lehmann et al., 2018", species_id = "778540", + source = "DSMZ", species = "tuberculosis", + fullname = "Mycobacterium tuberculosis" + )) %>% arrange(fullname) class(microorganisms$mo) <- c("mo", "character") microorganisms.old <- microorganisms.old %>% filter(fullname != "Mycobacterium tuberculosis") @@ -949,31 +1098,31 @@ usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2) # left_join(MOs %>% # select(-mo), by = "fullname")) # this is how to fix it -# microorganisms.codes <- AMR::microorganisms.codes %>% +# microorganisms.codes <- AMR::microorganisms.codes %>% # left_join(MOs %>% -# mutate(kingdom_fullname = paste(kingdom, fullname)) %>% +# mutate(kingdom_fullname = paste(kingdom, fullname)) %>% # left_join(AMR::microorganisms %>% # transmute(mo, kingdom_fullname = paste(kingdom, fullname)), # by = "kingdom_fullname", suffix = c("_new", "_old")) %>% # select(mo_old, mo_new), -# by = c("mo" = "mo_old")) %>% -# select(code, mo = mo_new) %>% +# by = c("mo" = "mo_old")) %>% +# select(code, mo = mo_new) %>% # filter(!is.na(mo)) # microorganisms.codes %>% filter(!mo %in% MOs$mo) # # and for microorganisms.translation: -# microorganisms.translation <- AMR:::microorganisms.translation %>% -# select(mo = mo_new) %>% +# microorganisms.translation <- AMR:::microorganisms.translation %>% +# select(mo = mo_new) %>% # left_join(AMR::microorganisms %>% # transmute(mo, kingdom_fullname = paste(kingdom, fullname)), # by = "kingdom_fullname", suffix = c("_new", "_old")) %>% # select(mo_old, mo_new) # left_join(MOs %>% -# mutate(kingdom_fullname = paste(kingdom, fullname)) %>% +# mutate(kingdom_fullname = paste(kingdom, fullname)) %>% # left_join(AMR::microorganisms %>% # transmute(mo, kingdom_fullname = paste(kingdom, fullname)), # by = "kingdom_fullname", suffix = c("_new", "_old")) %>% # select(mo_old, mo_new), -# by = c("mo" = "mo_old")) %>% -# select(code, mo = mo_new) %>% +# by = c("mo" = "mo_old")) %>% +# select(code, mo = mo_new) %>% # filter(!is.na(mo)) # microorganisms.codes %>% filter(!mo %in% MOs$mo) diff --git a/data-raw/reproduction_of_microorganisms_update.R b/data-raw/reproduction_of_microorganisms_update.R index 0d7c69696..0e7c2b0db 100644 --- a/data-raw/reproduction_of_microorganisms_update.R +++ b/data-raw/reproduction_of_microorganisms_update.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -39,7 +39,7 @@ test_mo <- microorganisms$mo get_author_year <- function(ref) { # Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011' - + authors2 <- iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT") authors2 <- gsub(" ?\\(Approved Lists [0-9]+\\) ?", " () ", authors2) authors2 <- gsub(" [)(]+ $", "", authors2) @@ -47,14 +47,16 @@ get_author_year <- function(ref) { authors2 <- trimws(gsub("^[(](.*)[)]$", "\\1", authors2)) # only take part after brackets if there's a name authors2 <- ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2), - gsub(".*[)] (.*)", "\\1", authors2), - authors2) + gsub(".*[)] (.*)", "\\1", authors2), + authors2 + ) # get year from last 4 digits - lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2)) + lastyear <- as.integer(gsub(".*([0-9]{4})$", "\\1", authors2)) # can never be later than now - lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")), - NA, - lastyear) + lastyear <- ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")), + NA, + lastyear + ) # get authors without last year authors <- gsub("(.*)[0-9]{4}$", "\\1", authors2) # remove nonsense characters from names @@ -72,18 +74,19 @@ get_author_year <- function(ref) { authors <- gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE) # combine author and year if year is available ref <- ifelse(!is.na(lastyear), - paste0(authors, ", ", lastyear), - authors) + paste0(authors, ", ", lastyear), + authors + ) # fix beginning and ending ref <- gsub(", $", "", ref) ref <- gsub("^, ", "", ref) ref <- gsub("^(emend|et al.,?)", "", ref) ref <- trimws(ref) ref <- gsub("'", "", ref) - + # a lot start with a lowercase character - fix that ref[!grepl("^d[A-Z]", ref)] <- gsub("^([a-z])", "\\U\\1", ref[!grepl("^d[A-Z]", ref)], perl = TRUE) - # specific one for the French that are named dOrbigny + # specific one for the French that are named dOrbigny ref[grepl("^d[A-Z]", ref)] <- gsub("^d", "d'", ref[grepl("^d[A-Z]", ref)]) ref <- gsub(" +", " ", ref) ref @@ -92,21 +95,23 @@ get_author_year <- function(ref) { df_remove_nonASCII <- function(df) { # Remove non-ASCII characters (these are not allowed by CRAN) df %>% - mutate_if(is.character, iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>% + mutate_if(is.character, iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>% # also remove invalid characters - mutate_if(is.character, ~gsub("[\"'`]+", "", .)) %>% + mutate_if(is.character, ~ gsub("[\"'`]+", "", .)) %>% AMR:::dataset_UTF8_to_ASCII() } abbreviate_mo <- function(x, minlength = 5, prefix = "", ...) { # keep a starting Latin ae suppressWarnings( - gsub("^ae", "\u00E6\u00E6", x, ignore.case = TRUE) %>% - abbreviate(minlength = minlength, - use.classes = TRUE, - method = "both.sides", ...) %>% - paste0(prefix, .) %>% - toupper() %>% + gsub("^ae", "\u00E6\u00E6", x, ignore.case = TRUE) %>% + abbreviate( + minlength = minlength, + use.classes = TRUE, + method = "both.sides", ... + ) %>% + paste0(prefix, .) %>% + toupper() %>% gsub("(\u00C6|\u00E6)+", "AE", .) ) } @@ -119,63 +124,74 @@ taxonomy <- read_csv(file_location) new_synonyms <- taxonomy %>% left_join(taxonomy, - by = c("record_lnk" = "record_no"), - suffix = c("", ".new")) %>% - filter(!is.na(record_lnk)) %>% - mutate_all(~ifelse(is.na(.), "", .)) %>% - transmute(fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)), - fullname_new = trimws(paste(genus_name.new, sp_epithet.new, subsp_epithet.new)), - ref = get_author_year(authors), - prevalence = 0) %>% - distinct(fullname, .keep_all = TRUE) %>% - filter(fullname != fullname_new) %>% - # this part joins this table to itself to correct for entries that had >1 renames, + by = c("record_lnk" = "record_no"), + suffix = c("", ".new") + ) %>% + filter(!is.na(record_lnk)) %>% + mutate_all(~ ifelse(is.na(.), "", .)) %>% + transmute( + fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)), + fullname_new = trimws(paste(genus_name.new, sp_epithet.new, subsp_epithet.new)), + ref = get_author_year(authors), + prevalence = 0 + ) %>% + distinct(fullname, .keep_all = TRUE) %>% + filter(fullname != fullname_new) %>% + # this part joins this table to itself to correct for entries that had >1 renames, # such as: # Bacteroides tectum -> Bacteroides tectus -> Bacteroides pyogenes - left_join(., ., - by = c("fullname_new" = "fullname"), - suffix = c("", ".2")) %>% - mutate(fullname_new = ifelse(!is.na(fullname_new.2), fullname_new.2, fullname_new), - ref = ifelse(!is.na(ref.2), ref.2, ref)) %>% - select(-ends_with(".2")) + left_join(., ., + by = c("fullname_new" = "fullname"), + suffix = c("", ".2") + ) %>% + mutate( + fullname_new = ifelse(!is.na(fullname_new.2), fullname_new.2, fullname_new), + ref = ifelse(!is.na(ref.2), ref.2, ref) + ) %>% + select(-ends_with(".2")) -mo_became_synonym <- microorganisms %>% +mo_became_synonym <- microorganisms %>% filter(fullname %in% new_synonyms$fullname) updated_microorganisms <- taxonomy %>% - filter(is.na(record_lnk)) %>% - mutate_all(~ifelse(is.na(.), "", .)) %>% - transmute(mo = "", - fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)), - kingdom = "Bacteria", - phylum = "", - class = "", - order = "", - family = "", - genus = trimws(genus_name), - species = trimws(replace_na(sp_epithet, "")), - subspecies = trimws(replace_na(subsp_epithet, "")), - rank = case_when(subspecies == "" & species == "" ~ "genus", - subspecies == "" ~ "species", - TRUE ~ "subsp."), - ref = get_author_year(authors), - species_id = as.character(record_no), - source = "LPSN", - prevalence = 0, - snomed = NA) + filter(is.na(record_lnk)) %>% + mutate_all(~ ifelse(is.na(.), "", .)) %>% + transmute( + mo = "", + fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)), + kingdom = "Bacteria", + phylum = "", + class = "", + order = "", + family = "", + genus = trimws(genus_name), + species = trimws(replace_na(sp_epithet, "")), + subspecies = trimws(replace_na(subsp_epithet, "")), + rank = case_when( + subspecies == "" & species == "" ~ "genus", + subspecies == "" ~ "species", + TRUE ~ "subsp." + ), + ref = get_author_year(authors), + species_id = as.character(record_no), + source = "LPSN", + prevalence = 0, + snomed = NA + ) -new_microorganisms <- updated_microorganisms %>% +new_microorganisms <- updated_microorganisms %>% filter(!fullname %in% microorganisms$fullname) genera_with_mo_code <- updated_microorganisms %>% filter(genus %in% (microorganisms %>% filter(kingdom == "Bacteria", rank == "genus") %>% pull(genus))) %>% - distinct(genus) %>% + distinct(genus) %>% left_join(microorganisms %>% filter(kingdom == "Bacteria", rank == "genus") %>% select(mo, genus), - by = "genus") - -genera_without_mo_code <- updated_microorganisms %>% - filter(!genus %in% genera_with_mo_code$genus) %>% - pull(genus) %>% + by = "genus" + ) + +genera_without_mo_code <- updated_microorganisms %>% + filter(!genus %in% genera_with_mo_code$genus) %>% + pull(genus) %>% unique() genera_without_mo_code_abbr <- genera_without_mo_code %>% abbreviate_mo(5, prefix = "B_") @@ -184,65 +200,73 @@ genera_without_mo_code_abbr[genera_without_mo_code_abbr %in% microorganisms$mo] # all unique?? sum(genera_without_mo_code_abbr %in% microorganisms$mo) == 0 -genus_abb <- tibble(genus = genera_without_mo_code, - abbr = genera_without_mo_code_abbr) %>% +genus_abb <- tibble( + genus = genera_without_mo_code, + abbr = genera_without_mo_code_abbr +) %>% bind_rows(microorganisms %>% - filter(kingdom == "Bacteria", rank == "genus", !genus %in% genera_without_mo_code) %>% - transmute(genus, abbr = as.character(mo))) %>% + filter(kingdom == "Bacteria", rank == "genus", !genus %in% genera_without_mo_code) %>% + transmute(genus, abbr = as.character(mo))) %>% arrange(genus) # Update taxonomy --------------------------------------------------------- # fill in the taxonomy of new genera -updated_taxonomy <- tibble(phylum = character(0), - class = character(0), - order = character(0), - family = character(0), - genus = character(0)) +updated_taxonomy <- tibble( + phylum = character(0), + class = character(0), + order = character(0), + family = character(0), + genus = character(0) +) for (page in LETTERS) { message("Downloading page ", page, "... ", appendLF = FALSE) url <- paste0("https://lpsn.dsmz.de/genus?page=", page) - - x <- xml2::read_html(url) %>% - rvest::html_node(".main-list") %>% + + x <- xml2::read_html(url) %>% + rvest::html_node(".main-list") %>% # evety list element with a set attribute rvest::html_nodes("li[id]") for (i in seq_len(length(x))) { - txt <- x %>% - magrittr::extract2(i) %>% + txt <- x %>% + magrittr::extract2(i) %>% rvest::html_text() %>% - gsub("\\[[A-Za-z]+, no [a-z]+\\]", "NA", .) %>% - gsub("Candidatus ", "", ., fixed = TRUE) %>% + gsub("\\[[A-Za-z]+, no [a-z]+\\]", "NA", .) %>% + gsub("Candidatus ", "", ., fixed = TRUE) %>% gsub("[ \t\r\n\"]+", "|", .) %>% - gsub("\\|ShowHide.*", "", .) %>% - gsub("[\\[\\]]", "", ., fixed = TRUE) %>% - gsub("^\\|", "", .) %>% - strsplit("|", fixed = TRUE) %>% + gsub("\\|ShowHide.*", "", .) %>% + gsub("[\\[\\]]", "", ., fixed = TRUE) %>% + gsub("^\\|", "", .) %>% + strsplit("|", fixed = TRUE) %>% unlist() txt[txt == "NA"] <- "" txt <- gsub("[^A-Za-z]+", "", txt) - updated_taxonomy <- updated_taxonomy %>% - bind_rows(tibble(phylum = txt[2], - class = txt[3], - order = txt[4], - family = txt[5], - genus = txt[6])) + updated_taxonomy <- updated_taxonomy %>% + bind_rows(tibble( + phylum = txt[2], + class = txt[3], + order = txt[4], + family = txt[5], + genus = txt[6] + )) } message(length(x), " entries (total ", nrow(updated_taxonomy), ")") } # Create new microorganisms ----------------------------------------------- -new_microorganisms <- new_microorganisms %>% - left_join(genus_abb, by = "genus") %>% - group_by(genus) %>% - mutate(species_abb = abbreviate_mo(species, 4)) %>% - group_by(genus, species) %>% - mutate(subspecies_abb = abbreviate_mo(subspecies, 4)) %>% - ungroup() %>% - mutate(mo = paste(abbr, species_abb, subspecies_abb, sep = "_"), - mo = gsub("_+$", "", mo)) %>% +new_microorganisms <- new_microorganisms %>% + left_join(genus_abb, by = "genus") %>% + group_by(genus) %>% + mutate(species_abb = abbreviate_mo(species, 4)) %>% + group_by(genus, species) %>% + mutate(subspecies_abb = abbreviate_mo(subspecies, 4)) %>% + ungroup() %>% + mutate( + mo = paste(abbr, species_abb, subspecies_abb, sep = "_"), + mo = gsub("_+$", "", mo) + ) %>% select(-matches("abb")) # add taxonomy new microorganisms @@ -256,128 +280,150 @@ MOs$mo[which(duplicated(MOs$mo))] <- paste0(MOs$mo[which(duplicated(MOs$mo))], 1 # all unique? !any(duplicated(MOs$mo)) -MOs <- MOs %>% +MOs <- MOs %>% # remove entries that are now a synonym - filter(!fullname %in% new_synonyms$fullname) %>% + filter(!fullname %in% new_synonyms$fullname) %>% # update the taxonomy left_join(updated_taxonomy, by = "genus", suffix = c("", ".new")) %>% - mutate(phylum = ifelse(!is.na(phylum.new), phylum.new, phylum), - class = ifelse(!is.na(class.new), class.new, class), - order = ifelse(!is.na(order.new), order.new, order), - family = ifelse(!is.na(family.new), family.new, family)) %>% + mutate( + phylum = ifelse(!is.na(phylum.new), phylum.new, phylum), + class = ifelse(!is.na(class.new), class.new, class), + order = ifelse(!is.na(order.new), order.new, order), + family = ifelse(!is.na(family.new), family.new, family) + ) %>% select(-ends_with(".new")) %>% # update prevalence based on taxonomy (Berends et al., 2021) mutate(prevalence = case_when( - class == "Gammaproteobacteria" - | genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus") + class == "Gammaproteobacteria" | + genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus") ~ 1, - kingdom %in% c("Archaea", "Bacteria", "Chromista", "Fungi") - & (phylum %in% c("Proteobacteria", - "Firmicutes", - "Actinobacteria", - "Sarcomastigophora") - | genus %in% MO_PREVALENT_GENERA - | rank %in% c("kingdom", "phylum", "class", "order", "family")) + kingdom %in% c("Archaea", "Bacteria", "Chromista", "Fungi") & + (phylum %in% c( + "Proteobacteria", + "Firmicutes", + "Actinobacteria", + "Sarcomastigophora" + ) | + genus %in% MO_PREVALENT_GENERA | + rank %in% c("kingdom", "phylum", "class", "order", "family")) ~ 2, TRUE ~ 3 )) # add all mssing genera, families and orders -MOs <- MOs %>% +MOs <- MOs %>% bind_rows( - MOs %>% + MOs %>% arrange(genus, species) %>% distinct(genus, .keep_all = TRUE) %>% filter(rank == "species", source != "manually added") %>% - mutate(mo = gsub("^([A-Z]_[A-Z]+)_.*", "\\1", mo), - fullname = genus, - species = "", - subspecies = "", - rank = "genus", - species_id = "", - snomed = NA, - ref = NA_character_), - MOs %>% + mutate( + mo = gsub("^([A-Z]_[A-Z]+)_.*", "\\1", mo), + fullname = genus, + species = "", + subspecies = "", + rank = "genus", + species_id = "", + snomed = NA, + ref = NA_character_ + ), + MOs %>% group_by(family) %>% filter(!any(rank == "family") & n() > 1) %>% - ungroup() %>% + ungroup() %>% arrange(family) %>% - distinct(family, .keep_all = TRUE) %>% + distinct(family, .keep_all = TRUE) %>% filter(!family %in% c("", NA), source != "manually added") %>% - mutate(mo = paste0(substr(kingdom, 1, 1), "_[FAM]_", - abbreviate(family, - minlength = 8, - use.classes = TRUE, - method = "both.sides", - strict = FALSE)), - mo = toupper(mo), - fullname = family, - genus = "", - species = "", - subspecies = "", - rank = "family", - species_id = "", - snomed = NA, - ref = NA_character_), - MOs %>% + mutate( + mo = paste0( + substr(kingdom, 1, 1), "_[FAM]_", + abbreviate(family, + minlength = 8, + use.classes = TRUE, + method = "both.sides", + strict = FALSE + ) + ), + mo = toupper(mo), + fullname = family, + genus = "", + species = "", + subspecies = "", + rank = "family", + species_id = "", + snomed = NA, + ref = NA_character_ + ), + MOs %>% group_by(order) %>% filter(!any(rank == "order") & n() > 1) %>% - ungroup() %>% + ungroup() %>% arrange(order) %>% - distinct(order, .keep_all = TRUE) %>% + distinct(order, .keep_all = TRUE) %>% filter(!order %in% c("", NA), source != "manually added") %>% - mutate(mo = paste0(substr(kingdom, 1, 1), "_[ORD]_", - abbreviate(order, - minlength = 8, - use.classes = TRUE, - method = "both.sides", - strict = FALSE)), - mo = toupper(mo), - fullname = order, - family = "", - genus = "", - species = "", - subspecies = "", - rank = "order", - species_id = "", - snomed = NA, - ref = NA_character_) + mutate( + mo = paste0( + substr(kingdom, 1, 1), "_[ORD]_", + abbreviate(order, + minlength = 8, + use.classes = TRUE, + method = "both.sides", + strict = FALSE + ) + ), + mo = toupper(mo), + fullname = order, + family = "", + genus = "", + species = "", + subspecies = "", + rank = "order", + species_id = "", + snomed = NA, + ref = NA_character_ + ) ) %>% arrange(fullname) # clean up -MOs <- MOs %>% +MOs <- MOs %>% df_remove_nonASCII() # Add LPSN record IDs ----------------------------------------------------- records_ids <- taxonomy %>% - mutate(across(1:3, function(x) { x[is.na(x)] <- ""; x}), - fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet))) %>% - transmute(fullname, species_id = as.numeric(record_no)) %>% - arrange(fullname, species_id) %>% + mutate(across(1:3, function(x) { + x[is.na(x)] <- "" + x + }), + fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)) + ) %>% + transmute(fullname, species_id = as.numeric(record_no)) %>% + arrange(fullname, species_id) %>% distinct(fullname, .keep_all = TRUE) message("Adding ", sum(records_ids$fullname %in% microorganisms$fullname), " LPSN record IDs") MOs <- MOs %>% - select(-species_id) %>% + select(-species_id) %>% left_join(records_ids, by = "fullname") %>% - relocate(species_id, .after = ref) %>% - mutate(source = case_when(!is.na(species_id) ~ "LPSN", - source %unlike% "manual" ~ "CoL", - TRUE ~ source)) + relocate(species_id, .after = ref) %>% + mutate(source = case_when( + !is.na(species_id) ~ "LPSN", + source %unlike% "manual" ~ "CoL", + TRUE ~ source + )) # Merge synonyms ---------------------------------------------------------- # remove synonyms that are now valid names -MOs.old <- microorganisms.old %>% +MOs.old <- microorganisms.old %>% # add new synonyms - bind_rows(new_synonyms) %>% - filter(!fullname %in% MOs$fullname) %>% - arrange(fullname) %>% - distinct(fullname, fullname_new, .keep_all = TRUE) %>% + bind_rows(new_synonyms) %>% + filter(!fullname %in% MOs$fullname) %>% + arrange(fullname) %>% + distinct(fullname, fullname_new, .keep_all = TRUE) %>% # add prevalence to old taxonomic names - select(-prevalence) %>% - left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname")) %>% + select(-prevalence) %>% + left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname")) %>% # clean up df_remove_nonASCII() @@ -397,16 +443,18 @@ microorganisms.old <- MOs.old # we keep them both microorganisms <- microorganisms %>% bind_rows(microorganisms %>% - filter(fullname == "Branhamella catarrhalis") %>% - mutate(mo = "B_MRXLL_CTRR", - fullname = "Moraxella catarrhalis", - genus = "Moraxella", - ref = "Henriksen et al., 1968", - species_id = "a374f6f0868e05f9c0f5077b60ee0a6c", - snomed = as.list(24226003))) %>% - arrange(fullname) %>% + filter(fullname == "Branhamella catarrhalis") %>% + mutate( + mo = "B_MRXLL_CTRR", + fullname = "Moraxella catarrhalis", + genus = "Moraxella", + ref = "Henriksen et al., 1968", + species_id = "a374f6f0868e05f9c0f5077b60ee0a6c", + snomed = as.list(24226003) + )) %>% + arrange(fullname) %>% df_remove_nonASCII() -microorganisms.old <- microorganisms.old %>% +microorganisms.old <- microorganisms.old %>% filter(fullname != "Moraxella catarrhalis") # --- @@ -444,7 +492,7 @@ rm(intrinsic_resistant) # load new data sets again devtools::load_all(".") -source("data-raw/pre-commit-hook.R") +source("data-raw/_pre_commit_hook.R") devtools::load_all(".") diff --git a/data-raw/reproduction_of_poorman.R b/data-raw/reproduction_of_poorman.R index ed32061e1..7ff3a3ab9 100644 --- a/data-raw/reproduction_of_poorman.R +++ b/data-raw/reproduction_of_poorman.R @@ -25,26 +25,28 @@ sapply(files, function(file) { contents <<- c(contents, readLines(file)) invisible() }) -contents <- c(intro, - copyright, - "", - contents) +contents <- c( + intro, + copyright, + "", + contents +) # remove lines starting with "#'" and NULL and write to file contents <- contents[!grepl("^(#'|NULL|\"_PACKAGE)", contents)] # now make it independent on UseMethod, since we will not export these functions - contents <- gsub('UseMethod[(]"(.*?)"[)]', - 'if ("grouped_data" %in% class(.data)) {||| \\1.grouped_data(.data, ...)||| } else {||| \\1.default(.data, ...)||| }', - paste(contents, collapse = "|||"), - perl = TRUE) %>% + 'if ("grouped_data" %in% class(.data)) {||| \\1.grouped_data(.data, ...)||| } else {||| \\1.default(.data, ...)||| }', + paste(contents, collapse = "|||"), + perl = TRUE +) %>% # add commit to intro part gsub("{commit}", commit, ., fixed = TRUE) %>% # add date to intro part gsub("{date}", format(Sys.Date(), "%e %B %Y"), ., fixed = TRUE) %>% strsplit(split = "|||", fixed = TRUE) %>% - unlist() %>% + unlist() %>% # add "pm_" as prefix to all functions gsub("^([a-z_.]+) <- function", "pm_\\1 <- function", .) @@ -56,7 +58,7 @@ for (i in seq_len(length(new_pm_names))) { contents <- gsub(paste0("( |\\[|\\()", new_pm_names[i], "($|[^a-z]|\\))"), paste0("\\1pm_", new_pm_names[i], "\\2"), contents) } -# replace %>% with %pm>% +# replace %>% with %pm>% contents <- gsub("%>%", "%pm>%", contents, fixed = TRUE) # fix for new lines, since n() also existed contents <- gsub("\\pm_n", "\\n", contents, fixed = TRUE) @@ -70,6 +72,8 @@ contents <- gsub("context", "pm_context", contents, fixed = TRUE) contents <- gsub("(pm_)+", "pm_", contents) # special case for pm_distinct(), we need '.keep_all' to work contents <- gsub("pm_distinct <- function(.data, ..., .keep_all = FALSE)", "pm_distinct <- function(.data, ...)", contents, fixed = TRUE) +# pm_pull does not correct for tibbles, misses the drop argument +contents[contents == ".data[, var]"] <- ".data[, var, drop = TRUE]" # who needs US spelling? contents <- contents[!grepl("summarize", contents)] diff --git a/data-raw/reproduction_of_rsi_translation.R b/data-raw/reproduction_of_rsi_translation.R index 3ad92c0d5..68f32e8bc 100644 --- a/data-raw/reproduction_of_rsi_translation.R +++ b/data-raw/reproduction_of_rsi_translation.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -40,88 +40,104 @@ ORGLIST <- read_tsv("data-raw/WHONET/Codes/ORGLIST.txt", na = c("", "NA", "-"), # create data set for generic rules (i.e., AB-specific but not MO-specific) rsi_generic <- DRGLST %>% filter(CLSI == "X" | EUCST == "X") %>% - select(ab = ANTIBIOTIC, disk_dose = POTENCY, matches("^(CLSI|EUCST)[0-9]")) %>% - mutate(ab = as.ab(ab), - across(matches("(CLSI|EUCST)"), as.double)) %>% - pivot_longer(-c(ab, disk_dose), names_to = "method") %>% - separate(method, into = c("guideline", "method"), sep = "_") %>% + select(ab = ANTIBIOTIC, disk_dose = POTENCY, matches("^(CLSI|EUCST)[0-9]")) %>% + mutate( + ab = as.ab(ab), + across(matches("(CLSI|EUCST)"), as.double) + ) %>% + pivot_longer(-c(ab, disk_dose), names_to = "method") %>% + separate(method, into = c("guideline", "method"), sep = "_") %>% mutate(method = ifelse(method %like% "D", - gsub("D", "DISK_", method, fixed = TRUE), - gsub("M", "MIC_", method, fixed = TRUE))) %>% - separate(method, into = c("method", "rsi"), sep = "_") %>% + gsub("D", "DISK_", method, fixed = TRUE), + gsub("M", "MIC_", method, fixed = TRUE) + )) %>% + separate(method, into = c("method", "rsi"), sep = "_") %>% # I is in the middle, so we only need R and S (saves data) - filter(rsi %in% c("R", "S")) %>% + filter(rsi %in% c("R", "S")) %>% pivot_wider(names_from = rsi, values_from = value) %>% - transmute(guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", guideline)), - method, - site = NA_character_, - mo = as.mo("UNKNOWN"), - ab, - ref_tbl = "Generic rules", - disk_dose, - breakpoint_S = S, - breakpoint_R = R, - uti = FALSE) %>% + transmute( + guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", guideline)), + method, + site = NA_character_, + mo = as.mo("UNKNOWN"), + ab, + ref_tbl = "Generic rules", + disk_dose, + breakpoint_S = S, + breakpoint_R = R, + uti = FALSE + ) %>% filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)), !is.na(mo), !is.na(ab)) rsi_generic # create data set for AB-specific and MO-specific rules -rsi_specific <- DRGLST1 %>% +rsi_specific <- DRGLST1 %>% # only support guidelines for humans (for now) - filter(HOST == "Human" & SITE_INF %unlike% "canine|feline", - # only CLSI and EUCAST - GUIDELINES %like% "(CLSI|EUCST)") %>% + filter( + HOST == "Human" & SITE_INF %unlike% "canine|feline", + # only CLSI and EUCAST + GUIDELINES %like% "(CLSI|EUCST)" + ) %>% # get microorganism names from another WHONET table - mutate(ORG_CODE = tolower(ORG_CODE)) %>% + mutate(ORG_CODE = tolower(ORG_CODE)) %>% left_join(ORGLIST %>% - transmute(ORG_CODE = tolower(ORG), - SCT_TEXT = case_when(is.na(SCT_TEXT) & is.na(ORGANISM) ~ ORG_CODE, - is.na(SCT_TEXT) ~ ORGANISM, - TRUE ~ SCT_TEXT)) %>% - # WHO for 'Generic' - bind_rows(tibble(ORG_CODE = "gen", SCT_TEXT = "Unknown")) %>% - # WHO for 'Enterobacterales' - bind_rows(tibble(ORG_CODE = "ebc", SCT_TEXT = "Enterobacterales")) - ) %>% + transmute( + ORG_CODE = tolower(ORG), + SCT_TEXT = case_when( + is.na(SCT_TEXT) & is.na(ORGANISM) ~ ORG_CODE, + is.na(SCT_TEXT) ~ ORGANISM, + TRUE ~ SCT_TEXT + ) + ) %>% + # WHO for 'Generic' + bind_rows(tibble(ORG_CODE = "gen", SCT_TEXT = "Unknown")) %>% + # WHO for 'Enterobacterales' + bind_rows(tibble(ORG_CODE = "ebc", SCT_TEXT = "Enterobacterales"))) %>% # still some manual cleaning required - filter(!SCT_TEXT %in% c("Anaerobic Actinomycetes")) %>% - transmute(guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", GUIDELINES)), - method = toupper(TESTMETHOD), - site = SITE_INF, - mo = as.mo(SCT_TEXT), - ab = as.ab(WHON5_CODE), - ref_tbl = REF_TABLE, - disk_dose = POTENCY, - breakpoint_S = as.double(ifelse(method == "DISK", DISK_S, MIC_S)), - breakpoint_R = as.double(ifelse(method == "DISK", DISK_R, MIC_R)), - uti = site %like% "(UTI|urinary|urine)") %>% + filter(!SCT_TEXT %in% c("Anaerobic Actinomycetes")) %>% + transmute( + guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", GUIDELINES)), + method = toupper(TESTMETHOD), + site = SITE_INF, + mo = as.mo(SCT_TEXT), + ab = as.ab(WHON5_CODE), + ref_tbl = REF_TABLE, + disk_dose = POTENCY, + breakpoint_S = as.double(ifelse(method == "DISK", DISK_S, MIC_S)), + breakpoint_R = as.double(ifelse(method == "DISK", DISK_R, MIC_R)), + uti = site %like% "(UTI|urinary|urine)" + ) %>% filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)), !is.na(mo), !is.na(ab)) rsi_specific -rsi_translation <- rsi_generic %>% - bind_rows(rsi_specific) %>% +rsi_translation <- rsi_generic %>% + bind_rows(rsi_specific) %>% # add the taxonomic rank index, used for sorting (so subspecies match first, order matches last) - mutate(rank_index = case_when(mo_rank(mo) %like% "(infra|sub)" ~ 1, - mo_rank(mo) == "species" ~ 2, - mo_rank(mo) == "genus" ~ 3, - mo_rank(mo) == "family" ~ 4, - mo_rank(mo) == "order" ~ 5, - TRUE ~ 6), - .after = mo) %>% - arrange(desc(guideline), ab, mo, method) %>% - distinct(guideline, ab, mo, method, site, .keep_all = TRUE) %>% + mutate( + rank_index = case_when( + mo_rank(mo) %like% "(infra|sub)" ~ 1, + mo_rank(mo) == "species" ~ 2, + mo_rank(mo) == "genus" ~ 3, + mo_rank(mo) == "family" ~ 4, + mo_rank(mo) == "order" ~ 5, + TRUE ~ 6 + ), + .after = mo + ) %>% + arrange(desc(guideline), ab, mo, method) %>% + distinct(guideline, ab, mo, method, site, .keep_all = TRUE) %>% as.data.frame(stringsAsFactors = FALSE) # disks MUST be 6-50 mm, so correct where that is wrong: rsi_translation[which(rsi_translation$method == "DISK" & - (is.na(rsi_translation$breakpoint_S) | rsi_translation$breakpoint_S > 50)), "breakpoint_S"] <- 50 + (is.na(rsi_translation$breakpoint_S) | rsi_translation$breakpoint_S > 50)), "breakpoint_S"] <- 50 rsi_translation[which(rsi_translation$method == "DISK" & - (is.na(rsi_translation$breakpoint_R) | rsi_translation$breakpoint_R < 6)), "breakpoint_R"] <- 6 + (is.na(rsi_translation$breakpoint_R) | rsi_translation$breakpoint_R < 6)), "breakpoint_R"] <- 6 m <- unique(as.double(as.mic(levels(as.mic(1))))) rsi_translation[which(rsi_translation$method == "MIC" & - is.na(rsi_translation$breakpoint_S)), "breakpoint_S"] <- min(m) + is.na(rsi_translation$breakpoint_S)), "breakpoint_S"] <- min(m) rsi_translation[which(rsi_translation$method == "MIC" & - is.na(rsi_translation$breakpoint_R)), "breakpoint_R"] <- max(m) + is.na(rsi_translation$breakpoint_R)), "breakpoint_R"] <- max(m) # WHONET has no >1024 but instead uses 1025, 513, etc, so raise these one higher valid MIC factor level: rsi_translation[which(rsi_translation$breakpoint_R == 129), "breakpoint_R"] <- m[which(m == 128) + 1] @@ -134,18 +150,18 @@ rsi_translation[which(rsi_translation$breakpoint_R == 1025), "breakpoint_R"] <- # WHONET file: S <= 8 and R >= 16 # this will make an MIC of 12 I, which should be R, so: eucast_mics <- which(rsi_translation$guideline %like% "EUCAST" & - rsi_translation$method == "MIC" & - log2(as.double(rsi_translation$breakpoint_R)) - log2(as.double(rsi_translation$breakpoint_S)) != 0 & - !is.na(rsi_translation$breakpoint_R)) + rsi_translation$method == "MIC" & + log2(as.double(rsi_translation$breakpoint_R)) - log2(as.double(rsi_translation$breakpoint_S)) != 0 & + !is.na(rsi_translation$breakpoint_R)) old_R <- rsi_translation[eucast_mics, "breakpoint_R", drop = TRUE] old_S <- rsi_translation[eucast_mics, "breakpoint_S", drop = TRUE] -new_R <- 2 ^ (log2(old_R) - 1) +new_R <- 2^(log2(old_R) - 1) new_R[new_R < old_S | is.na(as.mic(new_R))] <- old_S[new_R < old_S | is.na(as.mic(new_R))] rsi_translation[eucast_mics, "breakpoint_R"] <- new_R eucast_disks <- which(rsi_translation$guideline %like% "EUCAST" & - rsi_translation$method == "DISK" & - rsi_translation$breakpoint_S - rsi_translation$breakpoint_R != 0 & - !is.na(rsi_translation$breakpoint_R)) + rsi_translation$method == "DISK" & + rsi_translation$breakpoint_S - rsi_translation$breakpoint_R != 0 & + !is.na(rsi_translation$breakpoint_R)) rsi_translation[eucast_disks, "breakpoint_R"] <- rsi_translation[eucast_disks, "breakpoint_R", drop = TRUE] + 1 # Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII: diff --git a/data-raw/snomed.R b/data-raw/snomed.R index be4488fe0..d95af8464 100644 --- a/data-raw/snomed.R +++ b/data-raw/snomed.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -31,8 +31,8 @@ library(tidyverse) # - go to https://phinvads.cdc.gov/vads/ViewValueSet.action?oid=2.16.840.1.114222.4.11.1009 # - check that current online version is higher than SNOMED_VERSION$current_version # - if so, click on 'Download Value Set', choose 'TXT' -snomed <- read_tsv("data-raw/SNOMED_PHVS_Microorganism_CDC_V12.txt", skip = 3) %>% - select(1:2) %>% +snomed <- read_tsv("data-raw/SNOMED_PHVS_Microorganism_CDC_V12.txt", skip = 3) %>% + select(1:2) %>% set_names(c("snomed", "mo")) # save all valid genera, species and subspecies @@ -41,17 +41,21 @@ vctr <- tolower(vctr[vctr %like% "^[a-z]+$"]) # remove all parts of the name that are no valid values in genera, species or subspecies # this takes ~20 seconds -snomed <- snomed %>% - mutate(fullname = vapply(FUN.VALUE = character(1), - # split on space and/or comma - strsplit(tolower(mo), "[ ,]"), - function(x) trimws(paste0(x[x %in% vctr], collapse = " "))), - # remove " group" - fullname = gsub(" group", "", fullname, fixed = TRUE)) +snomed <- snomed %>% + mutate( + fullname = vapply( + FUN.VALUE = character(1), + # split on space and/or comma + strsplit(tolower(mo), "[ ,]"), + function(x) trimws(paste0(x[x %in% vctr], collapse = " ")) + ), + # remove " group" + fullname = gsub(" group", "", fullname, fixed = TRUE) + ) -snomed_keep <- snomed %>% - filter(fullname %in% tolower(c(microorganisms$fullname, microorganisms.old$fullname))) %>% - group_by(fullname_lower = fullname) %>% +snomed_keep <- snomed %>% + filter(fullname %in% tolower(c(microorganisms$fullname, microorganisms.old$fullname))) %>% + group_by(fullname_lower = fullname) %>% summarise(snomed = list(snomed)) message(nrow(snomed_keep), " MO's will get a SNOMED code.") @@ -65,10 +69,9 @@ microorganisms <- microorganisms %>% # join new snomed left_join(snomed_keep) %>% # remove dummy var - select(-fullname_lower) %>% + select(-fullname_lower) %>% AMR:::dataset_UTF8_to_ASCII() # don't forget to update the version number in SNOMED_VERSION in ./R/globals.R! # usethis::use_data(microorganisms, overwrite = TRUE, version = 2, compress = "xz") - diff --git a/data-raw/toxoplasma.R b/data-raw/toxoplasma.R index 91b4f3aef..330ac665c 100644 --- a/data-raw/toxoplasma.R +++ b/data-raw/toxoplasma.R @@ -1,68 +1,77 @@ -microorganisms <- microorganisms |> bind_rows( - # Toxoplasma - data.frame(mo = "P_TXPL_GOND", # species - fullname = "Toxoplasma gondii", - kingdom = "(unknown kingdom)", - phylum = "Apicomplexa", - class = "Conoidasida", - order = "Eucoccidiorida", - family = "Sarcocystidae", - genus = "Toxoplasma", - species = "gondii", - subspecies = "", - rank = "species", - ref = "Nicolle et al., 1908", - species_id = NA_real_, - source = "manually added", - prevalence = 2, - stringsAsFactors = FALSE), - data.frame(mo = "P_TXPL", # genus - fullname = "Toxoplasma", - kingdom = "(unknown kingdom)", - phylum = "Apicomplexa", - class = "Conoidasida", - order = "Eucoccidiorida", - family = "Sarcocystidae", - genus = "Toxoplasma", - species = "", - subspecies = "", - rank = "genus", - ref = "Nicolle et al., 1909", - species_id = NA_real_, - source = "manually added", - prevalence = 2, - stringsAsFactors = FALSE), - data.frame(mo = "[FAM]_SRCCYSTD", # family - fullname = "Sarcocystidae", - kingdom = "(unknown kingdom)", - phylum = "Apicomplexa", - class = "Conoidasida", - order = "Eucoccidiorida", - family = "Sarcocystidae", - genus = "", - species = "", - subspecies = "", - rank = "family", - ref = "Poche, 1913", - species_id = NA_real_, - source = "manually added", - prevalence = 2, - stringsAsFactors = FALSE), - data.frame(mo = "[ORD]_EUCCCDRD", # order - fullname = "Eucoccidiorida", - kingdom = "(unknown kingdom)", - phylum = "Apicomplexa", - class = "Conoidasida", - order = "Eucoccidiorida", - family = "", - genus = "", - species = "", - subspecies = "", - rank = "order", - ref = "Leger et al., 1910", - species_id = NA_real_, - source = "manually added", - prevalence = 2, - stringsAsFactors = FALSE), - ) |> +microorganisms <- microorganisms |> + bind_rows( + # Toxoplasma + data.frame( + mo = "P_TXPL_GOND", # species + fullname = "Toxoplasma gondii", + kingdom = "(unknown kingdom)", + phylum = "Apicomplexa", + class = "Conoidasida", + order = "Eucoccidiorida", + family = "Sarcocystidae", + genus = "Toxoplasma", + species = "gondii", + subspecies = "", + rank = "species", + ref = "Nicolle et al., 1908", + species_id = NA_real_, + source = "manually added", + prevalence = 2, + stringsAsFactors = FALSE + ), + data.frame( + mo = "P_TXPL", # genus + fullname = "Toxoplasma", + kingdom = "(unknown kingdom)", + phylum = "Apicomplexa", + class = "Conoidasida", + order = "Eucoccidiorida", + family = "Sarcocystidae", + genus = "Toxoplasma", + species = "", + subspecies = "", + rank = "genus", + ref = "Nicolle et al., 1909", + species_id = NA_real_, + source = "manually added", + prevalence = 2, + stringsAsFactors = FALSE + ), + data.frame( + mo = "[FAM]_SRCCYSTD", # family + fullname = "Sarcocystidae", + kingdom = "(unknown kingdom)", + phylum = "Apicomplexa", + class = "Conoidasida", + order = "Eucoccidiorida", + family = "Sarcocystidae", + genus = "", + species = "", + subspecies = "", + rank = "family", + ref = "Poche, 1913", + species_id = NA_real_, + source = "manually added", + prevalence = 2, + stringsAsFactors = FALSE + ), + data.frame( + mo = "[ORD]_EUCCCDRD", # order + fullname = "Eucoccidiorida", + kingdom = "(unknown kingdom)", + phylum = "Apicomplexa", + class = "Conoidasida", + order = "Eucoccidiorida", + family = "", + genus = "", + species = "", + subspecies = "", + rank = "order", + ref = "Leger et al., 1910", + species_id = NA_real_, + source = "manually added", + prevalence = 2, + stringsAsFactors = FALSE + ), + ) |> arrange(fullname) diff --git a/inst/tinytest/test-_deprecated.R b/inst/tinytest/test-_deprecated.R index d87856698..6ae233380 100644 --- a/inst/tinytest/test-_deprecated.R +++ b/inst/tinytest/test-_deprecated.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -22,4 +22,3 @@ # Visit our website for the full manual and a complete tutorial about # # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # - diff --git a/inst/tinytest/test-_misc.R b/inst/tinytest/test-_misc.R index c08b8ffd1..6871292cb 100755 --- a/inst/tinytest/test-_misc.R +++ b/inst/tinytest/test-_misc.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # diff --git a/inst/tinytest/test-ab.R b/inst/tinytest/test-ab.R index 47718f0be..0a44cc6fc 100755 --- a/inst/tinytest/test-ab.R +++ b/inst/tinytest/test-ab.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -23,17 +23,21 @@ # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -expect_equal(as.character(as.ab(c("J01FA01", - "J 01 FA 01", - "Erythromycin", - "eryt", - " eryt 123", - "ERYT", - "ERY", - "erytromicine", - "Erythrocin", - "Romycin"))), - rep("ERY", 10)) +expect_equal( + as.character(as.ab(c( + "J01FA01", + "J 01 FA 01", + "Erythromycin", + "eryt", + " eryt 123", + "ERYT", + "ERY", + "erytromicine", + "Erythrocin", + "Romycin" + ))), + rep("ERY", 10) +) expect_identical(class(as.ab("amox")), c("ab", "character")) expect_identical(class(antibiotics$ab), c("ab", "character")) @@ -47,17 +51,25 @@ expect_warning(as.ab("")) expect_stdout(print(as.ab("amox"))) -expect_equal(as.character(as.ab("Phloxapen")), - "FLC") +expect_equal( + as.character(as.ab("Phloxapen")), + "FLC" +) -expect_equal(suppressWarnings(as.character(as.ab(c("Bacteria", "Bacterial")))), - c(NA, "TMP")) +expect_equal( + suppressWarnings(as.character(as.ab(c("Bacteria", "Bacterial")))), + c(NA, "TMP") +) -expect_equal(as.character(as.ab("Amoxy + clavulaanzuur")), - "AMC") +expect_equal( + as.character(as.ab("Amoxy + clavulaanzuur")), + "AMC" +) -expect_equal(as.character(as.ab(c("mreopenem", "co-maoxiclav"))), - c("MEM", "AMC")) +expect_equal( + as.character(as.ab(c("mreopenem", "co-maoxiclav"))), + c("MEM", "AMC") +) expect_message(as.ab("cipro mero")) diff --git a/inst/tinytest/test-ab_from_text.R b/inst/tinytest/test-ab_from_text.R index 1cf7316b4..5787cb904 100644 --- a/inst/tinytest/test-ab_from_text.R +++ b/inst/tinytest/test-ab_from_text.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -23,18 +23,32 @@ # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds")[[1]], - as.ab("Amoxicillin")) -expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", thorough_search = TRUE)[[1]], - as.ab("Amoxicillin")) -expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", thorough_search = FALSE)[[1]], - as.ab("Amoxicillin")) -expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", translate_ab = TRUE)[[1]], - "Amoxicillin") -expect_identical(ab_from_text("administered amoxi/clav and cipro", collapse = ", ")[[1]], - "AMC, CIP") +expect_identical( + ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds")[[1]], + as.ab("Amoxicillin") +) +expect_identical( + ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", thorough_search = TRUE)[[1]], + as.ab("Amoxicillin") +) +expect_identical( + ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", thorough_search = FALSE)[[1]], + as.ab("Amoxicillin") +) +expect_identical( + ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", translate_ab = TRUE)[[1]], + "Amoxicillin" +) +expect_identical( + ab_from_text("administered amoxi/clav and cipro", collapse = ", ")[[1]], + "AMC, CIP" +) -expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "dose")[[1]], - 500) -expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "admin")[[1]], - "oral") +expect_identical( + ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "dose")[[1]], + 500 +) +expect_identical( + ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "admin")[[1]], + "oral" +) diff --git a/inst/tinytest/test-ab_property.R b/inst/tinytest/test-ab_property.R index 6364e7405..b80e4db29 100644 --- a/inst/tinytest/test-ab_property.R +++ b/inst/tinytest/test-ab_property.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -47,8 +47,10 @@ expect_identical(ab_ddd("AMX", "iv"), 3) expect_identical(ab_ddd_units("AMX", "iv"), "g") expect_identical(ab_name(x = c("AMC", "PLB"), language = NULL), c("Amoxicillin/clavulanic acid", "Polymyxin B")) -expect_identical(ab_name(x = c("AMC", "PLB"), tolower = TRUE, language = NULL), - c("amoxicillin/clavulanic acid", "polymyxin B")) +expect_identical( + ab_name(x = c("AMC", "PLB"), tolower = TRUE, language = NULL), + c("amoxicillin/clavulanic acid", "polymyxin B") +) expect_inherits(ab_info("AMX"), "list") @@ -57,25 +59,37 @@ expect_error(ab_name("amox", language = "INVALID")) expect_stdout(print(ab_name("amox", language = NULL))) expect_equal(ab_name("21066-6", language = NULL), "Ampicillin") -expect_equal(ab_loinc("ampicillin"), - c("21066-6", "3355-5", "33562-0", "33919-2", "43883-8", "43884-6", "87604-5")) +expect_equal( + ab_loinc("ampicillin"), + c("21066-6", "3355-5", "33562-0", "33919-2", "43883-8", "43884-6", "87604-5") +) expect_true(ab_url("AMX") %like% "whocc.no") expect_warning(ab_url("ASP")) -expect_identical(colnames(set_ab_names(example_isolates[, 20:25])), - c("cefoxitin", "cefotaxime", "ceftazidime", "ceftriaxone", "gentamicin", "tobramycin")) -expect_identical(colnames(set_ab_names(example_isolates[, 20:25], language = "nl", snake_case = FALSE)), - c("Cefoxitine", "Cefotaxim", "Ceftazidim", "Ceftriaxon", "Gentamicine", "Tobramycine")) -expect_identical(colnames(set_ab_names(example_isolates[, 20:25], property = "atc")), - c("J01DC01", "J01DD01", "J01DD02", "J01DD04", "J01GB03", "J01GB01")) +expect_identical( + colnames(set_ab_names(example_isolates[, 20:25])), + c("cefoxitin", "cefotaxime", "ceftazidime", "ceftriaxone", "gentamicin", "tobramycin") +) +expect_identical( + colnames(set_ab_names(example_isolates[, 20:25], language = "nl", snake_case = FALSE)), + c("Cefoxitine", "Cefotaxim", "Ceftazidim", "Ceftriaxon", "Gentamicine", "Tobramycine") +) +expect_identical( + colnames(set_ab_names(example_isolates[, 20:25], property = "atc")), + c("J01DC01", "J01DD01", "J01DD02", "J01DD04", "J01GB03", "J01GB01") +) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { - expect_identical(example_isolates %>% set_ab_names(), - example_isolates %>% rename_with(set_ab_names)) - expect_true(all(c("SXT", "nitrofurantoin", "fosfomycin", "linezolid", "ciprofloxacin", - "moxifloxacin", "vancomycin", "TEC") %in% - (example_isolates %>% - set_ab_names(NIT:VAN) %>% - colnames()))) + expect_identical( + example_isolates %>% set_ab_names(), + example_isolates %>% rename_with(set_ab_names) + ) + expect_true(all(c( + "SXT", "nitrofurantoin", "fosfomycin", "linezolid", "ciprofloxacin", + "moxifloxacin", "vancomycin", "TEC" + ) %in% + (example_isolates %>% + set_ab_names(NIT:VAN) %>% + colnames()))) } diff --git a/inst/tinytest/test-ab_selectors.R b/inst/tinytest/test-ab_selectors.R index 586cc5d4d..c8d48238f 100644 --- a/inst/tinytest/test-ab_selectors.R +++ b/inst/tinytest/test-ab_selectors.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -76,16 +76,20 @@ expect_equal(nrow(example_isolates[all(c(carbapenems(), aminoglycosides()) == "R expect_equal(nrow(example_isolates[any(carbapenems() == "R"), penicillins()]), 55, tolerance = 0.5) expect_equal(ncol(example_isolates[any(carbapenems() == "R"), penicillins()]), 7, tolerance = 0.5) -x <- data.frame(x = 0, - mo = 0, - gen = "S", - genta = "S", - J01GB03 = "S", - tobra = "S", - Tobracin = "S") +x <- data.frame( + x = 0, + mo = 0, + gen = "S", + genta = "S", + J01GB03 = "S", + tobra = "S", + Tobracin = "S" +) # should have the first hits -expect_identical(colnames(x[, aminoglycosides()]), - c("gen", "tobra")) +expect_identical( + colnames(x[, aminoglycosides()]), + c("gen", "tobra") +) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { expect_equal(example_isolates %>% select(administrable_per_os() & penicillins()) %>% ncol(), 5, tolerance = 0.5) diff --git a/inst/tinytest/test-age.R b/inst/tinytest/test-age.R index 83ad24fbf..934ac107d 100644 --- a/inst/tinytest/test-age.R +++ b/inst/tinytest/test-age.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -23,46 +23,75 @@ # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -expect_equal(age(x = c("1980-01-01", "1985-01-01", "1990-01-01"), - reference = "2019-01-01"), - c(39, 34, 29)) +expect_equal( + age( + x = c("1980-01-01", "1985-01-01", "1990-01-01"), + reference = "2019-01-01" + ), + c(39, 34, 29) +) -expect_equal(age(x = c("2019-01-01", "2019-04-01", "2019-07-01"), - reference = "2019-09-01", - exact = TRUE), - c(0.6656393, 0.4191781, 0.1698630), - tolerance = 0.001) +expect_equal(age( + x = c("2019-01-01", "2019-04-01", "2019-07-01"), + reference = "2019-09-01", + exact = TRUE +), +c(0.6656393, 0.4191781, 0.1698630), +tolerance = 0.001 +) -expect_error(age(x = c("1980-01-01", "1985-01-01", "1990-01-01"), - reference = c("2019-01-01", "2019-01-01"))) +expect_error(age( + x = c("1980-01-01", "1985-01-01", "1990-01-01"), + reference = c("2019-01-01", "2019-01-01") +)) -expect_warning(age(x = c("1980-01-01", "1985-01-01", "1990-01-01"), - reference = "1975-01-01")) +expect_warning(age( + x = c("1980-01-01", "1985-01-01", "1990-01-01"), + reference = "1975-01-01" +)) -expect_warning(age(x = c("1800-01-01", "1805-01-01", "1810-01-01"), - reference = "2019-01-01")) +expect_warning(age( + x = c("1800-01-01", "1805-01-01", "1810-01-01"), + reference = "2019-01-01" +)) -expect_equal(length(age(x = c("2019-01-01", NA), na.rm = TRUE)), - 1) +expect_equal( + length(age(x = c("2019-01-01", NA), na.rm = TRUE)), + 1 +) ages <- c(3, 8, 16, 54, 31, 76, 101, 43, 21) -expect_equal(length(unique(age_groups(ages, 50))), - 2) -expect_equal(length(unique(age_groups(ages, c(50, 60)))), - 3) -expect_identical(class(age_groups(ages, "child")), - c("ordered", "factor")) +expect_equal( + length(unique(age_groups(ages, 50))), + 2 +) +expect_equal( + length(unique(age_groups(ages, c(50, 60)))), + 3 +) +expect_identical( + class(age_groups(ages, "child")), + c("ordered", "factor") +) -expect_identical(class(age_groups(ages, "elderly")), - c("ordered", "factor")) +expect_identical( + class(age_groups(ages, "elderly")), + c("ordered", "factor") +) -expect_identical(class(age_groups(ages, "tens")), - c("ordered", "factor")) +expect_identical( + class(age_groups(ages, "tens")), + c("ordered", "factor") +) -expect_identical(class(age_groups(ages, "fives")), - c("ordered", "factor")) +expect_identical( + class(age_groups(ages, "fives")), + c("ordered", "factor") +) -expect_equal(length(age_groups(c(10, 20, 30, NA), na.rm = TRUE)), - 3) +expect_equal( + length(age_groups(c(10, 20, 30, NA), na.rm = TRUE)), + 3 +) diff --git a/inst/tinytest/test-atc_online.R b/inst/tinytest/test-atc_online.R index 59b154b12..48e55c4f7 100644 --- a/inst/tinytest/test-atc_online.R +++ b/inst/tinytest/test-atc_online.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,12 +24,11 @@ # ==================================================================== # if (AMR:::pkg_is_available("curl", also_load = FALSE) && - AMR:::pkg_is_available("rvest", also_load = FALSE) && - AMR:::pkg_is_available("xml2", also_load = FALSE) && - tryCatch(curl::has_internet(), error = function(e) FALSE)) { + AMR:::pkg_is_available("rvest", also_load = FALSE) && + AMR:::pkg_is_available("xml2", also_load = FALSE) && + tryCatch(curl::has_internet(), error = function(e) FALSE)) { expect_true(length(atc_online_groups(ab_atc("AMX"))) >= 1) expect_equal(atc_online_ddd(ab_atc("AMX"), administration = "O"), 1.5) expect_equal(atc_online_ddd(ab_atc("AMX"), administration = "P"), 3) expect_equal(atc_online_ddd_units("AMX", administration = "P"), "g") - } diff --git a/inst/tinytest/test-availability.R b/inst/tinytest/test-availability.R index 34d0f4b5d..3db9ab656 100644 --- a/inst/tinytest/test-availability.R +++ b/inst/tinytest/test-availability.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -22,5 +22,5 @@ # Visit our website for the full manual and a complete tutorial about # # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # - + expect_inherits(availability(example_isolates), "data.frame") diff --git a/inst/tinytest/test-bug_drug_combinations.R b/inst/tinytest/test-bug_drug_combinations.R index ed7c74449..dbafb8817 100644 --- a/inst/tinytest/test-bug_drug_combinations.R +++ b/inst/tinytest/test-bug_drug_combinations.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -29,8 +29,8 @@ expect_stdout(suppressMessages(print(b))) expect_true(is.data.frame(format(b))) expect_true(is.data.frame(format(b, combine_IR = TRUE, add_ab_group = FALSE))) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { - expect_true(example_isolates %>% - group_by(ward) %>% - bug_drug_combinations(FUN = mo_gramstain) %>% - is.data.frame()) + expect_true(example_isolates %>% + group_by(ward) %>% + bug_drug_combinations(FUN = mo_gramstain) %>% + is.data.frame()) } diff --git a/inst/tinytest/test-count.R b/inst/tinytest/test-count.R index b4db17cb2..262018bfa 100644 --- a/inst/tinytest/test-count.R +++ b/inst/tinytest/test-count.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -31,16 +31,22 @@ expect_equal(count_all(example_isolates$AMX), n_rsi(example_isolates$AMX)) expect_equal(count_R(example_isolates$AMX), 804) expect_equal(count_I(example_isolates$AMX), 3) expect_equal(suppressWarnings(count_S(example_isolates$AMX)), 543) -expect_equal(count_R(example_isolates$AMX) + count_I(example_isolates$AMX), - suppressWarnings(count_IR(example_isolates$AMX))) -expect_equal(suppressWarnings(count_S(example_isolates$AMX)) + count_I(example_isolates$AMX), - count_SI(example_isolates$AMX)) +expect_equal( + count_R(example_isolates$AMX) + count_I(example_isolates$AMX), + suppressWarnings(count_IR(example_isolates$AMX)) +) +expect_equal( + suppressWarnings(count_S(example_isolates$AMX)) + count_I(example_isolates$AMX), + count_SI(example_isolates$AMX) +) # warning for speed loss expect_warning(count_resistant(as.character(example_isolates$AMC))) -expect_warning(count_resistant(example_isolates$AMC, - as.character(example_isolates$GEN))) +expect_warning(count_resistant( + example_isolates$AMC, + as.character(example_isolates$GEN) +)) # check for errors expect_error(count_resistant("test", minimum = "test")) @@ -57,41 +63,53 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { expect_equal(example_isolates %>% count_susceptible(AMC, GEN, only_all_tested = FALSE), 1764) expect_equal(example_isolates %>% count_all(AMC, GEN, only_all_tested = TRUE), 1798) expect_equal(example_isolates %>% count_all(AMC, GEN, only_all_tested = FALSE), 1936) - expect_identical(example_isolates %>% count_all(AMC, GEN, only_all_tested = TRUE), - example_isolates %>% count_susceptible(AMC, GEN, only_all_tested = TRUE) + - example_isolates %>% count_resistant(AMC, GEN, only_all_tested = TRUE)) - + expect_identical( + example_isolates %>% count_all(AMC, GEN, only_all_tested = TRUE), + example_isolates %>% count_susceptible(AMC, GEN, only_all_tested = TRUE) + + example_isolates %>% count_resistant(AMC, GEN, only_all_tested = TRUE) + ) + # count of cases - expect_equal(example_isolates %>% - group_by(ward) %>% - summarise(cipro = count_susceptible(CIP), - genta = count_susceptible(GEN), - combination = count_susceptible(CIP, GEN)) %>% - pull(combination), - c(253, 465, 192, 558)) - + expect_equal( + example_isolates %>% + group_by(ward) %>% + summarise( + cipro = count_susceptible(CIP), + genta = count_susceptible(GEN), + combination = count_susceptible(CIP, GEN) + ) %>% + pull(combination), + c(253, 465, 192, 558) + ) + # count_df expect_equal( example_isolates %>% select(AMX) %>% count_df() %>% pull(value), - c(example_isolates$AMX %>% count_susceptible(), - example_isolates$AMX %>% count_resistant()) + c( + example_isolates$AMX %>% count_susceptible(), + example_isolates$AMX %>% count_resistant() + ) ) expect_equal( example_isolates %>% select(AMX) %>% count_df(combine_IR = TRUE) %>% pull(value), - c(suppressWarnings(example_isolates$AMX %>% count_S()), - suppressWarnings(example_isolates$AMX %>% count_IR())) + c( + suppressWarnings(example_isolates$AMX %>% count_S()), + suppressWarnings(example_isolates$AMX %>% count_IR()) + ) ) expect_equal( example_isolates %>% select(AMX) %>% count_df(combine_SI = FALSE) %>% pull(value), - c(suppressWarnings(example_isolates$AMX %>% count_S()), + c( + suppressWarnings(example_isolates$AMX %>% count_S()), example_isolates$AMX %>% count_I(), - example_isolates$AMX %>% count_R()) + example_isolates$AMX %>% count_R() + ) ) - + # grouping in rsi_calc_df() (= backbone of rsi_df()) - expect_true("ward" %in% (example_isolates %>% - group_by(ward) %>% - select(ward, AMX, CIP, gender) %>% - rsi_df() %>% - colnames())) + expect_true("ward" %in% (example_isolates %>% + group_by(ward) %>% + select(ward, AMX, CIP, gender) %>% + rsi_df() %>% + colnames())) } diff --git a/inst/tinytest/test-data.R b/inst/tinytest/test-data.R index aa36aa35e..4086f6d2e 100644 --- a/inst/tinytest/test-data.R +++ b/inst/tinytest/test-data.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -64,30 +64,42 @@ for (i in seq_len(length(datasets))) { df <- AMR:::MO_lookup expect_true(nrow(df[which(df$prevalence == 1), , drop = FALSE]) < nrow(df[which(df$prevalence == 2), , drop = FALSE])) expect_true(nrow(df[which(df$prevalence == 2), , drop = FALSE]) < nrow(df[which(df$prevalence == 3), , drop = FALSE])) -expect_true(all(c("mo", "fullname", - "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", - "rank", "ref", "species_id", "source", "prevalence", "snomed", - "kingdom_index", "fullname_lower", "g_species") %in% colnames(df))) +expect_true(all(c( + "mo", "fullname", + "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", + "rank", "ref", "species_id", "source", "prevalence", "snomed", + "kingdom_index", "fullname_lower", "g_species" +) %in% colnames(df))) -expect_true(all(c("fullname", "fullname_new", "ref", "prevalence", - "fullname_lower", "g_species") %in% colnames(AMR:::MO.old_lookup))) +expect_true(all(c( + "fullname", "fullname_new", "ref", "prevalence", + "fullname_lower", "g_species" +) %in% colnames(AMR:::MO.old_lookup))) expect_inherits(AMR:::MO_CONS, "mo") -expect_identical(class(catalogue_of_life_version()), - c("catalogue_of_life_version", "list")) +expect_identical( + class(catalogue_of_life_version()), + c("catalogue_of_life_version", "list") +) expect_stdout(print(catalogue_of_life_version())) -uncategorised <- subset(microorganisms, - genus == "Staphylococcus" & - !species %in% c("", "aureus") & - !mo %in% c(AMR:::MO_CONS, AMR:::MO_COPS)) -expect_true(NROW(uncategorised) == 0, - info = ifelse(NROW(uncategorised) == 0, - "All staphylococcal species categorised as CoNS/CoPS.", - paste0("Staphylococcal species not categorised as CoNS/CoPS: S. ", - uncategorised$species, " (", uncategorised$mo, ")"))) +uncategorised <- subset( + microorganisms, + genus == "Staphylococcus" & + !species %in% c("", "aureus") & + !mo %in% c(AMR:::MO_CONS, AMR:::MO_COPS) +) +expect_true(NROW(uncategorised) == 0, + info = ifelse(NROW(uncategorised) == 0, + "All staphylococcal species categorised as CoNS/CoPS.", + paste0( + "Staphylococcal species not categorised as CoNS/CoPS: S. ", + uncategorised$species, " (", uncategorised$mo, ")" + ) + ) +) # THIS WILL CHECK NON-ASCII STRINGS IN ALL FILES: @@ -119,5 +131,5 @@ expect_true(NROW(uncategorised) == 0, # } # ) # } -# x <- check_non_ascii() %>% +# x <- check_non_ascii() %>% # filter(file %unlike% "^(data-raw|docs|git_)") diff --git a/inst/tinytest/test-disk.R b/inst/tinytest/test-disk.R index d840626f6..70fb6b9ef 100755 --- a/inst/tinytest/test-disk.R +++ b/inst/tinytest/test-disk.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # diff --git a/inst/tinytest/test-episode.R b/inst/tinytest/test-episode.R index 330ab0e47..cd4a63a15 100644 --- a/inst/tinytest/test-episode.R +++ b/inst/tinytest/test-episode.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -31,19 +31,28 @@ test_df <- rbind( data.frame( date = as.Date(c("2015-01-01", "2016-02-01", "2016-12-31", "2017-01-01", "2017-02-03")), patient_id = "B" - )) + ) +) -expect_equal(get_episode(test_df$date, 365), - c(1, 1, 2, 2, 2, 3, 3, 4, 1, 2, 2, 2, 3)) -expect_equal(get_episode(test_df$date[which(test_df$patient_id == "A")], 365), - c(1, 1, 2, 2, 2, 2, 3, 4)) -expect_equal(get_episode(test_df$date[which(test_df$patient_id == "B")], 365), - c(1, 2, 2, 2, 3)) +expect_equal( + get_episode(test_df$date, 365), + c(1, 1, 2, 2, 2, 3, 3, 4, 1, 2, 2, 2, 3) +) +expect_equal( + get_episode(test_df$date[which(test_df$patient_id == "A")], 365), + c(1, 1, 2, 2, 2, 2, 3, 4) +) +expect_equal( + get_episode(test_df$date[which(test_df$patient_id == "B")], 365), + c(1, 2, 2, 2, 3) +) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { - expect_identical(test_df %>% group_by(patient_id) %>% mutate(f = is_new_episode(date, 365)) %>% pull(f), - c(TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE)) - + expect_identical( + test_df %>% group_by(patient_id) %>% mutate(f = is_new_episode(date, 365)) %>% pull(f), + c(TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE) + ) + suppressMessages( x <- example_isolates %>% mutate(out = first_isolate(., include_unknown = TRUE, method = "episode-based", info = FALSE)) @@ -51,6 +60,6 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { y <- example_isolates %>% group_by(patient, mo) %>% mutate(out = is_new_episode(date, 365)) - + expect_identical(which(x$out), which(y$out)) } diff --git a/inst/tinytest/test-eucast_rules.R b/inst/tinytest/test-eucast_rules.R index c87641bbd..36692152b 100755 --- a/inst/tinytest/test-eucast_rules.R +++ b/inst/tinytest/test-eucast_rules.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,13 +24,17 @@ # ==================================================================== # # thoroughly check input table -expect_equal(colnames(AMR:::EUCAST_RULES_DF), - c("if_mo_property", "like.is.one_of", "this_value", - "and_these_antibiotics", "have_these_values", - "then_change_these_antibiotics", "to_value", - "reference.rule", "reference.rule_group", - "reference.version", - "note")) +expect_equal( + colnames(AMR:::EUCAST_RULES_DF), + c( + "if_mo_property", "like.is.one_of", "this_value", + "and_these_antibiotics", "have_these_values", + "then_change_these_antibiotics", "to_value", + "reference.rule", "reference.rule_group", + "reference.version", + "note" + ) +) MOs_mentioned <- unique(AMR:::EUCAST_RULES_DF$this_value) MOs_mentioned <- sort(AMR:::trimws(unlist(strsplit(MOs_mentioned[!AMR:::is_valid_regex(MOs_mentioned)], ",", fixed = TRUE)))) MOs_test <- suppressWarnings(suppressMessages(mo_name(MOs_mentioned))) @@ -43,71 +47,103 @@ expect_error(eucast_rules(data.frame(mo = "test"), rules = "invalid rules set")) expect_warning(eucast_rules(data.frame(mo = "Escherichia coli", vancomycin = "S", stringsAsFactors = TRUE))) -expect_identical(colnames(example_isolates), - colnames(suppressWarnings(eucast_rules(example_isolates, info = FALSE)))) +expect_identical( + colnames(example_isolates), + colnames(suppressWarnings(eucast_rules(example_isolates, info = FALSE))) +) expect_stdout(suppressMessages(eucast_rules(example_isolates, info = TRUE))) -a <- data.frame(mo = c("Klebsiella pneumoniae", - "Pseudomonas aeruginosa", - "Enterobacter cloacae"), - amox = "-", # Amoxicillin - stringsAsFactors = FALSE) -b <- data.frame(mo = c("Klebsiella pneumoniae", - "Pseudomonas aeruginosa", - "Enterobacter cloacae"), - amox = "R", # Amoxicillin - stringsAsFactors = FALSE) +a <- data.frame( + mo = c( + "Klebsiella pneumoniae", + "Pseudomonas aeruginosa", + "Enterobacter cloacae" + ), + amox = "-", # Amoxicillin + stringsAsFactors = FALSE +) +b <- data.frame( + mo = c( + "Klebsiella pneumoniae", + "Pseudomonas aeruginosa", + "Enterobacter cloacae" + ), + amox = "R", # Amoxicillin + stringsAsFactors = FALSE +) expect_identical(suppressWarnings(eucast_rules(a, "mo", info = FALSE)), b) expect_stdout(suppressMessages(suppressWarnings(eucast_rules(a, "mo", info = TRUE)))) -a <- data.frame(mo = c("Staphylococcus aureus", - "Streptococcus group A"), - COL = "-", # Colistin - stringsAsFactors = FALSE) -b <- data.frame(mo = c("Staphylococcus aureus", - "Streptococcus group A"), - COL = "R", # Colistin - stringsAsFactors = FALSE) +a <- data.frame( + mo = c( + "Staphylococcus aureus", + "Streptococcus group A" + ), + COL = "-", # Colistin + stringsAsFactors = FALSE +) +b <- data.frame( + mo = c( + "Staphylococcus aureus", + "Streptococcus group A" + ), + COL = "R", # Colistin + stringsAsFactors = FALSE +) expect_equal(suppressWarnings(eucast_rules(a, "mo", info = FALSE)), b) # piperacillin must be R in Enterobacteriaceae when tica is R if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { - expect_equal(suppressWarnings( - example_isolates %>% - filter(mo_family(mo) == "Enterobacteriaceae") %>% - mutate(TIC = as.rsi("R"), - PIP = as.rsi("S")) %>% - eucast_rules(col_mo = "mo", version_expertrules = 3.1, info = FALSE) %>% - pull(PIP) %>% - unique() %>% - as.character()), - "R") + expect_equal( + suppressWarnings( + example_isolates %>% + filter(mo_family(mo) == "Enterobacteriaceae") %>% + mutate( + TIC = as.rsi("R"), + PIP = as.rsi("S") + ) %>% + eucast_rules(col_mo = "mo", version_expertrules = 3.1, info = FALSE) %>% + pull(PIP) %>% + unique() %>% + as.character() + ), + "R" + ) } # azithromycin and clarythromycin must be equal to Erythromycin -a <- suppressWarnings(as.rsi(eucast_rules(data.frame(mo = example_isolates$mo, - ERY = example_isolates$ERY, - AZM = as.rsi("R"), - CLR = factor("R"), - stringsAsFactors = FALSE), - version_expertrules = 3.1, - only_rsi_columns = FALSE)$CLR)) +a <- suppressWarnings(as.rsi(eucast_rules(data.frame( + mo = example_isolates$mo, + ERY = example_isolates$ERY, + AZM = as.rsi("R"), + CLR = factor("R"), + stringsAsFactors = FALSE +), +version_expertrules = 3.1, +only_rsi_columns = FALSE +)$CLR)) b <- example_isolates$ERY -expect_identical(a[!is.na(b)], - b[!is.na(b)]) +expect_identical( + a[!is.na(b)], + b[!is.na(b)] +) # amox is inferred by benzylpenicillin in Kingella kingae expect_equal( suppressWarnings( as.list(eucast_rules( - data.frame(mo = as.mo("Kingella kingae"), - PEN = "S", - AMX = "-", - stringsAsFactors = FALSE) - , info = FALSE))$AMX + data.frame( + mo = as.mo("Kingella kingae"), + PEN = "S", + AMX = "-", + stringsAsFactors = FALSE + ), + info = FALSE + ))$AMX ), - "S") + "S" +) # also test norf if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { @@ -120,25 +156,37 @@ expect_stdout(suppressWarnings(eucast_rules(example_isolates, verbose = TRUE, ru # AmpC de-repressed cephalo mutants expect_identical( - eucast_rules(data.frame(mo = c("Escherichia coli", "Enterobacter cloacae"), - cefotax = as.rsi(c("S", "S"))), - ampc_cephalosporin_resistance = TRUE, - info = FALSE)$cefotax, - as.rsi(c("S", "R"))) + eucast_rules(data.frame( + mo = c("Escherichia coli", "Enterobacter cloacae"), + cefotax = as.rsi(c("S", "S")) + ), + ampc_cephalosporin_resistance = TRUE, + info = FALSE + )$cefotax, + as.rsi(c("S", "R")) +) expect_identical( - eucast_rules(data.frame(mo = c("Escherichia coli", "Enterobacter cloacae"), - cefotax = as.rsi(c("S", "S"))), - ampc_cephalosporin_resistance = NA, - info = FALSE)$cefotax, - as.rsi(c("S", NA))) + eucast_rules(data.frame( + mo = c("Escherichia coli", "Enterobacter cloacae"), + cefotax = as.rsi(c("S", "S")) + ), + ampc_cephalosporin_resistance = NA, + info = FALSE + )$cefotax, + as.rsi(c("S", NA)) +) expect_identical( - eucast_rules(data.frame(mo = c("Escherichia coli", "Enterobacter cloacae"), - cefotax = as.rsi(c("S", "S"))), - ampc_cephalosporin_resistance = NULL, - info = FALSE)$cefotax, - as.rsi(c("S", "S"))) + eucast_rules(data.frame( + mo = c("Escherichia coli", "Enterobacter cloacae"), + cefotax = as.rsi(c("S", "S")) + ), + ampc_cephalosporin_resistance = NULL, + info = FALSE + )$cefotax, + as.rsi(c("S", "S")) +) # EUCAST dosage ----------------------------------------------------------- expect_equal(nrow(eucast_dosage(c("tobra", "genta", "cipro"))), 3) @@ -146,17 +194,22 @@ expect_inherits(eucast_dosage(c("tobra", "genta", "cipro")), "data.frame") -x <- custom_eucast_rules(AMC == "R" & genus == "Klebsiella" ~ aminopenicillins == "R", - AMC == "I" & genus == "Klebsiella" ~ aminopenicillins == "I", - AMX == "S" ~ AMC == "S") +x <- custom_eucast_rules( + AMC == "R" & genus == "Klebsiella" ~ aminopenicillins == "R", + AMC == "I" & genus == "Klebsiella" ~ aminopenicillins == "I", + AMX == "S" ~ AMC == "S" +) expect_stdout(print(x)) expect_stdout(print(c(x, x))) expect_stdout(print(as.list(x, x))) # this custom rules makes 8 changes expect_equal(nrow(eucast_rules(example_isolates, - rules = "custom", - custom_rules = x, - info = FALSE, - verbose = TRUE)), - 8, tolerance = 0.5) + rules = "custom", + custom_rules = x, + info = FALSE, + verbose = TRUE +)), +8, +tolerance = 0.5 +) diff --git a/inst/tinytest/test-first_isolate.R b/inst/tinytest/test-first_isolate.R index 3dadd7f87..2ec4fe530 100755 --- a/inst/tinytest/test-first_isolate.R +++ b/inst/tinytest/test-first_isolate.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,40 +24,59 @@ # ==================================================================== # # all four methods -expect_equal(sum(first_isolate(x = example_isolates, method = "isolate-based", info = TRUE), na.rm = TRUE), - 1984) -expect_equal(sum(first_isolate(x = example_isolates, method = "patient-based", info = TRUE), na.rm = TRUE), - 1265) -expect_equal(sum(first_isolate(x = example_isolates, method = "episode-based", info = TRUE), na.rm = TRUE), - 1300) -expect_equal(sum(first_isolate(x = example_isolates, method = "phenotype-based", info = TRUE), na.rm = TRUE), - 1379) +expect_equal( + sum(first_isolate(x = example_isolates, method = "isolate-based", info = TRUE), na.rm = TRUE), + 1984 +) +expect_equal( + sum(first_isolate(x = example_isolates, method = "patient-based", info = TRUE), na.rm = TRUE), + 1265 +) +expect_equal( + sum(first_isolate(x = example_isolates, method = "episode-based", info = TRUE), na.rm = TRUE), + 1300 +) +expect_equal( + sum(first_isolate(x = example_isolates, method = "phenotype-based", info = TRUE), na.rm = TRUE), + 1379 +) # Phenotype-based, using key antimicrobials -expect_equal(sum(first_isolate(x = example_isolates, - method = "phenotype-based", - type = "keyantimicrobials", - antifungal = NULL, info = TRUE), na.rm = TRUE), - 1395) -expect_equal(sum(first_isolate(x = example_isolates, - method = "phenotype-based", - type = "keyantimicrobials", - antifungal = NULL, info = TRUE, ignore_I = FALSE), na.rm = TRUE), - 1418) +expect_equal( + sum(first_isolate( + x = example_isolates, + method = "phenotype-based", + type = "keyantimicrobials", + antifungal = NULL, info = TRUE + ), na.rm = TRUE), + 1395 +) +expect_equal( + sum(first_isolate( + x = example_isolates, + method = "phenotype-based", + type = "keyantimicrobials", + antifungal = NULL, info = TRUE, ignore_I = FALSE + ), na.rm = TRUE), + 1418 +) # first non-ICU isolates expect_equal( sum( first_isolate(example_isolates, - col_mo = "mo", - col_date = "date", - col_patient_id = "patient", - col_icu = example_isolates$ward == "ICU", - info = TRUE, - icu_exclude = TRUE), - na.rm = TRUE), - 941) + col_mo = "mo", + col_date = "date", + col_patient_id = "patient", + col_icu = example_isolates$ward == "ICU", + info = TRUE, + icu_exclude = TRUE + ), + na.rm = TRUE + ), + 941 +) # set 1500 random observations to be of specimen type 'Urine' random_rows <- sample(x = 1:2000, size = 1500, replace = FALSE) @@ -65,78 +84,98 @@ x <- example_isolates x$specimen <- "Other" x[random_rows, "specimen"] <- "Urine" expect_true( - sum(first_isolate(x = x, - col_date = "date", - col_patient_id = "patient", - col_mo = "mo", - col_specimen = "specimen", - filter_specimen = "Urine", - info = TRUE), na.rm = TRUE) < 1501) + sum(first_isolate( + x = x, + col_date = "date", + col_patient_id = "patient", + col_mo = "mo", + col_specimen = "specimen", + filter_specimen = "Urine", + info = TRUE + ), na.rm = TRUE) < 1501 +) # same, but now exclude ICU expect_true( - sum(first_isolate(x = x, - col_date = "date", - col_patient_id = "patient", - col_mo = "mo", - col_specimen = "specimen", - filter_specimen = "Urine", - col_icu = x$ward == "ICU", - icu_exclude = TRUE, - info = TRUE), na.rm = TRUE) < 1501) + sum(first_isolate( + x = x, + col_date = "date", + col_patient_id = "patient", + col_mo = "mo", + col_specimen = "specimen", + filter_specimen = "Urine", + col_icu = x$ward == "ICU", + icu_exclude = TRUE, + info = TRUE + ), na.rm = TRUE) < 1501 +) # "No isolates found" test_iso <- example_isolates test_iso$specimen <- "test" -expect_message(first_isolate(test_iso, - "date", - "patient", - col_mo = "mo", - col_specimen = "specimen", - filter_specimen = "something_unexisting", - info = TRUE)) +expect_message(first_isolate(test_iso, + "date", + "patient", + col_mo = "mo", + col_specimen = "specimen", + filter_specimen = "something_unexisting", + info = TRUE +)) # printing of exclusion message expect_message(first_isolate(example_isolates, - col_date = "date", - col_mo = "mo", - col_patient_id = "patient", - col_testcode = "gender", - testcodes_exclude = "M", - info = TRUE)) + col_date = "date", + col_mo = "mo", + col_patient_id = "patient", + col_testcode = "gender", + testcodes_exclude = "M", + info = TRUE +)) # errors expect_error(first_isolate("date", "patient", col_mo = "mo")) expect_error(first_isolate(example_isolates, - col_date = "non-existing col", - col_mo = "mo")) + col_date = "non-existing col", + col_mo = "mo" +)) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { # if mo is not an mo class, result should be the same - expect_identical(example_isolates %>% - mutate(mo = as.character(mo)) %>% - first_isolate(col_date = "date", - col_mo = "mo", - col_patient_id = "patient", - info = FALSE), - example_isolates %>% - first_isolate(col_date = "date", - col_mo = "mo", - col_patient_id = "patient", - info = FALSE)) - + expect_identical( + example_isolates %>% + mutate(mo = as.character(mo)) %>% + first_isolate( + col_date = "date", + col_mo = "mo", + col_patient_id = "patient", + info = FALSE + ), + example_isolates %>% + first_isolate( + col_date = "date", + col_mo = "mo", + col_patient_id = "patient", + info = FALSE + ) + ) + # support for WHONET expect_message(example_isolates %>% - select(-patient_id) %>% - mutate(`First name` = "test", - `Last name` = "test", - Sex = "Female") %>% - first_isolate(info = TRUE)) - + select(-patient_id) %>% + mutate( + `First name` = "test", + `Last name` = "test", + Sex = "Female" + ) %>% + first_isolate(info = TRUE)) + # groups - x <- example_isolates %>% group_by(ward) %>% mutate(first = first_isolate()) - y <- example_isolates %>% group_by(ward) %>% mutate(first = first_isolate(.)) + x <- example_isolates %>% + group_by(ward) %>% + mutate(first = first_isolate()) + y <- example_isolates %>% + group_by(ward) %>% + mutate(first = first_isolate(.)) expect_identical(x, y) - } # missing dates should be no problem @@ -144,33 +183,47 @@ df <- example_isolates df[1:100, "date"] <- NA expect_equal( sum( - first_isolate(x = df, - col_date = "date", - col_patient_id = "patient", - col_mo = "mo", - info = TRUE), - na.rm = TRUE), - 1382) + first_isolate( + x = df, + col_date = "date", + col_patient_id = "patient", + col_mo = "mo", + info = TRUE + ), + na.rm = TRUE + ), + 1382 +) # unknown MOs test_unknown <- example_isolates test_unknown$mo <- ifelse(test_unknown$mo == "B_ESCHR_COLI", "UNKNOWN", test_unknown$mo) -expect_equal(sum(first_isolate(test_unknown, include_unknown = FALSE)), - 1108) -expect_equal(sum(first_isolate(test_unknown, include_unknown = TRUE)), - 1591) +expect_equal( + sum(first_isolate(test_unknown, include_unknown = FALSE)), + 1108 +) +expect_equal( + sum(first_isolate(test_unknown, include_unknown = TRUE)), + 1591 +) test_unknown$mo <- ifelse(test_unknown$mo == "UNKNOWN", NA, test_unknown$mo) -expect_equal(sum(first_isolate(test_unknown)), - 1108) +expect_equal( + sum(first_isolate(test_unknown)), + 1108 +) # empty rsi results -expect_equal(sum(first_isolate(example_isolates, include_untested_rsi = FALSE)), - 1366) +expect_equal( + sum(first_isolate(example_isolates, include_untested_rsi = FALSE)), + 1366 +) # shortcuts -expect_identical(filter_first_isolate(example_isolates), - subset(example_isolates, first_isolate(example_isolates))) +expect_identical( + filter_first_isolate(example_isolates), + subset(example_isolates, first_isolate(example_isolates)) +) # notice that all mo's are distinct, so all are TRUE diff --git a/inst/tinytest/test-g.test.R b/inst/tinytest/test-g.test.R index 134d8887e..adbc5de3b 100644 --- a/inst/tinytest/test-g.test.R +++ b/inst/tinytest/test-g.test.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -28,14 +28,16 @@ # example 1: clearfield rice vs. red rice x <- c(772, 1611, 737) expect_equal(g.test(x, p = c(0.25, 0.50, 0.25))$p.value, - 0.12574, - tolerance = 0.0001) + 0.12574, + tolerance = 0.0001 +) # example 2: red crossbills x <- c(1752, 1895) expect_equal(g.test(x)$p.value, - 0.017873, - tolerance = 0.0001) + 0.017873, + tolerance = 0.0001 +) expect_error(g.test(0)) expect_error(g.test(c(0, 1), 0)) @@ -46,18 +48,22 @@ expect_warning(g.test(c(1, 2, 3, 4), p = c(0.25, 0.25, 0.25, 0.24), rescale.p = # INDEPENDENCE x <- as.data.frame( - matrix(data = round(runif(4) * 100000, 0), - ncol = 2, - byrow = TRUE) + matrix( + data = round(runif(4) * 100000, 0), + ncol = 2, + byrow = TRUE + ) ) # fisher.test() is always better for 2x2 tables: expect_warning(g.test(x)) expect_true(suppressWarnings(g.test(x)$p.value) < 1) -expect_warning(g.test(x = c(772, 1611, 737), - y = c(780, 1560, 780), - rescale.p = TRUE)) +expect_warning(g.test( + x = c(772, 1611, 737), + y = c(780, 1560, 780), + rescale.p = TRUE +)) expect_error(g.test(matrix(data = c(-1, -2, -3, -4), ncol = 2, byrow = TRUE))) expect_error(g.test(matrix(data = c(0, 0, 0, 0), ncol = 2, byrow = TRUE))) diff --git a/inst/tinytest/test-get_locale.R b/inst/tinytest/test-get_locale.R index 7cf9a3b73..6e7afcdf1 100644 --- a/inst/tinytest/test-get_locale.R +++ b/inst/tinytest/test-get_locale.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # diff --git a/inst/tinytest/test-ggplot_rsi.R b/inst/tinytest/test-ggplot_rsi.R index 5bfcea450..e54e3f38a 100644 --- a/inst/tinytest/test-ggplot_rsi.R +++ b/inst/tinytest/test-ggplot_rsi.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,14 +24,13 @@ # ==================================================================== # if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0") & AMR:::pkg_is_available("ggplot2")) { - pdf(NULL) # prevent Rplots.pdf being created - + # data should be equal expect_equal( (example_isolates %>% - select(AMC, CIP) %>% - ggplot_rsi())$data %>% + select(AMC, CIP) %>% + ggplot_rsi())$data %>% summarise_all(resistance) %>% as.double(), example_isolates %>% @@ -39,18 +38,18 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0") & AMR:::pkg_is_availa summarise_all(resistance) %>% as.double() ) - + expect_stdout(print(example_isolates %>% - select(AMC, CIP) %>% - ggplot_rsi(x = "interpretation", facet = "antibiotic"))) + select(AMC, CIP) %>% + ggplot_rsi(x = "interpretation", facet = "antibiotic"))) expect_stdout(print(example_isolates %>% - select(AMC, CIP) %>% - ggplot_rsi(x = "antibiotic", facet = "interpretation"))) - + select(AMC, CIP) %>% + ggplot_rsi(x = "antibiotic", facet = "interpretation"))) + expect_equal( (example_isolates %>% - select(AMC, CIP) %>% - ggplot_rsi(x = "interpretation", facet = "antibiotic"))$data %>% + select(AMC, CIP) %>% + ggplot_rsi(x = "interpretation", facet = "antibiotic"))$data %>% summarise_all(resistance) %>% as.double(), example_isolates %>% @@ -58,11 +57,11 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0") & AMR:::pkg_is_availa summarise_all(resistance) %>% as.double() ) - + expect_equal( (example_isolates %>% - select(AMC, CIP) %>% - ggplot_rsi(x = "antibiotic", facet = "interpretation"))$data %>% + select(AMC, CIP) %>% + ggplot_rsi(x = "antibiotic", facet = "interpretation"))$data %>% summarise_all(resistance) %>% as.double(), example_isolates %>% @@ -70,11 +69,11 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0") & AMR:::pkg_is_availa summarise_all(resistance) %>% as.double() ) - + expect_equal( (example_isolates %>% - select(AMC, CIP) %>% - ggplot_rsi(x = "antibiotic", facet = "interpretation"))$data %>% + select(AMC, CIP) %>% + ggplot_rsi(x = "antibiotic", facet = "interpretation"))$data %>% summarise_all(count_resistant) %>% as.double(), example_isolates %>% @@ -82,31 +81,46 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0") & AMR:::pkg_is_availa summarise_all(count_resistant) %>% as.double() ) - + # support for scale_type ab and mo - expect_inherits((data.frame(mo = as.mo(c("e. coli", "s aureus")), - n = c(40, 100)) %>% - ggplot(aes(x = mo, y = n)) + - geom_col())$data, - "data.frame") - expect_inherits((data.frame(ab = as.ab(c("amx", "amc")), - n = c(40, 100)) %>% - ggplot(aes(x = ab, y = n)) + - geom_col())$data, - "data.frame") - - expect_inherits((data.frame(ab = as.ab(c("amx", "amc")), - n = c(40, 100)) %>% - ggplot(aes(x = ab, y = n)) + - geom_col())$data, - "data.frame") - + expect_inherits( + (data.frame( + mo = as.mo(c("e. coli", "s aureus")), + n = c(40, 100) + ) %>% + ggplot(aes(x = mo, y = n)) + + geom_col())$data, + "data.frame" + ) + expect_inherits( + (data.frame( + ab = as.ab(c("amx", "amc")), + n = c(40, 100) + ) %>% + ggplot(aes(x = ab, y = n)) + + geom_col())$data, + "data.frame" + ) + + expect_inherits( + (data.frame( + ab = as.ab(c("amx", "amc")), + n = c(40, 100) + ) %>% + ggplot(aes(x = ab, y = n)) + + geom_col())$data, + "data.frame" + ) + # support for manual colours - expect_inherits((ggplot(data.frame(x = c("Value1", "Value2", "Value3"), - y = c(1, 2, 3), - z = c("Value4", "Value5", "Value6"))) + - geom_col(aes(x = x, y = y, fill = z)) + - scale_rsi_colours(Value4 = "S", Value5 = "I", Value6 = "R"))$data, - "data.frame") - + expect_inherits( + (ggplot(data.frame( + x = c("Value1", "Value2", "Value3"), + y = c(1, 2, 3), + z = c("Value4", "Value5", "Value6") + )) + + geom_col(aes(x = x, y = y, fill = z)) + + scale_rsi_colours(Value4 = "S", Value5 = "I", Value6 = "R"))$data, + "data.frame" + ) } diff --git a/inst/tinytest/test-guess_ab_col.R b/inst/tinytest/test-guess_ab_col.R index fbeae92ca..9f178a242 100644 --- a/inst/tinytest/test-guess_ab_col.R +++ b/inst/tinytest/test-guess_ab_col.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -23,20 +23,36 @@ # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -expect_equal(guess_ab_col(example_isolates, "amox"), - "AMX") -expect_equal(guess_ab_col(example_isolates, "amoxicillin"), - "AMX") -expect_equal(guess_ab_col(example_isolates, "J01AA07"), - "TCY") -expect_equal(guess_ab_col(example_isolates, "tetracycline"), - "TCY") -expect_equal(guess_ab_col(example_isolates, "TETR"), - "TCY") +expect_equal( + guess_ab_col(example_isolates, "amox"), + "AMX" +) +expect_equal( + guess_ab_col(example_isolates, "amoxicillin"), + "AMX" +) +expect_equal( + guess_ab_col(example_isolates, "J01AA07"), + "TCY" +) +expect_equal( + guess_ab_col(example_isolates, "tetracycline"), + "TCY" +) +expect_equal( + guess_ab_col(example_isolates, "TETR"), + "TCY" +) -df <- data.frame(AMP_ND10 = "R", - AMC_ED20 = "S") -expect_equal(guess_ab_col(df, "ampicillin"), - "AMP_ND10") -expect_equal(guess_ab_col(df, "J01CR02"), - "AMC_ED20") +df <- data.frame( + AMP_ND10 = "R", + AMC_ED20 = "S" +) +expect_equal( + guess_ab_col(df, "ampicillin"), + "AMP_ND10" +) +expect_equal( + guess_ab_col(df, "J01CR02"), + "AMC_ED20" +) diff --git a/inst/tinytest/test-italicise_taxonomy.R b/inst/tinytest/test-italicise_taxonomy.R index 1b15d08e3..f9b5a8415 100644 --- a/inst/tinytest/test-italicise_taxonomy.R +++ b/inst/tinytest/test-italicise_taxonomy.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -23,11 +23,17 @@ # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -expect_identical(italicise_taxonomy("test for E. coli"), - "test for *E. coli*") -expect_identical(italicise_taxonomy("test for E. coli"), - italicize_taxonomy("test for E. coli")) +expect_identical( + italicise_taxonomy("test for E. coli"), + "test for *E. coli*" +) +expect_identical( + italicise_taxonomy("test for E. coli"), + italicize_taxonomy("test for E. coli") +) if (AMR:::has_colour()) { - expect_identical(italicise_taxonomy("test for E. coli", type = "ansi"), - "test for \033[3mE. coli\033[23m") + expect_identical( + italicise_taxonomy("test for E. coli", type = "ansi"), + "test for \033[3mE. coli\033[23m" + ) } diff --git a/inst/tinytest/test-join_microorganisms.R b/inst/tinytest/test-join_microorganisms.R index d35a87510..e1e43115f 100755 --- a/inst/tinytest/test-join_microorganisms.R +++ b/inst/tinytest/test-join_microorganisms.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # diff --git a/inst/tinytest/test-key_antimicrobials.R b/inst/tinytest/test-key_antimicrobials.R index 811e6b43a..457b042b8 100644 --- a/inst/tinytest/test-key_antimicrobials.R +++ b/inst/tinytest/test-key_antimicrobials.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # diff --git a/inst/tinytest/test-kurtosis.R b/inst/tinytest/test-kurtosis.R index 85cbe9550..f66061c03 100644 --- a/inst/tinytest/test-kurtosis.R +++ b/inst/tinytest/test-kurtosis.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,19 +24,24 @@ # ==================================================================== # expect_equal(kurtosis(example_isolates$age), - 5.227999, - tolerance = 0.00001) + 5.227999, + tolerance = 0.00001 +) expect_equal(unname(kurtosis(data.frame(example_isolates$age))), - 5.227999, - tolerance = 0.00001) + 5.227999, + tolerance = 0.00001 +) expect_equal(unname(kurtosis(data.frame(example_isolates$age), excess = TRUE)), - 2.227999, - tolerance = 0.00001) + 2.227999, + tolerance = 0.00001 +) expect_equal(kurtosis(matrix(example_isolates$age)), - 5.227999, - tolerance = 0.00001) + 5.227999, + tolerance = 0.00001 +) expect_equal(kurtosis(matrix(example_isolates$age), excess = TRUE), - 2.227999, - tolerance = 0.00001) + 2.227999, + tolerance = 0.00001 +) diff --git a/inst/tinytest/test-like.R b/inst/tinytest/test-like.R index f2a654b9e..d07c3dbdc 100644 --- a/inst/tinytest/test-like.R +++ b/inst/tinytest/test-like.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -32,9 +32,15 @@ expect_true(factor("test") %like% "t") expect_true("test" %like% factor("t")) expect_true(as.factor("test") %like% "TEST") -expect_identical(factor(c("Test case", "Something different", "Yet another thing")) %like% c("case", "diff", "yet"), - c(TRUE, TRUE, TRUE)) -expect_identical("test" %like% c("t", "e", "s", "t"), - c(TRUE, TRUE, TRUE, TRUE)) -expect_identical(factor("test") %like% factor(c("t", "e", "s", "t")), - c(TRUE, TRUE, TRUE, TRUE)) +expect_identical( + factor(c("Test case", "Something different", "Yet another thing")) %like% c("case", "diff", "yet"), + c(TRUE, TRUE, TRUE) +) +expect_identical( + "test" %like% c("t", "e", "s", "t"), + c(TRUE, TRUE, TRUE, TRUE) +) +expect_identical( + factor("test") %like% factor(c("t", "e", "s", "t")), + c(TRUE, TRUE, TRUE, TRUE) +) diff --git a/inst/tinytest/test-mdro.R b/inst/tinytest/test-mdro.R index 1a67009c3..ead48f021 100755 --- a/inst/tinytest/test-mdro.R +++ b/inst/tinytest/test-mdro.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -41,181 +41,222 @@ expect_stdout(outcome <- mdro(example_isolates, "nl", info = TRUE)) expect_identical(class(outcome), c("ordered", "factor")) # example_isolates should have these finding using Dutch guidelines -expect_equal(as.double(table(outcome)), - c(1954, 24, 6)) # 1954 neg, 24 unconfirmed, 6 pos, rest is NA +expect_equal( + as.double(table(outcome)), + c(1954, 24, 6) +) # 1954 neg, 24 unconfirmed, 6 pos, rest is NA -expect_equal(brmo(example_isolates, info = FALSE), - mdro(example_isolates, guideline = "BRMO", info = FALSE)) +expect_equal( + brmo(example_isolates, info = FALSE), + mdro(example_isolates, guideline = "BRMO", info = FALSE) +) # test Dutch P. aeruginosa MDRO expect_equal( - as.character(mdro(data.frame(mo = as.mo("P. aeruginosa"), - cfta = "S", - cipr = "S", - mero = "S", - imip = "S", - gent = "S", - tobr = "S", - pita = "S"), - guideline = "BRMO", - col_mo = "mo", - info = FALSE)), - "Negative") + as.character(mdro(data.frame( + mo = as.mo("P. aeruginosa"), + cfta = "S", + cipr = "S", + mero = "S", + imip = "S", + gent = "S", + tobr = "S", + pita = "S" + ), + guideline = "BRMO", + col_mo = "mo", + info = FALSE + )), + "Negative" +) expect_equal( - as.character(mdro(data.frame(mo = as.mo("P. aeruginosa"), - cefta = "R", - cipr = "R", - mero = "R", - imip = "R", - gent = "R", - tobr = "R", - pita = "R"), - guideline = "BRMO", - col_mo = "mo", - info = FALSE)), - "Positive") + as.character(mdro(data.frame( + mo = as.mo("P. aeruginosa"), + cefta = "R", + cipr = "R", + mero = "R", + imip = "R", + gent = "R", + tobr = "R", + pita = "R" + ), + guideline = "BRMO", + col_mo = "mo", + info = FALSE + )), + "Positive" +) # German 3MRGN and 4MRGN -expect_equal(as.character(mrgn( - data.frame(mo = c("E. coli", "E. coli", "K. pneumoniae", "E. coli", - "A. baumannii", "A. baumannii", "A. baumannii", - "P. aeruginosa", "P. aeruginosa", "P. aeruginosa"), - PIP = c("S", "R", "R", "S", - "S", "R", "R", - "S", "R", "R"), - CTX = c("S", "R", "R", "S", - "R", "R", "R", - "R", "R", "R"), - IPM = c("S", "R", "S", "R", - "R", "R", "S", - "S", "R", "R"), - CIP = c("S", "R", "R", "S", - "R", "R", "R", - "R", "S", "R"), - stringsAsFactors = FALSE))), - c("Negative", "4MRGN", "3MRGN", "4MRGN", "4MRGN", "4MRGN", "3MRGN", "Negative", "3MRGN", "4MRGN")) +expect_equal( + as.character(mrgn( + data.frame( + mo = c( + "E. coli", "E. coli", "K. pneumoniae", "E. coli", + "A. baumannii", "A. baumannii", "A. baumannii", + "P. aeruginosa", "P. aeruginosa", "P. aeruginosa" + ), + PIP = c( + "S", "R", "R", "S", + "S", "R", "R", + "S", "R", "R" + ), + CTX = c( + "S", "R", "R", "S", + "R", "R", "R", + "R", "R", "R" + ), + IPM = c( + "S", "R", "S", "R", + "R", "R", "S", + "S", "R", "R" + ), + CIP = c( + "S", "R", "R", "S", + "R", "R", "R", + "R", "S", "R" + ), + stringsAsFactors = FALSE + ) + )), + c("Negative", "4MRGN", "3MRGN", "4MRGN", "4MRGN", "4MRGN", "3MRGN", "Negative", "3MRGN", "4MRGN") +) # MDR TB expect_equal( # select only rifampicine, mo will be determined automatically (as M. tuberculosis), # number of mono-resistant strains should be equal to number of rifampicine-resistant strains as.double(table(mdr_tb(example_isolates[, "RIF", drop = FALSE])))[2], - count_R(example_isolates$RIF)) + count_R(example_isolates$RIF) +) -x <- data.frame(rifampicin = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), - inh = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), - gatifloxacin = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), - eth = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), - pza = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), - MFX = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), - KAN = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5))) +x <- data.frame( + rifampicin = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), + inh = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), + gatifloxacin = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), + eth = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), + pza = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), + MFX = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)), + KAN = random_rsi(5000, prob_RSI = c(0.4, 0.1, 0.5)) +) expect_true(length(unique(mdr_tb(x))) > 2) # check the guideline by Magiorakos et al. (2012), the default guideline -stau <- data.frame(mo = c("S. aureus", "S. aureus", "S. aureus", "S. aureus"), - GEN = c("R", "R", "S", "R"), - RIF = c("S", "R", "S", "R"), - CPT = c("S", "R", "R", "R"), - OXA = c("S", "R", "R", "R"), - CIP = c("S", "S", "R", "R"), - MFX = c("S", "S", "R", "R"), - SXT = c("S", "S", "R", "R"), - FUS = c("S", "S", "R", "R"), - VAN = c("S", "S", "R", "R"), - TEC = c("S", "S", "R", "R"), - TLV = c("S", "S", "R", "R"), - TGC = c("S", "S", "R", "R"), - CLI = c("S", "S", "R", "R"), - DAP = c("S", "S", "R", "R"), - ERY = c("S", "S", "R", "R"), - LNZ = c("S", "S", "R", "R"), - CHL = c("S", "S", "R", "R"), - FOS = c("S", "S", "R", "R"), - QDA = c("S", "S", "R", "R"), - TCY = c("S", "S", "R", "R"), - DOX = c("S", "S", "R", "R"), - MNO = c("S", "S", "R", "R"), - stringsAsFactors = FALSE) +stau <- data.frame( + mo = c("S. aureus", "S. aureus", "S. aureus", "S. aureus"), + GEN = c("R", "R", "S", "R"), + RIF = c("S", "R", "S", "R"), + CPT = c("S", "R", "R", "R"), + OXA = c("S", "R", "R", "R"), + CIP = c("S", "S", "R", "R"), + MFX = c("S", "S", "R", "R"), + SXT = c("S", "S", "R", "R"), + FUS = c("S", "S", "R", "R"), + VAN = c("S", "S", "R", "R"), + TEC = c("S", "S", "R", "R"), + TLV = c("S", "S", "R", "R"), + TGC = c("S", "S", "R", "R"), + CLI = c("S", "S", "R", "R"), + DAP = c("S", "S", "R", "R"), + ERY = c("S", "S", "R", "R"), + LNZ = c("S", "S", "R", "R"), + CHL = c("S", "S", "R", "R"), + FOS = c("S", "S", "R", "R"), + QDA = c("S", "S", "R", "R"), + TCY = c("S", "S", "R", "R"), + DOX = c("S", "S", "R", "R"), + MNO = c("S", "S", "R", "R"), + stringsAsFactors = FALSE +) expect_equal(as.integer(mdro(stau)), c(1:4)) expect_inherits(mdro(stau, verbose = TRUE), "data.frame") -ente <- data.frame(mo = c("Enterococcus", "Enterococcus", "Enterococcus", "Enterococcus"), - GEH = c("R", "R", "S", "R"), - STH = c("S", "R", "S", "R"), - IPM = c("S", "R", "R", "R"), - MEM = c("S", "R", "R", "R"), - DOR = c("S", "S", "R", "R"), - CIP = c("S", "S", "R", "R"), - LVX = c("S", "S", "R", "R"), - MFX = c("S", "S", "R", "R"), - VAN = c("S", "S", "R", "R"), - TEC = c("S", "S", "R", "R"), - TGC = c("S", "S", "R", "R"), - DAP = c("S", "S", "R", "R"), - LNZ = c("S", "S", "R", "R"), - AMP = c("S", "S", "R", "R"), - QDA = c("S", "S", "R", "R"), - DOX = c("S", "S", "R", "R"), - MNO = c("S", "S", "R", "R"), - stringsAsFactors = FALSE) +ente <- data.frame( + mo = c("Enterococcus", "Enterococcus", "Enterococcus", "Enterococcus"), + GEH = c("R", "R", "S", "R"), + STH = c("S", "R", "S", "R"), + IPM = c("S", "R", "R", "R"), + MEM = c("S", "R", "R", "R"), + DOR = c("S", "S", "R", "R"), + CIP = c("S", "S", "R", "R"), + LVX = c("S", "S", "R", "R"), + MFX = c("S", "S", "R", "R"), + VAN = c("S", "S", "R", "R"), + TEC = c("S", "S", "R", "R"), + TGC = c("S", "S", "R", "R"), + DAP = c("S", "S", "R", "R"), + LNZ = c("S", "S", "R", "R"), + AMP = c("S", "S", "R", "R"), + QDA = c("S", "S", "R", "R"), + DOX = c("S", "S", "R", "R"), + MNO = c("S", "S", "R", "R"), + stringsAsFactors = FALSE +) expect_equal(as.integer(mdro(ente)), c(1:4)) expect_inherits(mdro(ente, verbose = TRUE), "data.frame") -entero <- data.frame(mo = c("E. coli", "E. coli", "E. coli", "E. coli"), - GEN = c("R", "R", "S", "R"), TOB = c("S", "R", "S", "R"), - AMK = c("S", "R", "R", "R"), NET = c("S", "R", "R", "R"), - CPT = c("S", "R", "R", "R"), TCC = c("S", "R", "R", "R"), - TZP = c("S", "S", "R", "R"), ETP = c("S", "S", "R", "R"), - IPM = c("S", "S", "R", "R"), MEM = c("S", "S", "R", "R"), - DOR = c("S", "S", "R", "R"), CZO = c("S", "S", "R", "R"), - CXM = c("S", "S", "R", "R"), CTX = c("S", "S", "R", "R"), - CAZ = c("S", "S", "R", "R"), FEP = c("S", "S", "R", "R"), - FOX = c("S", "S", "R", "R"), CTT = c("S", "S", "R", "R"), - CIP = c("S", "S", "R", "R"), SXT = c("S", "S", "R", "R"), - TGC = c("S", "S", "R", "R"), ATM = c("S", "S", "R", "R"), - AMP = c("S", "S", "R", "R"), AMC = c("S", "S", "R", "R"), - SAM = c("S", "S", "R", "R"), CHL = c("S", "S", "R", "R"), - FOS = c("S", "S", "R", "R"), COL = c("S", "S", "R", "R"), - TCY = c("S", "S", "R", "R"), DOX = c("S", "S", "R", "R"), - MNO = c("S", "S", "R", "R"), - stringsAsFactors = FALSE) +entero <- data.frame( + mo = c("E. coli", "E. coli", "E. coli", "E. coli"), + GEN = c("R", "R", "S", "R"), TOB = c("S", "R", "S", "R"), + AMK = c("S", "R", "R", "R"), NET = c("S", "R", "R", "R"), + CPT = c("S", "R", "R", "R"), TCC = c("S", "R", "R", "R"), + TZP = c("S", "S", "R", "R"), ETP = c("S", "S", "R", "R"), + IPM = c("S", "S", "R", "R"), MEM = c("S", "S", "R", "R"), + DOR = c("S", "S", "R", "R"), CZO = c("S", "S", "R", "R"), + CXM = c("S", "S", "R", "R"), CTX = c("S", "S", "R", "R"), + CAZ = c("S", "S", "R", "R"), FEP = c("S", "S", "R", "R"), + FOX = c("S", "S", "R", "R"), CTT = c("S", "S", "R", "R"), + CIP = c("S", "S", "R", "R"), SXT = c("S", "S", "R", "R"), + TGC = c("S", "S", "R", "R"), ATM = c("S", "S", "R", "R"), + AMP = c("S", "S", "R", "R"), AMC = c("S", "S", "R", "R"), + SAM = c("S", "S", "R", "R"), CHL = c("S", "S", "R", "R"), + FOS = c("S", "S", "R", "R"), COL = c("S", "S", "R", "R"), + TCY = c("S", "S", "R", "R"), DOX = c("S", "S", "R", "R"), + MNO = c("S", "S", "R", "R"), + stringsAsFactors = FALSE +) expect_equal(as.integer(mdro(entero)), c(1:4)) expect_inherits(mdro(entero, verbose = TRUE), "data.frame") -pseud <- data.frame(mo = c("P. aeruginosa", "P. aeruginosa", "P. aeruginosa", "P. aeruginosa"), - GEN = c("R", "R", "S", "R"), TOB = c("S", "S", "S", "R"), - AMK = c("S", "S", "R", "R"), NET = c("S", "S", "R", "R"), - IPM = c("S", "R", "R", "R"), MEM = c("S", "S", "R", "R"), - DOR = c("S", "S", "R", "R"), CAZ = c("S", "S", "R", "R"), - FEP = c("S", "R", "R", "R"), CIP = c("S", "S", "R", "R"), - LVX = c("S", "S", "R", "R"), TCC = c("S", "S", "R", "R"), - TZP = c("S", "S", "R", "R"), ATM = c("S", "S", "R", "R"), - FOS = c("S", "S", "R", "R"), COL = c("S", "S", "R", "R"), - PLB = c("S", "S", "R", "R"), - stringsAsFactors = FALSE) +pseud <- data.frame( + mo = c("P. aeruginosa", "P. aeruginosa", "P. aeruginosa", "P. aeruginosa"), + GEN = c("R", "R", "S", "R"), TOB = c("S", "S", "S", "R"), + AMK = c("S", "S", "R", "R"), NET = c("S", "S", "R", "R"), + IPM = c("S", "R", "R", "R"), MEM = c("S", "S", "R", "R"), + DOR = c("S", "S", "R", "R"), CAZ = c("S", "S", "R", "R"), + FEP = c("S", "R", "R", "R"), CIP = c("S", "S", "R", "R"), + LVX = c("S", "S", "R", "R"), TCC = c("S", "S", "R", "R"), + TZP = c("S", "S", "R", "R"), ATM = c("S", "S", "R", "R"), + FOS = c("S", "S", "R", "R"), COL = c("S", "S", "R", "R"), + PLB = c("S", "S", "R", "R"), + stringsAsFactors = FALSE +) expect_equal(as.integer(mdro(pseud)), c(1:4)) expect_inherits(mdro(pseud, verbose = TRUE), "data.frame") -acin <- data.frame(mo = c("A. baumannii", "A. baumannii", "A. baumannii", "A. baumannii"), - GEN = c("R", "R", "S", "R"), TOB = c("S", "R", "S", "R"), - AMK = c("S", "R", "R", "R"), NET = c("S", "R", "R", "R"), - IPM = c("S", "S", "R", "R"), MEM = c("S", "R", "R", "R"), - DOR = c("S", "S", "R", "R"), CIP = c("S", "S", "R", "R"), - LVX = c("S", "S", "R", "R"), TZP = c("S", "S", "R", "R"), - TCC = c("S", "S", "R", "R"), CTX = c("S", "S", "R", "R"), - CRO = c("S", "S", "R", "R"), CAZ = c("S", "S", "R", "R"), - FEP = c("S", "R", "R", "R"), SXT = c("S", "S", "R", "R"), - SAM = c("S", "S", "R", "R"), COL = c("S", "S", "R", "R"), - PLB = c("S", "S", "R", "R"), TCY = c("S", "S", "R", "R"), - DOX = c("S", "S", "R", "R"), MNO = c("S", "S", "R", "R"), - stringsAsFactors = FALSE) +acin <- data.frame( + mo = c("A. baumannii", "A. baumannii", "A. baumannii", "A. baumannii"), + GEN = c("R", "R", "S", "R"), TOB = c("S", "R", "S", "R"), + AMK = c("S", "R", "R", "R"), NET = c("S", "R", "R", "R"), + IPM = c("S", "S", "R", "R"), MEM = c("S", "R", "R", "R"), + DOR = c("S", "S", "R", "R"), CIP = c("S", "S", "R", "R"), + LVX = c("S", "S", "R", "R"), TZP = c("S", "S", "R", "R"), + TCC = c("S", "S", "R", "R"), CTX = c("S", "S", "R", "R"), + CRO = c("S", "S", "R", "R"), CAZ = c("S", "S", "R", "R"), + FEP = c("S", "R", "R", "R"), SXT = c("S", "S", "R", "R"), + SAM = c("S", "S", "R", "R"), COL = c("S", "S", "R", "R"), + PLB = c("S", "S", "R", "R"), TCY = c("S", "S", "R", "R"), + DOX = c("S", "S", "R", "R"), MNO = c("S", "S", "R", "R"), + stringsAsFactors = FALSE +) expect_equal(as.integer(mdro(acin)), c(1:4)) expect_inherits(mdro(acin, verbose = TRUE), "data.frame") # custom rules custom <- custom_mdro_guideline("CIP == 'R' & age > 60" ~ "Elderly Type A", - "ERY == 'R' & age > 60" ~ "Elderly Type B", - as_factor = TRUE) + "ERY == 'R' & age > 60" ~ "Elderly Type B", + as_factor = TRUE +) expect_stdout(print(custom)) expect_stdout(print(c(custom, custom))) expect_stdout(print(as.list(custom, custom))) @@ -229,9 +270,10 @@ expect_error(custom_mdro_guideline("test")) expect_error(custom_mdro_guideline("test" ~ c(1:3))) expect_error(custom_mdro_guideline("test" ~ A)) expect_warning(mdro(example_isolates, - # since `test` gives an error, it will be ignored with a warning - guideline = custom_mdro_guideline(test ~ "A"), - info = FALSE)) + # since `test` gives an error, it will be ignored with a warning + guideline = custom_mdro_guideline(test ~ "A"), + info = FALSE +)) # print groups if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { diff --git a/inst/tinytest/test-mic.R b/inst/tinytest/test-mic.R index 714bf5dde..84fe99729 100755 --- a/inst/tinytest/test-mic.R +++ b/inst/tinytest/test-mic.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -33,9 +33,12 @@ expect_true(is.mic(as.mic(8))) expect_equal(as.double(as.mic(">=32")), 32) expect_equal(as.numeric(as.mic(">=32")), 32) -expect_equal(as.integer(as.mic(">=32")), # should be factor level, not the MIC - as.integer(factor(as.character(">=32"), - levels = levels(as.mic(">=32"))))) +expect_equal( + as.integer(as.mic(">=32")), # should be factor level, not the MIC + as.integer(factor(as.character(">=32"), + levels = levels(as.mic(">=32")) + )) +) expect_equal(suppressWarnings(as.logical(as.mic("INVALID VALUE"))), NA) # all levels should be valid MICs @@ -131,7 +134,7 @@ suppressWarnings(expect_identical(el1 + el2, el1_double + el2_double)) suppressWarnings(expect_identical(el1 - el2, el1_double - el2_double)) suppressWarnings(expect_identical(el1 * el2, el1_double * el2_double)) suppressWarnings(expect_identical(el1 / el2, el1_double / el2_double)) -suppressWarnings(expect_identical(el1 ^ el2, el1_double ^ el2_double)) +suppressWarnings(expect_identical(el1^el2, el1_double^el2_double)) suppressWarnings(expect_identical(el1 %% el2, el1_double %% el2_double)) suppressWarnings(expect_identical(el1 %/% el2, el1_double %/% el2_double)) suppressWarnings(expect_identical(el1 & el2, el1_double & el2_double)) diff --git a/inst/tinytest/test-mo.R b/inst/tinytest/test-mo.R index 8083cf2ad..2475aa985 100644 --- a/inst/tinytest/test-mo.R +++ b/inst/tinytest/test-mo.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -28,7 +28,8 @@ expect_identical(as.character(MOs$mo), as.character(as.mo(MOs$mo))) expect_identical( as.character(as.mo(c("E. coli", "H. influenzae"))), - c("B_ESCHR_COLI", "B_HMPHL_INFL")) + c("B_ESCHR_COLI", "B_HMPHL_INFL") +) expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI") expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI") @@ -79,29 +80,39 @@ expect_equal(as.character(as.mo(c("Gram negative", "Gram positive"))), c("B_GRAM # prevalent MO expect_identical( suppressWarnings(as.character( - as.mo(c("stau", - "STAU", - "staaur", - "S. aureus", - "S aureus", - "Sthafilokkockus aureeuzz", - "Staphylococcus aureus", - "MRSA", - "VISA", - "meth.-resis. S. aureus (MRSA)")))), - rep("B_STPHY_AURS", 10)) + as.mo(c( + "stau", + "STAU", + "staaur", + "S. aureus", + "S aureus", + "Sthafilokkockus aureeuzz", + "Staphylococcus aureus", + "MRSA", + "VISA", + "meth.-resis. S. aureus (MRSA)" + )) + )), + rep("B_STPHY_AURS", 10) +) expect_identical( as.character( - as.mo(c("EHEC", "EPEC", "EIEC", "STEC", "ATEC", "UPEC"))), - rep("B_ESCHR_COLI", 6)) + as.mo(c("EHEC", "EPEC", "EIEC", "STEC", "ATEC", "UPEC")) + ), + rep("B_ESCHR_COLI", 6) +) # unprevalent MO expect_identical( as.character( - as.mo(c("parnod", - "P. nodosa", - "P nodosa", - "Paraburkholderia nodosa"))), - rep("B_PRBRK_NODS", 4)) + as.mo(c( + "parnod", + "P. nodosa", + "P nodosa", + "Paraburkholderia nodosa" + )) + ), + rep("B_PRBRK_NODS", 4) +) # empty values expect_identical(as.character(as.mo(c("", " ", NA, NaN))), rep(NA_character_, 4)) @@ -109,40 +120,42 @@ expect_identical(as.character(as.mo(" ")), NA_character_) # too few characters expect_warning(as.mo("ab")) -expect_equal(suppressWarnings(as.character(as.mo(c("Qq species", "", "CRSM", "K. pneu rhino", "esco")))), - c("UNKNOWN", NA_character_, "B_STNTR_MLTP", "B_KLBSL_PNMN_RHNS", "B_ESCHR_COLI")) +expect_equal( + suppressWarnings(as.character(as.mo(c("Qq species", "", "CRSM", "K. pneu rhino", "esco")))), + c("UNKNOWN", NA_character_, "B_STNTR_MLTP", "B_KLBSL_PNMN_RHNS", "B_ESCHR_COLI") +) # check for Becker classification -expect_identical(as.character(as.mo("S. epidermidis", Becker = FALSE)), "B_STPHY_EPDR") -expect_identical(as.character(as.mo("S. epidermidis", Becker = TRUE)), "B_STPHY_CONS") -expect_identical(as.character(as.mo("STAEPI", Becker = TRUE)), "B_STPHY_CONS") +expect_identical(as.character(as.mo("S. epidermidis", Becker = FALSE)), "B_STPHY_EPDR") +expect_identical(as.character(as.mo("S. epidermidis", Becker = TRUE)), "B_STPHY_CONS") +expect_identical(as.character(as.mo("STAEPI", Becker = TRUE)), "B_STPHY_CONS") expect_identical(as.character(as.mo("Sta intermedius", Becker = FALSE)), "B_STPHY_INTR") -expect_identical(as.character(as.mo("Sta intermedius", Becker = TRUE)), "B_STPHY_COPS") -expect_identical(as.character(as.mo("STAINT", Becker = TRUE)), "B_STPHY_COPS") +expect_identical(as.character(as.mo("Sta intermedius", Becker = TRUE)), "B_STPHY_COPS") +expect_identical(as.character(as.mo("STAINT", Becker = TRUE)), "B_STPHY_COPS") # aureus must only be influenced if Becker = "all" expect_identical(as.character(as.mo("STAAUR", Becker = FALSE)), "B_STPHY_AURS") -expect_identical(as.character(as.mo("STAAUR", Becker = TRUE)), "B_STPHY_AURS") +expect_identical(as.character(as.mo("STAAUR", Becker = TRUE)), "B_STPHY_AURS") expect_identical(as.character(as.mo("STAAUR", Becker = "all")), "B_STPHY_COPS") # check for Lancefield classification -expect_identical(as.character(as.mo("S. pyogenes", Lancefield = FALSE)), "B_STRPT_PYGN") -expect_identical(as.character(as.mo("S. pyogenes", Lancefield = TRUE)), "B_STRPT_GRPA") -expect_identical(as.character(as.mo("STCPYO", Lancefield = TRUE)), "B_STRPT_GRPA") # group A -expect_identical(as.character(as.mo("S. agalactiae", Lancefield = FALSE)), "B_STRPT_AGLC") -expect_identical(as.character(as.mo("S. agalactiae", Lancefield = TRUE)), "B_STRPT_GRPB") # group B +expect_identical(as.character(as.mo("S. pyogenes", Lancefield = FALSE)), "B_STRPT_PYGN") +expect_identical(as.character(as.mo("S. pyogenes", Lancefield = TRUE)), "B_STRPT_GRPA") +expect_identical(as.character(as.mo("STCPYO", Lancefield = TRUE)), "B_STRPT_GRPA") # group A +expect_identical(as.character(as.mo("S. agalactiae", Lancefield = FALSE)), "B_STRPT_AGLC") +expect_identical(as.character(as.mo("S. agalactiae", Lancefield = TRUE)), "B_STRPT_GRPB") # group B expect_identical(as.character(suppressWarnings(as.mo("estreptococos grupo B"))), "B_STRPT_GRPB") expect_identical(as.character(as.mo("S. equisimilis", Lancefield = FALSE)), "B_STRPT_DYSG_EQSM") -expect_identical(as.character(as.mo("S. equisimilis", Lancefield = TRUE)), "B_STRPT_GRPC") # group C +expect_identical(as.character(as.mo("S. equisimilis", Lancefield = TRUE)), "B_STRPT_GRPC") # group C # Enterococci must only be influenced if Lancefield = "all" -expect_identical(as.character(as.mo("E. faecium", Lancefield = FALSE)), "B_ENTRC_FACM") -expect_identical(as.character(as.mo("E. faecium", Lancefield = TRUE)), "B_ENTRC_FACM") -expect_identical(as.character(as.mo("E. faecium", Lancefield = "all")), "B_STRPT_GRPD") # group D -expect_identical(as.character(as.mo("S. anginosus", Lancefield = FALSE)), "B_STRPT_ANGN") -expect_identical(as.character(as.mo("S. anginosus", Lancefield = TRUE)), "B_STRPT_GRPF") # group F -expect_identical(as.character(as.mo("S. sanguinis", Lancefield = FALSE)), "B_STRPT_SNGN") -expect_identical(as.character(as.mo("S. sanguinis", Lancefield = TRUE)), "B_STRPT_GRPH") # group H -expect_identical(as.character(as.mo("S. salivarius", Lancefield = FALSE)), "B_STRPT_SLVR") -expect_identical(as.character(as.mo("S. salivarius", Lancefield = TRUE)), "B_STRPT_GRPK") # group K +expect_identical(as.character(as.mo("E. faecium", Lancefield = FALSE)), "B_ENTRC_FACM") +expect_identical(as.character(as.mo("E. faecium", Lancefield = TRUE)), "B_ENTRC_FACM") +expect_identical(as.character(as.mo("E. faecium", Lancefield = "all")), "B_STRPT_GRPD") # group D +expect_identical(as.character(as.mo("S. anginosus", Lancefield = FALSE)), "B_STRPT_ANGN") +expect_identical(as.character(as.mo("S. anginosus", Lancefield = TRUE)), "B_STRPT_GRPF") # group F +expect_identical(as.character(as.mo("S. sanguinis", Lancefield = FALSE)), "B_STRPT_SNGN") +expect_identical(as.character(as.mo("S. sanguinis", Lancefield = TRUE)), "B_STRPT_GRPH") # group H +expect_identical(as.character(as.mo("S. salivarius", Lancefield = FALSE)), "B_STRPT_SLVR") +expect_identical(as.character(as.mo("S. salivarius", Lancefield = TRUE)), "B_STRPT_GRPK") # group K if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { # select with one column @@ -153,9 +166,12 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { select(genus) %>% as.mo() %>% as.character(), - c("B_ESCHR", "B_ESCHR", "B_STPHY", "B_STPHY", "B_STPHY", - "B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY")) - + c( + "B_ESCHR", "B_ESCHR", "B_STPHY", "B_STPHY", "B_STPHY", + "B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY" + ) + ) + # select with two columns expect_identical( example_isolates %>% @@ -165,14 +181,17 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { slice(1:10) %>% left_join_microorganisms() %>% select(genus, species) %>% - as.mo()) - + as.mo() + ) + # too many columns expect_error(example_isolates %>% select(1:3) %>% as.mo()) - + # test pull - expect_equal(nrow(example_isolates %>% mutate(mo = as.mo(mo))), - 2000) + expect_equal( + nrow(example_isolates %>% mutate(mo = as.mo(mo))), + 2000 + ) expect_true(example_isolates %>% pull(mo) %>% is.mo()) } @@ -183,12 +202,16 @@ expect_warning(as.mo(c("INVALID", "Yeah, unknown"))) expect_stdout(print(as.mo(c("B_ESCHR_COLI", NA)))) # test data.frame -expect_equal(nrow(data.frame(test = as.mo("B_ESCHR_COLI"))), - 1) +expect_equal( + nrow(data.frame(test = as.mo("B_ESCHR_COLI"))), + 1 +) # check empty values -expect_equal(as.character(suppressWarnings(as.mo(""))), - NA_character_) +expect_equal( + as.character(suppressWarnings(as.mo(""))), + NA_character_ +) # check less prevalent MOs expect_equal(as.character(as.mo("Gomphosphaeria aponina delicatula")), "B_GMPHS_APNN_DLCT") @@ -215,38 +238,56 @@ expect_equal(suppressMessages(as.character(as.mo("unexisting staphy", allow_unce expect_equal(suppressMessages(as.character(as.mo(c("s aure THISISATEST", "Staphylococcus aureus unexisting"), allow_uncertain = 3))), c("B_STPHY_AURS_AURS", "B_STPHY_AURS_AURS")) # predefined reference_df -expect_equal(as.character(as.mo("TestingOwnID", - reference_df = data.frame(mycol = "TestingOwnID", mo = "B_ESCHR_COLI"))), - "B_ESCHR_COLI") -expect_equal(as.character(as.mo(c("TestingOwnID", "E. coli"), - reference_df = data.frame(mycol = "TestingOwnID", mo = "B_ESCHR_COLI"))), - c("B_ESCHR_COLI", "B_ESCHR_COLI")) +expect_equal( + as.character(as.mo("TestingOwnID", + reference_df = data.frame(mycol = "TestingOwnID", mo = "B_ESCHR_COLI") + )), + "B_ESCHR_COLI" +) +expect_equal( + as.character(as.mo(c("TestingOwnID", "E. coli"), + reference_df = data.frame(mycol = "TestingOwnID", mo = "B_ESCHR_COLI") + )), + c("B_ESCHR_COLI", "B_ESCHR_COLI") +) expect_warning(as.mo("TestingOwnID", reference_df = NULL)) expect_error(as.mo("E. coli", reference_df = data.frame(mycol = "TestingOwnID"))) # combination of existing mo and other code -expect_identical(as.character(as.mo(c("B_ESCHR_COL", "ESCCOL"))), - c("B_ESCHR_COLI", "B_ESCHR_COLI")) +expect_identical( + as.character(as.mo(c("B_ESCHR_COL", "ESCCOL"))), + c("B_ESCHR_COLI", "B_ESCHR_COLI") +) # from different sources -expect_equal(as.character(as.mo( - c("PRTMIR", "bclcer", "B_ESCHR_COLI"))), - c("B_PROTS_MRBL", "B_BCLLS_CERS", "B_ESCHR_COLI")) +expect_equal( + as.character(as.mo( + c("PRTMIR", "bclcer", "B_ESCHR_COLI") + )), + c("B_PROTS_MRBL", "B_BCLLS_CERS", "B_ESCHR_COLI") +) # hard to find -expect_equal(as.character(suppressMessages(as.mo( - c("Microbacterium paraoxidans", - "Streptococcus suis (bovis gr)", - "Raoultella (here some text) terrigena")))), - c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_RLTLL_TRRG")) +expect_equal( + as.character(suppressMessages(as.mo( + c( + "Microbacterium paraoxidans", + "Streptococcus suis (bovis gr)", + "Raoultella (here some text) terrigena" + ) + ))), + c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_RLTLL_TRRG") +) expect_stdout(print(mo_uncertainties())) x <- as.mo("S. aur") # many hits expect_stdout(print(mo_uncertainties())) # Salmonella (City) are all actually Salmonella enterica spp (City) -expect_equal(suppressMessages(as.mo(c("Salmonella Goettingen", "Salmonella Typhimurium", "Salmonella Group A"))), - as.mo(c("Salmonella enterica", "Salmonella enterica", "Salmonella"))) +expect_equal( + suppressMessages(as.mo(c("Salmonella Goettingen", "Salmonella Typhimurium", "Salmonella Group A"))), + as.mo(c("Salmonella enterica", "Salmonella enterica", "Salmonella")) +) # no viruses expect_equal(as.character(as.mo("Virus")), NA_character_) @@ -255,13 +296,17 @@ expect_equal(as.character(as.mo("Virus")), NA_character_) expect_equal(length(summary(example_isolates$mo)), 6) # WHONET codes and NA/NaN -expect_equal(as.character(as.mo(c("xxx", "na", "nan"), debug = TRUE)), - rep(NA_character_, 3)) +expect_equal( + as.character(as.mo(c("xxx", "na", "nan"), debug = TRUE)), + rep(NA_character_, 3) +) expect_equal(as.character(as.mo("con")), "UNKNOWN") expect_equal(as.character(as.mo("xxx")), NA_character_) expect_equal(as.character(as.mo(c("xxx", "con", "eco"))), c(NA_character_, "UNKNOWN", "B_ESCHR_COLI")) -expect_equal(as.character(as.mo(c("other", "none", "unknown"))), - rep("UNKNOWN", 3)) +expect_equal( + as.character(as.mo(c("other", "none", "unknown"))), + rep("UNKNOWN", 3) +) expect_null(mo_failures()) @@ -271,11 +316,15 @@ expect_error(translate_allow_uncertain(5)) expect_stdout(print(suppressMessages(suppressWarnings(as.mo("kshgcjkhsdgkshjdfsfvsdfv", debug = TRUE, allow_uncertain = 3))))) # ..coccus -expect_equal(as.character(as.mo(c("meningococ", "gonococ", "pneumococ"))), - c("B_NESSR_MNNG", "B_NESSR_GNRR", "B_STRPT_PNMN")) +expect_equal( + as.character(as.mo(c("meningococ", "gonococ", "pneumococ"))), + c("B_NESSR_MNNG", "B_NESSR_GNRR", "B_STRPT_PNMN") +) # yeasts and fungi -expect_equal(suppressWarnings(as.character(as.mo(c("yeasts", "fungi")))), - c("F_YEAST", "F_FUNGUS")) +expect_equal( + suppressWarnings(as.character(as.mo(c("yeasts", "fungi")))), + c("F_YEAST", "F_FUNGUS") +) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { # print tibble @@ -292,8 +341,10 @@ expect_warning(x[[1]] <- "invalid code") expect_warning(c(x[1], "test")) # ignoring patterns -expect_equal(as.character(as.mo(c("E. coli", "E. coli ignorethis"), ignore_pattern = "this")), - c("B_ESCHR_COLI", NA)) +expect_equal( + as.character(as.mo(c("E. coli", "E. coli ignorethis"), ignore_pattern = "this")), + c("B_ESCHR_COLI", NA) +) # frequency tables if (AMR:::pkg_is_available("cleaner")) { diff --git a/inst/tinytest/test-mo_property.R b/inst/tinytest/test-mo_property.R index de4904b01..2eac9d6ab 100644 --- a/inst/tinytest/test-mo_property.R +++ b/inst/tinytest/test-mo_property.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -45,15 +45,19 @@ expect_equal(mo_subspecies("Escherichia coli"), "") expect_equal(mo_type("Escherichia coli", language = "en"), "Bacteria") expect_equal(mo_gramstain("Escherichia coli", language = "en"), "Gram-negative") expect_inherits(mo_taxonomy("Escherichia coli"), "list") -expect_equal(names(mo_taxonomy("Escherichia coli")), c("kingdom", "phylum", "class", "order", - "family", "genus", "species", "subspecies")) +expect_equal(names(mo_taxonomy("Escherichia coli")), c( + "kingdom", "phylum", "class", "order", + "family", "genus", "species", "subspecies" +)) expect_equal(mo_synonyms("Escherichia coli"), NULL) expect_true(length(mo_synonyms("Candida albicans")) > 1) expect_inherits(mo_synonyms(c("Candida albicans", "Escherichia coli")), "list") -expect_equal(names(mo_info("Escherichia coli")), c("kingdom", "phylum", "class", "order", - "family", "genus", "species", "subspecies", - "synonyms", "gramstain", "url", "ref", - "snomed")) +expect_equal(names(mo_info("Escherichia coli")), c( + "kingdom", "phylum", "class", "order", + "family", "genus", "species", "subspecies", + "synonyms", "gramstain", "url", "ref", + "snomed" +)) expect_inherits(mo_info(c("Escherichia coli", "Staphylococcus aureus")), "list") expect_equal(mo_ref("Escherichia coli"), "Castellani et al., 1919") @@ -86,14 +90,22 @@ expect_identical(mo_name(dutch, language = NULL), microorganisms$fullname) # gig # manual property function expect_error(mo_property("Escherichia coli", property = c("tsn", "fullname"))) expect_error(mo_property("Escherichia coli", property = "UNKNOWN")) -expect_identical(mo_property("Escherichia coli", property = "fullname"), - mo_fullname("Escherichia coli")) -expect_identical(mo_property("Escherichia coli", property = "genus"), - mo_genus("Escherichia coli")) -expect_identical(mo_property("Escherichia coli", property = "species"), - mo_species("Escherichia coli")) -expect_identical(mo_property("Escherichia coli", property = "species_id"), - mo_lpsn("Escherichia coli")) +expect_identical( + mo_property("Escherichia coli", property = "fullname"), + mo_fullname("Escherichia coli") +) +expect_identical( + mo_property("Escherichia coli", property = "genus"), + mo_genus("Escherichia coli") +) +expect_identical( + mo_property("Escherichia coli", property = "species"), + mo_species("Escherichia coli") +) +expect_identical( + mo_property("Escherichia coli", property = "species_id"), + mo_lpsn("Escherichia coli") +) expect_identical(suppressWarnings(mo_ref("Chlamydia psittaci")), "Page, 1968") expect_identical(mo_ref("Chlamydophila psittaci"), "Everett et al., 1999") @@ -102,30 +114,48 @@ expect_true(112283007 %in% mo_snomed("Escherichia coli")) # old codes must throw a warning in mo_* family expect_warning(mo_name(c("B_ESCHR_COL", "B_STPHY_AUR"))) # outcome of mo_fullname must always return the fullname from the data set -x <- data.frame(mo = microorganisms$mo, - # fullname from the original data: - f1 = microorganisms$fullname, - # newly created fullname based on MO code: - f2 = mo_fullname(microorganisms$mo, language = "en"), - stringsAsFactors = FALSE) +x <- data.frame( + mo = microorganisms$mo, + # fullname from the original data: + f1 = microorganisms$fullname, + # newly created fullname based on MO code: + f2 = mo_fullname(microorganisms$mo, language = "en"), + stringsAsFactors = FALSE +) expect_equal(nrow(subset(x, f1 != f2)), 0) # is gram pos/neg (also return FALSE for all non-bacteria) -expect_equal(mo_is_gram_negative(c("Escherichia coli", "Staphylococcus aureus", "Candida albicans")), - c(TRUE, FALSE, FALSE)) -expect_equal(mo_is_gram_positive(c("Escherichia coli", "Staphylococcus aureus", "Candida albicans")), - c(FALSE, TRUE, FALSE)) +expect_equal( + mo_is_gram_negative(c("Escherichia coli", "Staphylococcus aureus", "Candida albicans")), + c(TRUE, FALSE, FALSE) +) +expect_equal( + mo_is_gram_positive(c("Escherichia coli", "Staphylococcus aureus", "Candida albicans")), + c(FALSE, TRUE, FALSE) +) # is intrinsic resistant -expect_equal(mo_is_intrinsic_resistant(c("Escherichia coli", "Staphylococcus aureus", "Candida albicans"), - "vanco"), - c(TRUE, FALSE, FALSE)) +expect_equal( + mo_is_intrinsic_resistant( + c("Escherichia coli", "Staphylococcus aureus", "Candida albicans"), + "vanco" + ), + c(TRUE, FALSE, FALSE) +) # with reference data -expect_equal(mo_name("test", reference_df = data.frame(col1 = "test", mo = "B_ESCHR_COLI")), - "Escherichia coli") +expect_equal( + mo_name("test", reference_df = data.frame(col1 = "test", mo = "B_ESCHR_COLI")), + "Escherichia coli" +) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { expect_equal(example_isolates %>% filter(mo_is_gram_negative()) %>% nrow(), - 730, tolerance = 0.5) + 730, + tolerance = 0.5 + ) expect_equal(example_isolates %>% filter(mo_is_gram_positive()) %>% nrow(), - 1238, tolerance = 0.5) + 1238, + tolerance = 0.5 + ) expect_equal(example_isolates %>% filter(mo_is_intrinsic_resistant(ab = "Vancomycin")) %>% nrow(), - 710, tolerance = 0.5) + 710, + tolerance = 0.5 + ) } diff --git a/inst/tinytest/test-pca.R b/inst/tinytest/test-pca.R index 6c391af82..f8ada6cde 100644 --- a/inst/tinytest/test-pca.R +++ b/inst/tinytest/test-pca.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -23,20 +23,26 @@ # how to conduct AMR data analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -resistance_data <- structure(list(order = c("Bacillales", "Enterobacterales", "Enterobacterales"), - genus = c("Staphylococcus", "Escherichia", "Klebsiella"), - AMC = c(0.00425, 0.13062, 0.10344), - CXM = c(0.00425, 0.05376, 0.10344), - CTX = c(0.00000, 0.02396, 0.05172), - TOB = c(0.02325, 0.02597, 0.10344), - TMP = c(0.08387, 0.39141, 0.18367)), - class = c("grouped_df", "tbl_df", "tbl", "data.frame"), - row.names = c(NA, -3L), - groups = structure(list(order = c("Bacillales", "Enterobacterales"), - .rows = list(1L, 2:3)), - row.names = c(NA, -2L), - class = c("tbl_df", "tbl", "data.frame"), - .drop = TRUE)) +resistance_data <- structure(list( + order = c("Bacillales", "Enterobacterales", "Enterobacterales"), + genus = c("Staphylococcus", "Escherichia", "Klebsiella"), + AMC = c(0.00425, 0.13062, 0.10344), + CXM = c(0.00425, 0.05376, 0.10344), + CTX = c(0.00000, 0.02396, 0.05172), + TOB = c(0.02325, 0.02597, 0.10344), + TMP = c(0.08387, 0.39141, 0.18367) +), +class = c("grouped_df", "tbl_df", "tbl", "data.frame"), +row.names = c(NA, -3L), +groups = structure(list( + order = c("Bacillales", "Enterobacterales"), + .rows = list(1L, 2:3) +), +row.names = c(NA, -2L), +class = c("tbl_df", "tbl", "data.frame"), +.drop = TRUE +) +) pca_model <- pca(resistance_data) expect_inherits(pca_model, "pca") @@ -48,14 +54,16 @@ if (AMR:::pkg_is_available("ggplot2")) { } if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { - resistance_data <- example_isolates %>% - group_by(order = mo_order(mo), - genus = mo_genus(mo)) %>% + resistance_data <- example_isolates %>% + group_by( + order = mo_order(mo), + genus = mo_genus(mo) + ) %>% summarise_if(is.rsi, resistance, minimum = 0) - pca_result <- resistance_data %>% - pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, "SXT") + pca_result <- resistance_data %>% + pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, "SXT") expect_inherits(pca_result, "prcomp") - + if (AMR:::pkg_is_available("ggplot2")) { ggplot_pca(pca_result, ellipse = TRUE) ggplot_pca(pca_result, ellipse = FALSE, arrows_textangled = FALSE, scale = FALSE) diff --git a/inst/tinytest/test-proportion.R b/inst/tinytest/test-proportion.R index bfc0e7585..bfaab47ac 100755 --- a/inst/tinytest/test-proportion.R +++ b/inst/tinytest/test-proportion.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -28,77 +28,108 @@ expect_equal(proportion_SI(example_isolates$AMX), susceptibility(example_isolate # AMX resistance in `example_isolates` expect_equal(proportion_R(example_isolates$AMX), 0.5955556, tolerance = 0.0001) expect_equal(proportion_I(example_isolates$AMX), 0.002222222, tolerance = 0.0001) -expect_equal(1 - proportion_R(example_isolates$AMX) - proportion_I(example_isolates$AMX), - proportion_S(example_isolates$AMX)) -expect_equal(proportion_R(example_isolates$AMX) + proportion_I(example_isolates$AMX), - proportion_IR(example_isolates$AMX)) -expect_equal(proportion_S(example_isolates$AMX) + proportion_I(example_isolates$AMX), - proportion_SI(example_isolates$AMX)) +expect_equal( + 1 - proportion_R(example_isolates$AMX) - proportion_I(example_isolates$AMX), + proportion_S(example_isolates$AMX) +) +expect_equal( + proportion_R(example_isolates$AMX) + proportion_I(example_isolates$AMX), + proportion_IR(example_isolates$AMX) +) +expect_equal( + proportion_S(example_isolates$AMX) + proportion_I(example_isolates$AMX), + proportion_SI(example_isolates$AMX) +) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { expect_equal(example_isolates %>% proportion_SI(AMC), - 0.7626397, - tolerance = 0.0001) + 0.7626397, + tolerance = 0.0001 + ) expect_equal(example_isolates %>% proportion_SI(AMC, GEN), - 0.9408, - tolerance = 0.0001) + 0.9408, + tolerance = 0.0001 + ) expect_equal(example_isolates %>% proportion_SI(AMC, GEN, only_all_tested = TRUE), - 0.9382647, - tolerance = 0.0001) - + 0.9382647, + tolerance = 0.0001 + ) + # percentages - expect_equal(example_isolates %>% - group_by(ward) %>% - summarise(R = proportion_R(CIP, as_percent = TRUE), - I = proportion_I(CIP, as_percent = TRUE), - S = proportion_S(CIP, as_percent = TRUE), - n = n_rsi(CIP), - total = n()) %>% - pull(n) %>% - sum(), - 1409) - + expect_equal( + example_isolates %>% + group_by(ward) %>% + summarise( + R = proportion_R(CIP, as_percent = TRUE), + I = proportion_I(CIP, as_percent = TRUE), + S = proportion_S(CIP, as_percent = TRUE), + n = n_rsi(CIP), + total = n() + ) %>% + pull(n) %>% + sum(), + 1409 + ) + # count of cases - expect_equal(example_isolates %>% - group_by(ward) %>% - summarise(cipro_p = proportion_SI(CIP, as_percent = TRUE), - cipro_n = n_rsi(CIP), - genta_p = proportion_SI(GEN, as_percent = TRUE), - genta_n = n_rsi(GEN), - combination_p = proportion_SI(CIP, GEN, as_percent = TRUE), - combination_n = n_rsi(CIP, GEN)) %>% - pull(combination_n), - c(305, 617, 241, 711)) - + expect_equal( + example_isolates %>% + group_by(ward) %>% + summarise( + cipro_p = proportion_SI(CIP, as_percent = TRUE), + cipro_n = n_rsi(CIP), + genta_p = proportion_SI(GEN, as_percent = TRUE), + genta_n = n_rsi(GEN), + combination_p = proportion_SI(CIP, GEN, as_percent = TRUE), + combination_n = n_rsi(CIP, GEN) + ) %>% + pull(combination_n), + c(305, 617, 241, 711) + ) + # proportion_df expect_equal( example_isolates %>% select(AMX) %>% proportion_df() %>% pull(value), - c(example_isolates$AMX %>% proportion_SI(), - example_isolates$AMX %>% proportion_R()) + c( + example_isolates$AMX %>% proportion_SI(), + example_isolates$AMX %>% proportion_R() + ) ) expect_equal( example_isolates %>% select(AMX) %>% proportion_df(combine_IR = TRUE) %>% pull(value), - c(example_isolates$AMX %>% proportion_S(), - example_isolates$AMX %>% proportion_IR()) + c( + example_isolates$AMX %>% proportion_S(), + example_isolates$AMX %>% proportion_IR() + ) ) expect_equal( example_isolates %>% select(AMX) %>% proportion_df(combine_SI = FALSE) %>% pull(value), - c(example_isolates$AMX %>% proportion_S(), + c( + example_isolates$AMX %>% proportion_S(), example_isolates$AMX %>% proportion_I(), - example_isolates$AMX %>% proportion_R()) + example_isolates$AMX %>% proportion_R() + ) ) } expect_warning(proportion_R(as.character(example_isolates$AMC))) expect_warning(proportion_S(as.character(example_isolates$AMC))) -expect_warning(proportion_S(as.character(example_isolates$AMC, - example_isolates$GEN))) +expect_warning(proportion_S(as.character( + example_isolates$AMC, + example_isolates$GEN +))) -expect_warning(n_rsi(as.character(example_isolates$AMC, - example_isolates$GEN))) -expect_equal(suppressWarnings(n_rsi(as.character(example_isolates$AMC, - example_isolates$GEN))), - 1879) +expect_warning(n_rsi(as.character( + example_isolates$AMC, + example_isolates$GEN +))) +expect_equal( + suppressWarnings(n_rsi(as.character( + example_isolates$AMC, + example_isolates$GEN + ))), + 1879 +) # check for errors expect_error(proportion_IR("test", minimum = "test")) @@ -110,12 +141,18 @@ expect_error(proportion_S("test", as_percent = "test")) expect_error(proportion_S("test", also_single_tested = TRUE)) # check too low amount of isolates -expect_identical(suppressWarnings(proportion_R(example_isolates$AMX, minimum = nrow(example_isolates) + 1)), - NA_real_) -expect_identical(suppressWarnings(proportion_I(example_isolates$AMX, minimum = nrow(example_isolates) + 1)), - NA_real_) -expect_identical(suppressWarnings(proportion_S(example_isolates$AMX, minimum = nrow(example_isolates) + 1)), - NA_real_) +expect_identical( + suppressWarnings(proportion_R(example_isolates$AMX, minimum = nrow(example_isolates) + 1)), + NA_real_ +) +expect_identical( + suppressWarnings(proportion_I(example_isolates$AMX, minimum = nrow(example_isolates) + 1)), + NA_real_ +) +expect_identical( + suppressWarnings(proportion_S(example_isolates$AMX, minimum = nrow(example_isolates) + 1)), + NA_real_ +) # warning for speed loss expect_warning(proportion_R(as.character(example_isolates$GEN))) diff --git a/inst/tinytest/test-random.R b/inst/tinytest/test-random.R index 0f00da0b8..98e304e00 100644 --- a/inst/tinytest/test-random.R +++ b/inst/tinytest/test-random.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # diff --git a/inst/tinytest/test-resistance_predict.R b/inst/tinytest/test-resistance_predict.R index 12740b1e3..baf03e3f6 100644 --- a/inst/tinytest/test-resistance_predict.R +++ b/inst/tinytest/test-resistance_predict.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -25,22 +25,25 @@ if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { expect_stdout(AMX_R <- example_isolates %>% - filter(mo == "B_ESCHR_COLI") %>% - rsi_predict(col_ab = "AMX", - col_date = "date", - model = "binomial", - minimum = 10, - info = TRUE) %>% - pull("value")) + filter(mo == "B_ESCHR_COLI") %>% + rsi_predict( + col_ab = "AMX", + col_date = "date", + model = "binomial", + minimum = 10, + info = TRUE + ) %>% + pull("value")) # AMX resistance will increase according to data set `example_isolates` expect_true(AMX_R[3] < AMX_R[20]) } expect_stdout(x <- suppressMessages(resistance_predict(example_isolates, - col_ab = "AMX", - year_min = 2010, - model = "binomial", - info = TRUE))) + col_ab = "AMX", + year_min = 2010, + model = "binomial", + info = TRUE +))) pdf(NULL) # prevent Rplots.pdf being created expect_silent(plot(x)) if (AMR:::pkg_is_available("ggplot2")) { @@ -48,48 +51,66 @@ if (AMR:::pkg_is_available("ggplot2")) { expect_silent(autoplot(x)) expect_error(ggplot_rsi_predict(example_isolates)) } -expect_stdout(rsi_predict(x = subset(example_isolates, mo == "B_ESCHR_COLI"), - model = "binomial", - col_ab = "AMX", - col_date = "date", - info = TRUE)) -expect_stdout(rsi_predict(x = subset(example_isolates, mo == "B_ESCHR_COLI"), - model = "loglin", - col_ab = "AMX", - col_date = "date", - info = TRUE)) -expect_stdout(rsi_predict(x = subset(example_isolates, mo == "B_ESCHR_COLI"), - model = "lin", - col_ab = "AMX", - col_date = "date", - info = TRUE)) +expect_stdout(rsi_predict( + x = subset(example_isolates, mo == "B_ESCHR_COLI"), + model = "binomial", + col_ab = "AMX", + col_date = "date", + info = TRUE +)) +expect_stdout(rsi_predict( + x = subset(example_isolates, mo == "B_ESCHR_COLI"), + model = "loglin", + col_ab = "AMX", + col_date = "date", + info = TRUE +)) +expect_stdout(rsi_predict( + x = subset(example_isolates, mo == "B_ESCHR_COLI"), + model = "lin", + col_ab = "AMX", + col_date = "date", + info = TRUE +)) -expect_error(rsi_predict(x = subset(example_isolates, mo == "B_ESCHR_COLI"), - model = "INVALID MODEL", - col_ab = "AMX", - col_date = "date", - info = TRUE)) -expect_error(rsi_predict(x = subset(example_isolates, mo == "B_ESCHR_COLI"), - model = "binomial", - col_ab = "NOT EXISTING COLUMN", - col_date = "date", - info = TRUE)) -expect_error(rsi_predict(x = subset(example_isolates, mo == "B_ESCHR_COLI"), - model = "binomial", - col_ab = "AMX", - col_date = "NOT EXISTING COLUMN", - info = TRUE)) -expect_error(rsi_predict(x = subset(example_isolates, mo == "B_ESCHR_COLI"), - col_ab = "AMX", - col_date = "NOT EXISTING COLUMN", - info = TRUE)) -expect_error(rsi_predict(x = subset(example_isolates, mo == "B_ESCHR_COLI"), - col_ab = "AMX", - col_date = "date", - info = TRUE)) +expect_error(rsi_predict( + x = subset(example_isolates, mo == "B_ESCHR_COLI"), + model = "INVALID MODEL", + col_ab = "AMX", + col_date = "date", + info = TRUE +)) +expect_error(rsi_predict( + x = subset(example_isolates, mo == "B_ESCHR_COLI"), + model = "binomial", + col_ab = "NOT EXISTING COLUMN", + col_date = "date", + info = TRUE +)) +expect_error(rsi_predict( + x = subset(example_isolates, mo == "B_ESCHR_COLI"), + model = "binomial", + col_ab = "AMX", + col_date = "NOT EXISTING COLUMN", + info = TRUE +)) +expect_error(rsi_predict( + x = subset(example_isolates, mo == "B_ESCHR_COLI"), + col_ab = "AMX", + col_date = "NOT EXISTING COLUMN", + info = TRUE +)) +expect_error(rsi_predict( + x = subset(example_isolates, mo == "B_ESCHR_COLI"), + col_ab = "AMX", + col_date = "date", + info = TRUE +)) # almost all E. coli are MEM S in the Netherlands :) -expect_error(resistance_predict(x = subset(example_isolates, mo == "B_ESCHR_COLI"), - model = "binomial", - col_ab = "MEM", - col_date = "date", - info = TRUE)) +expect_error(resistance_predict( + x = subset(example_isolates, mo == "B_ESCHR_COLI"), + model = "binomial", + col_ab = "MEM", + col_date = "date", + info = TRUE +)) diff --git a/inst/tinytest/test-rsi.R b/inst/tinytest/test-rsi.R index 6a41f75a3..b1024c93f 100644 --- a/inst/tinytest/test-rsi.R +++ b/inst/tinytest/test-rsi.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,8 +24,10 @@ # ==================================================================== # # we must only have EUCAST and CLSI, because otherwise the rules in as.rsi() will fail -expect_identical(unique(gsub("[^A-Z]", "", AMR::rsi_translation$guideline)), - c("EUCAST", "CLSI")) +expect_identical( + unique(gsub("[^A-Z]", "", AMR::rsi_translation$guideline)), + c("EUCAST", "CLSI") +) expect_true(as.rsi("S") < as.rsi("I")) expect_true(as.rsi("I") < as.rsi("R")) @@ -45,97 +47,140 @@ expect_stdout(print(as.rsi(c("S", "I", "R")))) expect_equal(as.character(as.rsi(c(1:3))), c("S", "I", "R")) expect_equal(as.character(as.rsi(c(1:3))), c("S", "I", "R")) expect_equal(suppressWarnings(as.logical(as.rsi("INVALID VALUE"))), NA) -expect_equal(summary(as.rsi(c("S", "R"))), - structure(c("Class" = "rsi", - "%R" = "50.0% (n=1)", - "%SI" = "50.0% (n=1)", - "- %S" = "50.0% (n=1)", - "- %I" = " 0.0% (n=0)"), class = c("summaryDefault", "table"))) -expect_identical(as.logical(lapply(example_isolates, is.rsi.eligible)), - as.logical(lapply(example_isolates, is.rsi))) +expect_equal( + summary(as.rsi(c("S", "R"))), + structure(c( + "Class" = "rsi", + "%R" = "50.0% (n=1)", + "%SI" = "50.0% (n=1)", + "- %S" = "50.0% (n=1)", + "- %I" = " 0.0% (n=0)" + ), class = c("summaryDefault", "table")) +) +expect_identical( + as.logical(lapply(example_isolates, is.rsi.eligible)), + as.logical(lapply(example_isolates, is.rsi)) +) expect_error(as.rsi.mic(as.mic(16))) expect_error(as.rsi.disk(as.disk(16))) expect_error(get_guideline("this one does not exist")) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { # 40 rsi columns - expect_equal(example_isolates %>% - mutate_at(vars(PEN:RIF), as.character) %>% - lapply(is.rsi.eligible) %>% - as.logical() %>% - sum(), - 40) + expect_equal( + example_isolates %>% + mutate_at(vars(PEN:RIF), as.character) %>% + lapply(is.rsi.eligible) %>% + as.logical() %>% + sum(), + 40 + ) expect_equal(sum(is.rsi(example_isolates)), 40) - + expect_stdout(print(tibble(ab = as.rsi("S")))) } if (AMR:::pkg_is_available("skimr", min_version = "2.0.0")) { - expect_inherits(skim(example_isolates), - "data.frame") + expect_inherits( + skim(example_isolates), + "data.frame" + ) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { - expect_inherits(example_isolates %>% - mutate(m = as.mic(2), - d = as.disk(20)) %>% - skim(), - "data.frame") + expect_inherits( + example_isolates %>% + mutate( + m = as.mic(2), + d = as.disk(20) + ) %>% + skim(), + "data.frame" + ) } } expect_equal(as.rsi(c("", "-", NA, "NULL")), c(NA_rsi_, NA_rsi_, NA_rsi_, NA_rsi_)) # S. pneumoniae/ampicillin in EUCAST 2020: 0.5-2 ug/ml (R is only > 2) -expect_equal(as.character( - as.rsi(x = as.mic(c(0.125, 0.5, 1, 2, 4)), - mo = "B_STRPT_PNMN", - ab = "AMP", - guideline = "EUCAST 2020")), - c("S", "S", "I", "I", "R")) +expect_equal( + as.character( + as.rsi( + x = as.mic(c(0.125, 0.5, 1, 2, 4)), + mo = "B_STRPT_PNMN", + ab = "AMP", + guideline = "EUCAST 2020" + ) + ), + c("S", "S", "I", "I", "R") +) # S. pneumoniae/amoxicillin in CLSI 2019: 2-8 ug/ml (R is 8 and > 8) -expect_equal(as.character( - as.rsi(x = as.mic(c(1, 2, 4, 8, 16)), - mo = "B_STRPT_PNMN", - ab = "AMX", - guideline = "CLSI 2019")), - c("S", "S", "I", "R", "R")) +expect_equal( + as.character( + as.rsi( + x = as.mic(c(1, 2, 4, 8, 16)), + mo = "B_STRPT_PNMN", + ab = "AMX", + guideline = "CLSI 2019" + ) + ), + c("S", "S", "I", "R", "R") +) # cutoffs at MIC = 8 -expect_equal(as.rsi(as.mic(2), "E. coli", "ampicillin", guideline = "EUCAST 2020"), - as.rsi("S")) -expect_equal(as.rsi(as.mic(32), "E. coli", "ampicillin", guideline = "EUCAST 2020"), - as.rsi("R")) +expect_equal( + as.rsi(as.mic(2), "E. coli", "ampicillin", guideline = "EUCAST 2020"), + as.rsi("S") +) +expect_equal( + as.rsi(as.mic(32), "E. coli", "ampicillin", guideline = "EUCAST 2020"), + as.rsi("R") +) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { expect_true(suppressWarnings(example_isolates %>% - mutate(amox_mic = as.mic(2)) %>% - select(mo, amox_mic) %>% - as.rsi() %>% - pull(amox_mic) %>% - is.rsi())) + mutate(amox_mic = as.mic(2)) %>% + select(mo, amox_mic) %>% + as.rsi() %>% + pull(amox_mic) %>% + is.rsi())) } -expect_equal(as.character( - as.rsi(x = as.disk(22), - mo = "B_STRPT_PNMN", - ab = "ERY", - guideline = "CLSI")), - "S") -expect_equal(as.character( - as.rsi(x = as.disk(18), - mo = "B_STRPT_PNMN", - ab = "ERY", - guideline = "CLSI")), - "I") -expect_equal(as.character( - as.rsi(x = as.disk(10), - mo = "B_STRPT_PNMN", - ab = "ERY", - guideline = "CLSI")), - "R") +expect_equal( + as.character( + as.rsi( + x = as.disk(22), + mo = "B_STRPT_PNMN", + ab = "ERY", + guideline = "CLSI" + ) + ), + "S" +) +expect_equal( + as.character( + as.rsi( + x = as.disk(18), + mo = "B_STRPT_PNMN", + ab = "ERY", + guideline = "CLSI" + ) + ), + "I" +) +expect_equal( + as.character( + as.rsi( + x = as.disk(10), + mo = "B_STRPT_PNMN", + ab = "ERY", + guideline = "CLSI" + ) + ), + "R" +) if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0")) { expect_true(example_isolates %>% - mutate(amox_disk = as.disk(15)) %>% - select(mo, amox_disk) %>% - as.rsi(guideline = "CLSI") %>% - pull(amox_disk) %>% - is.rsi()) + mutate(amox_disk = as.disk(15)) %>% + select(mo, amox_disk) %>% + as.rsi(guideline = "CLSI") %>% + pull(amox_disk) %>% + is.rsi()) } # frequency tables if (AMR:::pkg_is_available("cleaner")) { @@ -143,23 +188,37 @@ if (AMR:::pkg_is_available("cleaner")) { } -df <- data.frame(microorganism = "Escherichia coli", - AMP = as.mic(8), - CIP = as.mic(0.256), - GEN = as.disk(18), - TOB = as.disk(16), - ERY = "R", # note about assigning class - CLR = "V") # note about cleaning -expect_inherits(suppressWarnings(as.rsi(df)), - "data.frame") -expect_inherits(suppressWarnings(as.rsi(data.frame(mo = "Escherichia coli", - amoxi = c("R", "S", "I", "invalid")))$amoxi), - "rsi") -expect_warning(as.rsi(data.frame(mo = "E. coli", - NIT = c("<= 2", 32)))) -expect_message(as.rsi(data.frame(mo = "E. coli", - NIT = c("<= 2", 32), - uti = TRUE))) -expect_message(as.rsi(data.frame(mo = "E. coli", - NIT = c("<= 2", 32), - specimen = c("urine", "blood")))) +df <- data.frame( + microorganism = "Escherichia coli", + AMP = as.mic(8), + CIP = as.mic(0.256), + GEN = as.disk(18), + TOB = as.disk(16), + ERY = "R", # note about assigning class + CLR = "V" +) # note about cleaning +expect_inherits( + suppressWarnings(as.rsi(df)), + "data.frame" +) +expect_inherits( + suppressWarnings(as.rsi(data.frame( + mo = "Escherichia coli", + amoxi = c("R", "S", "I", "invalid") + ))$amoxi), + "rsi" +) +expect_warning(as.rsi(data.frame( + mo = "E. coli", + NIT = c("<= 2", 32) +))) +expect_message(as.rsi(data.frame( + mo = "E. coli", + NIT = c("<= 2", 32), + uti = TRUE +))) +expect_message(as.rsi(data.frame( + mo = "E. coli", + NIT = c("<= 2", 32), + specimen = c("urine", "blood") +))) diff --git a/inst/tinytest/test-skewness.R b/inst/tinytest/test-skewness.R index 46d6d5ed2..374f3ab65 100644 --- a/inst/tinytest/test-skewness.R +++ b/inst/tinytest/test-skewness.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,11 +24,14 @@ # ==================================================================== # expect_equal(skewness(example_isolates$age), - -1.212888, - tolerance = 0.00001) + -1.212888, + tolerance = 0.00001 +) expect_equal(unname(skewness(data.frame(example_isolates$age))), - -1.212888, - tolerance = 0.00001) + -1.212888, + tolerance = 0.00001 +) expect_equal(skewness(matrix(example_isolates$age)), - -1.212888, - tolerance = 0.00001) + -1.212888, + tolerance = 0.00001 +) diff --git a/inst/tinytest/test-zzz.R b/inst/tinytest/test-zzz.R index da610655a..62414e3ae 100644 --- a/inst/tinytest/test-zzz.R +++ b/inst/tinytest/test-zzz.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -46,7 +46,8 @@ import_functions <- c( "read_html" = "xml2", "right_join" = "dplyr", "semi_join" = "dplyr", - "showQuestion" = "rstudioapi") + "showQuestion" = "rstudioapi" +) # functions that are called directly call_functions <- c( @@ -84,9 +85,10 @@ call_functions <- c( ) if (AMR:::pkg_is_available("skimr", also_load = FALSE, min_version = "2.0.0")) { call_functions <- c(call_functions, - # skimr - "inline_hist" = "skimr", - "sfl" = "skimr") + # skimr + "inline_hist" = "skimr", + "sfl" = "skimr" + ) } extended_functions <- c( @@ -105,12 +107,15 @@ for (i in seq_len(length(import_functions))) { pkg <- unname(import_functions[i]) # function should exist in foreign pkg namespace if (AMR:::pkg_is_available(pkg, - also_load = FALSE, - min_version = if (pkg == "dplyr") "1.0.0" else NULL)) { + also_load = FALSE, + min_version = if (pkg == "dplyr") "1.0.0" else NULL + )) { tst <- !is.null(AMR:::import_fn(name = fn, pkg = pkg, error_on_fail = FALSE)) expect_true(tst, - info = ifelse(tst, - "All external function references exist.", - paste0("Function ", pkg, "::", fn, "() does not exist anymore"))) + info = ifelse(tst, + "All external function references exist.", + paste0("Function ", pkg, "::", fn, "() does not exist anymore") + ) + ) } } diff --git a/man/ab_from_text.Rd b/man/ab_from_text.Rd index f74a909ec..91cbf33db 100644 --- a/man/ab_from_text.Rd +++ b/man/ab_from_text.Rd @@ -58,7 +58,7 @@ With using \code{collapse}, this function will return a \link{character}:\cr } } \examples{ -# mind the bad spelling of amoxicillin in this line, +# mind the bad spelling of amoxicillin in this line, # straight from a true health care record: ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds") @@ -73,22 +73,28 @@ abx <- ab_from_text("500 mg amoxi po and 400mg cipro iv") ab_group(abx[[1]]) if (require("dplyr")) { - tibble(clinical_text = c("given 400mg cipro and 500 mg amox", - "started on doxy iv today")) \%>\% - mutate(abx_codes = ab_from_text(clinical_text), - abx_doses = ab_from_text(clinical_text, type = "doses"), - abx_admin = ab_from_text(clinical_text, type = "admin"), - abx_coll = ab_from_text(clinical_text, collapse = "|"), - abx_coll_names = ab_from_text(clinical_text, - collapse = "|", - translate_ab = "name"), - abx_coll_doses = ab_from_text(clinical_text, - type = "doses", - collapse = "|"), - abx_coll_admin = ab_from_text(clinical_text, - type = "admin", - collapse = "|")) - + tibble(clinical_text = c( + "given 400mg cipro and 500 mg amox", + "started on doxy iv today" + )) \%>\% + mutate( + abx_codes = ab_from_text(clinical_text), + abx_doses = ab_from_text(clinical_text, type = "doses"), + abx_admin = ab_from_text(clinical_text, type = "admin"), + abx_coll = ab_from_text(clinical_text, collapse = "|"), + abx_coll_names = ab_from_text(clinical_text, + collapse = "|", + translate_ab = "name" + ), + abx_coll_doses = ab_from_text(clinical_text, + type = "doses", + collapse = "|" + ), + abx_coll_admin = ab_from_text(clinical_text, + type = "admin", + collapse = "|" + ) + ) } } } diff --git a/man/ab_property.Rd b/man/ab_property.Rd index 96c611deb..5bd4ca613 100644 --- a/man/ab_property.Rd +++ b/man/ab_property.Rd @@ -109,36 +109,38 @@ All data sets in this \code{AMR} package (about microorganisms, antibiotics, R/S \examples{ # all properties: -ab_name("AMX") # "Amoxicillin" -ab_atc("AMX") # "J01CA04" (ATC code from the WHO) -ab_cid("AMX") # 33613 (Compound ID from PubChem) -ab_synonyms("AMX") # a list with brand names of amoxicillin +ab_name("AMX") # "Amoxicillin" +ab_atc("AMX") # "J01CA04" (ATC code from the WHO) +ab_cid("AMX") # 33613 (Compound ID from PubChem) +ab_synonyms("AMX") # a list with brand names of amoxicillin ab_tradenames("AMX") # same -ab_group("AMX") # "Beta-lactams/penicillins" +ab_group("AMX") # "Beta-lactams/penicillins" ab_atc_group1("AMX") # "Beta-lactam antibacterials, penicillins" ab_atc_group2("AMX") # "Penicillins with extended spectrum" -ab_url("AMX") # link to the official WHO page +ab_url("AMX") # link to the official WHO page # smart lowercase tranformation -ab_name(x = c("AMC", "PLB")) # "Amoxicillin/clavulanic acid" "Polymyxin B" -ab_name(x = c("AMC", "PLB"), - tolower = TRUE) # "amoxicillin/clavulanic acid" "polymyxin B" +ab_name(x = c("AMC", "PLB")) # "Amoxicillin/clavulanic acid" "Polymyxin B" +ab_name( + x = c("AMC", "PLB"), + tolower = TRUE +) # "amoxicillin/clavulanic acid" "polymyxin B" # defined daily doses (DDD) -ab_ddd("AMX", "oral") # 1.5 +ab_ddd("AMX", "oral") # 1.5 ab_ddd_units("AMX", "oral") # "g" -ab_ddd("AMX", "iv") # 3 -ab_ddd_units("AMX", "iv") # "g" +ab_ddd("AMX", "iv") # 3 +ab_ddd_units("AMX", "iv") # "g" -ab_info("AMX") # all properties as a list +ab_info("AMX") # all properties as a list # all ab_* functions use as.ab() internally, so you can go from 'any' to 'any': -ab_atc("AMP") # ATC code of AMP (ampicillin) -ab_group("J01CA01") # Drug group of ampicillins ATC code -ab_loinc("ampicillin") # LOINC codes of ampicillin -ab_name("21066-6") # "Ampicillin" (using LOINC) -ab_name(6249) # "Ampicillin" (using CID) -ab_name("J01CA01") # "Ampicillin" (using ATC) +ab_atc("AMP") # ATC code of AMP (ampicillin) +ab_group("J01CA01") # Drug group of ampicillins ATC code +ab_loinc("ampicillin") # LOINC codes of ampicillin +ab_name("21066-6") # "Ampicillin" (using LOINC) +ab_name(6249) # "Ampicillin" (using CID) +ab_name("J01CA01") # "Ampicillin" (using ATC) # spelling from different languages and dyslexia are no problem ab_atc("ceftriaxon") @@ -155,24 +157,24 @@ if (require("dplyr")) { example_isolates \%>\% set_ab_names() \%>\% head() - + # this does the same: example_isolates \%>\% - rename_with(set_ab_names)\%>\% + rename_with(set_ab_names) \%>\% head() - + # set_ab_names() works with any AB property: example_isolates \%>\% - set_ab_names(property = "atc")\%>\% + set_ab_names(property = "atc") \%>\% head() - - example_isolates \%>\% - set_ab_names(where(is.rsi)) \%>\% - colnames() - - example_isolates \%>\% - set_ab_names(NIT:VAN) \%>\% - colnames() + + example_isolates \%>\% + set_ab_names(where(is.rsi)) \%>\% + colnames() + + example_isolates \%>\% + set_ab_names(NIT:VAN) \%>\% + colnames() } } } diff --git a/man/age_groups.Rd b/man/age_groups.Rd index 0260aa199..2736605fc 100644 --- a/man/age_groups.Rd +++ b/man/age_groups.Rd @@ -62,10 +62,12 @@ if (require("dplyr")) { filter(mo == as.mo("Escherichia coli")) \%>\% group_by(age_group = age_groups(age)) \%>\% select(age_group, CIP) \%>\% - ggplot_rsi(x = "age_group", - minimum = 0, - x.title = "Age Group", - title = "Ciprofloxacin resistance per age group") + ggplot_rsi( + x = "age_group", + minimum = 0, + x.title = "Age Group", + title = "Ciprofloxacin resistance per age group" + ) } } } diff --git a/man/antibiotic_class_selectors.Rd b/man/antibiotic_class_selectors.Rd index deb9a9456..d4ddf7c25 100644 --- a/man/antibiotic_class_selectors.Rd +++ b/man/antibiotic_class_selectors.Rd @@ -217,101 +217,92 @@ if (require("dplyr")) { # get AMR for all aminoglycosides e.g., per ward: example_isolates \%>\% - group_by(ward) \%>\% + group_by(ward) \%>\% summarise(across(aminoglycosides(), resistance)) - } if (require("dplyr")) { - + # You can combine selectors with '&' to be more specific: example_isolates \%>\% select(penicillins() & administrable_per_os()) - } if (require("dplyr")) { - + # get AMR for only drugs that matter - no intrinsic resistance: example_isolates \%>\% - filter(mo_genus() \%in\% c("Escherichia", "Klebsiella")) \%>\% - group_by(ward) \%>\% + filter(mo_genus() \%in\% c("Escherichia", "Klebsiella")) \%>\% + group_by(ward) \%>\% summarise(across(not_intrinsic_resistant(), resistance)) - } if (require("dplyr")) { - + # get susceptibility for antibiotics whose name contains "trim": example_isolates \%>\% - filter(first_isolate()) \%>\% - group_by(ward) \%>\% + filter(first_isolate()) \%>\% + group_by(ward) \%>\% summarise(across(ab_selector(name \%like\% "trim"), susceptibility)) - } if (require("dplyr")) { # this will select columns 'IPM' (imipenem) and 'MEM' (meropenem): - example_isolates \%>\% + example_isolates \%>\% select(carbapenems()) - -} -if (require("dplyr")) { - - # this will select columns 'mo', 'AMK', 'GEN', 'KAN' and 'TOB': - example_isolates \%>\% - select(mo, aminoglycosides()) - -} -if (require("dplyr")) { - - # any() and all() work in dplyr's filter() too: - example_isolates \%>\% - filter(any(aminoglycosides() == "R"), - all(cephalosporins_2nd() == "R")) - -} -if (require("dplyr")) { - - # also works with c(): - example_isolates \%>\% - filter(any(c(carbapenems(), aminoglycosides()) == "R")) - -} -if (require("dplyr")) { - - # not setting any/all will automatically apply all(): - example_isolates \%>\% - filter(aminoglycosides() == "R") - -} -if (require("dplyr")) { - - # this will select columns 'mo' and all antimycobacterial drugs ('RIF'): - example_isolates \%>\% - select(mo, ab_class("mycobact")) - -} -if (require("dplyr")) { - - # get bug/drug combinations for only glycopeptides in Gram-positives: - example_isolates \%>\% - filter(mo_is_gram_positive()) \%>\% - select(mo, glycopeptides()) \%>\% - bug_drug_combinations() \%>\% - format() - } if (require("dplyr")) { - data.frame(some_column = "some_value", - J01CA01 = "S") \%>\% # ATC code of ampicillin - select(penicillins()) # only the 'J01CA01' column will be selected - + # this will select columns 'mo', 'AMK', 'GEN', 'KAN' and 'TOB': + example_isolates \%>\% + select(mo, aminoglycosides()) +} +if (require("dplyr")) { + + # any() and all() work in dplyr's filter() too: + example_isolates \%>\% + filter( + any(aminoglycosides() == "R"), + all(cephalosporins_2nd() == "R") + ) +} +if (require("dplyr")) { + + # also works with c(): + example_isolates \%>\% + filter(any(c(carbapenems(), aminoglycosides()) == "R")) +} +if (require("dplyr")) { + + # not setting any/all will automatically apply all(): + example_isolates \%>\% + filter(aminoglycosides() == "R") +} +if (require("dplyr")) { + + # this will select columns 'mo' and all antimycobacterial drugs ('RIF'): + example_isolates \%>\% + select(mo, ab_class("mycobact")) +} +if (require("dplyr")) { + + # get bug/drug combinations for only glycopeptides in Gram-positives: + example_isolates \%>\% + filter(mo_is_gram_positive()) \%>\% + select(mo, glycopeptides()) \%>\% + bug_drug_combinations() \%>\% + format() +} +if (require("dplyr")) { + data.frame( + some_column = "some_value", + J01CA01 = "S" + ) \%>\% # ATC code of ampicillin + select(penicillins()) # only the 'J01CA01' column will be selected } if (require("dplyr")) { # with recent versions of dplyr this is all equal: x <- example_isolates[carbapenems() == "R", ] y <- example_isolates \%>\% filter(carbapenems() == "R") - z <- example_isolates \%>\% filter(if_all(carbapenems(), ~.x == "R")) + z <- example_isolates \%>\% filter(if_all(carbapenems(), ~ .x == "R")) identical(x, y) && identical(y, z) } } diff --git a/man/as.ab.Rd b/man/as.ab.Rd index 8380feed1..138d19dfb 100644 --- a/man/as.ab.Rd +++ b/man/as.ab.Rd @@ -74,26 +74,25 @@ as.ab(" eryt 123") as.ab("ERYT") as.ab("ERY") as.ab("eritromicine") # spelled wrong, yet works -as.ab("Erythrocin") # trade name -as.ab("Romycin") # trade name +as.ab("Erythrocin") # trade name +as.ab("Romycin") # trade name # spelling from different languages and dyslexia are no problem ab_atc("ceftriaxon") -ab_atc("cephtriaxone") # small spelling error -ab_atc("cephthriaxone") # or a bit more severe +ab_atc("cephtriaxone") # small spelling error +ab_atc("cephthriaxone") # or a bit more severe ab_atc("seephthriaaksone") # and even this works # use ab_* functions to get a specific properties (see ?ab_property); # they use as.ab() internally: -ab_name("J01FA01") # "Erythromycin" -ab_name("eryt") # "Erythromycin" +ab_name("J01FA01") # "Erythromycin" +ab_name("eryt") # "Erythromycin" \donttest{ if (require("dplyr")) { # you can quickly rename columns using dplyr >= 1.0.0: example_isolates \%>\% rename_with(as.ab, where(is.rsi)) - } } } diff --git a/man/as.disk.Rd b/man/as.disk.Rd index b2ec23393..050da92d9 100644 --- a/man/as.disk.Rd +++ b/man/as.disk.Rd @@ -35,11 +35,13 @@ Interpret disk values as RSI values with \code{\link[=as.rsi]{as.rsi()}}. It sup } \examples{ # transform existing disk zones to the `disk` class (using base R) -df <- data.frame(microorganism = "Escherichia coli", - AMP = 20, - CIP = 14, - GEN = 18, - TOB = 16) +df <- data.frame( + microorganism = "Escherichia coli", + AMP = 20, + CIP = 14, + GEN = 18, + TOB = 16 +) df[, 2:5] <- lapply(df[, 2:5], as.disk) str(df) @@ -51,10 +53,12 @@ if (require("dplyr")) { } # interpret disk values, see ?as.rsi -as.rsi(x = as.disk(18), - mo = "Strep pneu", # `mo` will be coerced with as.mo() - ab = "ampicillin", # and `ab` with as.ab() - guideline = "EUCAST") +as.rsi( + x = as.disk(18), + mo = "Strep pneu", # `mo` will be coerced with as.mo() + ab = "ampicillin", # and `ab` with as.ab() + guideline = "EUCAST" +) # interpret whole data set, pretend to be all from urinary tract infections: as.rsi(df, uti = TRUE) diff --git a/man/as.mic.Rd b/man/as.mic.Rd index 2e3597d20..0596798f1 100755 --- a/man/as.mic.Rd +++ b/man/as.mic.Rd @@ -91,14 +91,18 @@ quantile(mic_data) all(mic_data < 512) # interpret MIC values -as.rsi(x = as.mic(2), - mo = as.mo("Streptococcus pneumoniae"), - ab = "AMX", - guideline = "EUCAST") -as.rsi(x = as.mic(c(0.01, 2, 4, 8)), - mo = as.mo("Streptococcus pneumoniae"), - ab = "AMX", - guideline = "EUCAST") +as.rsi( + x = as.mic(2), + mo = as.mo("Streptococcus pneumoniae"), + ab = "AMX", + guideline = "EUCAST" +) +as.rsi( + x = as.mic(c(0.01, 2, 4, 8)), + mo = as.mo("Streptococcus pneumoniae"), + ab = "AMX", + guideline = "EUCAST" +) # plot MIC values, see ?plot plot(mic_data) diff --git a/man/as.mo.Rd b/man/as.mo.Rd index a0b0ce2ed..fcb940553 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -188,9 +188,9 @@ as.mo("S aureus") as.mo("Staphylococcus aureus") as.mo("Staphylococcus aureus (MRSA)") as.mo("Zthafilokkoockus oureuz") # handles incorrect spelling -as.mo("MRSA") # Methicillin Resistant S. aureus -as.mo("VISA") # Vancomycin Intermediate S. aureus -as.mo("VRSA") # Vancomycin Resistant S. aureus +as.mo("MRSA") # Methicillin Resistant S. aureus +as.mo("VISA") # Vancomycin Intermediate S. aureus +as.mo("VRSA") # Vancomycin Resistant S. aureus as.mo(115329001) # SNOMED CT code # Dyslexia is no problem - these all work: @@ -203,15 +203,15 @@ as.mo("Streptococcus group A") as.mo("GAS") # Group A Streptococci as.mo("GBS") # Group B Streptococci -as.mo("S. epidermidis") # will remain species: B_STPHY_EPDR -as.mo("S. epidermidis", Becker = TRUE) # will not remain species: B_STPHY_CONS +as.mo("S. epidermidis") # will remain species: B_STPHY_EPDR +as.mo("S. epidermidis", Becker = TRUE) # will not remain species: B_STPHY_CONS -as.mo("S. pyogenes") # will remain species: B_STRPT_PYGN +as.mo("S. pyogenes") # will remain species: B_STRPT_PYGN as.mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPT_GRPA # All mo_* functions use as.mo() internally too (see ?mo_property): -mo_genus("E. coli") # returns "Escherichia" -mo_gramstain("E. coli") # returns "Gram negative" +mo_genus("E. coli") # returns "Escherichia" +mo_gramstain("E. coli") # returns "Gram negative" mo_is_intrinsic_resistant("E. coli", "vanco") # returns TRUE } } diff --git a/man/as.rsi.Rd b/man/as.rsi.Rd index ceeaafb4c..afe2a8a80 100755 --- a/man/as.rsi.Rd +++ b/man/as.rsi.Rd @@ -164,27 +164,33 @@ example_isolates summary(example_isolates) # see all R/SI results at a glance # For INTERPRETING disk diffusion and MIC values ----------------------- - + # a whole data set, even with combined MIC values and disk zones -df <- data.frame(microorganism = "Escherichia coli", - AMP = as.mic(8), - CIP = as.mic(0.256), - GEN = as.disk(18), - TOB = as.disk(16), - NIT = as.mic(32), - ERY = "R") +df <- data.frame( + microorganism = "Escherichia coli", + AMP = as.mic(8), + CIP = as.mic(0.256), + GEN = as.disk(18), + TOB = as.disk(16), + NIT = as.mic(32), + ERY = "R" +) as.rsi(df) # for single values -as.rsi(x = as.mic(2), - mo = as.mo("S. pneumoniae"), - ab = "AMP", - guideline = "EUCAST") +as.rsi( + x = as.mic(2), + mo = as.mo("S. pneumoniae"), + ab = "AMP", + guideline = "EUCAST" +) -as.rsi(x = as.disk(18), - mo = "Strep pneu", # `mo` will be coerced with as.mo() - ab = "ampicillin", # and `ab` with as.ab() - guideline = "EUCAST") +as.rsi( + x = as.disk(18), + mo = "Strep pneu", # `mo` will be coerced with as.mo() + ab = "ampicillin", # and `ab` with as.ab() + guideline = "EUCAST" +) \donttest{ # the dplyr way @@ -194,23 +200,27 @@ if (require("dplyr")) { df \%>\% mutate(across(where(is.mic), as.rsi)) df \%>\% mutate_at(vars(AMP:TOB), as.rsi) df \%>\% mutate(across(AMP:TOB, as.rsi)) - + df \%>\% mutate_at(vars(AMP:TOB), as.rsi, mo = .$microorganism) - + # to include information about urinary tract infections (UTI) - data.frame(mo = "E. coli", - NIT = c("<= 2", 32), - from_the_bladder = c(TRUE, FALSE)) \%>\% + data.frame( + mo = "E. coli", + NIT = c("<= 2", 32), + from_the_bladder = c(TRUE, FALSE) + ) \%>\% as.rsi(uti = "from_the_bladder") - - data.frame(mo = "E. coli", - NIT = c("<= 2", 32), - specimen = c("urine", "blood")) \%>\% + + data.frame( + mo = "E. coli", + NIT = c("<= 2", 32), + specimen = c("urine", "blood") + ) \%>\% as.rsi() # automatically determines urine isolates - + df \%>\% - mutate_at(vars(AMP:NIT), as.rsi, mo = "E. coli", uti = TRUE) + mutate_at(vars(AMP:NIT), as.rsi, mo = "E. coli", uti = TRUE) } # For CLEANING existing R/SI values ------------------------------------ @@ -219,22 +229,22 @@ as.rsi(c("S", "I", "R", "A", "B", "C")) as.rsi("<= 0.002; S") # will return "S" rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370))) is.rsi(rsi_data) -plot(rsi_data) # for percentages +plot(rsi_data) # for percentages barplot(rsi_data) # for frequencies # the dplyr way if (require("dplyr")) { example_isolates \%>\% mutate_at(vars(PEN:RIF), as.rsi) - # same: + # same: example_isolates \%>\% as.rsi(PEN:RIF) - + # fastest way to transform all columns with already valid AMR results to class `rsi`: example_isolates \%>\% mutate_if(is.rsi.eligible, as.rsi) - - # since dplyr 1.0.0, this can also be: + + # since dplyr 1.0.0, this can also be: # example_isolates \%>\% # mutate(across(where(is.rsi.eligible), as.rsi)) } diff --git a/man/atc_online.Rd b/man/atc_online.Rd index 8fceab112..d68c4eba4 100644 --- a/man/atc_online.Rd +++ b/man/atc_online.Rd @@ -71,7 +71,7 @@ Abbreviations of return values when using \code{property = "U"} (unit): } \examples{ \donttest{ -if (requireNamespace("curl") && requireNamespace("rvest") && requireNamespace("xml2")) { +if (requireNamespace("curl") && requireNamespace("rvest") && requireNamespace("xml2")) { # oral DDD (Defined Daily Dose) of amoxicillin atc_online_property("J01CA04", "DDD", "O") atc_online_ddd(ab_atc("amox")) diff --git a/man/bug_drug_combinations.Rd b/man/bug_drug_combinations.Rd index d59042243..3fbda479f 100644 --- a/man/bug_drug_combinations.Rd +++ b/man/bug_drug_combinations.Rd @@ -70,12 +70,17 @@ head(x) format(x, translate_ab = "name (atc)") # Use FUN to change to transformation of microorganism codes -bug_drug_combinations(example_isolates, - FUN = mo_gramstain) - bug_drug_combinations(example_isolates, - FUN = function(x) ifelse(x == as.mo("Escherichia coli"), - "E. coli", - "Others")) + FUN = mo_gramstain +) + +bug_drug_combinations(example_isolates, + FUN = function(x) { + ifelse(x == as.mo("Escherichia coli"), + "E. coli", + "Others" + ) + } +) } } diff --git a/man/count.Rd b/man/count.Rd index ac3088251..8f0d4a91f 100644 --- a/man/count.Rd +++ b/man/count.Rd @@ -129,9 +129,9 @@ Using \code{only_all_tested} has no impact when only using one antibiotic as inp # run ?example_isolates for more info. # base R ------------------------------------------------------------ -count_resistant(example_isolates$AMX) # counts "R" +count_resistant(example_isolates$AMX) # counts "R" count_susceptible(example_isolates$AMX) # counts "S" and "I" -count_all(example_isolates$AMX) # counts "S", "I" and "R" +count_all(example_isolates$AMX) # counts "S", "I" and "R" # be more specific count_S(example_isolates$AMX) @@ -156,36 +156,38 @@ susceptibility(example_isolates$AMX) * n_rsi(example_isolates$AMX) if (require("dplyr")) { example_isolates \%>\% group_by(ward) \%>\% - summarise(R = count_R(CIP), - I = count_I(CIP), - S = count_S(CIP), - n1 = count_all(CIP), # the actual total; sum of all three - n2 = n_rsi(CIP), # same - analogous to n_distinct - total = n()) # NOT the number of tested isolates! - + summarise( + R = count_R(CIP), + I = count_I(CIP), + S = count_S(CIP), + n1 = count_all(CIP), # the actual total; sum of all three + n2 = n_rsi(CIP), # same - analogous to n_distinct + total = n() + ) # NOT the number of tested isolates! + # Number of available isolates for a whole antibiotic class # (i.e., in this data set columns GEN, TOB, AMK, KAN) example_isolates \%>\% group_by(ward) \%>\% summarise(across(aminoglycosides(), n_rsi)) - + # Count co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy. # Please mind that `susceptibility()` calculates percentages right away instead. example_isolates \%>\% count_susceptible(AMC) # 1433 - example_isolates \%>\% count_all(AMC) # 1879 - + example_isolates \%>\% count_all(AMC) # 1879 + example_isolates \%>\% count_susceptible(GEN) # 1399 - example_isolates \%>\% count_all(GEN) # 1855 - + example_isolates \%>\% count_all(GEN) # 1855 + example_isolates \%>\% count_susceptible(AMC, GEN) # 1764 - example_isolates \%>\% count_all(AMC, GEN) # 1936 - + example_isolates \%>\% count_all(AMC, GEN) # 1936 + # Get number of S+I vs. R immediately of selected columns example_isolates \%>\% select(AMX, CIP) \%>\% count_df(translate = FALSE) - + # It also supports grouping variables example_isolates \%>\% select(ward, AMX, CIP) \%>\% diff --git a/man/custom_eucast_rules.Rd b/man/custom_eucast_rules.Rd index 5fb7b1141..70ede096b 100644 --- a/man/custom_eucast_rules.Rd +++ b/man/custom_eucast_rules.Rd @@ -109,19 +109,24 @@ It is possible to define antibiotic groups instead of single antibiotics for the } \examples{ -x <- custom_eucast_rules(AMC == "R" & genus == "Klebsiella" ~ aminopenicillins == "R", - AMC == "I" & genus == "Klebsiella" ~ aminopenicillins == "I") +x <- custom_eucast_rules( + AMC == "R" & genus == "Klebsiella" ~ aminopenicillins == "R", + AMC == "I" & genus == "Klebsiella" ~ aminopenicillins == "I" +) x # run the custom rule set (verbose = TRUE will return a logbook instead of the data set): eucast_rules(example_isolates, - rules = "custom", - custom_rules = x, - info = FALSE, - verbose = TRUE) - + rules = "custom", + custom_rules = x, + info = FALSE, + verbose = TRUE +) + # combine rule sets -x2 <- c(x, - custom_eucast_rules(TZP == "R" ~ carbapenems == "R")) +x2 <- c( + x, + custom_eucast_rules(TZP == "R" ~ carbapenems == "R") +) x2 } diff --git a/man/eucast_rules.Rd b/man/eucast_rules.Rd index de880742a..fb7f41465 100644 --- a/man/eucast_rules.Rd +++ b/man/eucast_rules.Rd @@ -136,19 +136,23 @@ All data sets in this \code{AMR} package (about microorganisms, antibiotics, R/S \examples{ \donttest{ -a <- data.frame(mo = c("Staphylococcus aureus", - "Enterococcus faecalis", - "Escherichia coli", - "Klebsiella pneumoniae", - "Pseudomonas aeruginosa"), - VAN = "-", # Vancomycin - AMX = "-", # Amoxicillin - COL = "-", # Colistin - CAZ = "-", # Ceftazidime - CXM = "-", # Cefuroxime - PEN = "S", # Benzylpenicillin - FOX = "S", # Cefoxitin - stringsAsFactors = FALSE) +a <- data.frame( + mo = c( + "Staphylococcus aureus", + "Enterococcus faecalis", + "Escherichia coli", + "Klebsiella pneumoniae", + "Pseudomonas aeruginosa" + ), + VAN = "-", # Vancomycin + AMX = "-", # Amoxicillin + COL = "-", # Colistin + CAZ = "-", # Ceftazidime + CXM = "-", # Cefuroxime + PEN = "S", # Benzylpenicillin + FOX = "S", # Cefoxitin + stringsAsFactors = FALSE +) head(a) diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd index 58ae08a2f..a178b74e3 100755 --- a/man/first_isolate.Rd +++ b/man/first_isolate.Rd @@ -177,22 +177,20 @@ if (require("dplyr")) { # filter on first isolates using dplyr: example_isolates \%>\% filter(first_isolate()) - } if (require("dplyr")) { - + # short-hand version: example_isolates \%>\% filter_first_isolate(info = FALSE) - } if (require("dplyr")) { - - # flag the first isolates per group: - example_isolates \%>\% - group_by(ward) \%>\% - mutate(first = first_isolate()) \%>\% - select(ward, date, patient, mo, first) + + # flag the first isolates per group: + example_isolates \%>\% + group_by(ward) \%>\% + mutate(first = first_isolate()) \%>\% + select(ward, date, patient, mo, first) } } } diff --git a/man/get_episode.Rd b/man/get_episode.Rd index 563ae93f3..f25238287 100644 --- a/man/get_episode.Rd +++ b/man/get_episode.Rd @@ -37,68 +37,72 @@ The \code{dplyr} package is not required for these functions to work, but these # See ?example_isolates df <- example_isolates[sample(seq_len(2000), size = 200), ] -get_episode(df$date, episode_days = 60) # indices +get_episode(df$date, episode_days = 60) # indices is_new_episode(df$date, episode_days = 60) # TRUE/FALSE # filter on results from the third 60-day episode only, using base R df[which(get_episode(df$date, 60) == 3), ] # the functions also work for less than a day, e.g. to include one per hour: -get_episode(c(Sys.time(), - Sys.time() + 60 * 60), - episode_days = 1/24) +get_episode(c( + Sys.time(), + Sys.time() + 60 * 60 +), +episode_days = 1 / 24 +) \donttest{ if (require("dplyr")) { # is_new_episode() can also be used in dplyr verbs to determine patient # episodes based on any (combination of) grouping variables: df \%>\% - mutate(condition = sample(x = c("A", "B", "C"), - size = 200, - replace = TRUE)) \%>\% + mutate(condition = sample( + x = c("A", "B", "C"), + size = 200, + replace = TRUE + )) \%>\% group_by(condition) \%>\% mutate(new_episode = is_new_episode(date, 365)) \%>\% select(patient, date, condition, new_episode) - } if (require("dplyr")) { - df \%>\% group_by(ward, patient) \%>\% - transmute(date, - patient, - new_index = get_episode(date, 60), - new_logical = is_new_episode(date, 60)) - + transmute(date, + patient, + new_index = get_episode(date, 60), + new_logical = is_new_episode(date, 60) + ) } if (require("dplyr")) { - df \%>\% - group_by(ward) \%>\% - summarise(n_patients = n_distinct(patient), - n_episodes_365 = sum(is_new_episode(date, episode_days = 365)), - n_episodes_60 = sum(is_new_episode(date, episode_days = 60)), - n_episodes_30 = sum(is_new_episode(date, episode_days = 30))) - + group_by(ward) \%>\% + summarise( + n_patients = n_distinct(patient), + n_episodes_365 = sum(is_new_episode(date, episode_days = 365)), + n_episodes_60 = sum(is_new_episode(date, episode_days = 60)), + n_episodes_30 = sum(is_new_episode(date, episode_days = 30)) + ) } if (require("dplyr")) { - + # grouping on patients and microorganisms leads to the same # results as first_isolate() when using 'episode-based': x <- df \%>\% - filter_first_isolate(include_unknown = TRUE, - method = "episode-based") - + filter_first_isolate( + include_unknown = TRUE, + method = "episode-based" + ) + y <- df \%>\% group_by(patient, mo) \%>\% filter(is_new_episode(date, 365)) \%>\% ungroup() identical(x, y) - } if (require("dplyr")) { - + # but is_new_episode() has a lot more flexibility than first_isolate(), # since you can now group on anything that seems relevant: df \%>\% diff --git a/man/ggplot_pca.Rd b/man/ggplot_pca.Rd index 75fa8aec6..7e948260e 100644 --- a/man/ggplot_pca.Rd +++ b/man/ggplot_pca.Rd @@ -114,24 +114,26 @@ The colours for labels and points can be changed by adding another scale layer f \donttest{ if (require("dplyr")) { - # calculate the resistance per group first - resistance_data <- example_isolates \%>\% - group_by(order = mo_order(mo), # group on anything, like order - genus = mo_genus(mo)) \%>\% # and genus as we do here; - filter(n() >= 30) \%>\% # filter on only 30 results per group - summarise_if(is.rsi, resistance) # then get resistance of all drugs - + # calculate the resistance per group first + resistance_data <- example_isolates \%>\% + group_by( + order = mo_order(mo), # group on anything, like order + genus = mo_genus(mo) + ) \%>\% # and genus as we do here; + filter(n() >= 30) \%>\% # filter on only 30 results per group + summarise_if(is.rsi, resistance) # then get resistance of all drugs + # now conduct PCA for certain antimicrobial agents - pca_result <- resistance_data \%>\% - pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT) - + pca_result <- resistance_data \%>\% + pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT) + summary(pca_result) - + # old base R plotting method: biplot(pca_result) # new ggplot2 plotting method using this package: ggplot_pca(pca_result) - + if (require("ggplot2")) { ggplot_pca(pca_result) + scale_colour_viridis_d() + diff --git a/man/ggplot_rsi.Rd b/man/ggplot_rsi.Rd index 269afbee9..ce3e2e6c0 100644 --- a/man/ggplot_rsi.Rd +++ b/man/ggplot_rsi.Rd @@ -143,14 +143,13 @@ At default, the names of antibiotics will be shown on the plots using \code{\lin \examples{ \donttest{ if (require("ggplot2") && require("dplyr")) { - + # get antimicrobial results for drugs against a UTI: ggplot(example_isolates \%>\% select(AMX, NIT, FOS, TMP, CIP)) + geom_rsi() - } if (require("ggplot2") && require("dplyr")) { - + # prettify the plot using some additional functions: df <- example_isolates \%>\% select(AMX, NIT, FOS, TMP, CIP) ggplot(df) + @@ -159,35 +158,33 @@ if (require("ggplot2") && require("dplyr")) { scale_rsi_colours() + labels_rsi_count() + theme_rsi() - } if (require("ggplot2") && require("dplyr")) { - + # or better yet, simplify this using the wrapper function - a single command: example_isolates \%>\% select(AMX, NIT, FOS, TMP, CIP) \%>\% ggplot_rsi() - } if (require("ggplot2") && require("dplyr")) { - + # get only proportions and no counts: example_isolates \%>\% select(AMX, NIT, FOS, TMP, CIP) \%>\% ggplot_rsi(datalabels = FALSE) - } if (require("ggplot2") && require("dplyr")) { - + # add other ggplot2 arguments as you like: example_isolates \%>\% select(AMX, NIT, FOS, TMP, CIP) \%>\% - ggplot_rsi(width = 0.5, - colour = "black", - size = 1, - linetype = 2, - alpha = 0.25) - + ggplot_rsi( + width = 0.5, + colour = "black", + size = 1, + linetype = 2, + alpha = 0.25 + ) } if (require("ggplot2") && require("dplyr")) { @@ -195,55 +192,57 @@ if (require("ggplot2") && require("dplyr")) { example_isolates \%>\% select(AMX) \%>\% ggplot_rsi(colours = c(SI = "yellow")) - } if (require("ggplot2") && require("dplyr")) { # but you can also use the built-in colour-blind friendly colours for # your plots, where "S" is green, "I" is yellow and "R" is red: - data.frame(x = c("Value1", "Value2", "Value3"), - y = c(1, 2, 3), - z = c("Value4", "Value5", "Value6")) \%>\% + data.frame( + x = c("Value1", "Value2", "Value3"), + y = c(1, 2, 3), + z = c("Value4", "Value5", "Value6") + ) \%>\% ggplot() + geom_col(aes(x = x, y = y, fill = z)) + scale_rsi_colours(Value4 = "S", Value5 = "I", Value6 = "R") - } if (require("ggplot2") && require("dplyr")) { - + # resistance of ciprofloxacine per age group example_isolates \%>\% mutate(first_isolate = first_isolate()) \%>\% - filter(first_isolate == TRUE, - mo == as.mo("Escherichia coli")) \%>\% + filter( + first_isolate == TRUE, + mo == as.mo("Escherichia coli") + ) \%>\% # age_groups() is also a function in this AMR package: group_by(age_group = age_groups(age)) \%>\% select(age_group, CIP) \%>\% ggplot_rsi(x = "age_group") - } if (require("ggplot2") && require("dplyr")) { - + # a shorter version which also adjusts data label colours: example_isolates \%>\% select(AMX, NIT, FOS, TMP, CIP) \%>\% ggplot_rsi(colours = FALSE) - } if (require("ggplot2") && require("dplyr")) { # it also supports groups (don't forget to use the group var on `x` or `facet`): example_isolates \%>\% - filter(mo_is_gram_negative(), ward != "Outpatient") \%>\% + filter(mo_is_gram_negative(), ward != "Outpatient") \%>\% # select only UTI-specific drugs select(ward, AMX, NIT, FOS, TMP, CIP) \%>\% group_by(ward) \%>\% - ggplot_rsi(x = "ward", - facet = "antibiotic", - nrow = 1, - title = "AMR of Anti-UTI Drugs Per Ward", - x.title = "Ward", - datalabels = FALSE) + ggplot_rsi( + x = "ward", + facet = "antibiotic", + nrow = 1, + title = "AMR of Anti-UTI Drugs Per Ward", + x.title = "Ward", + datalabels = FALSE + ) } } } diff --git a/man/guess_ab_col.Rd b/man/guess_ab_col.Rd index 4f06e966a..70386cefe 100644 --- a/man/guess_ab_col.Rd +++ b/man/guess_ab_col.Rd @@ -30,8 +30,10 @@ This tries to find a column name in a data set based on information from the \li You can look for an antibiotic (trade) name or abbreviation and it will search \code{x} and the \link{antibiotics} data set for any column containing a name or code of that antibiotic. \strong{Longer columns names take precedence over shorter column names.} } \examples{ -df <- data.frame(amox = "S", - tetr = "R") +df <- data.frame( + amox = "S", + tetr = "R" +) guess_ab_col(df, "amoxicillin") # [1] "amox" @@ -43,8 +45,10 @@ guess_ab_col(df, "J01AA07", verbose = TRUE) # [1] "tetr" # WHONET codes -df <- data.frame(AMP_ND10 = "R", - AMC_ED20 = "S") +df <- data.frame( + AMP_ND10 = "R", + AMC_ED20 = "S" +) guess_ab_col(df, "ampicillin") # [1] "AMP_ND10" guess_ab_col(df, "J01CR02") @@ -53,8 +57,10 @@ guess_ab_col(df, as.ab("augmentin")) # [1] "AMC_ED20" # Longer names take precendence: -df <- data.frame(AMP_ED2 = "S", - AMP_ED20 = "S") +df <- data.frame( + AMP_ED2 = "S", + AMP_ED20 = "S" +) guess_ab_col(df, "ampicillin") # [1] "AMP_ED20" } diff --git a/man/join.Rd b/man/join.Rd index 828da1474..0c7e85cb1 100755 --- a/man/join.Rd +++ b/man/join.Rd @@ -47,12 +47,18 @@ If the \code{dplyr} package is installed, their join functions will be used. Oth left_join_microorganisms(as.mo("K. pneumoniae")) left_join_microorganisms("B_KLBSL_PNMN") -df <- data.frame(date = seq(from = as.Date("2018-01-01"), - to = as.Date("2018-01-07"), - by = 1), - bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR", - "E. coli", "E. coli", "E. coli")), - stringsAsFactors = FALSE) +df <- data.frame( + date = seq( + from = as.Date("2018-01-01"), + to = as.Date("2018-01-07"), + by = 1 + ), + bacteria = as.mo(c( + "S. aureus", "MRSA", "MSSA", "STAAUR", + "E. coli", "E. coli", "E. coli" + )), + stringsAsFactors = FALSE +) colnames(df) df_joined <- left_join_microorganisms(df, "bacteria") @@ -61,7 +67,7 @@ colnames(df_joined) \donttest{ if (require("dplyr")) { example_isolates \%>\% - left_join_microorganisms() \%>\% + left_join_microorganisms() \%>\% colnames() } } diff --git a/man/key_antimicrobials.Rd b/man/key_antimicrobials.Rd index 529fd1954..63b7a32fd 100644 --- a/man/key_antimicrobials.Rd +++ b/man/key_antimicrobials.Rd @@ -133,7 +133,7 @@ if (require("dplyr")) { # and first WEIGHTED isolates first_weighted = first_isolate(col_keyantimicrobials = "keyab") ) - + # Check the difference in this data set, 'weighted' results in more isolates: sum(my_patients$first_regular, na.rm = TRUE) sum(my_patients$first_weighted, na.rm = TRUE) diff --git a/man/like.Rd b/man/like.Rd index cf93675f0..bdc77c4c4 100755 --- a/man/like.Rd +++ b/man/like.Rd @@ -53,7 +53,7 @@ b \%like\% a # also supports multiple patterns a <- c("Test case", "Something different", "Yet another thing") -b <- c( "case", "diff", "yet") +b <- c("case", "diff", "yet") a \%like\% b a \%unlike\% b diff --git a/man/mdro.Rd b/man/mdro.Rd index 6c9aed657..965350d0a 100644 --- a/man/mdro.Rd +++ b/man/mdro.Rd @@ -137,7 +137,7 @@ You can print the rules set in the console for an overview. Colours will help re #> 1. CIP is "R" and age is higher than 60 -> Elderly Type A #> 2. ERY is "R" and age is higher than 60 -> Elderly Type B #> 3. Otherwise -> Negative -#> +#> #> Unmatched rows will return NA. }\if{html}{\out{}} @@ -153,10 +153,10 @@ table(x) Rules can also be combined with other custom rules by using \code{\link[=c]{c()}}: \if{html}{\out{
}}\preformatted{x <- mdro(example_isolates, - guideline = c(custom, + guideline = c(custom, custom_mdro_guideline(ERY == "R" & age > 50 ~ "Elderly Type C"))) table(x) -#> Negative Elderly Type A Elderly Type B Elderly Type C +#> Negative Elderly Type A Elderly Type B Elderly Type C #> 961 198 732 109 }\if{html}{\out{
}} @@ -193,8 +193,11 @@ str(out) table(out) out <- mdro(example_isolates, - guideline = custom_mdro_guideline(AMX == "R" ~ "Custom MDRO 1", - VAN == "R" ~ "Custom MDRO 2")) + guideline = custom_mdro_guideline( + AMX == "R" ~ "Custom MDRO 1", + VAN == "R" ~ "Custom MDRO 2" + ) +) table(out) \donttest{ @@ -202,13 +205,12 @@ if (require("dplyr")) { example_isolates \%>\% mdro() \%>\% table() - + # no need to define `x` when used inside dplyr verbs: example_isolates \%>\% mutate(MDRO = mdro()) \%>\% pull(MDRO) \%>\% table() - } } } diff --git a/man/mo_matching_score.Rd b/man/mo_matching_score.Rd index 922f3dedd..2158edf27 100644 --- a/man/mo_matching_score.Rd +++ b/man/mo_matching_score.Rd @@ -48,8 +48,10 @@ All data sets in this \code{AMR} package (about microorganisms, antibiotics, R/S as.mo("E. coli") mo_uncertainties() -mo_matching_score(x = "E. coli", - n = c("Escherichia coli", "Entamoeba coli")) +mo_matching_score( + x = "E. coli", + n = c("Escherichia coli", "Entamoeba coli") +) } \author{ Dr Matthijs Berends diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 8f9432e5c..4222e0618 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -263,11 +263,13 @@ mo_type("Klebsiella pneumoniae") mo_type("Klebsiella pneumoniae") mo_fullname("S. pyogenes", - Lancefield = TRUE, - language = "de") + Lancefield = TRUE, + language = "de" +) mo_fullname("S. pyogenes", - Lancefield = TRUE, - language = "nl") + Lancefield = TRUE, + language = "nl" +) # other -------------------------------------------------------------------- @@ -278,7 +280,7 @@ mo_is_yeast(c("Candida", "Trichophyton", "Klebsiella")) if (require("dplyr")) { example_isolates \%>\% filter(mo_is_gram_positive()) - + example_isolates \%>\% filter(mo_is_intrinsic_resistant(ab = "vanco")) } diff --git a/man/mo_source.Rd b/man/mo_source.Rd index 90f8cc5ef..790cb52c0 100644 --- a/man/mo_source.Rd +++ b/man/mo_source.Rd @@ -51,7 +51,7 @@ We save it as \code{"home/me/ourcodes.xlsx"}. Now we have to set it as a source: \if{html}{\out{
}}\preformatted{set_mo_source("home/me/ourcodes.xlsx") #> NOTE: Created mo_source file '/Users/me/mo_source.rds' (0.3 kB) from -#> '/Users/me/Documents/ourcodes.xlsx' (9 kB), columns +#> '/Users/me/Documents/ourcodes.xlsx' (9 kB), columns #> "Organisation XYZ" and "mo" }\if{html}{\out{
}} @@ -88,7 +88,7 @@ If we edit the Excel file by, let's say, adding row 4 like this: ...any new usage of an MO function in this package will update your data file: \if{html}{\out{
}}\preformatted{as.mo("lab_mo_ecoli") -#> NOTE: Updated mo_source file '/Users/me/mo_source.rds' (0.3 kB) from +#> NOTE: Updated mo_source file '/Users/me/mo_source.rds' (0.3 kB) from #> '/Users/me/Documents/ourcodes.xlsx' (9 kB), columns #> "Organisation XYZ" and "mo" #> Class diff --git a/man/pca.Rd b/man/pca.Rd index b5643f909..28e07ba1b 100644 --- a/man/pca.Rd +++ b/man/pca.Rd @@ -65,25 +65,27 @@ The result of the \code{\link[=pca]{pca()}} function is a \link{prcomp} object, \donttest{ if (require("dplyr")) { - # calculate the resistance per group first - resistance_data <- example_isolates \%>\% - group_by(order = mo_order(mo), # group on anything, like order - genus = mo_genus(mo)) \%>\% # and genus as we do here; - filter(n() >= 30) \%>\% # filter on only 30 results per group - summarise_if(is.rsi, resistance) # then get resistance of all drugs - + # calculate the resistance per group first + resistance_data <- example_isolates \%>\% + group_by( + order = mo_order(mo), # group on anything, like order + genus = mo_genus(mo) + ) \%>\% # and genus as we do here; + filter(n() >= 30) \%>\% # filter on only 30 results per group + summarise_if(is.rsi, resistance) # then get resistance of all drugs + # now conduct PCA for certain antimicrobial agents - pca_result <- resistance_data \%>\% - pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT) - + pca_result <- resistance_data \%>\% + pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT) + pca_result summary(pca_result) - + # old base R plotting method: biplot(pca_result) # new ggplot2 plotting method using this package: ggplot_pca(pca_result) - + if (require("ggplot2")) { ggplot_pca(pca_result) + scale_colour_viridis_d() + diff --git a/man/proportion.Rd b/man/proportion.Rd index 3637dcbac..68736e939 100644 --- a/man/proportion.Rd +++ b/man/proportion.Rd @@ -144,7 +144,7 @@ This AMR package honours this (new) insight. Use \code{\link[=susceptibility]{su # run ?example_isolates for more info. # base R ------------------------------------------------------------ -resistance(example_isolates$AMX) # determines \%R +resistance(example_isolates$AMX) # determines \%R susceptibility(example_isolates$AMX) # determines \%S+I # be more specific @@ -159,55 +159,65 @@ proportion_R(example_isolates$AMX) if (require("dplyr")) { example_isolates \%>\% group_by(ward) \%>\% - summarise(r = resistance(CIP), - n = n_rsi(CIP)) # n_rsi works like n_distinct in dplyr, see ?n_rsi - + summarise( + r = resistance(CIP), + n = n_rsi(CIP) + ) # n_rsi works like n_distinct in dplyr, see ?n_rsi + example_isolates \%>\% group_by(ward) \%>\% - summarise(R = resistance(CIP, as_percent = TRUE), - SI = susceptibility(CIP, as_percent = TRUE), - n1 = count_all(CIP), # the actual total; sum of all three - n2 = n_rsi(CIP), # same - analogous to n_distinct - total = n()) # NOT the number of tested isolates! - + summarise( + R = resistance(CIP, as_percent = TRUE), + SI = susceptibility(CIP, as_percent = TRUE), + n1 = count_all(CIP), # the actual total; sum of all three + n2 = n_rsi(CIP), # same - analogous to n_distinct + total = n() + ) # NOT the number of tested isolates! + # Calculate co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy: - example_isolates \%>\% susceptibility(AMC) # \%SI = 76.3\% - example_isolates \%>\% count_all(AMC) # n = 1879 - - example_isolates \%>\% susceptibility(GEN) # \%SI = 75.4\% - example_isolates \%>\% count_all(GEN) # n = 1855 - + example_isolates \%>\% susceptibility(AMC) # \%SI = 76.3\% + example_isolates \%>\% count_all(AMC) # n = 1879 + + example_isolates \%>\% susceptibility(GEN) # \%SI = 75.4\% + example_isolates \%>\% count_all(GEN) # n = 1855 + example_isolates \%>\% susceptibility(AMC, GEN) # \%SI = 94.1\% - example_isolates \%>\% count_all(AMC, GEN) # n = 1939 - - + example_isolates \%>\% count_all(AMC, GEN) # n = 1939 + + # See Details on how `only_all_tested` works. Example: example_isolates \%>\% - summarise(numerator = count_susceptible(AMC, GEN), - denominator = count_all(AMC, GEN), - proportion = susceptibility(AMC, GEN)) - + summarise( + numerator = count_susceptible(AMC, GEN), + denominator = count_all(AMC, GEN), + proportion = susceptibility(AMC, GEN) + ) + example_isolates \%>\% - summarise(numerator = count_susceptible(AMC, GEN, only_all_tested = TRUE), - denominator = count_all(AMC, GEN, only_all_tested = TRUE), - proportion = susceptibility(AMC, GEN, only_all_tested = TRUE)) - - + summarise( + numerator = count_susceptible(AMC, GEN, only_all_tested = TRUE), + denominator = count_all(AMC, GEN, only_all_tested = TRUE), + proportion = susceptibility(AMC, GEN, only_all_tested = TRUE) + ) + + example_isolates \%>\% group_by(ward) \%>\% - summarise(cipro_p = susceptibility(CIP, as_percent = TRUE), - cipro_n = count_all(CIP), - genta_p = susceptibility(GEN, as_percent = TRUE), - genta_n = count_all(GEN), - combination_p = susceptibility(CIP, GEN, as_percent = TRUE), - combination_n = count_all(CIP, GEN)) - + summarise( + cipro_p = susceptibility(CIP, as_percent = TRUE), + cipro_n = count_all(CIP), + genta_p = susceptibility(GEN, as_percent = TRUE), + genta_n = count_all(GEN), + combination_p = susceptibility(CIP, GEN, as_percent = TRUE), + combination_n = count_all(CIP, GEN) + ) + # Get proportions S/I/R immediately of all rsi columns example_isolates \%>\% select(AMX, CIP) \%>\% proportion_df(translate = FALSE) - + # It also supports grouping variables # (use rsi_df to also include the count) example_isolates \%>\% diff --git a/man/random.Rd b/man/random.Rd index 46eef6de6..c9207c524 100644 --- a/man/random.Rd +++ b/man/random.Rd @@ -42,12 +42,12 @@ random_rsi(25) \donttest{ # make the random generation more realistic by setting a bug and/or drug: -random_mic(25, "Klebsiella pneumoniae") # range 0.0625-64 -random_mic(25, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16 +random_mic(25, "Klebsiella pneumoniae") # range 0.0625-64 +random_mic(25, "Klebsiella pneumoniae", "meropenem") # range 0.0625-16 random_mic(25, "Streptococcus pneumoniae", "meropenem") # range 0.0625-4 -random_disk(25, "Klebsiella pneumoniae") # range 8-50 -random_disk(25, "Klebsiella pneumoniae", "ampicillin") # range 11-17 +random_disk(25, "Klebsiella pneumoniae") # range 8-50 +random_disk(25, "Klebsiella pneumoniae", "ampicillin") # range 11-17 random_disk(25, "Streptococcus pneumoniae", "ampicillin") # range 12-27 } } diff --git a/man/resistance_predict.Rd b/man/resistance_predict.Rd index fe83f7e95..5cc71a9f5 100644 --- a/man/resistance_predict.Rd +++ b/man/resistance_predict.Rd @@ -126,10 +126,11 @@ This AMR package honours this (new) insight. Use \code{\link[=susceptibility]{su } \examples{ -x <- resistance_predict(example_isolates, - col_ab = "AMX", - year_min = 2010, - model = "binomial") +x <- resistance_predict(example_isolates, + col_ab = "AMX", + year_min = 2010, + model = "binomial" +) plot(x) \donttest{ if (require("ggplot2")) { @@ -151,14 +152,15 @@ if (require("dplyr")) { # create nice plots with ggplot2 yourself if (require("dplyr") && require("ggplot2")) { - data <- example_isolates \%>\% filter(mo == as.mo("E. coli")) \%>\% - resistance_predict(col_ab = "AMX", - col_date = "date", - model = "binomial", - info = FALSE, - minimum = 15) + resistance_predict( + col_ab = "AMX", + col_date = "date", + model = "binomial", + info = FALSE, + minimum = 15 + ) head(data) autoplot(data) } diff --git a/tests/tinytest.R b/tests/tinytest.R index 09ca6835f..2753f0828 100644 --- a/tests/tinytest.R +++ b/tests/tinytest.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -30,11 +30,13 @@ if (identical(Sys.getenv("R_RUN_TINYTEST"), "true")) { if (AMR:::pkg_is_available("tinytest")) { library(AMR) out <- test_package("AMR", - testdir = ifelse(AMR:::dir.exists("inst/tinytest"), - "inst/tinytest", - "tinytest"), - verbose = 99, - color = FALSE) + testdir = ifelse(AMR:::dir.exists("inst/tinytest"), + "inst/tinytest", + "tinytest" + ), + verbose = 99, + color = FALSE + ) cat("SUMMARY:\n") print(summary(out)) } diff --git a/vignettes/AMR.Rmd b/vignettes/AMR.Rmd index 787bc5ebb..bcfbe1f82 100755 --- a/vignettes/AMR.Rmd +++ b/vignettes/AMR.Rmd @@ -48,13 +48,16 @@ For this tutorial, we will create fake demonstration data to work with. You can skip to [Cleaning the data](#cleaning-the-data) if you already have your own data ready. If you start your analysis, try to make the structure of your data generally look like this: ```{r example table, echo = FALSE, results = 'asis'} -knitr::kable(data.frame(date = Sys.Date(), - patient_id = c("abcd", "abcd", "efgh"), - mo = "Escherichia coli", - AMX = c("S", "S", "R"), - CIP = c("S", "R", "S"), - stringsAsFactors = FALSE), - align = "c") +knitr::kable(data.frame( + date = Sys.Date(), + patient_id = c("abcd", "abcd", "efgh"), + mo = "Escherichia coli", + AMX = c("S", "S", "R"), + CIP = c("S", "R", "S"), + stringsAsFactors = FALSE +), +align = "c" +) ``` ## Needed R packages @@ -87,9 +90,13 @@ patients <- unlist(lapply(LETTERS, paste0, 1:10)) The `LETTERS` object is available in R - it's a vector with 26 characters: `A` to `Z`. The `patients` object we just created is now a vector of length `r length(patients)`, with values (patient IDs) varying from ``r patients[1]`` to ``r patients[length(patients)]``. Now we we also set the gender of our patients, by putting the ID and the gender in a table: ```{r create gender} -patients_table <- data.frame(patient_id = patients, - gender = c(rep("M", 135), - rep("F", 125))) +patients_table <- data.frame( + patient_id = patients, + gender = c( + rep("M", 135), + rep("F", 125) + ) +) ``` The first 135 patient IDs are now male, the other 125 are female. @@ -107,8 +114,10 @@ This `dates` object now contains all days in our date range. For this tutorial, we will uses four different microorganisms: *Escherichia coli*, *Staphylococcus aureus*, *Streptococcus pneumoniae*, and *Klebsiella pneumoniae*: ```{r mo} -bacteria <- c("Escherichia coli", "Staphylococcus aureus", - "Streptococcus pneumoniae", "Klebsiella pneumoniae") +bacteria <- c( + "Escherichia coli", "Staphylococcus aureus", + "Streptococcus pneumoniae", "Klebsiella pneumoniae" +) ``` ## Put everything together @@ -117,20 +126,27 @@ Using the `sample()` function, we can randomly select items from all objects we ```{r merge data} sample_size <- 20000 -data <- data.frame(date = sample(dates, size = sample_size, replace = TRUE), - patient_id = sample(patients, size = sample_size, replace = TRUE), - hospital = sample(c("Hospital A", - "Hospital B", - "Hospital C", - "Hospital D"), - size = sample_size, replace = TRUE, - prob = c(0.30, 0.35, 0.15, 0.20)), - bacteria = sample(bacteria, size = sample_size, replace = TRUE, - prob = c(0.50, 0.25, 0.15, 0.10)), - AMX = random_rsi(sample_size, prob_RSI = c(0.35, 0.60, 0.05)), - AMC = random_rsi(sample_size, prob_RSI = c(0.15, 0.75, 0.10)), - CIP = random_rsi(sample_size, prob_RSI = c(0.20, 0.80, 0.00)), - GEN = random_rsi(sample_size, prob_RSI = c(0.08, 0.92, 0.00))) +data <- data.frame( + date = sample(dates, size = sample_size, replace = TRUE), + patient_id = sample(patients, size = sample_size, replace = TRUE), + hospital = sample(c( + "Hospital A", + "Hospital B", + "Hospital C", + "Hospital D" + ), + size = sample_size, replace = TRUE, + prob = c(0.30, 0.35, 0.15, 0.20) + ), + bacteria = sample(bacteria, + size = sample_size, replace = TRUE, + prob = c(0.50, 0.25, 0.15, 0.10) + ), + AMX = random_rsi(sample_size, prob_RSI = c(0.35, 0.60, 0.05)), + AMC = random_rsi(sample_size, prob_RSI = c(0.15, 0.75, 0.10)), + CIP = random_rsi(sample_size, prob_RSI = c(0.20, 0.80, 0.00)), + GEN = random_rsi(sample_size, prob_RSI = c(0.08, 0.92, 0.00)) +) ``` Using the `left_join()` function from the `dplyr` package, we can 'map' the gender to the patient ID using the `patients_table` object we created earlier: @@ -192,10 +208,12 @@ data <- eucast_rules(data, col_mo = "bacteria", rules = "all") Now that we have the microbial ID, we can add some taxonomic properties: ```{r new taxo} -data <- data %>% - mutate(gramstain = mo_gramstain(bacteria), - genus = mo_genus(bacteria), - species = mo_species(bacteria)) +data <- data %>% + mutate( + gramstain = mo_gramstain(bacteria), + genus = mo_genus(bacteria), + species = mo_species(bacteria) + ) ``` ## First isolates @@ -213,21 +231,21 @@ This `AMR` package includes this methodology with the `first_isolate()` function The outcome of the function can easily be added to our data: ```{r 1st isolate} -data <- data %>% +data <- data %>% mutate(first = first_isolate(info = TRUE)) ``` So only `r percentage(sum(data$first) / nrow(data))` is suitable for resistance analysis! We can now filter on it with the `filter()` function, also from the `dplyr` package: ```{r 1st isolate filter} -data_1st <- data %>% +data_1st <- data %>% filter(first == TRUE) ``` For future use, the above two syntaxes can be shortened: ```{r 1st isolate filter 2} -data_1st <- data %>% +data_1st <- data %>% filter_first_isolate() ``` @@ -261,7 +279,7 @@ Or can be used like the `dplyr` way, which is easier readable: data_1st %>% freq(genus, species) ``` ```{r freq 2b, results = 'asis', echo = FALSE} -data_1st %>% +data_1st %>% freq(genus, species, header = TRUE) ``` @@ -270,45 +288,48 @@ data_1st %>% Using [tidyverse selections](https://tidyselect.r-lib.org/reference/language.html), you can also select or filter columns based on the antibiotic class they are in: ```{r bug_drg 2a, eval = FALSE} -data_1st %>% +data_1st %>% filter(any(aminoglycosides() == "R")) ``` ```{r bug_drg 2b, echo = FALSE, results = 'asis'} -knitr::kable(data_1st %>% - filter(any(aminoglycosides() == "R")) %>% - head(), - align = "c") +knitr::kable(data_1st %>% + filter(any(aminoglycosides() == "R")) %>% + head(), +align = "c" +) ``` If you want to get a quick glance of the number of isolates in different bug/drug combinations, you can use the `bug_drug_combinations()` function: ```{r bug_drg 1a, eval = FALSE} -data_1st %>% - bug_drug_combinations() %>% +data_1st %>% + bug_drug_combinations() %>% head() # show first 6 rows ``` ```{r bug_drg 1b, echo = FALSE, results = 'asis'} -knitr::kable(data_1st %>% - bug_drug_combinations() %>% - head(), - align = "c") +knitr::kable(data_1st %>% + bug_drug_combinations() %>% + head(), +align = "c" +) ``` ```{r bug_drg 3a, eval = FALSE} -data_1st %>% - select(bacteria, aminoglycosides()) %>% +data_1st %>% + select(bacteria, aminoglycosides()) %>% bug_drug_combinations() ``` ```{r bug_drg 3b, echo = FALSE, results = 'asis'} -knitr::kable(data_1st %>% - select(bacteria, aminoglycosides()) %>% - bug_drug_combinations(), - align = "c") +knitr::kable(data_1st %>% + select(bacteria, aminoglycosides()) %>% + bug_drug_combinations(), +align = "c" +) ``` This will only give you the crude numbers in the data. To calculate antimicrobial resistance in a more sensible way, also by correcting for too few results, we use the `resistance()` and `susceptibility()` functions. @@ -328,86 +349,98 @@ data_1st %>% resistance(AMX) Or can be used in conjunction with `group_by()` and `summarise()`, both from the `dplyr` package: ```{r, eval = FALSE} -data_1st %>% - group_by(hospital) %>% +data_1st %>% + group_by(hospital) %>% summarise(amoxicillin = resistance(AMX)) ``` ```{r, echo = FALSE} -data_1st %>% - group_by(hospital) %>% - summarise(amoxicillin = resistance(AMX)) %>% +data_1st %>% + group_by(hospital) %>% + summarise(amoxicillin = resistance(AMX)) %>% knitr::kable(align = "c", big.mark = ",") ``` Of course it would be very convenient to know the number of isolates responsible for the percentages. For that purpose the `n_rsi()` can be used, which works exactly like `n_distinct()` from the `dplyr` package. It counts all isolates available for every group (i.e. values S, I or R): ```{r, eval = FALSE} -data_1st %>% - group_by(hospital) %>% - summarise(amoxicillin = resistance(AMX), - available = n_rsi(AMX)) +data_1st %>% + group_by(hospital) %>% + summarise( + amoxicillin = resistance(AMX), + available = n_rsi(AMX) + ) ``` ```{r, echo = FALSE} -data_1st %>% - group_by(hospital) %>% - summarise(amoxicillin = resistance(AMX), - available = n_rsi(AMX)) %>% +data_1st %>% + group_by(hospital) %>% + summarise( + amoxicillin = resistance(AMX), + available = n_rsi(AMX) + ) %>% knitr::kable(align = "c", big.mark = ",") ``` These functions can also be used to get the proportion of multiple antibiotics, to calculate empiric susceptibility of combination therapies very easily: ```{r, eval = FALSE} -data_1st %>% - group_by(genus) %>% - summarise(amoxiclav = susceptibility(AMC), - gentamicin = susceptibility(GEN), - amoxiclav_genta = susceptibility(AMC, GEN)) +data_1st %>% + group_by(genus) %>% + summarise( + amoxiclav = susceptibility(AMC), + gentamicin = susceptibility(GEN), + amoxiclav_genta = susceptibility(AMC, GEN) + ) ``` ```{r, echo = FALSE} -data_1st %>% - group_by(genus) %>% - summarise(amoxiclav = susceptibility(AMC), - gentamicin = susceptibility(GEN), - amoxiclav_genta = susceptibility(AMC, GEN)) %>% +data_1st %>% + group_by(genus) %>% + summarise( + amoxiclav = susceptibility(AMC), + gentamicin = susceptibility(GEN), + amoxiclav_genta = susceptibility(AMC, GEN) + ) %>% knitr::kable(align = "c", big.mark = ",") ``` Or if you are curious for the resistance within certain antibiotic classes, use a antibiotic class selector such as `penicillins()`, which automatically will include the columns `AMX` and `AMC` of our data: ```{r, eval = FALSE} -data_1st %>% +data_1st %>% # group by hospital - group_by(hospital) %>% + group_by(hospital) %>% # / -> select all penicillins in the data for calculation # | / -> use resistance() for all peni's per hospital # | | / -> print as percentages - summarise(across(penicillins(), resistance, as_percent = TRUE)) %>% + summarise(across(penicillins(), resistance, as_percent = TRUE)) %>% # format the antibiotic column names, using so-called snake case, # so 'Amoxicillin/clavulanic acid' becomes 'amoxicillin_clavulanic_acid' rename_with(set_ab_names, penicillins()) ``` ```{r, echo = FALSE, message = FALSE} -data_1st %>% - group_by(hospital) %>% - summarise(across(penicillins(), resistance, as_percent = TRUE)) %>% - rename_with(set_ab_names, penicillins()) %>% +data_1st %>% + group_by(hospital) %>% + summarise(across(penicillins(), resistance, as_percent = TRUE)) %>% + rename_with(set_ab_names, penicillins()) %>% knitr::kable(align = "lrr") ``` To make a transition to the next part, let's see how differences in the previously calculated combination therapies could be plotted: ```{r plot 1} -data_1st %>% - group_by(genus) %>% - summarise("1. Amoxi/clav" = susceptibility(AMC), - "2. Gentamicin" = susceptibility(GEN), - "3. Amoxi/clav + genta" = susceptibility(AMC, GEN)) %>% +data_1st %>% + group_by(genus) %>% + summarise( + "1. Amoxi/clav" = susceptibility(AMC), + "2. Gentamicin" = susceptibility(GEN), + "3. Amoxi/clav + genta" = susceptibility(AMC, GEN) + ) %>% # pivot_longer() from the tidyr package "lengthens" data: - tidyr::pivot_longer(-genus, names_to = "antibiotic") %>% - ggplot(aes(x = genus, - y = value, - fill = antibiotic)) + + tidyr::pivot_longer(-genus, names_to = "antibiotic") %>% + ggplot(aes( + x = genus, + y = value, + fill = antibiotic + )) + geom_col(position = "dodge2") ``` @@ -416,14 +449,20 @@ data_1st %>% To show results in plots, most R users would nowadays use the `ggplot2` package. This package lets you create plots in layers. You can read more about it [on their website](https://ggplot2.tidyverse.org/). A quick example would look like these syntaxes: ```{r plot 2, eval = FALSE} -ggplot(data = a_data_set, - mapping = aes(x = year, - y = value)) + +ggplot( + data = a_data_set, + mapping = aes( + x = year, + y = value + ) +) + geom_col() + - labs(title = "A title", - subtitle = "A subtitle", - x = "My X axis", - y = "My Y axis") + labs( + title = "A title", + subtitle = "A subtitle", + x = "My X axis", + y = "My Y axis" + ) # or as short as: ggplot(a_data_set) + @@ -443,11 +482,11 @@ If we group on e.g. the `genus` column and add some additional functions from ou ```{r plot 4} # group the data on `genus` -ggplot(data_1st %>% group_by(genus)) + +ggplot(data_1st %>% group_by(genus)) + # create bars with genus on x axis # it looks for variables with class `rsi`, # of which we have 4 (earlier created with `as.rsi`) - geom_rsi(x = "genus") + + geom_rsi(x = "genus") + # split plots on antibiotic facet_rsi(facet = "antibiotic") + # set colours to the R/SI interpretations (colour-blind friendly) @@ -457,8 +496,10 @@ ggplot(data_1st %>% group_by(genus)) + # turn 90 degrees, to make it bars instead of columns coord_flip() + # add labels - labs(title = "Resistance per genus and antibiotic", - subtitle = "(this is fake data)") + + labs( + title = "Resistance per genus and antibiotic", + subtitle = "(this is fake data)" + ) + # and print genus in italic to follow our convention # (is now y axis because we turned the plot) theme(axis.text.y = element_text(face = "italic")) @@ -467,12 +508,14 @@ ggplot(data_1st %>% group_by(genus)) + To simplify this, we also created the `ggplot_rsi()` function, which combines almost all above functions: ```{r plot 5} -data_1st %>% +data_1st %>% group_by(genus) %>% - ggplot_rsi(x = "genus", - facet = "antibiotic", - breaks = 0:4 * 25, - datalabels = FALSE) + + ggplot_rsi( + x = "genus", + facet = "antibiotic", + breaks = 0:4 * 25, + datalabels = FALSE + ) + coord_flip() ``` @@ -527,9 +570,10 @@ And when using the `ggplot2` package, but now choosing the latest implemented CL ```{r disk_plots_mo_ab, message = FALSE, warning = FALSE} autoplot(disk_values, - mo = "E. coli", - ab = "cipro", - guideline = "CLSI") + mo = "E. coli", + ab = "cipro", + guideline = "CLSI" +) ``` ## Independence test @@ -544,13 +588,15 @@ library(tidyr) check_FOS <- example_isolates %>% filter(ward %in% c("A", "D")) %>% # filter on only hospitals A and D - select(ward, FOS) %>% # select the hospitals and fosfomycin - group_by(ward) %>% # group on the hospitals - count_df(combine_SI = TRUE) %>% # count all isolates per group (ward) - pivot_wider(names_from = ward, # transform output so A and D are columns - values_from = value) %>% - select(A, D) %>% # and only select these columns - as.matrix() # transform to a good old matrix for fisher.test() + select(ward, FOS) %>% # select the hospitals and fosfomycin + group_by(ward) %>% # group on the hospitals + count_df(combine_SI = TRUE) %>% # count all isolates per group (ward) + pivot_wider( + names_from = ward, # transform output so A and D are columns + values_from = value + ) %>% + select(A, D) %>% # and only select these columns + as.matrix() # transform to a good old matrix for fisher.test() check_FOS ``` @@ -559,7 +605,7 @@ We can apply the test now with: ```{r} # do Fisher's Exact Test -fisher.test(check_FOS) +fisher.test(check_FOS) ``` As can be seen, the p value is `r round(fisher.test(check_FOS)$p.value, 3)`, which means that the fosfomycin resistance found in isolates from patients in hospital A and D are really different. diff --git a/vignettes/AMR_intro.png b/vignettes/AMR_intro.png deleted file mode 100644 index d3b1e1c03..000000000 Binary files a/vignettes/AMR_intro.png and /dev/null differ diff --git a/vignettes/EUCAST.Rmd b/vignettes/EUCAST.Rmd index 6f2bcef73..1c4cb899e 100644 --- a/vignettes/EUCAST.Rmd +++ b/vignettes/EUCAST.Rmd @@ -39,9 +39,13 @@ These rules can be used to discard impossible bug-drug combinations in your data Sometimes, laboratory data can still contain such strains with ampicillin being susceptible to ampicillin. This could be because an antibiogram is available before an identification is available, and the antibiogram is then not re-interpreted based on the identification (namely, *Klebsiella*). EUCAST expert rules solve this, that can be applied using `eucast_rules()`: ```{r, warning = FALSE, message = FALSE} -oops <- data.frame(mo = c("Klebsiella", - "Escherichia"), - ampicillin = "S") +oops <- data.frame( + mo = c( + "Klebsiella", + "Escherichia" + ), + ampicillin = "S" +) oops eucast_rules(oops, info = FALSE) @@ -50,29 +54,37 @@ eucast_rules(oops, info = FALSE) A more convenient function is `mo_is_intrinsic_resistant()` that uses the same guideline, but allows to check for one or more specific microorganisms or antibiotics: ```{r, warning = FALSE, message = FALSE} -mo_is_intrinsic_resistant(c("Klebsiella", "Escherichia"), - "ampicillin") +mo_is_intrinsic_resistant( + c("Klebsiella", "Escherichia"), + "ampicillin" +) -mo_is_intrinsic_resistant("Klebsiella", - c("ampicillin", "kanamycin")) +mo_is_intrinsic_resistant( + "Klebsiella", + c("ampicillin", "kanamycin") +) ``` EUCAST rules can not only be used for correction, they can also be used for filling in known resistance and susceptibility based on results of other antimicrobials drugs. This process is called *interpretive reading*, is basically a form of imputation, and is part of the `eucast_rules()` function as well: ```{r, warning = FALSE, message = FALSE} -data <- data.frame(mo = c("Staphylococcus aureus", - "Enterococcus faecalis", - "Escherichia coli", - "Klebsiella pneumoniae", - "Pseudomonas aeruginosa"), - VAN = "-", # Vancomycin - AMX = "-", # Amoxicillin - COL = "-", # Colistin - CAZ = "-", # Ceftazidime - CXM = "-", # Cefuroxime - PEN = "S", # Benzylenicillin - FOX = "S", # Cefoxitin - stringsAsFactors = FALSE) +data <- data.frame( + mo = c( + "Staphylococcus aureus", + "Enterococcus faecalis", + "Escherichia coli", + "Klebsiella pneumoniae", + "Pseudomonas aeruginosa" + ), + VAN = "-", # Vancomycin + AMX = "-", # Amoxicillin + COL = "-", # Colistin + CAZ = "-", # Ceftazidime + CXM = "-", # Cefuroxime + PEN = "S", # Benzylenicillin + FOX = "S", # Cefoxitin + stringsAsFactors = FALSE +) ``` ```{r, eval = FALSE} data diff --git a/vignettes/MDR.Rmd b/vignettes/MDR.Rmd index 1d8fe337c..8fafd561c 100644 --- a/vignettes/MDR.Rmd +++ b/vignettes/MDR.Rmd @@ -64,8 +64,10 @@ You can also use your own custom guideline. Custom guidelines can be set with th If you are familiar with `case_when()` of the `dplyr` package, you will recognise the input method to set your own rules. Rules must be set using what R considers to be the 'formula notation': ```{r} -custom <- custom_mdro_guideline(CIP == "R" & age > 60 ~ "Elderly Type A", - ERY == "R" & age > 60 ~ "Elderly Type B") +custom <- custom_mdro_guideline( + CIP == "R" & age > 60 ~ "Elderly Type A", + ERY == "R" & age > 60 ~ "Elderly Type B" +) ``` If a row/an isolate matches the first rule, the value after the first `~` (in this case *'Elderly Type A'*) will be set as MDRO value. Otherwise, the second rule will be tried and so on. The maximum number of rules is unlimited. @@ -92,17 +94,17 @@ The `mdro()` function always returns an ordered `factor` for predefined guidelin The next example uses the `example_isolates` data set. This is a data set included with this package and contains full antibiograms of 2,000 microbial isolates. It reflects reality and can be used to practise AMR data analysis. If we test the MDR/XDR/PDR guideline on this data set, we get: ```{r, message = FALSE} -library(dplyr) # to support pipes: %>% +library(dplyr) # to support pipes: %>% library(cleaner) # to create frequency tables ``` ```{r, results = 'hide'} -example_isolates %>% - mdro() %>% +example_isolates %>% + mdro() %>% freq() # show frequency table of the result ``` ```{r, echo = FALSE, results = 'asis', message = FALSE, warning = FALSE} -example_isolates %>% - mdro(info = FALSE) %>% +example_isolates %>% + mdro(info = FALSE) %>% freq() # show frequency table of the result ``` @@ -111,25 +113,29 @@ For another example, I will create a data set to determine multi-drug resistant ```{r} # random_rsi() is a helper function to generate # a random vector with values S, I and R -my_TB_data <- data.frame(rifampicin = random_rsi(5000), - isoniazid = random_rsi(5000), - gatifloxacin = random_rsi(5000), - ethambutol = random_rsi(5000), - pyrazinamide = random_rsi(5000), - moxifloxacin = random_rsi(5000), - kanamycin = random_rsi(5000)) +my_TB_data <- data.frame( + rifampicin = random_rsi(5000), + isoniazid = random_rsi(5000), + gatifloxacin = random_rsi(5000), + ethambutol = random_rsi(5000), + pyrazinamide = random_rsi(5000), + moxifloxacin = random_rsi(5000), + kanamycin = random_rsi(5000) +) ``` Because all column names are automatically verified for valid drug names or codes, this would have worked exactly the same way: ```{r, eval = FALSE} -my_TB_data <- data.frame(RIF = random_rsi(5000), - INH = random_rsi(5000), - GAT = random_rsi(5000), - ETH = random_rsi(5000), - PZA = random_rsi(5000), - MFX = random_rsi(5000), - KAN = random_rsi(5000)) +my_TB_data <- data.frame( + RIF = random_rsi(5000), + INH = random_rsi(5000), + GAT = random_rsi(5000), + ETH = random_rsi(5000), + PZA = random_rsi(5000), + MFX = random_rsi(5000), + KAN = random_rsi(5000) +) ``` The data set now looks like this: diff --git a/vignettes/PCA.Rmd b/vignettes/PCA.Rmd index 61fbbe4b9..3f31a5d43 100755 --- a/vignettes/PCA.Rmd +++ b/vignettes/PCA.Rmd @@ -39,12 +39,16 @@ glimpse(example_isolates) Now to transform this to a data set with only resistance percentages per taxonomic order and genus: ```{r, warning = FALSE} -resistance_data <- example_isolates %>% - group_by(order = mo_order(mo), # group on anything, like order - genus = mo_genus(mo)) %>% # and genus as we do here +resistance_data <- example_isolates %>% + group_by( + order = mo_order(mo), # group on anything, like order + genus = mo_genus(mo) + ) %>% # and genus as we do here summarise_if(is.rsi, resistance) %>% # then get resistance of all drugs - select(order, genus, AMC, CXM, CTX, - CAZ, GEN, TOB, TMP, SXT) # and select only relevant columns + select( + order, genus, AMC, CXM, CTX, + CAZ, GEN, TOB, TMP, SXT + ) # and select only relevant columns head(resistance_data) ``` diff --git a/vignettes/SPSS.Rmd b/vignettes/SPSS.Rmd index 5e276e295..c28f85cb6 100755 --- a/vignettes/SPSS.Rmd +++ b/vignettes/SPSS.Rmd @@ -80,9 +80,11 @@ as.mic("testvalue") mo_gramstain("E. coli") # Klebsiella is intrinsic resistant to amoxicillin, according to EUCAST: -klebsiella_test <- data.frame(mo = "klebsiella", - amox = "S", - stringsAsFactors = FALSE) +klebsiella_test <- data.frame( + mo = "klebsiella", + amox = "S", + stringsAsFactors = FALSE +) klebsiella_test # (our original data) eucast_rules(klebsiella_test, info = FALSE) # (the edited data by EUCAST rules) @@ -153,7 +155,7 @@ To import data from SPSS, SAS or Stata, you can use the [great `haven` package]( # download and install the latest version: install.packages("haven") # load the package you just installed: -library(haven) +library(haven) ``` You can now import files as follows: @@ -203,7 +205,7 @@ To export your R objects to the SAS file format: # save as regular SAS file: write_sas(data = yourdata, path = "path/to/file") -# the SAS transport format is an open format +# the SAS transport format is an open format # (required for submission of the data to the FDA) write_xpt(data = yourdata, path = "path/to/file", version = 8) ``` diff --git a/vignettes/WHONET.Rmd b/vignettes/WHONET.Rmd index 6216f620b..b4f38f6d3 100644 --- a/vignettes/WHONET.Rmd +++ b/vignettes/WHONET.Rmd @@ -39,9 +39,9 @@ This package comes with an [example data set `WHONET`](https://msberends.github. First, load the relevant packages if you did not yet did this. I use the tidyverse for all of my analyses. All of them. If you don't know it yet, I suggest you read about it on their website: https://www.tidyverse.org/. ```{r, message = FALSE} -library(dplyr) # part of tidyverse +library(dplyr) # part of tidyverse library(ggplot2) # part of tidyverse -library(AMR) # this package +library(AMR) # this package library(cleaner) # to create frequency tables ``` @@ -54,7 +54,7 @@ We will have to transform some variables to simplify and automate the analysis: # transform variables data <- WHONET %>% # get microbial ID based on given organism - mutate(mo = as.mo(Organism)) %>% + mutate(mo = as.mo(Organism)) %>% # transform everything from "AMP_ND10" to "CIP_EE" to the new `rsi` class mutate_at(vars(AMP_ND10:CIP_EE), as.rsi) ``` @@ -83,15 +83,16 @@ An easy `ggplot` will already give a lot of information, using the included `ggp data %>% group_by(Country) %>% select(Country, AMP_ND2, AMC_ED20, CAZ_ED10, CIP_ED5) %>% - ggplot_rsi(translate_ab = 'ab', facet = "Country", datalabels = FALSE) + ggplot_rsi(translate_ab = "ab", facet = "Country", datalabels = FALSE) ``` ```{r, echo = FALSE} # on very old and some new releases of R, this may lead to an error tryCatch(data %>% - group_by(Country) %>% - select(Country, AMP_ND2, AMC_ED20, CAZ_ED10, CIP_ED5) %>% - ggplot_rsi(translate_ab = 'ab', facet = "Country", datalabels = FALSE) %>% - print(), - error = function(e) base::invisible()) + group_by(Country) %>% + select(Country, AMP_ND2, AMC_ED20, CAZ_ED10, CIP_ED5) %>% + ggplot_rsi(translate_ab = "ab", facet = "Country", datalabels = FALSE) %>% + print(), +error = function(e) base::invisible() +) ``` diff --git a/vignettes/benchmarks.Rmd.not b/vignettes/benchmarks.Rmd.not index 93b3c6a03..12effa5b0 100755 --- a/vignettes/benchmarks.Rmd.not +++ b/vignettes/benchmarks.Rmd.not @@ -33,19 +33,25 @@ ggplot.bm <- function(df) { summ <- tapply(.x, .f, .fun) factor(.f, levels = names(summ)[order(summ, decreasing = .desc)], ordered = is.ordered(.f)) } - ggplot(df, - aes(x = reorder(expr, time, median), y = time / 1000 / 1000)) + - stat_boxplot(geom = "errorbar", width = 0.5) + - geom_boxplot(outlier.alpha = 0) + - coord_flip() + - scale_y_continuous(trans = "log", breaks = c(1, 2, 5, - 10, 20, 50, - 100, 200, 500, - 1000, 2000, 5000)) + - labs(x = "Expression", - y = "Time in milliseconds (log scale)") + - theme_minimal() + - theme(axis.text.y = element_text(family = "mono")) + ggplot( + df, + aes(x = reorder(expr, time, median), y = time / 1000 / 1000) + ) + + stat_boxplot(geom = "errorbar", width = 0.5) + + geom_boxplot(outlier.alpha = 0) + + coord_flip() + + scale_y_continuous(trans = "log", breaks = c( + 1, 2, 5, + 10, 20, 50, + 100, 200, 500, + 1000, 2000, 5000 + )) + + labs( + x = "Expression", + y = "Time in milliseconds (log scale)" + ) + + theme_minimal() + + theme(axis.text.y = element_text(family = "mono")) } ``` @@ -75,7 +81,8 @@ S.aureus <- microbenchmark( as.mo("Sthafilokkockus aaureuz"), # incorrect spelling as.mo("MRSA"), # Methicillin Resistant S. aureus as.mo("VISA"), # Vancomycin Intermediate S. aureus - times = 25) + times = 25 +) print(S.aureus, unit = "ms", signif = 2) ``` ```{r, echo = FALSE} @@ -95,7 +102,7 @@ To prove this, we will use `mo_name()` for testing - a helper function that retu ```{r, message = FALSE} # start with the example_isolates data set -x <- example_isolates %>% +x <- example_isolates %>% # take all MO codes from the 'mo' column pull(mo) %>% # and copy them a thousand times @@ -105,7 +112,7 @@ x <- example_isolates %>% # what do these values look like? They are of class : head(x) - + # as the example_isolates data set has 2,000 rows, we should have 2 million items length(x) @@ -114,7 +121,8 @@ n_distinct(x) # now let's see: run_it <- microbenchmark(mo_name(x), - times = 10) + times = 10 +) print(run_it, unit = "ms", signif = 3) ``` @@ -125,25 +133,29 @@ So getting official taxonomic names of `r format(length(x), big.mark = ",")` (!! What about precalculated results? If the input is an already precalculated result of a helper function such as `mo_name()`, it almost doesn't take any time at all. In other words, if you run `mo_name()` on a valid taxonomic name, it will return the results immediately (see 'C' below): ```{r, warning=FALSE, message=FALSE} -run_it <- microbenchmark(A = mo_name("STAAUR"), - B = mo_name("S. aureus"), - C = mo_name("Staphylococcus aureus"), - times = 10) +run_it <- microbenchmark( + A = mo_name("STAAUR"), + B = mo_name("S. aureus"), + C = mo_name("Staphylococcus aureus"), + times = 10 +) print(run_it, unit = "ms", signif = 3) ``` So going from `mo_name("Staphylococcus aureus")` to `"Staphylococcus aureus"` takes `r format(round(run_it %>% filter(expr == "C") %>% pull(time) %>% median() / 1e9, 4), scientific = FALSE)` seconds - it doesn't even start calculating *if the result would be the same as the expected resulting value*. That goes for all helper functions: ```{r} -run_it <- microbenchmark(A = mo_species("aureus"), - B = mo_genus("Staphylococcus"), - C = mo_name("Staphylococcus aureus"), - D = mo_family("Staphylococcaceae"), - E = mo_order("Bacillales"), - F = mo_class("Bacilli"), - G = mo_phylum("Firmicutes"), - H = mo_kingdom("Bacteria"), - times = 10) +run_it <- microbenchmark( + A = mo_species("aureus"), + B = mo_genus("Staphylococcus"), + C = mo_name("Staphylococcus aureus"), + D = mo_family("Staphylococcaceae"), + E = mo_order("Bacillales"), + F = mo_class("Bacilli"), + G = mo_phylum("Firmicutes"), + H = mo_kingdom("Bacteria"), + times = 10 +) print(run_it, unit = "ms", signif = 3) ``` @@ -163,17 +175,19 @@ mo_name(CoNS, language = "es") # or just mo_name(CoNS) on a Spanish system mo_name(CoNS, language = "nl") # or just mo_name(CoNS) on a Dutch system -run_it <- microbenchmark(da = mo_name(CoNS, language = "da"), - de = mo_name(CoNS, language = "de"), - en = mo_name(CoNS, language = "en"), - es = mo_name(CoNS, language = "es"), - fr = mo_name(CoNS, language = "fr"), - it = mo_name(CoNS, language = "it"), - nl = mo_name(CoNS, language = "nl"), - pt = mo_name(CoNS, language = "pt"), - ru = mo_name(CoNS, language = "ru"), - sv = mo_name(CoNS, language = "sv"), - times = 100) +run_it <- microbenchmark( + da = mo_name(CoNS, language = "da"), + de = mo_name(CoNS, language = "de"), + en = mo_name(CoNS, language = "en"), + es = mo_name(CoNS, language = "es"), + fr = mo_name(CoNS, language = "fr"), + it = mo_name(CoNS, language = "it"), + nl = mo_name(CoNS, language = "nl"), + pt = mo_name(CoNS, language = "pt"), + ru = mo_name(CoNS, language = "ru"), + sv = mo_name(CoNS, language = "sv"), + times = 100 +) print(run_it, unit = "ms", signif = 4) ``` diff --git a/vignettes/datasets.Rmd b/vignettes/datasets.Rmd index f91408e58..f97aedb41 100644 --- a/vignettes/datasets.Rmd +++ b/vignettes/datasets.Rmd @@ -28,16 +28,20 @@ library(dplyr) options(knitr.kable.NA = "") structure_txt <- function(dataset) { - paste0("A data set with ", - format(nrow(dataset), big.mark = ","), " rows and ", - ncol(dataset), " columns, containing the following column names: \n", - AMR:::vector_or(colnames(dataset), quotes = "*", last_sep = " and ", sort = FALSE), ".") + paste0( + "A data set with ", + format(nrow(dataset), big.mark = ","), " rows and ", + ncol(dataset), " columns, containing the following column names: \n", + AMR:::vector_or(colnames(dataset), quotes = "*", last_sep = " and ", sort = FALSE), "." + ) } download_txt <- function(filename) { - msg <- paste0("It was last updated on ", - trimws(format(file.mtime(paste0("../data/", filename, ".rda")), "%e %B %Y %H:%M:%S %Z", tz = "UTC")), - ". Find more info about the structure of this data set [here](https://msberends.github.io/AMR/reference/", ifelse(filename == "antivirals", "antibiotics", filename), ".html).\n") + msg <- paste0( + "It was last updated on ", + trimws(format(file.mtime(paste0("../data/", filename, ".rda")), "%e %B %Y %H:%M:%S %Z", tz = "UTC")), + ". Find more info about the structure of this data set [here](https://msberends.github.io/AMR/reference/", ifelse(filename == "antivirals", "antibiotics", filename), ".html).\n" + ) github_base <- "https://github.com/msberends/AMR/raw/main/data-raw/" filename <- paste0("../data-raw/", filename) rds <- paste0(filename, ".rds") @@ -50,38 +54,44 @@ download_txt <- function(filename) { stata <- paste0(filename, ".dta") create_txt <- function(filename, type, software, exists) { if (isTRUE(exists)) { - paste0("* Download as [", software, "](", github_base, filename, ") (", - AMR:::formatted_filesize(filename), ") \n") + paste0( + "* Download as [", software, "](", github_base, filename, ") (", + AMR:::formatted_filesize(filename), ") \n" + ) } else { paste0("* *(unavailable as ", software, ")*\n") } } - - if (any(file.exists(rds), - file.exists(txt), - file.exists(excel), - file.exists(feather), - file.exists(parquet), - file.exists(sas), - file.exists(spss), - file.exists(stata))) { - msg <- c(msg, "\n**Direct download links:**\n\n", - create_txt(rds, "rds", "original R Data Structure (RDS) file", file.exists(rds)), - create_txt(txt, "txt", "tab-separated text file", file.exists(txt)), - create_txt(excel, "xlsx", "Microsoft Excel workbook", file.exists(excel)), - create_txt(feather, "feather", "Apache Feather file", file.exists(feather)), - create_txt(parquet, "parquet", "Apache Parquet file", file.exists(parquet)), - create_txt(sas, "sas", "SAS data file", file.exists(sas)), - create_txt(spss, "sav", "IBM SPSS Statistics data file", file.exists(spss)), - create_txt(stata, "dta", "Stata DTA file", file.exists(stata))) + + if (any( + file.exists(rds), + file.exists(txt), + file.exists(excel), + file.exists(feather), + file.exists(parquet), + file.exists(sas), + file.exists(spss), + file.exists(stata) + )) { + msg <- c( + msg, "\n**Direct download links:**\n\n", + create_txt(rds, "rds", "original R Data Structure (RDS) file", file.exists(rds)), + create_txt(txt, "txt", "tab-separated text file", file.exists(txt)), + create_txt(excel, "xlsx", "Microsoft Excel workbook", file.exists(excel)), + create_txt(feather, "feather", "Apache Feather file", file.exists(feather)), + create_txt(parquet, "parquet", "Apache Parquet file", file.exists(parquet)), + create_txt(sas, "sas", "SAS data file", file.exists(sas)), + create_txt(spss, "sav", "IBM SPSS Statistics data file", file.exists(spss)), + create_txt(stata, "dta", "Stata DTA file", file.exists(stata)) + ) } paste0(msg, collapse = "") } print_df <- function(x, rows = 6) { - x %>% - as.data.frame(stringsAsFactors = FALSE) %>% - head(n = rows) %>% + x %>% + as.data.frame(stringsAsFactors = FALSE) %>% + head(n = rows) %>% mutate_all(function(x) { if (is.list(x)) { sapply(x, function(y) { @@ -128,10 +138,10 @@ Our full taxonomy of microorganisms is based on the authoritative and comprehens Included (sub)species per taxonomic kingdom: ```{r, echo = FALSE} -microorganisms %>% - count(kingdom) %>% - mutate(n = format(n, big.mark = ",")) %>% - setNames(c("Kingdom", "Number of (sub)species")) %>% +microorganisms %>% + count(kingdom) %>% + mutate(n = format(n, big.mark = ",")) %>% + setNames(c("Kingdom", "Number of (sub)species")) %>% print_df() ``` @@ -139,7 +149,7 @@ Example rows when filtering on genus *Escherichia*: ```{r, echo = FALSE} microorganisms %>% - filter(genus == "Escherichia") %>% + filter(genus == "Escherichia") %>% print_df() ``` @@ -166,7 +176,7 @@ Example rows when filtering on *Escherichia*: ```{r, echo = FALSE} microorganisms.old %>% - filter(fullname %like% "^Escherichia") %>% + filter(fullname %like% "^Escherichia") %>% print_df() ``` @@ -191,7 +201,7 @@ This data set contains all EARS-Net and ATC codes gathered from WHO and WHONET, ```{r, echo = FALSE} antibiotics %>% - filter(ab %in% colnames(example_isolates)) %>% + filter(ab %in% colnames(example_isolates)) %>% print_df() ``` @@ -233,9 +243,9 @@ This data set contains interpretation rules for MIC values and disk diffusion di ### Example content ```{r, echo = FALSE} -rsi_translation %>% - mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>% - mutate(ab_name = ab_name(ab, language = NULL), .after = ab) %>% +rsi_translation %>% + mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>% + mutate(ab_name = ab_name(ab, language = NULL), .after = ab) %>% print_df() ``` @@ -258,9 +268,11 @@ Example rows when filtering on *Enterobacter cloacae*: ```{r, echo = FALSE} intrinsic_resistant %>% - transmute(microorganism = mo_name(mo), - antibiotic = ab_name(ab)) %>% - filter(microorganism == "Enterobacter cloacae") %>% + transmute( + microorganism = mo_name(mo), + antibiotic = ab_name(ab) + ) %>% + filter(microorganism == "Enterobacter cloacae") %>% arrange(antibiotic) %>% print_df(rows = Inf) ``` @@ -283,7 +295,7 @@ Currently included dosages in the data set are meant for: `r AMR:::format_eucast ### Example content ```{r, echo = FALSE} -dosage %>% +dosage %>% print_df() ``` @@ -303,7 +315,7 @@ This data set contains randomised fictitious data, but reflects reality and can ### Example content ```{r, echo = FALSE} -example_isolates %>% +example_isolates %>% print_df() ``` @@ -322,6 +334,6 @@ This data set contains randomised fictitious data, but reflects reality and can ### Example content ```{r, echo = FALSE} -example_isolates_unclean %>% +example_isolates_unclean %>% print_df() ``` diff --git a/vignettes/resistance_predict.Rmd b/vignettes/resistance_predict.Rmd index f54dfd32d..97d0cd75d 100755 --- a/vignettes/resistance_predict.Rmd +++ b/vignettes/resistance_predict.Rmd @@ -43,14 +43,18 @@ It is basically as easy as: resistance_predict(tbl = example_isolates, col_date = "date", col_ab = "TZP", model = "binomial") # or: -example_isolates %>% - resistance_predict(col_ab = "TZP", - model "binomial") +example_isolates %>% + resistance_predict( + col_ab = "TZP", + model = "binomial" + ) # to bind it to object 'predict_TZP' for example: -predict_TZP <- example_isolates %>% - resistance_predict(col_ab = "TZP", - model = "binomial") +predict_TZP <- example_isolates %>% + resistance_predict( + col_ab = "TZP", + model = "binomial" + ) ``` The function will look for a date column itself if `col_date` is not set. @@ -58,7 +62,7 @@ The function will look for a date column itself if `col_date` is not set. When running any of these commands, a summary of the regression model will be printed unless using `resistance_predict(..., info = FALSE)`. ```{r, echo = FALSE, message = FALSE} -predict_TZP <- example_isolates %>% +predict_TZP <- example_isolates %>% resistance_predict(col_ab = "TZP", model = "binomial") ``` @@ -92,7 +96,7 @@ Resistance is not easily predicted; if we look at vancomycin resistance in Gram- ```{r} example_isolates %>% filter(mo_gramstain(mo, language = NULL) == "Gram-positive") %>% - resistance_predict(col_ab = "VAN", year_min = 2010, info = FALSE, model = "binomial") %>% + resistance_predict(col_ab = "VAN", year_min = 2010, info = FALSE, model = "binomial") %>% ggplot_rsi_predict() ``` @@ -113,7 +117,7 @@ For the vancomycin resistance in Gram-positive bacteria, a linear model might be ```{r} example_isolates %>% filter(mo_gramstain(mo, language = NULL) == "Gram-positive") %>% - resistance_predict(col_ab = "VAN", year_min = 2010, info = FALSE, model = "linear") %>% + resistance_predict(col_ab = "VAN", year_min = 2010, info = FALSE, model = "linear") %>% ggplot_rsi_predict() ``` diff --git a/vignettes/welcome_to_AMR.Rmd b/vignettes/welcome_to_AMR.Rmd index 725b23874..f9d95bf0f 100644 --- a/vignettes/welcome_to_AMR.Rmd +++ b/vignettes/welcome_to_AMR.Rmd @@ -28,10 +28,6 @@ Note: to keep the package size as small as possible, we only included this vigne The `AMR` package is a [free and open-source](https://msberends.github.io/AMR/#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. -```{r, echo = FALSE, out.width = "555px"} -knitr::include_graphics("AMR_intro.png") -``` - After installing this package, R knows `r AMR:::format_included_data_number(AMR::microorganisms)` distinct microbial species and all `r AMR:::format_included_data_number(rbind(AMR::antibiotics[, "atc", drop = FALSE], AMR::antivirals[, "atc", drop = FALSE]))` antibiotic, antimycotic and antiviral drugs by name and code (including ATC, EARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. It supports any data format, including WHONET/EARS-Net data. The `AMR` package is available in English, Chinese, Danish, Dutch, French, German, Greek, Italian, Japanese, Polish, Portuguese, Russian, Spanish, Swedish, Turkish and Ukrainian. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages.