From 36ec8b0d816511da8a26f2bf06d56380af4d241b Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Mon, 28 Sep 2020 11:00:59 +0200 Subject: [PATCH] (v1.3.0.9033) skimr fix --- DESCRIPTION | 2 +- NEWS.md | 2 +- R/disk.R | 6 +++--- R/eucast_rules.R | 8 ++++---- R/join_microorganisms.R | 8 ++++---- R/mic.R | 2 +- R/mo.R | 26 ++++++++++---------------- R/mo_matching_score.R | 4 ++-- R/rsi.R | 25 ++++++++++++++++--------- docs/404.html | 2 +- docs/LICENSE-text.html | 2 +- docs/articles/index.html | 2 +- docs/authors.html | 2 +- docs/index.html | 2 +- docs/news/index.html | 8 ++++---- docs/pkgdown.yml | 2 +- docs/reference/as.mo.html | 4 ++-- docs/reference/eucast_rules.html | 6 +++--- docs/reference/index.html | 2 +- docs/reference/mo_matching_score.html | 4 ++-- docs/reference/mo_property.html | 4 ++-- docs/survey.html | 2 +- man/as.mo.Rd | 2 +- man/eucast_rules.Rd | 4 ++-- man/mo_matching_score.Rd | 2 +- man/mo_property.Rd | 2 +- tests/testthat/test-proportion.R | 6 +++--- 27 files changed, 71 insertions(+), 70 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 11fa4ffb..67d1666b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 1.3.0.9032 +Version: 1.3.0.9033 Date: 2020-09-28 Title: Antimicrobial Resistance Analysis Authors@R: c( diff --git a/NEWS.md b/NEWS.md index c9b5e694..5b9ed41a 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.3.0.9032 +# AMR 1.3.0.9033 ## Last updated: 28 September 2020 Note: some changes in this version were suggested by anonymous reviewers from the journal we submitted our manuscipt to. We are those reviewers very grateful for going through our code so thoroughly! diff --git a/R/disk.R b/R/disk.R index 65a7bc15..007fac85 100644 --- a/R/disk.R +++ b/R/disk.R @@ -193,10 +193,10 @@ get_skimmers.disk <- function(column) { inline_hist <- import_fn("inline_hist", "skimr", error_on_fail = FALSE) sfl( skim_type = "disk", - smallest = ~min(as.double(.), na.rm = TRUE), - largest = ~max(as.double(.), na.rm = TRUE), + min = ~min(as.double(.), na.rm = TRUE), + max = ~max(as.double(.), na.rm = TRUE), median = ~stats::median(as.double(.), na.rm = TRUE), - n_unique = n_unique, + n_unique = ~pm_n_distinct(., na.rm = TRUE), hist = ~inline_hist(stats::na.omit(as.double(.))) ) } diff --git a/R/eucast_rules.R b/R/eucast_rules.R index a4ef37bf..42a71bf8 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -42,8 +42,8 @@ EUCAST_VERSION_EXPERT_RULES <- list("3.1" = list(version_txt = "v3.1", #' @param info print progress #' @param rules a character vector that specifies which rules should be applied. Must be one or more of `"breakpoints"`, `"expert"`, `"other"`, `"all"`, and defaults to `c("breakpoints", "expert")`. The default value can be set to another value, e.g. using `options(AMR_eucastrules = "all")`. #' @param verbose a [logical] to turn Verbose mode on and off (default is off). In Verbose mode, the function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way. Using Verbose mode takes a lot more time. -#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline -#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline +#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline. Currently supported: `r paste0(names(EUCAST_VERSION_BREAKPOINTS), collapse = ", ")`. +#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Currently supported: `r paste0(names(EUCAST_VERSION_EXPERT_RULES), collapse = ", ")`. #' @param ... column name of an antibiotic, please see section *Antibiotics* below #' @inheritParams first_isolate #' @details @@ -143,8 +143,8 @@ eucast_rules <- function(x, check_dataset_integrity() - version_breakpoints <- as.double(version_breakpoints) - version_expertrules <- as.double(version_expertrules) + version_breakpoints <- as.double(gsub("[^0-9.]+", "", version_breakpoints)) + version_expertrules <- as.double(gsub("[^0-9.]+", "", version_expertrules)) stop_ifnot(version_breakpoints %in% as.double(names(EUCAST_VERSION_BREAKPOINTS)), "EUCAST version ", version_breakpoints, " for clinical breakpoints not found") stop_ifnot(version_expertrules %in% as.double(names(EUCAST_VERSION_EXPERT_RULES)), diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R index 2dd64a58..e5b85bdd 100755 --- a/R/join_microorganisms.R +++ b/R/join_microorganisms.R @@ -172,11 +172,11 @@ semi_join_microorganisms <- function(x, by = NULL, ...) { dplyr_semi <- import_fn("semi_join", "dplyr", error_on_fail = FALSE) if (!is.null(dplyr_semi)) { join <- suppressWarnings( - dplyr_semi(x = x, y = microorganisms, by = by,...) + dplyr_semi(x = x, y = microorganisms, by = by, ...) ) } else { join <- suppressWarnings( - pm_semi_join(x = x, y = microorganisms, by = by,...) + pm_semi_join(x = x, y = microorganisms, by = by, ...) ) } class(join) <- x_class @@ -196,11 +196,11 @@ anti_join_microorganisms <- function(x, by = NULL, ...) { dplyr_anti <- import_fn("anti_join", "dplyr", error_on_fail = FALSE) if (!is.null(dplyr_anti)) { join <- suppressWarnings( - dplyr_anti(x = x, y = microorganisms, by = by,...) + dplyr_anti(x = x, y = microorganisms, by = by, ...) ) } else { join <- suppressWarnings( - pm_anti_join(x = x, y = microorganisms, by = by,...) + pm_anti_join(x = x, y = microorganisms, by = by, ...) ) } class(join) <- x_class diff --git a/R/mic.R b/R/mic.R index 7a8019f4..d7545060 100755 --- a/R/mic.R +++ b/R/mic.R @@ -306,7 +306,7 @@ get_skimmers.mic <- function(column) { min = ~as.character(sort(na.omit(.))[1]), max = ~as.character(sort(stats::na.omit(.))[length(stats::na.omit(.))]), median = ~as.character(stats::na.omit(.)[as.double(stats::na.omit(.)) == median(as.double(stats::na.omit(.)))])[1], - n_unique = n_unique, + n_unique = ~pm_n_distinct(., na.rm = TRUE), hist_log2 = ~inline_hist(log2(as.double(stats::na.omit(.)))) ) } diff --git a/R/mo.R b/R/mo.R index 841c8878..cda2f305 100755 --- a/R/mo.R +++ b/R/mo.R @@ -1647,7 +1647,7 @@ get_skimmers.mo <- function(column) { sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE) sfl( skim_type = "mo", - unique_total = n_unique, + unique_total = ~pm_n_distinct(., na.rm = TRUE), gram_negative = ~sum(mo_gramstain(stats::na.omit(.), language = NULL) == "Gram-negative", na.rm = TRUE), gram_positive = ~sum(mo_gramstain(stats::na.omit(.), language = NULL) == "Gram-positive", na.rm = TRUE), top_genus = ~names(sort(-table(mo_genus(stats::na.omit(.), language = NULL))))[1L], @@ -1778,20 +1778,20 @@ print.mo_uncertainties <- function(x, ...) { if (NROW(x) == 0) { return(NULL) } - cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Furthermore, an indication is given about the certainty of the match - the more transformations are needed for coercion, the less certain the result.")), collapse = "\n")) + cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Please see ?mo_matching_score.")), collapse = "\n")) cat("\n") msg <- "" for (i in seq_len(nrow(x))) { if (x[i, ]$candidates != "") { candidates <- unlist(strsplit(x[i, ]$candidates, ", ", fixed = TRUE)) - scores <- mo_matching_score(x = x[i, ]$input, - n = candidates) + scores <- mo_matching_score(x = x[i, ]$input, n = candidates) # sort on descending scores candidates <- candidates[order(1 - scores)] + scores_formatted <- trimws(formatC(round(scores, 3), format = "f", digits = 3)) n_candidates <- length(candidates) candidates <- paste0(font_italic(candidates, collapse = NULL), - " (", trimws(percentage(scores[order(1 - scores)], digits = 1)), ")") + " (", scores_formatted[order(1 - scores)], ")") candidates <- paste(candidates, collapse = ", ") # align with input after arrow candidates <- paste0("\n", strrep(" ", nchar(x[i, ]$input) + 6), @@ -1799,23 +1799,17 @@ print.mo_uncertainties <- function(x, ...) { } else { candidates <- "" } - if (x[i, ]$uncertainty == 1) { - uncertainty_interpretation <- font_green("* very certain *") - } else if (x[i, ]$uncertainty == 1) { - uncertainty_interpretation <- font_yellow("* certain *") - } else { - uncertainty_interpretation <- font_red("* not certain *") - } + score <- trimws(formatC(round(mo_matching_score(x = x[i, ]$input, + n = x[i, ]$fullname), + 3), + format = "f", digits = 3)) msg <- paste(msg, paste0('"', x[i, ]$input, '" -> ', paste0(font_bold(font_italic(x[i, ]$fullname)), ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""), " (", x[i, ]$mo, - ", matching score = ", trimws(percentage(mo_matching_score(x = x[i, ]$input, - n = x[i, ]$fullname), - digits = 1)), + ", matching score = ", score, ") "), - uncertainty_interpretation, candidates), sep = "\n") } diff --git a/R/mo_matching_score.R b/R/mo_matching_score.R index 38d3bf23..a4fd1f83 100755 --- a/R/mo_matching_score.R +++ b/R/mo_matching_score.R @@ -25,7 +25,7 @@ #' @param x Any user input value(s) #' @param n A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms] #' @section Matching score for microorganisms: -#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, ranging from 0 to 100%, is calculated as: +#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as: #' #' \deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \operatorname{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )} #' @@ -66,7 +66,7 @@ mo_matching_score <- function(x, n) { var_F <- nchar(n) # L = modified Levenshtein distance var_L <- levenshtein - # P = Prevalence (1 to 3) + # P = prevalence (1 to 3), see ?as.mo var_P <- MO_lookup[match(n, MO_lookup$fullname), "prevalence", drop = TRUE] # K = kingdom index (Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5) var_K <- MO_lookup[match(n, MO_lookup$fullname), "kingdom_index", drop = TRUE] diff --git a/R/rsi.R b/R/rsi.R index 0f9fe37f..11b3bf0d 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -746,20 +746,27 @@ freq.rsi <- function(x, ...) { # will be exported using s3_register() in R/zzz.R get_skimmers.rsi <- function(column) { - # a bit of a crazy hack to get the variable name - name_call <- function(.data, name = deparse(substitute(column))) { - vars <- tryCatch(eval(parse(text = ".data$skim_variable"), envir = sys.frame(2)), - error = function(e) NULL) + # get the variable name 'skim_variable' + name_call <- function(.data) { calls <- sys.calls() + calls_txt <- vapply(calls, function(x) paste(deparse(x), collapse = ""), FUN.VALUE = character(1)) + if (any(calls_txt %like% "skim_variable", na.rm = TRUE)) { + ind <- which(calls_txt %like% "skim_variable")[1L] + vars <- tryCatch(eval(parse(text = ".data$skim_variable"), envir = sys.frame(ind)), + error = function(e) NULL) + } else { + vars <- NULL + } i <- tryCatch(attributes(calls[[length(calls)]])$position, error = function(e) NULL) if (is.null(vars) | is.null(i)) { NA_character_ - } else{ + } else { lengths <- sapply(vars, length) - lengths <- sum(lengths[!names(lengths) == "rsi"]) - var <- vars$rsi[i - lengths] - if (var == "data") { + when_starts_rsi <- which(names(sapply(vars, length)) == "rsi") + offset <- sum(lengths[c(1:when_starts_rsi - 1)]) + var <- vars$rsi[i - offset] + if (!isFALSE(var == "data")) { NA_character_ } else{ ab_name(var) @@ -770,7 +777,7 @@ get_skimmers.rsi <- function(column) { sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE) sfl( skim_type = "rsi", - name = name_call, + ab_name = name_call, count_R = count_R, count_S = count_susceptible, count_I = count_I, diff --git a/docs/404.html b/docs/404.html index fef15802..50ca67e0 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9032 + 1.3.0.9033 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 7203feb9..81c88b82 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9032 + 1.3.0.9033 diff --git a/docs/articles/index.html b/docs/articles/index.html index 14e8d05c..28d56e20 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9032 + 1.3.0.9033 diff --git a/docs/authors.html b/docs/authors.html index ed47c105..8098f2e7 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9032 + 1.3.0.9033 diff --git a/docs/index.html b/docs/index.html index b739ff7e..6f844d15 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.3.0.9032 + 1.3.0.9033 diff --git a/docs/news/index.html b/docs/news/index.html index 8f95a432..76ccaae9 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9032 + 1.3.0.9033 @@ -236,9 +236,9 @@ Source: NEWS.md -
-

-AMR 1.3.0.9032 Unreleased +
+

+AMR 1.3.0.9033 Unreleased

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index c63bbf64..69506ae3 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -2,7 +2,7 @@ pandoc: 2.7.3 pkgdown: 1.5.1.9000 pkgdown_sha: eae56f08694abebf93cdfc0dd8e9ede06d8c815f articles: [] -last_built: 2020-09-27T23:07Z +last_built: 2020-09-28T09:00Z urls: reference: https://msberends.github.io/AMR/reference article: https://msberends.github.io/AMR/articles diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 33ceb82e..5ddfb2c6 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9032 + 1.3.0.9033

@@ -388,7 +388,7 @@ The lifecycle of this function is stableWith ambiguous user input in as.mo() and all the mo_* functions, the returned results are chosen based on their matching score using mo_matching_score(). This matching score \(m\), ranging from 0 to 100%, is calculated as:

+

With ambiguous user input in as.mo() and all the mo_* functions, the returned results are chosen based on their matching score using mo_matching_score(). This matching score \(m\), is calculated as:

$$m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \operatorname{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}$$

where:

  • \(x\) is the user input;

  • diff --git a/docs/reference/eucast_rules.html b/docs/reference/eucast_rules.html index d1981557..1a7ad6b5 100644 --- a/docs/reference/eucast_rules.html +++ b/docs/reference/eucast_rules.html @@ -83,7 +83,7 @@ To improve the interpretation of the antibiogram before EUCAST rules are applied AMR (for R) - 1.3.0.9028 + 1.3.0.9033
@@ -280,11 +280,11 @@ To improve the interpretation of the antibiogram before EUCAST rules are applied version_breakpoints -

the version number to use for the EUCAST Clinical Breakpoints guideline

+

the version number to use for the EUCAST Clinical Breakpoints guideline. Currently supported: 10.0.

version_expertrules -

the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline

+

the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Currently supported: 3.1, 3.2.

... diff --git a/docs/reference/index.html b/docs/reference/index.html index b937ee57..c4492f08 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.3.0.9032 + 1.3.0.9033

diff --git a/docs/reference/mo_matching_score.html b/docs/reference/mo_matching_score.html index ac0067dc..dd340718 100644 --- a/docs/reference/mo_matching_score.html +++ b/docs/reference/mo_matching_score.html @@ -82,7 +82,7 @@ AMR (for R) - 1.3.0.9032 + 1.3.0.9033 @@ -261,7 +261,7 @@ -

With ambiguous user input in as.mo() and all the mo_* functions, the returned results are chosen based on their matching score using mo_matching_score(). This matching score \(m\), ranging from 0 to 100%, is calculated as:

+

With ambiguous user input in as.mo() and all the mo_* functions, the returned results are chosen based on their matching score using mo_matching_score(). This matching score \(m\), is calculated as:

$$m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \operatorname{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}$$

where: