(v1.3.0.9033) skimr fix

2025-07-13 04:42:09 +02:00 · 2020-09-28 11:00:59 +02:00
parent 519aada54f
commit 36ec8b0d81
27 changed files with 71 additions and 70 deletions
--- a/R/disk.R
+++ b/R/disk.R
@ -193,10 +193,10 @@ get_skimmers.disk <- function(column) {
  inline_hist <- import_fn("inline_hist", "skimr", error_on_fail = FALSE)
  sfl(
    skim_type = "disk",
-    smallest = ~min(as.double(.), na.rm = TRUE),
-    largest = ~max(as.double(.), na.rm = TRUE),
+    min = ~min(as.double(.), na.rm = TRUE),
+    max = ~max(as.double(.), na.rm = TRUE),
    median = ~stats::median(as.double(.), na.rm = TRUE),
-    n_unique = n_unique,
+    n_unique = ~pm_n_distinct(., na.rm = TRUE),
    hist = ~inline_hist(stats::na.omit(as.double(.)))
  )
 }
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@ -42,8 +42,8 @@ EUCAST_VERSION_EXPERT_RULES <- list("3.1" = list(version_txt = "v3.1",
 #' @param info print progress
 #' @param rules a character vector that specifies which rules should be applied. Must be one or more of `"breakpoints"`, `"expert"`, `"other"`, `"all"`, and defaults to `c("breakpoints", "expert")`. The default value can be set to another value, e.g. using `options(AMR_eucastrules = "all")`.
 #' @param verbose a [logical] to turn Verbose mode on and off (default is off). In Verbose mode, the function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way. Using Verbose mode takes a lot more time.
-#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline
-#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline
+#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline. Currently supported: `r paste0(names(EUCAST_VERSION_BREAKPOINTS), collapse = ", ")`.
+#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Currently supported: `r paste0(names(EUCAST_VERSION_EXPERT_RULES), collapse = ", ")`.
 #' @param ... column name of an antibiotic, please see section *Antibiotics* below
 #' @inheritParams first_isolate
 #' @details
@ -143,8 +143,8 @@ eucast_rules <- function(x,
  
  check_dataset_integrity()
  
-  version_breakpoints <- as.double(version_breakpoints)
-  version_expertrules <- as.double(version_expertrules)
+  version_breakpoints <- as.double(gsub("[^0-9.]+", "", version_breakpoints))
+  version_expertrules <- as.double(gsub("[^0-9.]+", "", version_expertrules))
  stop_ifnot(version_breakpoints %in% as.double(names(EUCAST_VERSION_BREAKPOINTS)),
             "EUCAST version ", version_breakpoints, " for clinical breakpoints not found")
  stop_ifnot(version_expertrules %in% as.double(names(EUCAST_VERSION_EXPERT_RULES)),
--- a/R/join_microorganisms.R
+++ b/R/join_microorganisms.R
@ -172,11 +172,11 @@ semi_join_microorganisms <- function(x, by = NULL, ...) {
  dplyr_semi <- import_fn("semi_join", "dplyr", error_on_fail = FALSE)
  if (!is.null(dplyr_semi)) {
    join <- suppressWarnings(
-      dplyr_semi(x = x, y = microorganisms, by = by,...)
+      dplyr_semi(x = x, y = microorganisms, by = by, ...)
    )
  } else {
    join <- suppressWarnings(
-      pm_semi_join(x = x, y = microorganisms, by = by,...)
+      pm_semi_join(x = x, y = microorganisms, by = by, ...)
    )
  }
  class(join) <- x_class
@ -196,11 +196,11 @@ anti_join_microorganisms <- function(x, by = NULL, ...) {
  dplyr_anti <- import_fn("anti_join", "dplyr", error_on_fail = FALSE)
  if (!is.null(dplyr_anti)) {
    join <- suppressWarnings(
-      dplyr_anti(x = x, y = microorganisms, by = by,...)
+      dplyr_anti(x = x, y = microorganisms, by = by, ...)
    )
  } else {
    join <- suppressWarnings(
-      pm_anti_join(x = x, y = microorganisms, by = by,...)
+      pm_anti_join(x = x, y = microorganisms, by = by, ...)
    )
  }
  class(join) <- x_class
--- a/R/mic.R
+++ b/R/mic.R
@ -306,7 +306,7 @@ get_skimmers.mic <- function(column) {
    min = ~as.character(sort(na.omit(.))[1]),
    max = ~as.character(sort(stats::na.omit(.))[length(stats::na.omit(.))]),
    median = ~as.character(stats::na.omit(.)[as.double(stats::na.omit(.)) == median(as.double(stats::na.omit(.)))])[1],
-    n_unique = n_unique,
+    n_unique = ~pm_n_distinct(., na.rm = TRUE),
    hist_log2 = ~inline_hist(log2(as.double(stats::na.omit(.))))
  )
 }
--- a/R/mo.R
+++ b/R/mo.R
@ -1647,7 +1647,7 @@ get_skimmers.mo <- function(column) {
  sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE)
  sfl(
    skim_type = "mo",
-    unique_total = n_unique,
+    unique_total = ~pm_n_distinct(., na.rm = TRUE),
    gram_negative = ~sum(mo_gramstain(stats::na.omit(.), language = NULL) == "Gram-negative", na.rm = TRUE),
    gram_positive = ~sum(mo_gramstain(stats::na.omit(.), language = NULL) == "Gram-positive", na.rm = TRUE),
    top_genus = ~names(sort(-table(mo_genus(stats::na.omit(.), language = NULL))))[1L],
@ -1778,20 +1778,20 @@ print.mo_uncertainties <- function(x, ...) {
  if (NROW(x) == 0) {
    return(NULL)
  }
-  cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Furthermore, an indication is given about the certainty of the match - the more transformations are needed for coercion, the less certain the result.")), collapse = "\n"))
+  cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Please see ?mo_matching_score.")), collapse = "\n"))
  cat("\n")
  
  msg <- ""
  for (i in seq_len(nrow(x))) {
    if (x[i, ]$candidates != "") {
      candidates <- unlist(strsplit(x[i, ]$candidates, ", ", fixed = TRUE))
-      scores <- mo_matching_score(x = x[i, ]$input,
-                                  n = candidates)
+      scores <- mo_matching_score(x = x[i, ]$input, n = candidates)
      # sort on descending scores
      candidates <- candidates[order(1 - scores)]
+      scores_formatted <- trimws(formatC(round(scores, 3), format = "f", digits = 3))
      n_candidates <- length(candidates)
      candidates <- paste0(font_italic(candidates, collapse = NULL), 
-                           " (", trimws(percentage(scores[order(1 - scores)], digits = 1)), ")")
+                           " (", scores_formatted[order(1 - scores)], ")")
      candidates <- paste(candidates, collapse = ", ")
      # align with input after arrow
      candidates <- paste0("\n", strrep(" ", nchar(x[i, ]$input) + 6), 
@ -1799,23 +1799,17 @@ print.mo_uncertainties <- function(x, ...) {
    } else {
      candidates <- ""
    }
-    if (x[i, ]$uncertainty == 1) {
-      uncertainty_interpretation <- font_green("* very certain *")
-    } else if (x[i, ]$uncertainty == 1) {
-      uncertainty_interpretation <- font_yellow("* certain *")
-    } else {
-      uncertainty_interpretation <- font_red("* not certain *")
-    }
+    score <- trimws(formatC(round(mo_matching_score(x = x[i, ]$input,
+                                                    n = x[i, ]$fullname),
+                                  3),
+                            format = "f", digits = 3))
    msg <- paste(msg,
                 paste0('"', x[i, ]$input, '" -> ',
                        paste0(font_bold(font_italic(x[i, ]$fullname)),
                               ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""),
                               " (", x[i, ]$mo,
-                               ", matching score = ", trimws(percentage(mo_matching_score(x = x[i, ]$input,
-                                                                                          n = x[i, ]$fullname),
-                                                                        digits = 1)),
+                               ", matching score = ", score,
                               ") "),
-                        uncertainty_interpretation,
                        candidates),
                 sep = "\n")
  }
--- a/R/mo_matching_score.R
+++ b/R/mo_matching_score.R
@ -25,7 +25,7 @@
 #' @param x Any user input value(s)
 #' @param n A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms]
 #' @section Matching score for microorganisms:
-#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, ranging from 0 to 100%, is calculated as:
+#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as:
 #' 
 #' \deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \operatorname{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}
 #' 
@ -66,7 +66,7 @@ mo_matching_score <- function(x, n) {
  var_F <- nchar(n)
  # L = modified Levenshtein distance
  var_L <- levenshtein
-  # P = Prevalence (1 to 3)
+  # P = prevalence (1 to 3), see ?as.mo
  var_P <- MO_lookup[match(n, MO_lookup$fullname), "prevalence", drop = TRUE]
  # K = kingdom index (Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5)
  var_K <- MO_lookup[match(n, MO_lookup$fullname), "kingdom_index", drop = TRUE]
--- a/R/rsi.R
+++ b/R/rsi.R
@ -746,20 +746,27 @@ freq.rsi <- function(x, ...) {

 # will be exported using s3_register() in R/zzz.R
 get_skimmers.rsi <- function(column) {
-  # a bit of a crazy hack to get the variable name
-  name_call <- function(.data, name = deparse(substitute(column))) {
-    vars <- tryCatch(eval(parse(text = ".data$skim_variable"), envir = sys.frame(2)), 
-                     error = function(e) NULL)
+  # get the variable name 'skim_variable'
+  name_call <- function(.data) {
    calls <- sys.calls()
+    calls_txt <- vapply(calls, function(x) paste(deparse(x), collapse = ""), FUN.VALUE = character(1))
+    if (any(calls_txt %like% "skim_variable", na.rm = TRUE)) {
+      ind <- which(calls_txt %like% "skim_variable")[1L]
+      vars <- tryCatch(eval(parse(text = ".data$skim_variable"), envir = sys.frame(ind)), 
+                       error = function(e) NULL)
+    } else {
+      vars <- NULL
+    }
    i <- tryCatch(attributes(calls[[length(calls)]])$position, 
                  error = function(e) NULL)
    if (is.null(vars) | is.null(i)) {
      NA_character_
-    } else{
+    } else {
      lengths <- sapply(vars, length)
-      lengths <-  sum(lengths[!names(lengths) == "rsi"])
-      var <- vars$rsi[i - lengths]
-      if (var == "data") {
+      when_starts_rsi <- which(names(sapply(vars, length)) == "rsi")
+      offset <- sum(lengths[c(1:when_starts_rsi - 1)])
+      var <- vars$rsi[i - offset]
+      if (!isFALSE(var == "data")) {
        NA_character_
      } else{
        ab_name(var)
@ -770,7 +777,7 @@ get_skimmers.rsi <- function(column) {
  sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE)
  sfl(
    skim_type = "rsi",
-    name = name_call,
+    ab_name = name_call,
    count_R = count_R,
    count_S = count_susceptible,
    count_I = count_I,