1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-10 10:21:56 +02:00

(v1.3.0.9033) skimr fix

This commit is contained in:
2020-09-28 11:00:59 +02:00
parent 519aada54f
commit 36ec8b0d81
27 changed files with 71 additions and 70 deletions

View File

@ -193,10 +193,10 @@ get_skimmers.disk <- function(column) {
inline_hist <- import_fn("inline_hist", "skimr", error_on_fail = FALSE)
sfl(
skim_type = "disk",
smallest = ~min(as.double(.), na.rm = TRUE),
largest = ~max(as.double(.), na.rm = TRUE),
min = ~min(as.double(.), na.rm = TRUE),
max = ~max(as.double(.), na.rm = TRUE),
median = ~stats::median(as.double(.), na.rm = TRUE),
n_unique = n_unique,
n_unique = ~pm_n_distinct(., na.rm = TRUE),
hist = ~inline_hist(stats::na.omit(as.double(.)))
)
}

View File

@ -42,8 +42,8 @@ EUCAST_VERSION_EXPERT_RULES <- list("3.1" = list(version_txt = "v3.1",
#' @param info print progress
#' @param rules a character vector that specifies which rules should be applied. Must be one or more of `"breakpoints"`, `"expert"`, `"other"`, `"all"`, and defaults to `c("breakpoints", "expert")`. The default value can be set to another value, e.g. using `options(AMR_eucastrules = "all")`.
#' @param verbose a [logical] to turn Verbose mode on and off (default is off). In Verbose mode, the function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way. Using Verbose mode takes a lot more time.
#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline
#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline
#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline. Currently supported: `r paste0(names(EUCAST_VERSION_BREAKPOINTS), collapse = ", ")`.
#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Currently supported: `r paste0(names(EUCAST_VERSION_EXPERT_RULES), collapse = ", ")`.
#' @param ... column name of an antibiotic, please see section *Antibiotics* below
#' @inheritParams first_isolate
#' @details
@ -143,8 +143,8 @@ eucast_rules <- function(x,
check_dataset_integrity()
version_breakpoints <- as.double(version_breakpoints)
version_expertrules <- as.double(version_expertrules)
version_breakpoints <- as.double(gsub("[^0-9.]+", "", version_breakpoints))
version_expertrules <- as.double(gsub("[^0-9.]+", "", version_expertrules))
stop_ifnot(version_breakpoints %in% as.double(names(EUCAST_VERSION_BREAKPOINTS)),
"EUCAST version ", version_breakpoints, " for clinical breakpoints not found")
stop_ifnot(version_expertrules %in% as.double(names(EUCAST_VERSION_EXPERT_RULES)),

View File

@ -172,11 +172,11 @@ semi_join_microorganisms <- function(x, by = NULL, ...) {
dplyr_semi <- import_fn("semi_join", "dplyr", error_on_fail = FALSE)
if (!is.null(dplyr_semi)) {
join <- suppressWarnings(
dplyr_semi(x = x, y = microorganisms, by = by,...)
dplyr_semi(x = x, y = microorganisms, by = by, ...)
)
} else {
join <- suppressWarnings(
pm_semi_join(x = x, y = microorganisms, by = by,...)
pm_semi_join(x = x, y = microorganisms, by = by, ...)
)
}
class(join) <- x_class
@ -196,11 +196,11 @@ anti_join_microorganisms <- function(x, by = NULL, ...) {
dplyr_anti <- import_fn("anti_join", "dplyr", error_on_fail = FALSE)
if (!is.null(dplyr_anti)) {
join <- suppressWarnings(
dplyr_anti(x = x, y = microorganisms, by = by,...)
dplyr_anti(x = x, y = microorganisms, by = by, ...)
)
} else {
join <- suppressWarnings(
pm_anti_join(x = x, y = microorganisms, by = by,...)
pm_anti_join(x = x, y = microorganisms, by = by, ...)
)
}
class(join) <- x_class

View File

@ -306,7 +306,7 @@ get_skimmers.mic <- function(column) {
min = ~as.character(sort(na.omit(.))[1]),
max = ~as.character(sort(stats::na.omit(.))[length(stats::na.omit(.))]),
median = ~as.character(stats::na.omit(.)[as.double(stats::na.omit(.)) == median(as.double(stats::na.omit(.)))])[1],
n_unique = n_unique,
n_unique = ~pm_n_distinct(., na.rm = TRUE),
hist_log2 = ~inline_hist(log2(as.double(stats::na.omit(.))))
)
}

26
R/mo.R
View File

@ -1647,7 +1647,7 @@ get_skimmers.mo <- function(column) {
sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE)
sfl(
skim_type = "mo",
unique_total = n_unique,
unique_total = ~pm_n_distinct(., na.rm = TRUE),
gram_negative = ~sum(mo_gramstain(stats::na.omit(.), language = NULL) == "Gram-negative", na.rm = TRUE),
gram_positive = ~sum(mo_gramstain(stats::na.omit(.), language = NULL) == "Gram-positive", na.rm = TRUE),
top_genus = ~names(sort(-table(mo_genus(stats::na.omit(.), language = NULL))))[1L],
@ -1778,20 +1778,20 @@ print.mo_uncertainties <- function(x, ...) {
if (NROW(x) == 0) {
return(NULL)
}
cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Furthermore, an indication is given about the certainty of the match - the more transformations are needed for coercion, the less certain the result.")), collapse = "\n"))
cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Please see ?mo_matching_score.")), collapse = "\n"))
cat("\n")
msg <- ""
for (i in seq_len(nrow(x))) {
if (x[i, ]$candidates != "") {
candidates <- unlist(strsplit(x[i, ]$candidates, ", ", fixed = TRUE))
scores <- mo_matching_score(x = x[i, ]$input,
n = candidates)
scores <- mo_matching_score(x = x[i, ]$input, n = candidates)
# sort on descending scores
candidates <- candidates[order(1 - scores)]
scores_formatted <- trimws(formatC(round(scores, 3), format = "f", digits = 3))
n_candidates <- length(candidates)
candidates <- paste0(font_italic(candidates, collapse = NULL),
" (", trimws(percentage(scores[order(1 - scores)], digits = 1)), ")")
" (", scores_formatted[order(1 - scores)], ")")
candidates <- paste(candidates, collapse = ", ")
# align with input after arrow
candidates <- paste0("\n", strrep(" ", nchar(x[i, ]$input) + 6),
@ -1799,23 +1799,17 @@ print.mo_uncertainties <- function(x, ...) {
} else {
candidates <- ""
}
if (x[i, ]$uncertainty == 1) {
uncertainty_interpretation <- font_green("* very certain *")
} else if (x[i, ]$uncertainty == 1) {
uncertainty_interpretation <- font_yellow("* certain *")
} else {
uncertainty_interpretation <- font_red("* not certain *")
}
score <- trimws(formatC(round(mo_matching_score(x = x[i, ]$input,
n = x[i, ]$fullname),
3),
format = "f", digits = 3))
msg <- paste(msg,
paste0('"', x[i, ]$input, '" -> ',
paste0(font_bold(font_italic(x[i, ]$fullname)),
ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""),
" (", x[i, ]$mo,
", matching score = ", trimws(percentage(mo_matching_score(x = x[i, ]$input,
n = x[i, ]$fullname),
digits = 1)),
", matching score = ", score,
") "),
uncertainty_interpretation,
candidates),
sep = "\n")
}

View File

@ -25,7 +25,7 @@
#' @param x Any user input value(s)
#' @param n A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms]
#' @section Matching score for microorganisms:
#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, ranging from 0 to 100%, is calculated as:
#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as:
#'
#' \deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \operatorname{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}
#'
@ -66,7 +66,7 @@ mo_matching_score <- function(x, n) {
var_F <- nchar(n)
# L = modified Levenshtein distance
var_L <- levenshtein
# P = Prevalence (1 to 3)
# P = prevalence (1 to 3), see ?as.mo
var_P <- MO_lookup[match(n, MO_lookup$fullname), "prevalence", drop = TRUE]
# K = kingdom index (Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5)
var_K <- MO_lookup[match(n, MO_lookup$fullname), "kingdom_index", drop = TRUE]

25
R/rsi.R
View File

@ -746,20 +746,27 @@ freq.rsi <- function(x, ...) {
# will be exported using s3_register() in R/zzz.R
get_skimmers.rsi <- function(column) {
# a bit of a crazy hack to get the variable name
name_call <- function(.data, name = deparse(substitute(column))) {
vars <- tryCatch(eval(parse(text = ".data$skim_variable"), envir = sys.frame(2)),
error = function(e) NULL)
# get the variable name 'skim_variable'
name_call <- function(.data) {
calls <- sys.calls()
calls_txt <- vapply(calls, function(x) paste(deparse(x), collapse = ""), FUN.VALUE = character(1))
if (any(calls_txt %like% "skim_variable", na.rm = TRUE)) {
ind <- which(calls_txt %like% "skim_variable")[1L]
vars <- tryCatch(eval(parse(text = ".data$skim_variable"), envir = sys.frame(ind)),
error = function(e) NULL)
} else {
vars <- NULL
}
i <- tryCatch(attributes(calls[[length(calls)]])$position,
error = function(e) NULL)
if (is.null(vars) | is.null(i)) {
NA_character_
} else{
} else {
lengths <- sapply(vars, length)
lengths <- sum(lengths[!names(lengths) == "rsi"])
var <- vars$rsi[i - lengths]
if (var == "data") {
when_starts_rsi <- which(names(sapply(vars, length)) == "rsi")
offset <- sum(lengths[c(1:when_starts_rsi - 1)])
var <- vars$rsi[i - offset]
if (!isFALSE(var == "data")) {
NA_character_
} else{
ab_name(var)
@ -770,7 +777,7 @@ get_skimmers.rsi <- function(column) {
sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE)
sfl(
skim_type = "rsi",
name = name_call,
ab_name = name_call,
count_R = count_R,
count_S = count_susceptible,
count_I = count_I,