mirror of
https://github.com/msberends/AMR.git
synced 2025-07-10 10:21:56 +02:00
(v1.3.0.9033) skimr fix
This commit is contained in:
6
R/disk.R
6
R/disk.R
@ -193,10 +193,10 @@ get_skimmers.disk <- function(column) {
|
||||
inline_hist <- import_fn("inline_hist", "skimr", error_on_fail = FALSE)
|
||||
sfl(
|
||||
skim_type = "disk",
|
||||
smallest = ~min(as.double(.), na.rm = TRUE),
|
||||
largest = ~max(as.double(.), na.rm = TRUE),
|
||||
min = ~min(as.double(.), na.rm = TRUE),
|
||||
max = ~max(as.double(.), na.rm = TRUE),
|
||||
median = ~stats::median(as.double(.), na.rm = TRUE),
|
||||
n_unique = n_unique,
|
||||
n_unique = ~pm_n_distinct(., na.rm = TRUE),
|
||||
hist = ~inline_hist(stats::na.omit(as.double(.)))
|
||||
)
|
||||
}
|
||||
|
@ -42,8 +42,8 @@ EUCAST_VERSION_EXPERT_RULES <- list("3.1" = list(version_txt = "v3.1",
|
||||
#' @param info print progress
|
||||
#' @param rules a character vector that specifies which rules should be applied. Must be one or more of `"breakpoints"`, `"expert"`, `"other"`, `"all"`, and defaults to `c("breakpoints", "expert")`. The default value can be set to another value, e.g. using `options(AMR_eucastrules = "all")`.
|
||||
#' @param verbose a [logical] to turn Verbose mode on and off (default is off). In Verbose mode, the function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way. Using Verbose mode takes a lot more time.
|
||||
#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline
|
||||
#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline
|
||||
#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline. Currently supported: `r paste0(names(EUCAST_VERSION_BREAKPOINTS), collapse = ", ")`.
|
||||
#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Currently supported: `r paste0(names(EUCAST_VERSION_EXPERT_RULES), collapse = ", ")`.
|
||||
#' @param ... column name of an antibiotic, please see section *Antibiotics* below
|
||||
#' @inheritParams first_isolate
|
||||
#' @details
|
||||
@ -143,8 +143,8 @@ eucast_rules <- function(x,
|
||||
|
||||
check_dataset_integrity()
|
||||
|
||||
version_breakpoints <- as.double(version_breakpoints)
|
||||
version_expertrules <- as.double(version_expertrules)
|
||||
version_breakpoints <- as.double(gsub("[^0-9.]+", "", version_breakpoints))
|
||||
version_expertrules <- as.double(gsub("[^0-9.]+", "", version_expertrules))
|
||||
stop_ifnot(version_breakpoints %in% as.double(names(EUCAST_VERSION_BREAKPOINTS)),
|
||||
"EUCAST version ", version_breakpoints, " for clinical breakpoints not found")
|
||||
stop_ifnot(version_expertrules %in% as.double(names(EUCAST_VERSION_EXPERT_RULES)),
|
||||
|
@ -172,11 +172,11 @@ semi_join_microorganisms <- function(x, by = NULL, ...) {
|
||||
dplyr_semi <- import_fn("semi_join", "dplyr", error_on_fail = FALSE)
|
||||
if (!is.null(dplyr_semi)) {
|
||||
join <- suppressWarnings(
|
||||
dplyr_semi(x = x, y = microorganisms, by = by,...)
|
||||
dplyr_semi(x = x, y = microorganisms, by = by, ...)
|
||||
)
|
||||
} else {
|
||||
join <- suppressWarnings(
|
||||
pm_semi_join(x = x, y = microorganisms, by = by,...)
|
||||
pm_semi_join(x = x, y = microorganisms, by = by, ...)
|
||||
)
|
||||
}
|
||||
class(join) <- x_class
|
||||
@ -196,11 +196,11 @@ anti_join_microorganisms <- function(x, by = NULL, ...) {
|
||||
dplyr_anti <- import_fn("anti_join", "dplyr", error_on_fail = FALSE)
|
||||
if (!is.null(dplyr_anti)) {
|
||||
join <- suppressWarnings(
|
||||
dplyr_anti(x = x, y = microorganisms, by = by,...)
|
||||
dplyr_anti(x = x, y = microorganisms, by = by, ...)
|
||||
)
|
||||
} else {
|
||||
join <- suppressWarnings(
|
||||
pm_anti_join(x = x, y = microorganisms, by = by,...)
|
||||
pm_anti_join(x = x, y = microorganisms, by = by, ...)
|
||||
)
|
||||
}
|
||||
class(join) <- x_class
|
||||
|
2
R/mic.R
2
R/mic.R
@ -306,7 +306,7 @@ get_skimmers.mic <- function(column) {
|
||||
min = ~as.character(sort(na.omit(.))[1]),
|
||||
max = ~as.character(sort(stats::na.omit(.))[length(stats::na.omit(.))]),
|
||||
median = ~as.character(stats::na.omit(.)[as.double(stats::na.omit(.)) == median(as.double(stats::na.omit(.)))])[1],
|
||||
n_unique = n_unique,
|
||||
n_unique = ~pm_n_distinct(., na.rm = TRUE),
|
||||
hist_log2 = ~inline_hist(log2(as.double(stats::na.omit(.))))
|
||||
)
|
||||
}
|
||||
|
26
R/mo.R
26
R/mo.R
@ -1647,7 +1647,7 @@ get_skimmers.mo <- function(column) {
|
||||
sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE)
|
||||
sfl(
|
||||
skim_type = "mo",
|
||||
unique_total = n_unique,
|
||||
unique_total = ~pm_n_distinct(., na.rm = TRUE),
|
||||
gram_negative = ~sum(mo_gramstain(stats::na.omit(.), language = NULL) == "Gram-negative", na.rm = TRUE),
|
||||
gram_positive = ~sum(mo_gramstain(stats::na.omit(.), language = NULL) == "Gram-positive", na.rm = TRUE),
|
||||
top_genus = ~names(sort(-table(mo_genus(stats::na.omit(.), language = NULL))))[1L],
|
||||
@ -1778,20 +1778,20 @@ print.mo_uncertainties <- function(x, ...) {
|
||||
if (NROW(x) == 0) {
|
||||
return(NULL)
|
||||
}
|
||||
cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Furthermore, an indication is given about the certainty of the match - the more transformations are needed for coercion, the less certain the result.")), collapse = "\n"))
|
||||
cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Please see ?mo_matching_score.")), collapse = "\n"))
|
||||
cat("\n")
|
||||
|
||||
msg <- ""
|
||||
for (i in seq_len(nrow(x))) {
|
||||
if (x[i, ]$candidates != "") {
|
||||
candidates <- unlist(strsplit(x[i, ]$candidates, ", ", fixed = TRUE))
|
||||
scores <- mo_matching_score(x = x[i, ]$input,
|
||||
n = candidates)
|
||||
scores <- mo_matching_score(x = x[i, ]$input, n = candidates)
|
||||
# sort on descending scores
|
||||
candidates <- candidates[order(1 - scores)]
|
||||
scores_formatted <- trimws(formatC(round(scores, 3), format = "f", digits = 3))
|
||||
n_candidates <- length(candidates)
|
||||
candidates <- paste0(font_italic(candidates, collapse = NULL),
|
||||
" (", trimws(percentage(scores[order(1 - scores)], digits = 1)), ")")
|
||||
" (", scores_formatted[order(1 - scores)], ")")
|
||||
candidates <- paste(candidates, collapse = ", ")
|
||||
# align with input after arrow
|
||||
candidates <- paste0("\n", strrep(" ", nchar(x[i, ]$input) + 6),
|
||||
@ -1799,23 +1799,17 @@ print.mo_uncertainties <- function(x, ...) {
|
||||
} else {
|
||||
candidates <- ""
|
||||
}
|
||||
if (x[i, ]$uncertainty == 1) {
|
||||
uncertainty_interpretation <- font_green("* very certain *")
|
||||
} else if (x[i, ]$uncertainty == 1) {
|
||||
uncertainty_interpretation <- font_yellow("* certain *")
|
||||
} else {
|
||||
uncertainty_interpretation <- font_red("* not certain *")
|
||||
}
|
||||
score <- trimws(formatC(round(mo_matching_score(x = x[i, ]$input,
|
||||
n = x[i, ]$fullname),
|
||||
3),
|
||||
format = "f", digits = 3))
|
||||
msg <- paste(msg,
|
||||
paste0('"', x[i, ]$input, '" -> ',
|
||||
paste0(font_bold(font_italic(x[i, ]$fullname)),
|
||||
ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""),
|
||||
" (", x[i, ]$mo,
|
||||
", matching score = ", trimws(percentage(mo_matching_score(x = x[i, ]$input,
|
||||
n = x[i, ]$fullname),
|
||||
digits = 1)),
|
||||
", matching score = ", score,
|
||||
") "),
|
||||
uncertainty_interpretation,
|
||||
candidates),
|
||||
sep = "\n")
|
||||
}
|
||||
|
@ -25,7 +25,7 @@
|
||||
#' @param x Any user input value(s)
|
||||
#' @param n A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms]
|
||||
#' @section Matching score for microorganisms:
|
||||
#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, ranging from 0 to 100%, is calculated as:
|
||||
#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as:
|
||||
#'
|
||||
#' \deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \operatorname{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}
|
||||
#'
|
||||
@ -66,7 +66,7 @@ mo_matching_score <- function(x, n) {
|
||||
var_F <- nchar(n)
|
||||
# L = modified Levenshtein distance
|
||||
var_L <- levenshtein
|
||||
# P = Prevalence (1 to 3)
|
||||
# P = prevalence (1 to 3), see ?as.mo
|
||||
var_P <- MO_lookup[match(n, MO_lookup$fullname), "prevalence", drop = TRUE]
|
||||
# K = kingdom index (Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5)
|
||||
var_K <- MO_lookup[match(n, MO_lookup$fullname), "kingdom_index", drop = TRUE]
|
||||
|
25
R/rsi.R
25
R/rsi.R
@ -746,20 +746,27 @@ freq.rsi <- function(x, ...) {
|
||||
|
||||
# will be exported using s3_register() in R/zzz.R
|
||||
get_skimmers.rsi <- function(column) {
|
||||
# a bit of a crazy hack to get the variable name
|
||||
name_call <- function(.data, name = deparse(substitute(column))) {
|
||||
vars <- tryCatch(eval(parse(text = ".data$skim_variable"), envir = sys.frame(2)),
|
||||
error = function(e) NULL)
|
||||
# get the variable name 'skim_variable'
|
||||
name_call <- function(.data) {
|
||||
calls <- sys.calls()
|
||||
calls_txt <- vapply(calls, function(x) paste(deparse(x), collapse = ""), FUN.VALUE = character(1))
|
||||
if (any(calls_txt %like% "skim_variable", na.rm = TRUE)) {
|
||||
ind <- which(calls_txt %like% "skim_variable")[1L]
|
||||
vars <- tryCatch(eval(parse(text = ".data$skim_variable"), envir = sys.frame(ind)),
|
||||
error = function(e) NULL)
|
||||
} else {
|
||||
vars <- NULL
|
||||
}
|
||||
i <- tryCatch(attributes(calls[[length(calls)]])$position,
|
||||
error = function(e) NULL)
|
||||
if (is.null(vars) | is.null(i)) {
|
||||
NA_character_
|
||||
} else{
|
||||
} else {
|
||||
lengths <- sapply(vars, length)
|
||||
lengths <- sum(lengths[!names(lengths) == "rsi"])
|
||||
var <- vars$rsi[i - lengths]
|
||||
if (var == "data") {
|
||||
when_starts_rsi <- which(names(sapply(vars, length)) == "rsi")
|
||||
offset <- sum(lengths[c(1:when_starts_rsi - 1)])
|
||||
var <- vars$rsi[i - offset]
|
||||
if (!isFALSE(var == "data")) {
|
||||
NA_character_
|
||||
} else{
|
||||
ab_name(var)
|
||||
@ -770,7 +777,7 @@ get_skimmers.rsi <- function(column) {
|
||||
sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE)
|
||||
sfl(
|
||||
skim_type = "rsi",
|
||||
name = name_call,
|
||||
ab_name = name_call,
|
||||
count_R = count_R,
|
||||
count_S = count_susceptible,
|
||||
count_I = count_I,
|
||||
|
Reference in New Issue
Block a user