diff --git a/DESCRIPTION b/DESCRIPTION
index 11fa4ffb..67d1666b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
Package: AMR
-Version: 1.3.0.9032
+Version: 1.3.0.9033
Date: 2020-09-28
Title: Antimicrobial Resistance Analysis
Authors@R: c(
diff --git a/NEWS.md b/NEWS.md
index c9b5e694..5b9ed41a 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# AMR 1.3.0.9032
+# AMR 1.3.0.9033
## Last updated: 28 September 2020
Note: some changes in this version were suggested by anonymous reviewers from the journal we submitted our manuscipt to. We are those reviewers very grateful for going through our code so thoroughly!
diff --git a/R/disk.R b/R/disk.R
index 65a7bc15..007fac85 100644
--- a/R/disk.R
+++ b/R/disk.R
@@ -193,10 +193,10 @@ get_skimmers.disk <- function(column) {
inline_hist <- import_fn("inline_hist", "skimr", error_on_fail = FALSE)
sfl(
skim_type = "disk",
- smallest = ~min(as.double(.), na.rm = TRUE),
- largest = ~max(as.double(.), na.rm = TRUE),
+ min = ~min(as.double(.), na.rm = TRUE),
+ max = ~max(as.double(.), na.rm = TRUE),
median = ~stats::median(as.double(.), na.rm = TRUE),
- n_unique = n_unique,
+ n_unique = ~pm_n_distinct(., na.rm = TRUE),
hist = ~inline_hist(stats::na.omit(as.double(.)))
)
}
diff --git a/R/eucast_rules.R b/R/eucast_rules.R
index a4ef37bf..42a71bf8 100755
--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@@ -42,8 +42,8 @@ EUCAST_VERSION_EXPERT_RULES <- list("3.1" = list(version_txt = "v3.1",
#' @param info print progress
#' @param rules a character vector that specifies which rules should be applied. Must be one or more of `"breakpoints"`, `"expert"`, `"other"`, `"all"`, and defaults to `c("breakpoints", "expert")`. The default value can be set to another value, e.g. using `options(AMR_eucastrules = "all")`.
#' @param verbose a [logical] to turn Verbose mode on and off (default is off). In Verbose mode, the function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way. Using Verbose mode takes a lot more time.
-#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline
-#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline
+#' @param version_breakpoints the version number to use for the EUCAST Clinical Breakpoints guideline. Currently supported: `r paste0(names(EUCAST_VERSION_BREAKPOINTS), collapse = ", ")`.
+#' @param version_expertrules the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Currently supported: `r paste0(names(EUCAST_VERSION_EXPERT_RULES), collapse = ", ")`.
#' @param ... column name of an antibiotic, please see section *Antibiotics* below
#' @inheritParams first_isolate
#' @details
@@ -143,8 +143,8 @@ eucast_rules <- function(x,
check_dataset_integrity()
- version_breakpoints <- as.double(version_breakpoints)
- version_expertrules <- as.double(version_expertrules)
+ version_breakpoints <- as.double(gsub("[^0-9.]+", "", version_breakpoints))
+ version_expertrules <- as.double(gsub("[^0-9.]+", "", version_expertrules))
stop_ifnot(version_breakpoints %in% as.double(names(EUCAST_VERSION_BREAKPOINTS)),
"EUCAST version ", version_breakpoints, " for clinical breakpoints not found")
stop_ifnot(version_expertrules %in% as.double(names(EUCAST_VERSION_EXPERT_RULES)),
diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R
index 2dd64a58..e5b85bdd 100755
--- a/R/join_microorganisms.R
+++ b/R/join_microorganisms.R
@@ -172,11 +172,11 @@ semi_join_microorganisms <- function(x, by = NULL, ...) {
dplyr_semi <- import_fn("semi_join", "dplyr", error_on_fail = FALSE)
if (!is.null(dplyr_semi)) {
join <- suppressWarnings(
- dplyr_semi(x = x, y = microorganisms, by = by,...)
+ dplyr_semi(x = x, y = microorganisms, by = by, ...)
)
} else {
join <- suppressWarnings(
- pm_semi_join(x = x, y = microorganisms, by = by,...)
+ pm_semi_join(x = x, y = microorganisms, by = by, ...)
)
}
class(join) <- x_class
@@ -196,11 +196,11 @@ anti_join_microorganisms <- function(x, by = NULL, ...) {
dplyr_anti <- import_fn("anti_join", "dplyr", error_on_fail = FALSE)
if (!is.null(dplyr_anti)) {
join <- suppressWarnings(
- dplyr_anti(x = x, y = microorganisms, by = by,...)
+ dplyr_anti(x = x, y = microorganisms, by = by, ...)
)
} else {
join <- suppressWarnings(
- pm_anti_join(x = x, y = microorganisms, by = by,...)
+ pm_anti_join(x = x, y = microorganisms, by = by, ...)
)
}
class(join) <- x_class
diff --git a/R/mic.R b/R/mic.R
index 7a8019f4..d7545060 100755
--- a/R/mic.R
+++ b/R/mic.R
@@ -306,7 +306,7 @@ get_skimmers.mic <- function(column) {
min = ~as.character(sort(na.omit(.))[1]),
max = ~as.character(sort(stats::na.omit(.))[length(stats::na.omit(.))]),
median = ~as.character(stats::na.omit(.)[as.double(stats::na.omit(.)) == median(as.double(stats::na.omit(.)))])[1],
- n_unique = n_unique,
+ n_unique = ~pm_n_distinct(., na.rm = TRUE),
hist_log2 = ~inline_hist(log2(as.double(stats::na.omit(.))))
)
}
diff --git a/R/mo.R b/R/mo.R
index 841c8878..cda2f305 100755
--- a/R/mo.R
+++ b/R/mo.R
@@ -1647,7 +1647,7 @@ get_skimmers.mo <- function(column) {
sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE)
sfl(
skim_type = "mo",
- unique_total = n_unique,
+ unique_total = ~pm_n_distinct(., na.rm = TRUE),
gram_negative = ~sum(mo_gramstain(stats::na.omit(.), language = NULL) == "Gram-negative", na.rm = TRUE),
gram_positive = ~sum(mo_gramstain(stats::na.omit(.), language = NULL) == "Gram-positive", na.rm = TRUE),
top_genus = ~names(sort(-table(mo_genus(stats::na.omit(.), language = NULL))))[1L],
@@ -1778,20 +1778,20 @@ print.mo_uncertainties <- function(x, ...) {
if (NROW(x) == 0) {
return(NULL)
}
- cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Furthermore, an indication is given about the certainty of the match - the more transformations are needed for coercion, the less certain the result.")), collapse = "\n"))
+ cat(font_blue(strwrap(c("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Please see ?mo_matching_score.")), collapse = "\n"))
cat("\n")
msg <- ""
for (i in seq_len(nrow(x))) {
if (x[i, ]$candidates != "") {
candidates <- unlist(strsplit(x[i, ]$candidates, ", ", fixed = TRUE))
- scores <- mo_matching_score(x = x[i, ]$input,
- n = candidates)
+ scores <- mo_matching_score(x = x[i, ]$input, n = candidates)
# sort on descending scores
candidates <- candidates[order(1 - scores)]
+ scores_formatted <- trimws(formatC(round(scores, 3), format = "f", digits = 3))
n_candidates <- length(candidates)
candidates <- paste0(font_italic(candidates, collapse = NULL),
- " (", trimws(percentage(scores[order(1 - scores)], digits = 1)), ")")
+ " (", scores_formatted[order(1 - scores)], ")")
candidates <- paste(candidates, collapse = ", ")
# align with input after arrow
candidates <- paste0("\n", strrep(" ", nchar(x[i, ]$input) + 6),
@@ -1799,23 +1799,17 @@ print.mo_uncertainties <- function(x, ...) {
} else {
candidates <- ""
}
- if (x[i, ]$uncertainty == 1) {
- uncertainty_interpretation <- font_green("* very certain *")
- } else if (x[i, ]$uncertainty == 1) {
- uncertainty_interpretation <- font_yellow("* certain *")
- } else {
- uncertainty_interpretation <- font_red("* not certain *")
- }
+ score <- trimws(formatC(round(mo_matching_score(x = x[i, ]$input,
+ n = x[i, ]$fullname),
+ 3),
+ format = "f", digits = 3))
msg <- paste(msg,
paste0('"', x[i, ]$input, '" -> ',
paste0(font_bold(font_italic(x[i, ]$fullname)),
ifelse(!is.na(x[i, ]$renamed_to), paste(", renamed to", font_italic(x[i, ]$renamed_to)), ""),
" (", x[i, ]$mo,
- ", matching score = ", trimws(percentage(mo_matching_score(x = x[i, ]$input,
- n = x[i, ]$fullname),
- digits = 1)),
+ ", matching score = ", score,
") "),
- uncertainty_interpretation,
candidates),
sep = "\n")
}
diff --git a/R/mo_matching_score.R b/R/mo_matching_score.R
index 38d3bf23..a4fd1f83 100755
--- a/R/mo_matching_score.R
+++ b/R/mo_matching_score.R
@@ -25,7 +25,7 @@
#' @param x Any user input value(s)
#' @param n A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms]
#' @section Matching score for microorganisms:
-#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, ranging from 0 to 100%, is calculated as:
+#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as:
#'
#' \deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \operatorname{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}
#'
@@ -66,7 +66,7 @@ mo_matching_score <- function(x, n) {
var_F <- nchar(n)
# L = modified Levenshtein distance
var_L <- levenshtein
- # P = Prevalence (1 to 3)
+ # P = prevalence (1 to 3), see ?as.mo
var_P <- MO_lookup[match(n, MO_lookup$fullname), "prevalence", drop = TRUE]
# K = kingdom index (Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5)
var_K <- MO_lookup[match(n, MO_lookup$fullname), "kingdom_index", drop = TRUE]
diff --git a/R/rsi.R b/R/rsi.R
index 0f9fe37f..11b3bf0d 100755
--- a/R/rsi.R
+++ b/R/rsi.R
@@ -746,20 +746,27 @@ freq.rsi <- function(x, ...) {
# will be exported using s3_register() in R/zzz.R
get_skimmers.rsi <- function(column) {
- # a bit of a crazy hack to get the variable name
- name_call <- function(.data, name = deparse(substitute(column))) {
- vars <- tryCatch(eval(parse(text = ".data$skim_variable"), envir = sys.frame(2)),
- error = function(e) NULL)
+ # get the variable name 'skim_variable'
+ name_call <- function(.data) {
calls <- sys.calls()
+ calls_txt <- vapply(calls, function(x) paste(deparse(x), collapse = ""), FUN.VALUE = character(1))
+ if (any(calls_txt %like% "skim_variable", na.rm = TRUE)) {
+ ind <- which(calls_txt %like% "skim_variable")[1L]
+ vars <- tryCatch(eval(parse(text = ".data$skim_variable"), envir = sys.frame(ind)),
+ error = function(e) NULL)
+ } else {
+ vars <- NULL
+ }
i <- tryCatch(attributes(calls[[length(calls)]])$position,
error = function(e) NULL)
if (is.null(vars) | is.null(i)) {
NA_character_
- } else{
+ } else {
lengths <- sapply(vars, length)
- lengths <- sum(lengths[!names(lengths) == "rsi"])
- var <- vars$rsi[i - lengths]
- if (var == "data") {
+ when_starts_rsi <- which(names(sapply(vars, length)) == "rsi")
+ offset <- sum(lengths[c(1:when_starts_rsi - 1)])
+ var <- vars$rsi[i - offset]
+ if (!isFALSE(var == "data")) {
NA_character_
} else{
ab_name(var)
@@ -770,7 +777,7 @@ get_skimmers.rsi <- function(column) {
sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE)
sfl(
skim_type = "rsi",
- name = name_call,
+ ab_name = name_call,
count_R = count_R,
count_S = count_susceptible,
count_I = count_I,
diff --git a/docs/404.html b/docs/404.html
index fef15802..50ca67e0 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -81,7 +81,7 @@
NEWS.md
-
as.mo()
and all the mo_*
functions, the returned results are chosen based on their matching score using mo_matching_score()
. This matching score \(m\), ranging from 0 to 100%, is calculated as:
+With ambiguous user input in as.mo()
and all the mo_*
functions, the returned results are chosen based on their matching score using mo_matching_score()
. This matching score \(m\), is calculated as:
$$m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \operatorname{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}$$
where:
\(x\) is the user input;
the version number to use for the EUCAST Clinical Breakpoints guideline
the version number to use for the EUCAST Clinical Breakpoints guideline. Currently supported: 10.0.
the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline
the version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Currently supported: 3.1, 3.2.
With ambiguous user input in as.mo()
and all the mo_*
functions, the returned results are chosen based on their matching score using mo_matching_score()
. This matching score \(m\), ranging from 0 to 100%, is calculated as:
With ambiguous user input in as.mo()
and all the mo_*
functions, the returned results are chosen based on their matching score using mo_matching_score()
. This matching score \(m\), is calculated as:
$$m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \operatorname{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}$$
where:
\(x\) is the user input;
as.mo()
and all the mo_*
functions, the returned results are chosen based on their matching score using mo_matching_score()
. This matching score \(m\), ranging from 0 to 100%, is calculated as:
+With ambiguous user input in as.mo()
and all the mo_*
functions, the returned results are chosen based on their matching score using mo_matching_score()
. This matching score \(m\), is calculated as:
$$m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \operatorname{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}$$
where:
\(x\) is the user input;