1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 19:41:49 +02:00

(v1.3.0.9016) mo_uncertainties() overhaul

This commit is contained in:
2020-09-12 08:49:01 +02:00
parent 68e9cb78e9
commit 3ff871afeb
71 changed files with 820 additions and 169 deletions

View File

@ -492,7 +492,8 @@ create_pillar_column <- function(x, ...) {
}
}
# copied from vctrs::s3_register by their permission
# copied from vctrs::s3_register by their permission:
# https://github.com/r-lib/vctrs/blob/05968ce8e669f73213e3e894b5f4424af4f46316/R/register-s3.R
s3_register <- function(generic, class, method = NULL) {
stopifnot(is.character(generic), length(generic) == 1)
stopifnot(is.character(class), length(class) == 1)

13
R/amr.R
View File

@ -68,3 +68,16 @@
#' @name AMR
#' @rdname AMR
NULL
#' Plotting for classes `rsi` and `disk`
#'
#' Functions to print classes of the `AMR` package.
#' @inheritSection lifecycle Stable lifecycle
#' @inheritSection AMR Read more on our website!
#' @param ... Parameters passed on to functions
#' @inheritParams base::plot
#' @inheritParams graphics::barplot
#' @name plot
#' @rdname plot
#' @keywords internal
NULL

View File

@ -194,7 +194,7 @@ catalogue_of_life <- list(
#' Data set with `r format(nrow(WHONET), big.mark = ",")` isolates - WHONET example
#'
#' This example data set has the exact same structure as an export file from WHONET. Such files can be used with this package, as this example data set shows. The antibiotic results are based on our [example_isolates] data set. All patient names are created using online surname generators and are only in place for practice purposes.
#' This example data set has the exact same structure as an export file from WHONET. Such files can be used with this package, as this example data set shows. The antibiotic results are from our [example_isolates] data set. All patient names are created using online surname generators and are only in place for practice purposes.
#' @format A [`data.frame`] with `r format(nrow(WHONET), big.mark = ",")` observations and `r ncol(WHONET)` variables:
#' - `Identification number`\cr ID of the sample
#' - `Specimen number`\cr ID of the specimen

View File

@ -210,7 +210,7 @@ summary.mic <- function(object, ...) {
#' @method plot mic
#' @export
#' @importFrom graphics barplot axis par
#' @noRd
#' @rdname plot
plot.mic <- function(x,
main = paste("MIC values of", deparse(substitute(x))),
ylab = "Frequency",
@ -229,7 +229,7 @@ plot.mic <- function(x,
#' @method barplot mic
#' @export
#' @importFrom graphics barplot axis
#' @noRd
#' @rdname plot
barplot.mic <- function(height,
main = paste("MIC values of", deparse(substitute(height))),
ylab = "Frequency",

155
R/mo.R
View File

@ -86,7 +86,7 @@
#' - `"Fluoroquinolone-resistant Neisseria gonorrhoeae"`. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result *Neisseria gonorrhoeae* (``r as.mo("Neisseria gonorrhoeae")``) needs review.
#'
#' There are three helper functions that can be run after using the [as.mo()] function:
#' - Use [mo_uncertainties()] to get a [`data.frame`] with all values that were coerced to a valid value, but with uncertainty. The output contains a score, that is calculated as \eqn{(n - 0.5 * L) / n}, where *n* is the number of characters of the full taxonomic name of the microorganism, and *L* is the [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) between that full name and the user input.
#' - Use [mo_uncertainties()] to get a [`data.frame`] that prints in a pretty format with all taxonomic names that were guessed. The output contains a score that is based on the human pathogenic prevalence and the [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) between the full taxonomic name and the user input.
#' - Use [mo_failures()] to get a [`character`] [`vector`] with all values that could not be coerced to a valid value.
#' - Use [mo_renamed()] to get a [`data.frame`] with all values that could be coerced based on old, previously accepted taxonomic names.
#'
@ -178,6 +178,14 @@ as.mo <- function(x,
...) {
check_dataset_integrity()
if (tryCatch(all(x %in% MO_lookup$mo, na.rm = TRUE)
& isFALSE(Becker)
& isFALSE(Lancefield), error = function(e) FALSE)) {
# don't look into valid MO codes, just return them
# is.mo() won't work - codes might change between package versions
return(to_class_mo(x))
}
if (tryCatch(all(tolower(x) %in% MO_lookup$fullname_lower, na.rm = TRUE)
& isFALSE(Becker)
@ -273,36 +281,7 @@ exec_as.mo <- function(x,
reference_data_to_use = MO_lookup) {
check_dataset_integrity()
lookup <- function(needle, column = property, haystack = reference_data_to_use, n = 1, debug_mode = debug) {
# `column` can be NULL for all columns, or a selection
# returns a character (vector) - if `column` > length 1 then with columns as names
if (isTRUE(debug_mode)) {
cat(font_silver("looking up: ", substitute(needle), "\n", collapse = ""))
}
if (length(column) == 1) {
res <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), column, drop = TRUE]
res <- as.character(res)
if (length(res) == 0) {
NA_character_
} else {
res[seq_len(min(n, length(res)))]
}
} else {
if (is.null(column)) {
column <- names(haystack)
}
res <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), , drop = FALSE]
res <- res[seq_len(min(n, nrow(res))), column, drop = TRUE]
if (NROW(res) == 0) {
res <- rep(NA_character_, length(column))
}
res <- as.character(res)
names(res) <- column
res
}
}
# start off with replaced language-specific non-ASCII characters with ASCII characters
x <- parse_and_convert(x)
# replace mo codes used in older package versions
@ -323,14 +302,15 @@ exec_as.mo <- function(x,
}
options(mo_renamed_last_run = NULL)
failures <- character(0)
uncertainty_level <- translate_allow_uncertain(allow_uncertain)
uncertainties <- data.frame(uncertainty = integer(0),
input = character(0),
fullname = character(0),
renamed_to = character(0),
mo = character(0),
mo = character(0),
candidates = character(0),
stringsAsFactors = FALSE)
failures <- character(0)
uncertainty_level <- translate_allow_uncertain(allow_uncertain)
old_mo_warning <- FALSE
x_input <- x
@ -403,6 +383,43 @@ exec_as.mo <- function(x,
} else if (!all(x %in% microorganisms[, property])) {
lookup <- function(needle, column = property, haystack = reference_data_to_use, n = 1, debug_mode = debug, input = "") {
# `column` can be NULL for all columns, or a selection
# returns a character (vector) - if `column` > length 1 then with columns as names
if (isTRUE(debug_mode)) {
cat(font_silver("looking up: ", substitute(needle), "\n", collapse = ""))
}
if (length(column) == 1) {
res_df <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), , drop = FALSE]
res <- as.character(res_df[, column, drop = TRUE])
if (length(res) == 0) {
NA_character_
} else {
if (length(res) > n) {
# save the other possible results as well
uncertainties <<- rbind(uncertainties,
format_uncertainty_as_df(uncertainty_level = 1,
input = x_backup[i],
result_mo = res_df[1, "mo", drop = TRUE],
candidates = as.character(res_df[, "fullname", drop = TRUE])))
}
res[seq_len(min(n, length(res)))]
}
} else {
if (is.null(column)) {
column <- names(haystack)
}
res <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), , drop = FALSE]
res <- res[seq_len(min(n, nrow(res))), column, drop = TRUE]
if (NROW(res) == 0) {
res <- rep(NA_character_, length(column))
}
res <- as.character(res)
names(res) <- column
res
}
}
strip_whitespace <- function(x, dyslexia_mode) {
# all whitespaces (tab, new lines, etc.) should be one space
# and spaces before and after should be omitted
@ -1387,9 +1404,7 @@ exec_as.mo <- function(x,
}
msg <- paste0("Result", plural[1], " of ", nr2char(NROW(uncertainties)), " value", plural[1],
" ", plural[3], " guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".")
warning(font_red(paste0("\n", msg)),
call. = FALSE,
immediate. = TRUE) # thus will always be shown, even if >= warnings
message(font_blue(msg))
}
# Becker ----
@ -1514,25 +1529,25 @@ was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "")
format_uncertainty_as_df <- function(uncertainty_level,
input,
result_mo) {
result_mo,
candidates = NULL) {
if (!is.null(getOption("mo_renamed_last_run", default = NULL))) {
# was found as a renamed mo
df <- data.frame(uncertainty = uncertainty_level,
input = input,
fullname = getOption("mo_renamed_last_run"),
renamed_to = MO_lookup[which(MO_lookup$mo == result_mo), "fullname"][1],
mo = result_mo,
stringsAsFactors = FALSE)
fullname <- getOption("mo_renamed_last_run")
options(mo_renamed_last_run = NULL)
renamed_to <- MO_lookup[match(result_mo, MO_lookup$mo), "fullname", drop = TRUE][1]
} else {
df <- data.frame(uncertainty = uncertainty_level,
input = input,
fullname = MO_lookup[which(MO_lookup$mo == result_mo), "fullname"][1],
renamed_to = NA_character_,
mo = result_mo,
stringsAsFactors = FALSE)
fullname <- MO_lookup[match(result_mo, MO_lookup$mo), "fullname", drop = TRUE][1]
renamed_to <- NA_character_
}
df
data.frame(uncertainty = uncertainty_level,
input = input,
fullname = fullname,
renamed_to = renamed_to,
mo = result_mo,
# save max 25 entries
candidates = if (length(candidates) > 1) paste(candidates[c(2:min(25, length(candidates)))], collapse = ", ") else "",
stringsAsFactors = FALSE)
}
# will be exported using s3_register() in R/zzz.R
@ -1714,13 +1729,27 @@ print.mo_uncertainties <- function(x, ...) {
colour1 <- font_red
colour2 <- function(...) font_red_bg(font_white(...))
}
if (x[i, "candidates"] != "") {
candidates <- unlist(strsplit(x[i, "candidates"], ", ", fixed = TRUE))
scores <- finding_score(x[i, "input"], candidates)
# sort on descending scores
candidates <- candidates[order(1 - scores)]
candidates <- paste0(font_italic(candidates, collapse = NULL),
" (", trimws(percentage(scores[order(1 - scores)], digits = 1)), ")")
candidates <- paste(candidates, collapse = ", ")
# align with input after arrow
candidates <- paste0("\n", strrep(" ", nchar(x[i, "input"]) + 12), "Other: ", candidates)
} else {
candidates <- ""
}
msg <- paste(msg,
paste0(colour2(paste0(" [", x[i, "uncertainty"], "] ")), ' "', x[i, "input"], '" -> ',
colour1(paste0(font_italic(x[i, "fullname"]),
ifelse(!is.na(x[i, "renamed_to"]), paste(", renamed to", font_italic(x[i, "renamed_to"])), ""),
" (", x[i, "mo"],
", score: ", percentage(levenshtein_fraction(x[i, "input"], x[i, "fullname"]), digits = 1),
")"))),
", score: ", trimws(percentage(finding_score(x[i, "input"], x[i, "fullname"]), digits = 1)),
")")),
candidates),
sep = "\n")
}
cat(msg)
@ -1729,7 +1758,7 @@ print.mo_uncertainties <- function(x, ...) {
#' @rdname as.mo
#' @export
mo_renamed <- function() {
items <- getOption("mo_renamed")
items <- getOption("mo_renamed", default = NULL)
if (is.null(items)) {
items <- data.frame()
} else {
@ -1805,15 +1834,25 @@ load_mo_failures_uncertainties_renamed <- function(metadata) {
options("mo_renamed" = metadata$renamed)
}
levenshtein_fraction <- function(input, output) {
finding_score <- function(input, output) {
# output is always a valid fullname
levenshtein <- double(length = length(input))
if (length(output) == 1) {
output <- rep(output, length(input))
}
if (length(input) == 1) {
input <- rep(input, length(output))
}
for (i in seq_len(length(input))) {
# determine Levenshtein distance, but maximise to nchar of output
levenshtein[i] <- min(as.double(utils::adist(input[i], output[i], ignore.case = TRUE)),
nchar(output[i]))
nchar(output[i]))
}
# self-made score between 0 and 1 (for % certainty, so 0 means huge distance, 1 means no distance)
(nchar(output) - 0.5 * levenshtein) / nchar(output)
dist <- (nchar(output) - 0.5 * levenshtein) / nchar(output)
index_in_MO_lookup <- tryCatch((nrow(MO_lookup) - match(output, MO_lookup$fullname)) / nrow(MO_lookup),
error = function(e) rep(1, length(output)))
dist * index_in_MO_lookup
}
trimws2 <- function(x) {

View File

@ -755,7 +755,7 @@ summary.rsi <- function(object, ...) {
#' @method plot rsi
#' @export
#' @importFrom graphics text axis
#' @noRd
#' @rdname plot
plot.rsi <- function(x,
lwd = 2,
ylim = NULL,
@ -812,7 +812,7 @@ plot.rsi <- function(x,
#' @method barplot rsi
#' @export
#' @importFrom graphics barplot axis par
#' @noRd
#' @rdname plot
barplot.rsi <- function(height,
col = c("chartreuse4", "chartreuse3", "brown3"),
xlab = ifelse(beside, "Antimicrobial Interpretation", ""),

View File

@ -63,13 +63,18 @@
#' mo_name("CoNS", language = "pt")
#' #> "Staphylococcus coagulase negativo (CoNS)"
get_locale <- function() {
# AMR versions prior to 1.3.0 used the environmental variable:
if (!identical("", Sys.getenv("AMR_locale"))) {
options(AMR_locale = Sys.getenv("AMR_locale"))
}
if (!is.null(getOption("AMR_locale", default = NULL))) {
if (!language %in% LANGUAGES_SUPPORTED) {
stop_("unsupported language: '", language, "' - use one of: ",
paste0("'", LANGUAGES_SUPPORTED, "'", collapse = ", "),
call = FALSE)
lang <- getOption("AMR_locale")
if (lang %in% LANGUAGES_SUPPORTED) {
return(lang)
} else {
return(getOption("AMR_locale"))
stop_("unsupported language: '", lang, "' - use one of: ",
paste0("'", LANGUAGES_SUPPORTED, "'", collapse = ", "))
}
}

View File

@ -32,8 +32,9 @@
value = sort(c("en", unique(translations_file$lang))),
envir = asNamespace("AMR"))
# support for tibble headers (type_sum) and tibble columns content (pillar_shaft)
# without the need to depend on other packages
# support for tibble headers (type_sum) and tibble columns content (pillar_shaft) without the need to depend on other packages
# this was suggested by the developers of the vctrs package:
# https://github.com/r-lib/vctrs/blob/05968ce8e669f73213e3e894b5f4424af4f46316/R/register-s3.R
s3_register("pillar::pillar_shaft", "ab")
s3_register("tibble::type_sum", "ab")
s3_register("pillar::pillar_shaft", "mo")