mirror of
https://github.com/msberends/AMR.git
synced 2025-07-09 13:42:04 +02:00
(v0.7.1.9073) as.mo() self-learning algorithm
This commit is contained in:
26
R/like.R
26
R/like.R
@ -21,7 +21,7 @@
|
||||
|
||||
#' Pattern Matching
|
||||
#'
|
||||
#' Convenient wrapper around \code{\link[base]{grep}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors.
|
||||
#' Convenient wrapper around \code{\link[base]{grep}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive (use \code{a \%like_case\% b} for case-sensitive matching). Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors, or can both have the same length to iterate over all cases.
|
||||
#' @inheritParams base::grepl
|
||||
#' @return A \code{logical} vector
|
||||
#' @name like
|
||||
@ -53,14 +53,14 @@
|
||||
#' left_join_microorganisms() %>%
|
||||
#' filter(genus %like% '^ent') %>%
|
||||
#' freq(genus, species)
|
||||
like <- function(x, pattern) {
|
||||
like <- function(x, pattern, ignore.case = TRUE) {
|
||||
if (length(pattern) > 1) {
|
||||
if (length(x) != length(pattern)) {
|
||||
if (length(x) == 1) {
|
||||
x <- rep(x, length(pattern))
|
||||
}
|
||||
# return TRUE for every 'x' that matches any 'pattern', FALSE otherwise
|
||||
res <- sapply(pattern, function(pttrn) x %like% pttrn)
|
||||
res <- sapply(pattern, function(pttrn) base::grepl(pttrn, x, ignore.case = ignore.case))
|
||||
res2 <- as.logical(rowSums(res))
|
||||
# get only first item of every hit in pattern
|
||||
res2[duplicated(res)] <- FALSE
|
||||
@ -71,9 +71,9 @@ like <- function(x, pattern) {
|
||||
res <- vector(length = length(pattern))
|
||||
for (i in 1:length(res)) {
|
||||
if (is.factor(x[i])) {
|
||||
res[i] <- as.integer(x[i]) %in% base::grep(pattern[i], levels(x[i]), ignore.case = TRUE)
|
||||
res[i] <- as.integer(x[i]) %in% base::grep(pattern[i], levels(x[i]), ignore.case = ignore.case)
|
||||
} else {
|
||||
res[i] <- base::grepl(pattern[i], x[i], ignore.case = TRUE)
|
||||
res[i] <- base::grepl(pattern[i], x[i], ignore.case = ignore.case)
|
||||
}
|
||||
}
|
||||
return(res)
|
||||
@ -82,16 +82,24 @@ like <- function(x, pattern) {
|
||||
|
||||
# the regular way how grepl works; just one pattern against one or more x
|
||||
if (is.factor(x)) {
|
||||
as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE)
|
||||
as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = ignore.case)
|
||||
} else {
|
||||
tryCatch(base::grepl(pattern, x, ignore.case = TRUE),
|
||||
tryCatch(base::grepl(pattern, x, ignore.case = ignore.case),
|
||||
error = function(e) ifelse(test = grepl("Invalid regexp", e$message),
|
||||
# try with perl = TRUE:
|
||||
yes = return(base::grepl(pattern, x, ignore.case = TRUE, perl = TRUE)),
|
||||
yes = return(base::grepl(pattern, x, ignore.case = ignore.case, perl = TRUE)),
|
||||
no = stop(e$message)))
|
||||
}
|
||||
}
|
||||
|
||||
#' @rdname like
|
||||
#' @export
|
||||
"%like%" <- like
|
||||
"%like%" <- function(x, pattern) {
|
||||
like(x, pattern, ignore.case = TRUE)
|
||||
}
|
||||
|
||||
#' @rdname like
|
||||
#' @export
|
||||
"%like_case%" <- function(x, pattern) {
|
||||
like(x, pattern, ignore.case = FALSE)
|
||||
}
|
||||
|
4
R/misc.R
4
R/misc.R
@ -41,8 +41,8 @@ percent <- function(x, round = 1, force_zero = FALSE, decimal.mark = getOption("
|
||||
big.mark <- " "
|
||||
}
|
||||
}
|
||||
x <- percent_clean(x = x, round = round, force_zero = force_zero,
|
||||
decimal.mark = decimal.mark, big.mark = big.mark, ...)
|
||||
percent_clean(x = x, round = round, force_zero = force_zero,
|
||||
decimal.mark = decimal.mark, big.mark = big.mark, ...)
|
||||
}
|
||||
|
||||
#' @importFrom crayon blue bold red
|
||||
|
238
R/mo_history.R
238
R/mo_history.R
@ -19,116 +19,152 @@
|
||||
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
|
||||
# ==================================================================== #
|
||||
|
||||
# print successful as.mo coercions to AMR environment
|
||||
#' @importFrom dplyr %>% distinct filter
|
||||
set_mo_history <- function(x, mo, uncertainty_level, force = FALSE) {
|
||||
# disable function for now
|
||||
return(base::invisible())
|
||||
mo_history_file <- file.path(file.path(system.file(package = "AMR"), "mo_history"), "mo_history.csv")
|
||||
|
||||
# if (base::interactive() | force == TRUE) {
|
||||
# mo_hist <- read_mo_history(uncertainty_level = uncertainty_level, force = force)
|
||||
# df <- data.frame(x, mo, stringsAsFactors = FALSE) %>%
|
||||
# distinct(x, .keep_all = TRUE) %>%
|
||||
# filter(!is.na(x) & !is.na(mo))
|
||||
# if (nrow(df) == 0) {
|
||||
# return(base::invisible())
|
||||
# }
|
||||
# x <- toupper(df$x)
|
||||
# mo <- df$mo
|
||||
# for (i in 1:length(x)) {
|
||||
# # save package version too, as both the as.mo() algorithm and the reference data set may change
|
||||
# if (NROW(mo_hist[base::which(mo_hist$x == x[i] &
|
||||
# mo_hist$uncertainty_level >= uncertainty_level &
|
||||
# mo_hist$package_v == utils::packageVersion("AMR")),]) == 0) {
|
||||
# tryCatch(
|
||||
# assign(x = "mo_history",
|
||||
# value = rbind(mo_hist,
|
||||
# data.frame(
|
||||
# x = x[i],
|
||||
# mo = mo[i],
|
||||
# uncertainty_level = uncertainty_level,
|
||||
# package_v = base::as.character(utils::packageVersion("AMR")),
|
||||
# stringsAsFactors = FALSE)),
|
||||
# envir = asNamespace("AMR")),
|
||||
# error = function(e) invisible())
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
# return(base::invisible())
|
||||
# print successful as.mo coercions to a options entry
|
||||
#' @importFrom dplyr %>% distinct filter
|
||||
set_mo_history <- function(x, mo, uncertainty_level, force = FALSE, disable = FALSE) {
|
||||
if (isTRUE(disable)) {
|
||||
return(base::invisible())
|
||||
}
|
||||
|
||||
if (base::interactive() | force == TRUE) {
|
||||
mo_hist <- read_mo_history(uncertainty_level = uncertainty_level, force = force)
|
||||
df <- data.frame(x, mo, stringsAsFactors = FALSE) %>%
|
||||
distinct(x, .keep_all = TRUE) %>%
|
||||
filter(!is.na(x) & !is.na(mo))
|
||||
if (nrow(df) == 0) {
|
||||
return(base::invisible())
|
||||
}
|
||||
x <- toupper(df$x)
|
||||
mo <- df$mo
|
||||
for (i in 1:length(x)) {
|
||||
# save package version too, as both the as.mo() algorithm and the reference data set may change
|
||||
if (NROW(mo_hist[base::which(mo_hist$x == x[i] &
|
||||
mo_hist$uncertainty_level >= uncertainty_level &
|
||||
mo_hist$package_v == utils::packageVersion("AMR")),]) == 0) {
|
||||
# # Not using the file system:
|
||||
# tryCatch(options(mo_remembered_results = rbind(mo_hist,
|
||||
# data.frame(
|
||||
# x = x[i],
|
||||
# mo = mo[i],
|
||||
# uncertainty_level = uncertainty_level,
|
||||
# package_v = base::as.character(utils::packageVersion("AMR")),
|
||||
# stringsAsFactors = FALSE))),
|
||||
# error = function(e) base::invisible())
|
||||
# # don't remember more than 1,000 different input values
|
||||
# if (tryCatch(nrow(getOption("mo_remembered_results")), error = function(e) 1001) > 1000) {
|
||||
# return(base::invisible())
|
||||
# }
|
||||
if (is.null(mo_hist)) {
|
||||
message(blue(paste0("NOTE: results are saved to ", mo_history_file, ".")))
|
||||
}
|
||||
tryCatch(write.csv(rbind(mo_hist,
|
||||
data.frame(
|
||||
x = x[i],
|
||||
mo = mo[i],
|
||||
uncertainty_level = uncertainty_level,
|
||||
package_v = base::as.character(utils::packageVersion("AMR")),
|
||||
stringsAsFactors = FALSE)),
|
||||
file = mo_history_file, row.names = FALSE),
|
||||
error = function(e) base::invisible())
|
||||
}
|
||||
}
|
||||
}
|
||||
return(base::invisible())
|
||||
}
|
||||
|
||||
get_mo_history <- function(x, uncertainty_level, force = FALSE) {
|
||||
# disable function for now
|
||||
return(NA)
|
||||
get_mo_history <- function(x, uncertainty_level, force = FALSE, disable = FALSE) {
|
||||
if (isTRUE(disable)) {
|
||||
return(to_class_mo(NA))
|
||||
}
|
||||
|
||||
# history <- read_mo_history(uncertainty_level = uncertainty_level, force = force)
|
||||
# if (base::is.null(history)) {
|
||||
# NA
|
||||
# } else {
|
||||
# data.frame(x = toupper(x), stringsAsFactors = FALSE) %>%
|
||||
# left_join(history, by = "x") %>%
|
||||
# pull(mo)
|
||||
# }
|
||||
history <- read_mo_history(uncertainty_level = uncertainty_level, force = force)
|
||||
if (base::is.null(history)) {
|
||||
result <- NA
|
||||
} else {
|
||||
result <- data.frame(x = toupper(x), stringsAsFactors = FALSE) %>%
|
||||
left_join(history, by = "x") %>%
|
||||
pull(mo)
|
||||
}
|
||||
to_class_mo(result)
|
||||
}
|
||||
|
||||
#' @importFrom dplyr %>% filter distinct
|
||||
read_mo_history <- function(uncertainty_level = 2, force = FALSE, unfiltered = FALSE) {
|
||||
# disable function for now
|
||||
return(NULL)
|
||||
read_mo_history <- function(uncertainty_level = 2, force = FALSE, unfiltered = FALSE, disable = FALSE) {
|
||||
if (isTRUE(disable)) {
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
# if ((!base::interactive() & force == FALSE)) {
|
||||
# return(NULL)
|
||||
# }
|
||||
# uncertainty_level_param <- uncertainty_level
|
||||
#
|
||||
# history <- tryCatch(get("mo_history", envir = asNamespace("AMR")),
|
||||
# error = function(e) NULL)
|
||||
# if (is.null(history)) {
|
||||
# return(NULL)
|
||||
# }
|
||||
# # Below: filter on current package version.
|
||||
# # Even current fullnames may be replaced by new taxonomic names, so new versions of
|
||||
# # the Catalogue of Life must not lead to data corruption.
|
||||
#
|
||||
# if (unfiltered == FALSE) {
|
||||
# history <- history %>%
|
||||
# filter(package_v == as.character(utils::packageVersion("AMR")),
|
||||
# # only take unknowns if uncertainty_level_param is higher
|
||||
# ((mo == "UNKNOWN" & uncertainty_level_param == uncertainty_level) |
|
||||
# (mo != "UNKNOWN" & uncertainty_level_param >= uncertainty_level))) %>%
|
||||
# arrange(desc(uncertainty_level)) %>%
|
||||
# distinct(x, mo, .keep_all = TRUE)
|
||||
# }
|
||||
#
|
||||
# if (nrow(history) == 0) {
|
||||
# NULL
|
||||
# } else {
|
||||
# history
|
||||
# }
|
||||
if ((!base::interactive() & force == FALSE)) {
|
||||
return(NULL)
|
||||
}
|
||||
uncertainty_level_param <- uncertainty_level
|
||||
|
||||
# # Not using the file system:
|
||||
# history <- tryCatch(getOption("mo_remembered_results"),
|
||||
# error = function(e) NULL)
|
||||
history <- tryCatch(read.csv(mo_history_file, stringsAsFactors = FALSE),
|
||||
warning = function(w) invisible(),
|
||||
error = function(e) NULL)
|
||||
if (is.null(history)) {
|
||||
return(NULL)
|
||||
}
|
||||
# Below: filter on current package version.
|
||||
# Even current fullnames may be replaced by new taxonomic names, so new versions of
|
||||
# the Catalogue of Life must not lead to data corruption.
|
||||
|
||||
if (unfiltered == FALSE) {
|
||||
history <- history %>%
|
||||
filter(package_v == as.character(utils::packageVersion("AMR")),
|
||||
# only take unknowns if uncertainty_level_param is higher
|
||||
((mo == "UNKNOWN" & uncertainty_level_param == uncertainty_level) |
|
||||
(mo != "UNKNOWN" & uncertainty_level_param >= uncertainty_level))) %>%
|
||||
arrange(desc(uncertainty_level)) %>%
|
||||
distinct(x, mo, .keep_all = TRUE)
|
||||
}
|
||||
|
||||
if (nrow(history) == 0) {
|
||||
NULL
|
||||
} else {
|
||||
history
|
||||
}
|
||||
}
|
||||
|
||||
# @rdname as.mo
|
||||
# @importFrom crayon red
|
||||
# @importFrom utils menu
|
||||
# @export
|
||||
clean_mo_history <- function(...) {
|
||||
# if (!is.null(read_mo_history())) {
|
||||
# if (interactive() & !isTRUE(list(...)$force)) {
|
||||
# q <- menu(title = paste("This will remove all",
|
||||
# format(nrow(read_mo_history(999, unfiltered = TRUE)), big.mark = ","),
|
||||
# "microbial IDs determined previously in this session. Are you sure?"),
|
||||
# choices = c("Yes", "No"),
|
||||
# graphics = FALSE)
|
||||
# if (q != 1) {
|
||||
# return(invisible())
|
||||
# }
|
||||
# }
|
||||
# tryCatch(
|
||||
# assign(x = "mo_history",
|
||||
# value = NULL,
|
||||
# envir = asNamespace("AMR")),
|
||||
# error = function(e) invisible())
|
||||
# cat(red("History removed."))
|
||||
# }
|
||||
#' @rdname as.mo
|
||||
#' @importFrom crayon red
|
||||
#' @importFrom utils menu
|
||||
#' @export
|
||||
clear_mo_history <- function(...) {
|
||||
if (!is.null(read_mo_history())) {
|
||||
if (interactive() & !isTRUE(list(...)$force)) {
|
||||
q <- menu(title = paste("This will clear all",
|
||||
format(nrow(read_mo_history(999, unfiltered = TRUE)), big.mark = ","),
|
||||
"previously determined microbial IDs. Are you sure?"),
|
||||
choices = c("Yes", "No"),
|
||||
graphics = FALSE)
|
||||
if (q != 1) {
|
||||
return(invisible())
|
||||
}
|
||||
}
|
||||
# # Not using the file system:
|
||||
# success <- tryCatch(options(mo_remembered_results = NULL),
|
||||
# error = function(e) FALSE)
|
||||
success <- create_blank_mo_history()
|
||||
if (!isFALSE(success)) {
|
||||
cat(red(paste("File", mo_history_file, "cleared.")))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
create_blank_mo_history <- function() {
|
||||
tryCatch(
|
||||
write.csv(x = data.frame(x = character(0),
|
||||
mo = character(0),
|
||||
uncertainty_level = integer(0),
|
||||
package_v = character(0),
|
||||
stringsAsFactors = FALSE),
|
||||
file = mo_history_file),
|
||||
warning = function(w) invisible(),
|
||||
error = function(e) TRUE)
|
||||
}
|
||||
|
@ -408,7 +408,7 @@ mo_validate <- function(x, property, ...) {
|
||||
|
||||
if (!"AMR" %in% base::.packages()) {
|
||||
require("AMR")
|
||||
# check onLoad() in R/zzz.R: data tables are created there.
|
||||
# check .onLoad() in R/zzz.R: data tables are created there.
|
||||
}
|
||||
|
||||
# try to catch an error when inputting an invalid parameter
|
||||
|
6
R/zzz.R
6
R/zzz.R
@ -26,7 +26,8 @@
|
||||
|
||||
# register data
|
||||
microorganisms.oldDT <- as.data.table(AMR::microorganisms.old)
|
||||
microorganisms.oldDT$fullname_lower <- tolower(microorganisms.oldDT$fullname)
|
||||
# for fullname_lower: keep only dots, letters, numbers, slashes, spaces and dashes
|
||||
microorganisms.oldDT$fullname_lower <- gsub("[^.a-z0-9/ \\-]+", "", tolower(microorganisms.oldDT$fullname))
|
||||
setkey(microorganisms.oldDT, col_id, fullname)
|
||||
|
||||
assign(x = "microorganismsDT",
|
||||
@ -81,7 +82,8 @@
|
||||
#' @importFrom data.table as.data.table setkey
|
||||
make_DT <- function() {
|
||||
microorganismsDT <- as.data.table(AMR::microorganisms)
|
||||
microorganismsDT$fullname_lower <- tolower(microorganismsDT$fullname)
|
||||
# for fullname_lower: keep only dots, letters, numbers, slashes, spaces and dashes
|
||||
microorganismsDT$fullname_lower <- gsub("[^.a-z0-9/ \\-]+", "", tolower(microorganismsDT$fullname))
|
||||
setkey(microorganismsDT,
|
||||
prevalence,
|
||||
kingdom,
|
||||
|
Reference in New Issue
Block a user