mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 11:11:54 +02:00
(v1.5.0.9021) improve speed of %like%
This commit is contained in:
@ -1077,6 +1077,14 @@ percentage <- function(x, digits = NULL, ...) {
|
||||
digits = digits, ...)
|
||||
}
|
||||
|
||||
time_start_tracking <- function() {
|
||||
pkg_env$time_start <- round(as.numeric(Sys.time()) * 1000)
|
||||
}
|
||||
|
||||
time_track <- function(name = NULL) {
|
||||
paste("(until now:", trimws(round(as.numeric(Sys.time()) * 1000) - pkg_env$time_start), "ms)")
|
||||
}
|
||||
|
||||
# prevent dependency on package 'backports'
|
||||
# these functions were not available in previous versions of R (last checked: R 4.0.3)
|
||||
# see here for the full list: https://github.com/r-lib/backports
|
||||
|
@ -896,7 +896,7 @@ eucast_rules <- function(x,
|
||||
target_value <- eucast_rules_df[i, "to_value", drop = TRUE]
|
||||
|
||||
if (is.na(source_antibiotics)) {
|
||||
rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like_perl% mo_value),
|
||||
rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like% mo_value),
|
||||
error = function(e) integer(0))
|
||||
} else {
|
||||
source_antibiotics <- get_antibiotic_columns(source_antibiotics, x)
|
||||
@ -906,17 +906,17 @@ eucast_rules <- function(x,
|
||||
if (length(source_antibiotics) == 0) {
|
||||
rows <- integer(0)
|
||||
} else if (length(source_antibiotics) == 1) {
|
||||
rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like_perl% mo_value
|
||||
rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like% mo_value
|
||||
& as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]),
|
||||
error = function(e) integer(0))
|
||||
} else if (length(source_antibiotics) == 2) {
|
||||
rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like_perl% mo_value
|
||||
rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like% mo_value
|
||||
& as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]
|
||||
& as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]),
|
||||
error = function(e) integer(0))
|
||||
# nolint start
|
||||
# } else if (length(source_antibiotics) == 3) {
|
||||
# rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like_perl% mo_value
|
||||
# rows <- tryCatch(which(x[, if_mo_property, drop = TRUE] %like% mo_value
|
||||
# & as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]
|
||||
# & as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]
|
||||
# & as.rsi_no_warning(x[, source_antibiotics[3L]]) == source_value[3L]),
|
||||
|
37
R/like.R
37
R/like.R
@ -39,7 +39,7 @@
|
||||
#' * Is case-insensitive (use `%like_case%` for case-sensitive matching)
|
||||
#' * Supports multiple patterns
|
||||
#' * Checks if `pattern` is a regular expression and sets `fixed = TRUE` if not, to greatly improve speed
|
||||
#' * Tries again with `perl = TRUE` if regex fails
|
||||
#' * Always uses compatibility with Perl
|
||||
#'
|
||||
#' Using RStudio? The text `%like%` can also be directly inserted in your code from the Addins menu and can have its own Keyboard Shortcut like `Ctrl+Shift+L` or `Cmd+Shift+L` (see `Tools` > `Modify Keyboard Shortcuts...`).
|
||||
#' @source Idea from the [`like` function from the `data.table` package](https://github.com/Rdatatable/data.table/blob/master/R/like.R)
|
||||
@ -99,7 +99,7 @@ like <- function(x, pattern, ignore.case = TRUE) {
|
||||
if (is.factor(x[i])) {
|
||||
res[i] <- as.integer(x[i]) %in% grep(pattern[i], levels(x[i]), ignore.case = FALSE, fixed = fixed)
|
||||
} else {
|
||||
res[i] <- grepl(pattern[i], x[i], ignore.case = FALSE, fixed = fixed)
|
||||
res[i] <- grepl(pattern[i], x[i], ignore.case = FALSE, fixed = fixed, perl = !fixed)
|
||||
}
|
||||
}
|
||||
res <- vapply(FUN.VALUE = logical(1), pattern, function(pttrn) grepl(pttrn, x, ignore.case = FALSE, fixed = fixed))
|
||||
@ -112,9 +112,9 @@ like <- function(x, pattern, ignore.case = TRUE) {
|
||||
# x and pattern are of same length, so items with each other
|
||||
for (i in seq_len(length(res))) {
|
||||
if (is.factor(x[i])) {
|
||||
res[i] <- as.integer(x[i]) %in% grep(pattern[i], levels(x[i]), ignore.case = FALSE, fixed = fixed)
|
||||
res[i] <- as.integer(x[i]) %in% grep(pattern[i], levels(x[i]), ignore.case = FALSE, fixed = fixed, perl = !fixed)
|
||||
} else {
|
||||
res[i] <- grepl(pattern[i], x[i], ignore.case = FALSE, fixed = fixed)
|
||||
res[i] <- grepl(pattern[i], x[i], ignore.case = FALSE, fixed = fixed, perl = !fixed)
|
||||
}
|
||||
}
|
||||
return(res)
|
||||
@ -123,22 +123,9 @@ like <- function(x, pattern, ignore.case = TRUE) {
|
||||
|
||||
# the regular way how grepl works; just one pattern against one or more x
|
||||
if (is.factor(x)) {
|
||||
as.integer(x) %in% grep(pattern, levels(x), ignore.case = FALSE, fixed = fixed)
|
||||
as.integer(x) %in% grep(pattern, levels(x), ignore.case = FALSE, fixed = fixed, perl = !fixed)
|
||||
} else {
|
||||
tryCatch(grepl(pattern, x, ignore.case = FALSE, fixed = fixed),
|
||||
error = function(e) {
|
||||
if (grepl("invalid reg(ular )?exp", e$message, ignore.case = TRUE)) {
|
||||
# try with perl = TRUE:
|
||||
return(grepl(pattern = pattern,
|
||||
x = x,
|
||||
ignore.case = FALSE,
|
||||
fixed = fixed,
|
||||
perl = TRUE))
|
||||
} else {
|
||||
# stop otherwise
|
||||
stop(e$message)
|
||||
}
|
||||
})
|
||||
grepl(pattern, x, ignore.case = FALSE, fixed = fixed, perl = !fixed)
|
||||
}
|
||||
}
|
||||
|
||||
@ -157,15 +144,3 @@ like <- function(x, pattern, ignore.case = TRUE) {
|
||||
meet_criteria(pattern, allow_NA = FALSE)
|
||||
like(x, pattern, ignore.case = FALSE)
|
||||
}
|
||||
|
||||
"%like_perl%" <- function(x, pattern) {
|
||||
meet_criteria(x, allow_NA = TRUE)
|
||||
meet_criteria(pattern, allow_NA = FALSE)
|
||||
# convenient for e.g. matching all Klebsiella and Raoultella, but not
|
||||
# K. aerogenes: fullname %like_perl% "^(Klebsiella(?! aerogenes)|Raoultella)"
|
||||
grepl(x = tolower(x),
|
||||
pattern = tolower(pattern),
|
||||
perl = TRUE,
|
||||
fixed = FALSE,
|
||||
ignore.case = TRUE)
|
||||
}
|
||||
|
38
R/mo.R
38
R/mo.R
@ -277,6 +277,10 @@ exec_as.mo <- function(x,
|
||||
|
||||
check_dataset_integrity()
|
||||
|
||||
if (isTRUE(debug) && initial_search == TRUE) {
|
||||
time_start_tracking()
|
||||
}
|
||||
|
||||
lookup <- function(needle,
|
||||
column = property,
|
||||
haystack = reference_data_to_use,
|
||||
@ -295,7 +299,8 @@ exec_as.mo <- function(x,
|
||||
# `column` can be NULL for all columns, or a selection
|
||||
# returns a character (vector) - if `column` > length 1 then with columns as names
|
||||
if (isTRUE(debug_mode)) {
|
||||
cat(font_silver("looking up: ", substitute(needle), collapse = ""))
|
||||
cat(font_silver("Looking up: ", substitute(needle), collapse = ""),
|
||||
"\n ", time_track())
|
||||
}
|
||||
if (length(column) == 1) {
|
||||
res_df <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), , drop = FALSE]
|
||||
@ -313,7 +318,7 @@ exec_as.mo <- function(x,
|
||||
NA_character_
|
||||
} else {
|
||||
if (isTRUE(debug_mode)) {
|
||||
cat(font_green(paste0(" **MATCH** (", NROW(res_df), " results)\n")))
|
||||
cat(font_green(paste0(" MATCH (", NROW(res_df), " results)\n")))
|
||||
}
|
||||
if ((length(res) > n | uncertainty > 1) & uncertainty != -1) {
|
||||
# save the other possible results as well, but not for forced certain results (then uncertainty == -1)
|
||||
@ -327,16 +332,20 @@ exec_as.mo <- function(x,
|
||||
res[seq_len(min(n, length(res)))]
|
||||
}
|
||||
} else {
|
||||
if (isTRUE(debug_mode)) {
|
||||
cat("\n")
|
||||
}
|
||||
if (is.null(column)) {
|
||||
column <- names(haystack)
|
||||
}
|
||||
res <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), , drop = FALSE]
|
||||
res <- res[seq_len(min(n, nrow(res))), column, drop = TRUE]
|
||||
if (NROW(res) == 0) {
|
||||
if (isTRUE(debug_mode)) {
|
||||
cat(font_red(" (no rows)\n"))
|
||||
}
|
||||
res <- rep(NA_character_, length(column))
|
||||
} else {
|
||||
if (isTRUE(debug_mode)) {
|
||||
cat(font_green(paste0(" MATCH (", NROW(res), " rows)\n")))
|
||||
}
|
||||
}
|
||||
res <- as.character(res)
|
||||
names(res) <- column
|
||||
@ -394,7 +403,7 @@ exec_as.mo <- function(x,
|
||||
check_validity_mo_source(reference_df)
|
||||
reference_df <- repair_reference_df(reference_df)
|
||||
}
|
||||
|
||||
|
||||
# all empty
|
||||
if (all(identical(trimws(x_input), "") | is.na(x_input) | length(x) == 0)) {
|
||||
if (property == "mo") {
|
||||
@ -471,10 +480,10 @@ exec_as.mo <- function(x,
|
||||
x_backup[x_backup_untouched == "Fungi"] <- "Fungi" # is literally the kingdom
|
||||
|
||||
# Fill in fullnames and MO codes at once
|
||||
known_names <- x_backup %in% MO_lookup$fullname
|
||||
x[known_names] <- MO_lookup[match(x_backup[known_names], MO_lookup$fullname), property, drop = TRUE]
|
||||
known_codes <- x_backup %in% MO_lookup$mo
|
||||
x[known_codes] <- MO_lookup[match(x_backup[known_codes], MO_lookup$mo), property, drop = TRUE]
|
||||
known_names <- tolower(x_backup) %in% MO_lookup$fullname_lower
|
||||
x[known_names] <- MO_lookup[match(tolower(x_backup)[known_names], MO_lookup$fullname), property, drop = TRUE]
|
||||
known_codes <- toupper(x_backup) %in% MO_lookup$mo
|
||||
x[known_codes] <- MO_lookup[match(toupper(x_backup)[known_codes], MO_lookup$mo), property, drop = TRUE]
|
||||
already_known <- known_names | known_codes
|
||||
|
||||
# now only continue where the right taxonomic output is not already known
|
||||
@ -975,6 +984,7 @@ exec_as.mo <- function(x,
|
||||
g.x_backup_without_spp %pm>% substr(1, x_length / 2),
|
||||
".* ",
|
||||
g.x_backup_without_spp %pm>% substr((x_length / 2) + 1, x_length))
|
||||
print(x_split)
|
||||
found <- lookup(fullname_lower %like_case% x_split,
|
||||
haystack = data_to_check)
|
||||
if (!is.na(found)) {
|
||||
@ -1414,6 +1424,10 @@ exec_as.mo <- function(x,
|
||||
close(progress)
|
||||
}
|
||||
|
||||
if (isTRUE(debug) && initial_search == TRUE) {
|
||||
cat("Ended search", time_track(), "\n")
|
||||
}
|
||||
|
||||
|
||||
# handling failures ----
|
||||
failures <- failures[!failures %in% c(NA, NULL, NaN)]
|
||||
@ -1571,6 +1585,10 @@ exec_as.mo <- function(x,
|
||||
}
|
||||
}
|
||||
|
||||
if (isTRUE(debug) && initial_search == TRUE) {
|
||||
cat("Finished function", time_track(), "\n")
|
||||
}
|
||||
|
||||
x
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user