1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 06:51:48 +02:00

(v0.8.0.9032) fix regex error

This commit is contained in:
2019-11-15 16:50:46 +01:00
parent 09e2730b53
commit 67f3f4387b
11 changed files with 49 additions and 43 deletions

34
R/mo.R
View File

@ -545,7 +545,7 @@ exec_as.mo <- function(x,
x <- gsub("o+", "o+", x)
x <- gsub("(.)\\1+", "\\1+", x)
# allow multiplication of all other consonants
x <- gsub("([bdghjlnrw]+)", "\\1+", x)
x <- gsub("([bdgjlnrw]+)", "\\1+", x)
# allow ending in -en or -us
x <- gsub("e\\+n(?![a-z[])", "(e+n|u+(c|k|q|qu|s|z|x|ks)+)", x, perl = TRUE)
# if the input is longer than 10 characters, allow any forgotten consonant between all characters, as some might just have forgotten one...
@ -555,10 +555,11 @@ exec_as.mo <- function(x,
# allow au and ou after all these regex implementations
x <- gsub("a+[bcdfghjklmnpqrstvwxyz]?u+[bcdfghjklmnpqrstvwxyz]?", "(a+u+|o+u+)[bcdfghjklmnpqrstvwxyz]?", x, fixed = TRUE)
x <- gsub("o+[bcdfghjklmnpqrstvwxyz]?u+[bcdfghjklmnpqrstvwxyz]?", "(a+u+|o+u+)[bcdfghjklmnpqrstvwxyz]?", x, fixed = TRUE)
# make sure to remove regex overkill (will lead to errors)
x <- gsub("++", "+", x, fixed = TRUE)
}
x <- strip_whitespace(x, dyslexia_mode)
# make sure to remove regex overkill (will lead to errors)
x <- gsub("++", "+", x, fixed = TRUE)
x <- gsub("?+", "?", x, fixed = TRUE)
x_trimmed <- x
x_trimmed_species <- paste(x_trimmed, "species")
@ -571,21 +572,22 @@ exec_as.mo <- function(x,
# add start en stop regex
x <- paste0("^", x, "$")
x_withspaces_start_only <- paste0("^", x_withspaces)
x_withspaces_end_only <- paste0(x_withspaces, "$")
x_withspaces_start_end <- paste0("^", x_withspaces, "$")
if (isTRUE(debug)) {
cat(paste0('x "', x, '"\n'))
cat(paste0('x_species "', x_species, '"\n'))
cat(paste0('x_withspaces_start_only "', x_withspaces_start_only, '"\n'))
cat(paste0('x_withspaces_end_only "', x_withspaces_end_only, '"\n'))
cat(paste0('x_withspaces_start_end "', x_withspaces_start_end, '"\n'))
cat(paste0('x_backup "', x_backup, '"\n'))
cat(paste0('x_backup_without_spp "', x_backup_without_spp, '"\n'))
cat(paste0('x_trimmed "', x_trimmed, '"\n'))
cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n'))
cat(paste0('x_trimmed_without_group "', x_trimmed_without_group, '"\n'))
cat(paste0(blue('x'), ' "', x, '"\n'))
cat(paste0(blue('x_species'), ' "', x_species, '"\n'))
cat(paste0(blue('x_withspaces_start_only'), ' "', x_withspaces_start_only, '"\n'))
cat(paste0(blue('x_withspaces_end_only'), ' "', x_withspaces_end_only, '"\n'))
cat(paste0(blue('x_withspaces_start_end'), ' "', x_withspaces_start_end, '"\n'))
cat(paste0(blue('x_backup'), ' "', x_backup, '"\n'))
cat(paste0(blue('x_backup_without_spp'), ' "', x_backup_without_spp, '"\n'))
cat(paste0(blue('x_trimmed'), ' "', x_trimmed, '"\n'))
cat(paste0(blue('x_trimmed_species'), ' "', x_trimmed_species, '"\n'))
cat(paste0(blue('x_trimmed_without_group'), ' "', x_trimmed_without_group, '"\n'))
}
progress <- progress_estimated(n = length(x), min_time = 3)
@ -1590,12 +1592,12 @@ exec_as.mo <- function(x,
if (NROW(uncertainties) > 0 & initial_search == TRUE) {
options(mo_uncertainties = as.list(distinct(uncertainties, input, .keep_all = TRUE)))
plural <- c("", "it")
plural <- c("", "it", "was")
if (NROW(uncertainties) > 1) {
plural <- c("s", "them")
plural <- c("s", "them", "were")
}
msg <- paste0("\nResult", plural[1], " of ", nr2char(NROW(uncertainties)), " value", plural[1],
" was guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".")
" ", plural[3], " guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".")
warning(red(msg),
call. = FALSE,
immediate. = TRUE) # thus will always be shown, even if >= warnings

30
R/zzz.R
View File

@ -49,23 +49,27 @@
#' @importFrom data.table as.data.table setkey
#' @importFrom dplyr %>% mutate case_when
make_DT <- function() {
microorganismsDT <- as.data.table(AMR::microorganisms %>%
mutate(kingdom_index = case_when(kingdom == "Bacteria" ~ 1,
kingdom == "Fungi" ~ 2,
kingdom == "Protozoa" ~ 3,
kingdom == "Archaea" ~ 4,
TRUE ~ 99),
# for fullname_lower: keep only dots, letters,
# numbers, slashes, spaces and dashes
fullname_lower = gsub("[^.a-z0-9/ \\-]+", "",
# use this paste instead of `fullname` to
# work with Viridans Group Streptococci, etc.
tolower(trimws(paste(genus, species, subspecies))))))
microorganismsDT <- AMR::microorganisms %>%
mutate(kingdom_index = case_when(kingdom == "Bacteria" ~ 1,
kingdom == "Fungi" ~ 2,
kingdom == "Protozoa" ~ 3,
kingdom == "Archaea" ~ 4,
TRUE ~ 99),
# for fullname_lower: keep only dots, letters,
# numbers, slashes, spaces and dashes
fullname_lower = gsub("[^.a-z0-9/ \\-]+", "",
# use this paste instead of `fullname` to
# work with Viridans Group Streptococci, etc.
tolower(trimws(ifelse(genus == "",
fullname,
paste(genus, species, subspecies)))))) %>%
as.data.table()
# so arrange data on prevalence first, then kingdom, then full name
setkey(microorganismsDT,
prevalence,
kingdom_index,
fullname)
fullname_lower)
microorganismsDT
}