mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 11:51:59 +02:00
(v1.6.0.9003) like() fix
This commit is contained in:
@ -211,10 +211,20 @@ search_type_in_df <- function(x, type, info = TRUE) {
|
||||
found
|
||||
}
|
||||
|
||||
is_possibly_regex <- function(x) {
|
||||
tryCatch(vapply(FUN.VALUE = character(1), strsplit(x, ""),
|
||||
function(y) any(y %in% c("$", "(", ")", "*", "+", "-", ".", "?", "[", "]", "^", "{", "|", "}", "\\"), na.rm = TRUE)),
|
||||
error = function(e) rep(TRUE, length(x)))
|
||||
is_valid_regex <- function(x) {
|
||||
regex_at_all <- tryCatch(vapply(FUN.VALUE = logical(1),
|
||||
X = strsplit(x, ""),
|
||||
FUN = function(y) any(y %in% c("$", "(", ")", "*", "+", "-",
|
||||
".", "?", "[", "]", "^", "{",
|
||||
"|", "}", "\\"),
|
||||
na.rm = TRUE)),
|
||||
error = function(e) rep(TRUE, length(x)))
|
||||
regex_valid <- vapply(FUN.VALUE = logical(1),
|
||||
X = c("[.", "."),
|
||||
FUN = function(y) !"try-error" %in% class(try(grepl(y, ""),
|
||||
silent = TRUE)),
|
||||
USE.NAMES = FALSE)
|
||||
regex_at_all & regex_valid
|
||||
}
|
||||
|
||||
stop_ifnot_installed <- function(package) {
|
||||
|
21
R/like.R
21
R/like.R
@ -28,21 +28,21 @@
|
||||
#' Convenient wrapper around [grepl()] to match a pattern: `x %like% pattern`. It always returns a [`logical`] vector and is always case-insensitive (use `x %like_case% pattern` for case-sensitive matching). Also, `pattern` can be as long as `x` to compare items of each index in both vectors, or they both can have the same length to iterate over all cases.
|
||||
#' @inheritSection lifecycle Stable Lifecycle
|
||||
#' @param x a character vector where matches are sought, or an object which can be coerced by [as.character()] to a character vector.
|
||||
#' @param pattern a character string containing a regular expression (or [character] string for `fixed = TRUE`) to be matched in the given character vector. Coerced by [as.character()] to a character string if possible. If a [character] vector of length 2 or more is supplied, the first element is used with a warning.
|
||||
#' @param pattern a character vector containing regular expressions (or a [character] string for `fixed = TRUE`) to be matched in the given character vector. Coerced by [as.character()] to a character string if possible.
|
||||
#' @param ignore.case if `FALSE`, the pattern matching is *case sensitive* and if `TRUE`, case is ignored during matching.
|
||||
#' @return A [logical] vector
|
||||
#' @name like
|
||||
#' @rdname like
|
||||
#' @export
|
||||
#' @details
|
||||
#' The `%like%` function:
|
||||
#' This `%like%` function:
|
||||
#' * Is case-insensitive (use `%like_case%` for case-sensitive matching)
|
||||
#' * Supports multiple patterns
|
||||
#' * Checks if `pattern` is a regular expression and sets `fixed = TRUE` if not, to greatly improve speed
|
||||
#' * Checks if `pattern` is a valid regular expression and sets `fixed = TRUE` if not, to greatly improve speed (vectorised over `pattern`)
|
||||
#' * Always uses compatibility with Perl unless `fixed = TRUE`, to greatly improve speed
|
||||
#'
|
||||
#' Using RStudio? The text `%like%` can also be directly inserted in your code from the Addins menu and can have its own Keyboard Shortcut like `Ctrl+Shift+L` or `Cmd+Shift+L` (see `Tools` > `Modify Keyboard Shortcuts...`).
|
||||
#' @source Idea from the [`like` function from the `data.table` package](https://github.com/Rdatatable/data.table/blob/ec1259af1bf13fc0c96a1d3f9e84d55d8106a9a4/R/like.R)
|
||||
#' @source Idea from the [`like` function from the `data.table` package](https://github.com/Rdatatable/data.table/blob/ec1259af1bf13fc0c96a1d3f9e84d55d8106a9a4/R/like.R), although altered as explained in *Details*.
|
||||
#' @seealso [grepl()]
|
||||
#' @inheritSection AMR Read more on Our Website!
|
||||
#' @examples
|
||||
@ -79,9 +79,10 @@ like <- function(x, pattern, ignore.case = TRUE) {
|
||||
if (all(is.na(x))) {
|
||||
return(rep(FALSE, length(x)))
|
||||
}
|
||||
|
||||
# set to fixed if no regex found
|
||||
fixed <- !any(is_possibly_regex(pattern))
|
||||
|
||||
# set to fixed if no valid regex (vectorised)
|
||||
fixed <- !is_valid_regex(pattern)
|
||||
|
||||
if (ignore.case == TRUE) {
|
||||
# set here, otherwise if fixed = TRUE, this warning will be thrown: argument `ignore.case = TRUE` will be ignored
|
||||
x <- tolower(x)
|
||||
@ -91,7 +92,7 @@ like <- function(x, pattern, ignore.case = TRUE) {
|
||||
if (is.factor(x)) {
|
||||
x <- as.character(x)
|
||||
}
|
||||
|
||||
|
||||
if (length(pattern) == 1) {
|
||||
grepl(pattern, x, ignore.case = FALSE, fixed = fixed, perl = !fixed)
|
||||
} else {
|
||||
@ -105,7 +106,9 @@ like <- function(x, pattern, ignore.case = TRUE) {
|
||||
mapply(FUN = grepl,
|
||||
x = x,
|
||||
pattern = pattern,
|
||||
MoreArgs = list(ignore.case = FALSE, fixed = fixed, perl = !fixed),
|
||||
fixed = fixed,
|
||||
perl = !fixed,
|
||||
MoreArgs = list(ignore.case = FALSE),
|
||||
SIMPLIFY = FALSE,
|
||||
USE.NAMES = FALSE)
|
||||
)
|
||||
|
@ -44,7 +44,7 @@
|
||||
#' * \ifelse{html}{\out{<i>p<sub>n</sub></i> is the human pathogenic prevalence group of <i>n</i>, as described below;}}{p_n is the human pathogenic prevalence group of \eqn{n}, as described below;}
|
||||
#' * \ifelse{html}{\out{<i>k<sub>n</sub></i> is the taxonomic kingdom of <i>n</i>, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.}}{l_n is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.}
|
||||
#'
|
||||
#' The grouping into human pathogenic prevalence (\eqn{p}) is based on experience from several microbiological laboratories in the Netherlands in conjunction with international reports on pathogen prevalence. **Group 1** (most prevalent microorganisms) consists of all microorganisms where the taxonomic class is Gammaproteobacteria or where the taxonomic genus is *Enterococcus*, *Staphylococcus* or *Streptococcus*. This group consequently contains all common Gram-negative bacteria, such as *Pseudomonas* and *Legionella* and all species within the order Enterobacterales. **Group 2** consists of all microorganisms where the taxonomic phylum is Proteobacteria, Firmicutes, Actinobacteria or Sarcomastigophora, or where the taxonomic genus is *Absidia*, *Acremonium*, *Actinotignum*, *Alternaria*, *Anaerosalibacter*, *Apophysomyces*, *Arachnia*, *Aspergillus*, *Aureobacterium*, *Aureobasidium*, *Bacteroides*, *Basidiobolus*, *Beauveria*, *Blastocystis*, *Branhamella*, *Calymmatobacterium*, *Candida*, *Capnocytophaga*, *Catabacter*, *Chaetomium*, *Chryseobacterium*, *Chryseomonas*, *Chrysonilia*, *Cladophialophora*, *Cladosporium*, *Conidiobolus*, *Cryptococcus*, *Curvularia*, *Exophiala*, *Exserohilum*, *Flavobacterium*, *Fonsecaea*, *Fusarium*, *Fusobacterium*, *Hendersonula*, *Hypomyces*, *Koserella*, *Lelliottia*, *Leptosphaeria*, *Leptotrichia*, *Malassezia*, *Malbranchea*, *Mortierella*, *Mucor*, *Mycocentrospora*, *Mycoplasma*, *Nectria*, *Ochroconis*, *Oidiodendron*, *Phoma*, *Piedraia*, *Pithomyces*, *Pityrosporum*, *Prevotella*, *Pseudallescheria*, *Rhizomucor*, *Rhizopus*, *Rhodotorula*, *Scolecobasidium*, *Scopulariopsis*, *Scytalidium*,*Sporobolomyces*, *Stachybotrys*, *Stomatococcus*, *Treponema*, *Trichoderma*, *Trichophyton*, *Trichosporon*, *Tritirachium* or *Ureaplasma*. **Group 3** consists of all other microorganisms.
|
||||
#' The grouping into human pathogenic prevalence (\eqn{p}) is based on experience from several microbiological laboratories in the Netherlands in conjunction with international reports on pathogen prevalence. **Group 1** (most prevalent microorganisms) consists of all microorganisms where the taxonomic class is Gammaproteobacteria or where the taxonomic genus is *Enterococcus*, *Staphylococcus* or *Streptococcus*. This group consequently contains all common Gram-negative bacteria, such as *Pseudomonas* and *Legionella* and all species within the order Enterobacterales. **Group 2** consists of all microorganisms where the taxonomic phylum is Proteobacteria, Firmicutes, Actinobacteria or Sarcomastigophora, or where the taxonomic genus is *Absidia*, *Acremonium*, *Actinotignum*, *Alternaria*, *Anaerosalibacter*, *Apophysomyces*, *Arachnia*, *Aspergillus*, *Aureobacterium*, *Aureobasidium*, *Bacteroides*, *Basidiobolus*, *Beauveria*, *Blastocystis*, *Branhamella*, *Calymmatobacterium*, *Candida*, *Capnocytophaga*, *Catabacter*, *Chaetomium*, *Chryseobacterium*, *Chryseomonas*, *Chrysonilia*, *Cladophialophora*, *Cladosporium*, *Conidiobolus*, *Cryptococcus*, *Curvularia*, *Exophiala*, *Exserohilum*, *Flavobacterium*, *Fonsecaea*, *Fusarium*, *Fusobacterium*, *Hendersonula*, *Hypomyces*, *Koserella*, *Lelliottia*, *Leptosphaeria*, *Leptotrichia*, *Malassezia*, *Malbranchea*, *Mortierella*, *Mucor*, *Mycocentrospora*, *Mycoplasma*, *Nectria*, *Ochroconis*, *Oidiodendron*, *Phoma*, *Piedraia*, *Pithomyces*, *Pityrosporum*, *Prevotella*, *Pseudallescheria*, *Rhizomucor*, *Rhizopus*, *Rhodotorula*, *Scolecobasidium*, *Scopulariopsis*, *Scytalidium*, *Sporobolomyces*, *Stachybotrys*, *Stomatococcus*, *Treponema*, *Trichoderma*, *Trichophyton*, *Trichosporon*, *Tritirachium* or *Ureaplasma*. **Group 3** consists of all other microorganisms.
|
||||
#'
|
||||
#' All matches are sorted descending on their matching score and for all user input values, the top match will be returned. This will lead to the effect that e.g., `"E. coli"` will return the microbial ID of *Escherichia coli* (\eqn{m = `r round(mo_matching_score("E. coli", "Escherichia coli"), 3)`}, a highly prevalent microorganism found in humans) and not *Entamoeba coli* (\eqn{m = `r round(mo_matching_score("E. coli", "Entamoeba coli"), 3)`}, a less prevalent microorganism in humans), although the latter would alphabetically come first.
|
||||
#' @export
|
||||
|
BIN
R/sysdata.rda
BIN
R/sysdata.rda
Binary file not shown.
Reference in New Issue
Block a user