mirror of
https://github.com/msberends/AMR.git
synced 2024-12-25 18:46:11 +01:00
(v0.7.1.9073) as.mo() self-learning algorithm
This commit is contained in:
parent
cd178ee569
commit
398c5bdc4f
@ -21,3 +21,4 @@
|
|||||||
^pkgdown$
|
^pkgdown$
|
||||||
^public$
|
^public$
|
||||||
^data-raw$
|
^data-raw$
|
||||||
|
R/aa_test.R$
|
||||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -23,3 +23,4 @@ packrat/src/
|
|||||||
cran-comments.md
|
cran-comments.md
|
||||||
data-raw/taxon.tab
|
data-raw/taxon.tab
|
||||||
data-raw/DSMZ_bactnames.xlsx
|
data-raw/DSMZ_bactnames.xlsx
|
||||||
|
R/aa_test.R
|
||||||
|
@ -118,7 +118,7 @@ coverage:
|
|||||||
# install missing and outdated packages
|
# install missing and outdated packages
|
||||||
- Rscript -e 'source(".gitlab-ci.R"); gl_update_pkg_all(repos = "https://cran.rstudio.com", quiet = TRUE, install_pkgdown = FALSE)'
|
- Rscript -e 'source(".gitlab-ci.R"); gl_update_pkg_all(repos = "https://cran.rstudio.com", quiet = TRUE, install_pkgdown = FALSE)'
|
||||||
# codecov token is set in https://gitlab.com/msberends/AMR/settings/ci_cd
|
# codecov token is set in https://gitlab.com/msberends/AMR/settings/ci_cd
|
||||||
- Rscript -e "cc <- covr::package_coverage(line_exclusions = list('R/atc_online.R', 'R/mo_source.R')); covr::codecov(coverage = cc, token = '$codecov'); cat('Code coverage:', covr::percent_coverage(cc))"
|
- Rscript -e "cc <- covr::package_coverage(); covr::codecov(coverage = cc, token = '$codecov'); cat('Code coverage:', covr::percent_coverage(cc))"
|
||||||
coverage: '/Code coverage: \d+\.\d+/'
|
coverage: '/Code coverage: \d+\.\d+/'
|
||||||
|
|
||||||
pages:
|
pages:
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
Package: AMR
|
Package: AMR
|
||||||
Version: 0.7.1.9072
|
Version: 0.7.1.9073
|
||||||
Date: 2019-09-12
|
Date: 2019-09-15
|
||||||
Title: Antimicrobial Resistance Analysis
|
Title: Antimicrobial Resistance Analysis
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person(role = c("aut", "cre"),
|
person(role = c("aut", "cre"),
|
||||||
@ -17,6 +17,8 @@ Authors@R: c(
|
|||||||
family = "Glasner", given = "Corinna", email = "c.glasner@umcg.nl", comment = c(ORCID = "0000-0003-1241-1328")),
|
family = "Glasner", given = "Corinna", email = "c.glasner@umcg.nl", comment = c(ORCID = "0000-0003-1241-1328")),
|
||||||
person(role = "ctb",
|
person(role = "ctb",
|
||||||
family = "Hassing", given = c("Erwin", "E.", "A."), email = "e.hassing@certe.nl"),
|
family = "Hassing", given = c("Erwin", "E.", "A."), email = "e.hassing@certe.nl"),
|
||||||
|
person(role = "ctb",
|
||||||
|
family = "Hazenberg", given = c("Eric", "H.", "L.", "C.", "M."), email = "e.hazenberg@jbz.nl"),
|
||||||
person(role = "ctb",
|
person(role = "ctb",
|
||||||
family = "Lenglet", given = "Annick", email = "annick.lenglet@amsterdam.msf.org"),
|
family = "Lenglet", given = "Annick", email = "annick.lenglet@amsterdam.msf.org"),
|
||||||
person(role = "ctb",
|
person(role = "ctb",
|
||||||
@ -57,3 +59,4 @@ License: GPL-2 | file LICENSE
|
|||||||
Encoding: UTF-8
|
Encoding: UTF-8
|
||||||
LazyData: true
|
LazyData: true
|
||||||
RoxygenNote: 6.1.1
|
RoxygenNote: 6.1.1
|
||||||
|
StagedInstall: false
|
||||||
|
@ -58,6 +58,7 @@ S3method(type_sum,mic)
|
|||||||
S3method(type_sum,mo)
|
S3method(type_sum,mo)
|
||||||
S3method(type_sum,rsi)
|
S3method(type_sum,rsi)
|
||||||
export("%like%")
|
export("%like%")
|
||||||
|
export("%like_case%")
|
||||||
export(ab_atc)
|
export(ab_atc)
|
||||||
export(ab_atc_group1)
|
export(ab_atc_group1)
|
||||||
export(ab_atc_group2)
|
export(ab_atc_group2)
|
||||||
@ -85,6 +86,7 @@ export(availability)
|
|||||||
export(brmo)
|
export(brmo)
|
||||||
export(bug_drug_combinations)
|
export(bug_drug_combinations)
|
||||||
export(catalogue_of_life_version)
|
export(catalogue_of_life_version)
|
||||||
|
export(clear_mo_history)
|
||||||
export(count_I)
|
export(count_I)
|
||||||
export(count_IR)
|
export(count_IR)
|
||||||
export(count_R)
|
export(count_R)
|
||||||
|
19
NEWS.md
19
NEWS.md
@ -1,5 +1,5 @@
|
|||||||
# AMR 0.7.1.9072
|
# AMR 0.7.1.9073
|
||||||
<small>Last updated: 12-Sep-2019</small>
|
<small>Last updated: 15-Sep-2019</small>
|
||||||
|
|
||||||
### Breaking
|
### Breaking
|
||||||
* Determination of first isolates now **excludes** all 'unknown' microorganisms at default, i.e. microbial code `"UNKNOWN"`. They can be included with the new parameter `include_unknown`:
|
* Determination of first isolates now **excludes** all 'unknown' microorganisms at default, i.e. microbial code `"UNKNOWN"`. They can be included with the new parameter `include_unknown`:
|
||||||
@ -72,6 +72,14 @@
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
* Many algorithm improvements for `as.mo()` (of which some led to additions to the `microorganisms` data set):
|
||||||
|
* Self-learning algorithm - the function now gains experience from previously determined microorganism IDs and learns from it (yielding 80-95% speed improvement for any guess after the first try)
|
||||||
|
* Big improvement for misspelled input
|
||||||
|
* These new trivial names known to the field are now understood: meningococcus, gonococcus, pneumococcus
|
||||||
|
* Updated to the latest taxonomic data (updated to August 2019, from the International Journal of Systematic and Evolutionary Microbiology
|
||||||
|
* Added support for Viridans Group Streptococci (VGS) and Milleri Group Streptococci (MGS)
|
||||||
|
* Added support for 5,000 new fungi
|
||||||
|
* Added support for unknown yeasts and fungi
|
||||||
* Renamed data set `septic_patients` to `example_isolates`
|
* Renamed data set `septic_patients` to `example_isolates`
|
||||||
* Function `eucast_rules()`:
|
* Function `eucast_rules()`:
|
||||||
* Fixed a bug for *Yersinia pseudotuberculosis*
|
* Fixed a bug for *Yersinia pseudotuberculosis*
|
||||||
@ -83,13 +91,6 @@
|
|||||||
* Removed class `atc` - using `as.atc()` is now deprecated in favour of `ab_atc()` and this will return a character, not the `atc` class anymore
|
* Removed class `atc` - using `as.atc()` is now deprecated in favour of `ab_atc()` and this will return a character, not the `atc` class anymore
|
||||||
* Removed deprecated functions `abname()`, `ab_official()`, `atc_name()`, `atc_official()`, `atc_property()`, `atc_tradenames()`, `atc_trivial_nl()`
|
* Removed deprecated functions `abname()`, `ab_official()`, `atc_name()`, `atc_official()`, `atc_property()`, `atc_tradenames()`, `atc_trivial_nl()`
|
||||||
* Fix and speed improvement for `mo_shortname()`
|
* Fix and speed improvement for `mo_shortname()`
|
||||||
* Algorithm improvements for `as.mo()` (by which some additions were made to the `microorganisms` data set:
|
|
||||||
* Big improvement for misspelled input
|
|
||||||
* These new trivial names known to the field are now understood: meningococcus, gonococcus, pneumococcus
|
|
||||||
* Updated to the latest taxonomic data (updated to August 2019, from the International Journal of Systematic and Evolutionary Microbiology
|
|
||||||
* Added support for Viridans Group Streptococci (VGS) and Milleri Group Streptococci (MGS)
|
|
||||||
* Added support for 5,000 new fungi
|
|
||||||
* Added support for unknown yeasts and fungi
|
|
||||||
* Fix for using `mo_*` functions where the coercion uncertainties and failures would not be available through `mo_uncertainties()` and `mo_failures()` anymore
|
* Fix for using `mo_*` functions where the coercion uncertainties and failures would not be available through `mo_uncertainties()` and `mo_failures()` anymore
|
||||||
* Deprecated the `country` parameter of `mdro()` in favour of the already existing `guideline` parameter to support multiple guidelines within one country
|
* Deprecated the `country` parameter of `mdro()` in favour of the already existing `guideline` parameter to support multiple guidelines within one country
|
||||||
* The `name` of `RIF` is now Rifampicin instead of Rifampin
|
* The `name` of `RIF` is now Rifampicin instead of Rifampin
|
||||||
|
26
R/like.R
26
R/like.R
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
#' Pattern Matching
|
#' Pattern Matching
|
||||||
#'
|
#'
|
||||||
#' Convenient wrapper around \code{\link[base]{grep}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors.
|
#' Convenient wrapper around \code{\link[base]{grep}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive (use \code{a \%like_case\% b} for case-sensitive matching). Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors, or can both have the same length to iterate over all cases.
|
||||||
#' @inheritParams base::grepl
|
#' @inheritParams base::grepl
|
||||||
#' @return A \code{logical} vector
|
#' @return A \code{logical} vector
|
||||||
#' @name like
|
#' @name like
|
||||||
@ -53,14 +53,14 @@
|
|||||||
#' left_join_microorganisms() %>%
|
#' left_join_microorganisms() %>%
|
||||||
#' filter(genus %like% '^ent') %>%
|
#' filter(genus %like% '^ent') %>%
|
||||||
#' freq(genus, species)
|
#' freq(genus, species)
|
||||||
like <- function(x, pattern) {
|
like <- function(x, pattern, ignore.case = TRUE) {
|
||||||
if (length(pattern) > 1) {
|
if (length(pattern) > 1) {
|
||||||
if (length(x) != length(pattern)) {
|
if (length(x) != length(pattern)) {
|
||||||
if (length(x) == 1) {
|
if (length(x) == 1) {
|
||||||
x <- rep(x, length(pattern))
|
x <- rep(x, length(pattern))
|
||||||
}
|
}
|
||||||
# return TRUE for every 'x' that matches any 'pattern', FALSE otherwise
|
# return TRUE for every 'x' that matches any 'pattern', FALSE otherwise
|
||||||
res <- sapply(pattern, function(pttrn) x %like% pttrn)
|
res <- sapply(pattern, function(pttrn) base::grepl(pttrn, x, ignore.case = ignore.case))
|
||||||
res2 <- as.logical(rowSums(res))
|
res2 <- as.logical(rowSums(res))
|
||||||
# get only first item of every hit in pattern
|
# get only first item of every hit in pattern
|
||||||
res2[duplicated(res)] <- FALSE
|
res2[duplicated(res)] <- FALSE
|
||||||
@ -71,9 +71,9 @@ like <- function(x, pattern) {
|
|||||||
res <- vector(length = length(pattern))
|
res <- vector(length = length(pattern))
|
||||||
for (i in 1:length(res)) {
|
for (i in 1:length(res)) {
|
||||||
if (is.factor(x[i])) {
|
if (is.factor(x[i])) {
|
||||||
res[i] <- as.integer(x[i]) %in% base::grep(pattern[i], levels(x[i]), ignore.case = TRUE)
|
res[i] <- as.integer(x[i]) %in% base::grep(pattern[i], levels(x[i]), ignore.case = ignore.case)
|
||||||
} else {
|
} else {
|
||||||
res[i] <- base::grepl(pattern[i], x[i], ignore.case = TRUE)
|
res[i] <- base::grepl(pattern[i], x[i], ignore.case = ignore.case)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return(res)
|
return(res)
|
||||||
@ -82,16 +82,24 @@ like <- function(x, pattern) {
|
|||||||
|
|
||||||
# the regular way how grepl works; just one pattern against one or more x
|
# the regular way how grepl works; just one pattern against one or more x
|
||||||
if (is.factor(x)) {
|
if (is.factor(x)) {
|
||||||
as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE)
|
as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = ignore.case)
|
||||||
} else {
|
} else {
|
||||||
tryCatch(base::grepl(pattern, x, ignore.case = TRUE),
|
tryCatch(base::grepl(pattern, x, ignore.case = ignore.case),
|
||||||
error = function(e) ifelse(test = grepl("Invalid regexp", e$message),
|
error = function(e) ifelse(test = grepl("Invalid regexp", e$message),
|
||||||
# try with perl = TRUE:
|
# try with perl = TRUE:
|
||||||
yes = return(base::grepl(pattern, x, ignore.case = TRUE, perl = TRUE)),
|
yes = return(base::grepl(pattern, x, ignore.case = ignore.case, perl = TRUE)),
|
||||||
no = stop(e$message)))
|
no = stop(e$message)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @rdname like
|
#' @rdname like
|
||||||
#' @export
|
#' @export
|
||||||
"%like%" <- like
|
"%like%" <- function(x, pattern) {
|
||||||
|
like(x, pattern, ignore.case = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' @rdname like
|
||||||
|
#' @export
|
||||||
|
"%like_case%" <- function(x, pattern) {
|
||||||
|
like(x, pattern, ignore.case = FALSE)
|
||||||
|
}
|
||||||
|
2
R/misc.R
2
R/misc.R
@ -41,7 +41,7 @@ percent <- function(x, round = 1, force_zero = FALSE, decimal.mark = getOption("
|
|||||||
big.mark <- " "
|
big.mark <- " "
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
x <- percent_clean(x = x, round = round, force_zero = force_zero,
|
percent_clean(x = x, round = round, force_zero = force_zero,
|
||||||
decimal.mark = decimal.mark, big.mark = big.mark, ...)
|
decimal.mark = decimal.mark, big.mark = big.mark, ...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
222
R/mo_history.R
222
R/mo_history.R
@ -19,116 +19,152 @@
|
|||||||
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
|
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
|
||||||
# ==================================================================== #
|
# ==================================================================== #
|
||||||
|
|
||||||
# print successful as.mo coercions to AMR environment
|
mo_history_file <- file.path(file.path(system.file(package = "AMR"), "mo_history"), "mo_history.csv")
|
||||||
#' @importFrom dplyr %>% distinct filter
|
|
||||||
set_mo_history <- function(x, mo, uncertainty_level, force = FALSE) {
|
|
||||||
# disable function for now
|
|
||||||
return(base::invisible())
|
|
||||||
|
|
||||||
# if (base::interactive() | force == TRUE) {
|
# print successful as.mo coercions to a options entry
|
||||||
# mo_hist <- read_mo_history(uncertainty_level = uncertainty_level, force = force)
|
#' @importFrom dplyr %>% distinct filter
|
||||||
# df <- data.frame(x, mo, stringsAsFactors = FALSE) %>%
|
set_mo_history <- function(x, mo, uncertainty_level, force = FALSE, disable = FALSE) {
|
||||||
# distinct(x, .keep_all = TRUE) %>%
|
if (isTRUE(disable)) {
|
||||||
# filter(!is.na(x) & !is.na(mo))
|
return(base::invisible())
|
||||||
# if (nrow(df) == 0) {
|
}
|
||||||
# return(base::invisible())
|
|
||||||
# }
|
if (base::interactive() | force == TRUE) {
|
||||||
# x <- toupper(df$x)
|
mo_hist <- read_mo_history(uncertainty_level = uncertainty_level, force = force)
|
||||||
# mo <- df$mo
|
df <- data.frame(x, mo, stringsAsFactors = FALSE) %>%
|
||||||
# for (i in 1:length(x)) {
|
distinct(x, .keep_all = TRUE) %>%
|
||||||
# # save package version too, as both the as.mo() algorithm and the reference data set may change
|
filter(!is.na(x) & !is.na(mo))
|
||||||
# if (NROW(mo_hist[base::which(mo_hist$x == x[i] &
|
if (nrow(df) == 0) {
|
||||||
# mo_hist$uncertainty_level >= uncertainty_level &
|
return(base::invisible())
|
||||||
# mo_hist$package_v == utils::packageVersion("AMR")),]) == 0) {
|
}
|
||||||
# tryCatch(
|
x <- toupper(df$x)
|
||||||
# assign(x = "mo_history",
|
mo <- df$mo
|
||||||
# value = rbind(mo_hist,
|
for (i in 1:length(x)) {
|
||||||
|
# save package version too, as both the as.mo() algorithm and the reference data set may change
|
||||||
|
if (NROW(mo_hist[base::which(mo_hist$x == x[i] &
|
||||||
|
mo_hist$uncertainty_level >= uncertainty_level &
|
||||||
|
mo_hist$package_v == utils::packageVersion("AMR")),]) == 0) {
|
||||||
|
# # Not using the file system:
|
||||||
|
# tryCatch(options(mo_remembered_results = rbind(mo_hist,
|
||||||
# data.frame(
|
# data.frame(
|
||||||
# x = x[i],
|
# x = x[i],
|
||||||
# mo = mo[i],
|
# mo = mo[i],
|
||||||
# uncertainty_level = uncertainty_level,
|
# uncertainty_level = uncertainty_level,
|
||||||
# package_v = base::as.character(utils::packageVersion("AMR")),
|
# package_v = base::as.character(utils::packageVersion("AMR")),
|
||||||
# stringsAsFactors = FALSE)),
|
# stringsAsFactors = FALSE))),
|
||||||
# envir = asNamespace("AMR")),
|
# error = function(e) base::invisible())
|
||||||
# error = function(e) invisible())
|
# # don't remember more than 1,000 different input values
|
||||||
# }
|
# if (tryCatch(nrow(getOption("mo_remembered_results")), error = function(e) 1001) > 1000) {
|
||||||
# }
|
|
||||||
# }
|
|
||||||
# return(base::invisible())
|
# return(base::invisible())
|
||||||
|
# }
|
||||||
|
if (is.null(mo_hist)) {
|
||||||
|
message(blue(paste0("NOTE: results are saved to ", mo_history_file, ".")))
|
||||||
|
}
|
||||||
|
tryCatch(write.csv(rbind(mo_hist,
|
||||||
|
data.frame(
|
||||||
|
x = x[i],
|
||||||
|
mo = mo[i],
|
||||||
|
uncertainty_level = uncertainty_level,
|
||||||
|
package_v = base::as.character(utils::packageVersion("AMR")),
|
||||||
|
stringsAsFactors = FALSE)),
|
||||||
|
file = mo_history_file, row.names = FALSE),
|
||||||
|
error = function(e) base::invisible())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(base::invisible())
|
||||||
}
|
}
|
||||||
|
|
||||||
get_mo_history <- function(x, uncertainty_level, force = FALSE) {
|
get_mo_history <- function(x, uncertainty_level, force = FALSE, disable = FALSE) {
|
||||||
# disable function for now
|
if (isTRUE(disable)) {
|
||||||
return(NA)
|
return(to_class_mo(NA))
|
||||||
|
}
|
||||||
|
|
||||||
# history <- read_mo_history(uncertainty_level = uncertainty_level, force = force)
|
history <- read_mo_history(uncertainty_level = uncertainty_level, force = force)
|
||||||
# if (base::is.null(history)) {
|
if (base::is.null(history)) {
|
||||||
# NA
|
result <- NA
|
||||||
# } else {
|
} else {
|
||||||
# data.frame(x = toupper(x), stringsAsFactors = FALSE) %>%
|
result <- data.frame(x = toupper(x), stringsAsFactors = FALSE) %>%
|
||||||
# left_join(history, by = "x") %>%
|
left_join(history, by = "x") %>%
|
||||||
# pull(mo)
|
pull(mo)
|
||||||
# }
|
}
|
||||||
|
to_class_mo(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @importFrom dplyr %>% filter distinct
|
#' @importFrom dplyr %>% filter distinct
|
||||||
read_mo_history <- function(uncertainty_level = 2, force = FALSE, unfiltered = FALSE) {
|
read_mo_history <- function(uncertainty_level = 2, force = FALSE, unfiltered = FALSE, disable = FALSE) {
|
||||||
# disable function for now
|
if (isTRUE(disable)) {
|
||||||
return(NULL)
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
# if ((!base::interactive() & force == FALSE)) {
|
if ((!base::interactive() & force == FALSE)) {
|
||||||
# return(NULL)
|
return(NULL)
|
||||||
# }
|
}
|
||||||
# uncertainty_level_param <- uncertainty_level
|
uncertainty_level_param <- uncertainty_level
|
||||||
#
|
|
||||||
# history <- tryCatch(get("mo_history", envir = asNamespace("AMR")),
|
# # Not using the file system:
|
||||||
|
# history <- tryCatch(getOption("mo_remembered_results"),
|
||||||
# error = function(e) NULL)
|
# error = function(e) NULL)
|
||||||
# if (is.null(history)) {
|
history <- tryCatch(read.csv(mo_history_file, stringsAsFactors = FALSE),
|
||||||
# return(NULL)
|
warning = function(w) invisible(),
|
||||||
# }
|
error = function(e) NULL)
|
||||||
# # Below: filter on current package version.
|
if (is.null(history)) {
|
||||||
# # Even current fullnames may be replaced by new taxonomic names, so new versions of
|
return(NULL)
|
||||||
# # the Catalogue of Life must not lead to data corruption.
|
}
|
||||||
#
|
# Below: filter on current package version.
|
||||||
# if (unfiltered == FALSE) {
|
# Even current fullnames may be replaced by new taxonomic names, so new versions of
|
||||||
# history <- history %>%
|
# the Catalogue of Life must not lead to data corruption.
|
||||||
# filter(package_v == as.character(utils::packageVersion("AMR")),
|
|
||||||
# # only take unknowns if uncertainty_level_param is higher
|
if (unfiltered == FALSE) {
|
||||||
# ((mo == "UNKNOWN" & uncertainty_level_param == uncertainty_level) |
|
history <- history %>%
|
||||||
# (mo != "UNKNOWN" & uncertainty_level_param >= uncertainty_level))) %>%
|
filter(package_v == as.character(utils::packageVersion("AMR")),
|
||||||
# arrange(desc(uncertainty_level)) %>%
|
# only take unknowns if uncertainty_level_param is higher
|
||||||
# distinct(x, mo, .keep_all = TRUE)
|
((mo == "UNKNOWN" & uncertainty_level_param == uncertainty_level) |
|
||||||
# }
|
(mo != "UNKNOWN" & uncertainty_level_param >= uncertainty_level))) %>%
|
||||||
#
|
arrange(desc(uncertainty_level)) %>%
|
||||||
# if (nrow(history) == 0) {
|
distinct(x, mo, .keep_all = TRUE)
|
||||||
# NULL
|
}
|
||||||
# } else {
|
|
||||||
# history
|
if (nrow(history) == 0) {
|
||||||
# }
|
NULL
|
||||||
|
} else {
|
||||||
|
history
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# @rdname as.mo
|
#' @rdname as.mo
|
||||||
# @importFrom crayon red
|
#' @importFrom crayon red
|
||||||
# @importFrom utils menu
|
#' @importFrom utils menu
|
||||||
# @export
|
#' @export
|
||||||
clean_mo_history <- function(...) {
|
clear_mo_history <- function(...) {
|
||||||
# if (!is.null(read_mo_history())) {
|
if (!is.null(read_mo_history())) {
|
||||||
# if (interactive() & !isTRUE(list(...)$force)) {
|
if (interactive() & !isTRUE(list(...)$force)) {
|
||||||
# q <- menu(title = paste("This will remove all",
|
q <- menu(title = paste("This will clear all",
|
||||||
# format(nrow(read_mo_history(999, unfiltered = TRUE)), big.mark = ","),
|
format(nrow(read_mo_history(999, unfiltered = TRUE)), big.mark = ","),
|
||||||
# "microbial IDs determined previously in this session. Are you sure?"),
|
"previously determined microbial IDs. Are you sure?"),
|
||||||
# choices = c("Yes", "No"),
|
choices = c("Yes", "No"),
|
||||||
# graphics = FALSE)
|
graphics = FALSE)
|
||||||
# if (q != 1) {
|
if (q != 1) {
|
||||||
# return(invisible())
|
return(invisible())
|
||||||
# }
|
}
|
||||||
# }
|
}
|
||||||
# tryCatch(
|
# # Not using the file system:
|
||||||
# assign(x = "mo_history",
|
# success <- tryCatch(options(mo_remembered_results = NULL),
|
||||||
# value = NULL,
|
# error = function(e) FALSE)
|
||||||
# envir = asNamespace("AMR")),
|
success <- create_blank_mo_history()
|
||||||
# error = function(e) invisible())
|
if (!isFALSE(success)) {
|
||||||
# cat(red("History removed."))
|
cat(red(paste("File", mo_history_file, "cleared.")))
|
||||||
# }
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
create_blank_mo_history <- function() {
|
||||||
|
tryCatch(
|
||||||
|
write.csv(x = data.frame(x = character(0),
|
||||||
|
mo = character(0),
|
||||||
|
uncertainty_level = integer(0),
|
||||||
|
package_v = character(0),
|
||||||
|
stringsAsFactors = FALSE),
|
||||||
|
file = mo_history_file),
|
||||||
|
warning = function(w) invisible(),
|
||||||
|
error = function(e) TRUE)
|
||||||
|
}
|
||||||
|
@ -408,7 +408,7 @@ mo_validate <- function(x, property, ...) {
|
|||||||
|
|
||||||
if (!"AMR" %in% base::.packages()) {
|
if (!"AMR" %in% base::.packages()) {
|
||||||
require("AMR")
|
require("AMR")
|
||||||
# check onLoad() in R/zzz.R: data tables are created there.
|
# check .onLoad() in R/zzz.R: data tables are created there.
|
||||||
}
|
}
|
||||||
|
|
||||||
# try to catch an error when inputting an invalid parameter
|
# try to catch an error when inputting an invalid parameter
|
||||||
|
6
R/zzz.R
6
R/zzz.R
@ -26,7 +26,8 @@
|
|||||||
|
|
||||||
# register data
|
# register data
|
||||||
microorganisms.oldDT <- as.data.table(AMR::microorganisms.old)
|
microorganisms.oldDT <- as.data.table(AMR::microorganisms.old)
|
||||||
microorganisms.oldDT$fullname_lower <- tolower(microorganisms.oldDT$fullname)
|
# for fullname_lower: keep only dots, letters, numbers, slashes, spaces and dashes
|
||||||
|
microorganisms.oldDT$fullname_lower <- gsub("[^.a-z0-9/ \\-]+", "", tolower(microorganisms.oldDT$fullname))
|
||||||
setkey(microorganisms.oldDT, col_id, fullname)
|
setkey(microorganisms.oldDT, col_id, fullname)
|
||||||
|
|
||||||
assign(x = "microorganismsDT",
|
assign(x = "microorganismsDT",
|
||||||
@ -81,7 +82,8 @@
|
|||||||
#' @importFrom data.table as.data.table setkey
|
#' @importFrom data.table as.data.table setkey
|
||||||
make_DT <- function() {
|
make_DT <- function() {
|
||||||
microorganismsDT <- as.data.table(AMR::microorganisms)
|
microorganismsDT <- as.data.table(AMR::microorganisms)
|
||||||
microorganismsDT$fullname_lower <- tolower(microorganismsDT$fullname)
|
# for fullname_lower: keep only dots, letters, numbers, slashes, spaces and dashes
|
||||||
|
microorganismsDT$fullname_lower <- gsub("[^.a-z0-9/ \\-]+", "", tolower(microorganismsDT$fullname))
|
||||||
setkey(microorganismsDT,
|
setkey(microorganismsDT,
|
||||||
prevalence,
|
prevalence,
|
||||||
kingdom,
|
kingdom,
|
||||||
|
@ -118,7 +118,7 @@ reference:
|
|||||||
- "`ab_property`"
|
- "`ab_property`"
|
||||||
- "`age`"
|
- "`age`"
|
||||||
- "`age_groups`"
|
- "`age_groups`"
|
||||||
- "`p.symbol`"
|
- "`p_symbol`"
|
||||||
- "`join`"
|
- "`join`"
|
||||||
- "`atc_online_property`"
|
- "`atc_online_property`"
|
||||||
- title: "Analysing your data"
|
- title: "Analysing your data"
|
||||||
|
@ -19,3 +19,4 @@ ignore:
|
|||||||
- "R/atc_online.R"
|
- "R/atc_online.R"
|
||||||
- "R/mo_source.R"
|
- "R/mo_source.R"
|
||||||
- "R/resistance_predict.R"
|
- "R/resistance_predict.R"
|
||||||
|
- "R/aa_test.R"
|
||||||
|
@ -19,7 +19,7 @@ eucast_rules_file <- dplyr::arrange(
|
|||||||
reference.rule_group,
|
reference.rule_group,
|
||||||
reference.rule)
|
reference.rule)
|
||||||
|
|
||||||
# Translations -----
|
# Translations ----
|
||||||
translations_file <- utils::read.delim(file = "data-raw/translations.tsv",
|
translations_file <- utils::read.delim(file = "data-raw/translations.tsv",
|
||||||
sep = "\t",
|
sep = "\t",
|
||||||
stringsAsFactors = FALSE,
|
stringsAsFactors = FALSE,
|
||||||
@ -42,3 +42,16 @@ usethis::use_data(eucast_rules_file, translations_file,
|
|||||||
# Remove from global environment ----
|
# Remove from global environment ----
|
||||||
rm(eucast_rules_file)
|
rm(eucast_rules_file)
|
||||||
rm(translations_file)
|
rm(translations_file)
|
||||||
|
|
||||||
|
# Clean mo history ----
|
||||||
|
mo_history_file <- file.path(file.path(system.file(package = "AMR"), "mo_history"), "mo_history.csv")
|
||||||
|
usethis::ui_done(paste0("Resetting {usethis::ui_value('", mo_history_file, "')}"))
|
||||||
|
tryCatch(
|
||||||
|
write.csv(x = data.frame(x = character(0),
|
||||||
|
mo = character(0),
|
||||||
|
uncertainty_level = integer(0),
|
||||||
|
package_v = character(0),
|
||||||
|
stringsAsFactors = FALSE),
|
||||||
|
file = mo_history_file),
|
||||||
|
warning = function(w) invisible(),
|
||||||
|
error = function(e) TRUE)
|
||||||
|
@ -78,7 +78,7 @@
|
|||||||
</button>
|
</button>
|
||||||
<span class="navbar-brand">
|
<span class="navbar-brand">
|
||||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9072</span>
|
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9073</span>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 219 KiB After Width: | Height: | Size: 93 KiB |
Binary file not shown.
Before Width: | Height: | Size: 103 KiB After Width: | Height: | Size: 60 KiB |
@ -78,7 +78,7 @@
|
|||||||
</button>
|
</button>
|
||||||
<span class="navbar-brand">
|
<span class="navbar-brand">
|
||||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9072</span>
|
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9073</span>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@
|
|||||||
</button>
|
</button>
|
||||||
<span class="navbar-brand">
|
<span class="navbar-brand">
|
||||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9072</span>
|
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9073</span>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -253,6 +253,10 @@
|
|||||||
<p><strong>Erwin E. A. Hassing</strong>. Contributor.
|
<p><strong>Erwin E. A. Hassing</strong>. Contributor.
|
||||||
</p>
|
</p>
|
||||||
</li>
|
</li>
|
||||||
|
<li>
|
||||||
|
<p><strong>Eric H. L. C. M. Hazenberg</strong>. Contributor.
|
||||||
|
</p>
|
||||||
|
</li>
|
||||||
<li>
|
<li>
|
||||||
<p><strong>Annick Lenglet</strong>. Contributor.
|
<p><strong>Annick Lenglet</strong>. Contributor.
|
||||||
</p>
|
</p>
|
||||||
|
@ -42,7 +42,7 @@
|
|||||||
</button>
|
</button>
|
||||||
<span class="navbar-brand">
|
<span class="navbar-brand">
|
||||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9072</span>
|
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9073</span>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@
|
|||||||
</button>
|
</button>
|
||||||
<span class="navbar-brand">
|
<span class="navbar-brand">
|
||||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9055</span>
|
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9073</span>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -243,7 +243,9 @@
|
|||||||
|
|
||||||
<span class='fu'>mo_uncertainties</span>()
|
<span class='fu'>mo_uncertainties</span>()
|
||||||
|
|
||||||
<span class='fu'>mo_renamed</span>()</pre>
|
<span class='fu'>mo_renamed</span>()
|
||||||
|
|
||||||
|
<span class='fu'>clear_mo_history</span>(<span class='no'>...</span>)</pre>
|
||||||
|
|
||||||
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
||||||
<table class="ref-arguments">
|
<table class="ref-arguments">
|
||||||
@ -283,7 +285,7 @@
|
|||||||
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
|
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
|
||||||
|
|
||||||
<p><strong>General info</strong> <br />
|
<p><strong>General info</strong> <br />
|
||||||
A microbial ID from this package (class: <code>mo</code>) typically looks like these examples:<br /></p><pre>
|
A microorganism ID from this package (class: <code>mo</code>) typically looks like these examples:<br /></p><pre>
|
||||||
Code Full name
|
Code Full name
|
||||||
--------------- --------------------------------------
|
--------------- --------------------------------------
|
||||||
B_KLBSL Klebsiella
|
B_KLBSL Klebsiella
|
||||||
@ -299,8 +301,11 @@ A microbial ID from this package (class: <code>mo</code>) typically looks like t
|
|||||||
</pre>
|
</pre>
|
||||||
<p>Values that cannot be coered will be considered 'unknown' and will get the MO code <code>UNKNOWN</code>.</p>
|
<p>Values that cannot be coered will be considered 'unknown' and will get the MO code <code>UNKNOWN</code>.</p>
|
||||||
<p>Use the <code><a href='mo_property.html'>mo_property</a>_*</code> functions to get properties based on the returned code, see Examples.</p>
|
<p>Use the <code><a href='mo_property.html'>mo_property</a>_*</code> functions to get properties based on the returned code, see Examples.</p>
|
||||||
<p>The algorithm uses data from the Catalogue of Life (see below) and from one other source (see <code><a href='microorganisms.html'>?microorganisms</a></code>).</p>
|
<p>The algorithm uses data from the Catalogue of Life (see below) and from one other source (see <code><a href='microorganisms.html'>microorganisms</a></code>).</p>
|
||||||
<p><strong>Intelligent rules</strong> <br />
|
<p><strong>Self-learning algoritm</strong> <br />
|
||||||
|
The <code>as.mo()</code> function gains experience from previously determined microorganism IDs and learns from it. This drastically improves both speed and reliability. Use <code>clear_mo_history()</code> to reset the algorithms. Only experience from your current <code>AMR</code> package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge.</p>
|
||||||
|
<p>Usually, any guess after the first try runs 80-95% faster than the first try.</p>
|
||||||
|
<p><strong>Intelligent rules</strong> <br />
|
||||||
This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order:</p><ul>
|
This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order:</p><ul>
|
||||||
<li><p>Valid MO codes and full names: it first searches in already valid MO code and known genus/species combinations</p></li>
|
<li><p>Valid MO codes and full names: it first searches in already valid MO code and known genus/species combinations</p></li>
|
||||||
<li><p>Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones (see <em>Microbial prevalence of pathogens in humans</em> below)</p></li>
|
<li><p>Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones (see <em>Microbial prevalence of pathogens in humans</em> below)</p></li>
|
||||||
@ -326,7 +331,7 @@ The algorithm can additionally use three different levels of uncertainty to gues
|
|||||||
</ul>
|
</ul>
|
||||||
<p>Use <code>mo_failures()</code> to get a vector with all values that could not be coerced to a valid value.</p>
|
<p>Use <code>mo_failures()</code> to get a vector with all values that could not be coerced to a valid value.</p>
|
||||||
<p>Use <code>mo_uncertainties()</code> to get a data.frame with all values that were coerced to a valid value, but with uncertainty.</p>
|
<p>Use <code>mo_uncertainties()</code> to get a data.frame with all values that were coerced to a valid value, but with uncertainty.</p>
|
||||||
<p>Use <code>mo_renamed()</code> to get a vector with all values that could be coerced based on an old, previously accepted taxonomic name.</p>
|
<p>Use <code>mo_renamed()</code> to get a data.frame with all values that could be coerced based on an old, previously accepted taxonomic name.</p>
|
||||||
<p><strong>Microbial prevalence of pathogens in humans</strong> <br />
|
<p><strong>Microbial prevalence of pathogens in humans</strong> <br />
|
||||||
The intelligent rules take into account microbial prevalence of pathogens in humans. It uses three groups and all (sub)species are in only one group. These groups are:</p><ul>
|
The intelligent rules take into account microbial prevalence of pathogens in humans. It uses three groups and all (sub)species are in only one group. These groups are:</p><ul>
|
||||||
<li><p>1 (most prevalent): class is Gammaproteobacteria <strong>or</strong> genus is one of: <em>Enterococcus</em>, <em>Staphylococcus</em>, <em>Streptococcus</em>.</p></li>
|
<li><p>1 (most prevalent): class is Gammaproteobacteria <strong>or</strong> genus is one of: <em>Enterococcus</em>, <em>Staphylococcus</em>, <em>Streptococcus</em>.</p></li>
|
||||||
@ -334,7 +339,7 @@ The intelligent rules take into account microbial prevalence of pathogens in hum
|
|||||||
<li><p>3 (least prevalent): all others.</p></li>
|
<li><p>3 (least prevalent): all others.</p></li>
|
||||||
</ul>
|
</ul>
|
||||||
<p>Group 1 contains all common Gram positives and Gram negatives, like all Enterobacteriaceae and e.g. <em>Pseudomonas</em> and <em>Legionella</em>.</p>
|
<p>Group 1 contains all common Gram positives and Gram negatives, like all Enterobacteriaceae and e.g. <em>Pseudomonas</em> and <em>Legionella</em>.</p>
|
||||||
<p>Group 2 probably contains less microbial pathogens; all other members of phyla that were found in humans in the Northern Netherlands between 2001 and 2018.</p>
|
<p>Group 2 contains probably less pathogenic microorganisms; all other members of phyla that were found in humans in the Northern Netherlands between 2001 and 2018.</p>
|
||||||
|
|
||||||
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>
|
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@
|
|||||||
</button>
|
</button>
|
||||||
<span class="navbar-brand">
|
<span class="navbar-brand">
|
||||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9072</span>
|
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9073</span>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -292,7 +292,7 @@
|
|||||||
</tr><tr>
|
</tr><tr>
|
||||||
|
|
||||||
<td>
|
<td>
|
||||||
<p><code><a href="as.mo.html">as.mo()</a></code> <code><a href="as.mo.html">is.mo()</a></code> <code><a href="as.mo.html">mo_failures()</a></code> <code><a href="as.mo.html">mo_uncertainties()</a></code> <code><a href="as.mo.html">mo_renamed()</a></code> </p>
|
<p><code><a href="as.mo.html">as.mo()</a></code> <code><a href="as.mo.html">is.mo()</a></code> <code><a href="as.mo.html">mo_failures()</a></code> <code><a href="as.mo.html">mo_uncertainties()</a></code> <code><a href="as.mo.html">mo_renamed()</a></code> <code><a href="as.mo.html">clear_mo_history()</a></code> </p>
|
||||||
</td>
|
</td>
|
||||||
<td><p>Transform to microorganism ID</p></td>
|
<td><p>Transform to microorganism ID</p></td>
|
||||||
</tr><tr>
|
</tr><tr>
|
||||||
@ -384,9 +384,9 @@
|
|||||||
</tr><tr>
|
</tr><tr>
|
||||||
|
|
||||||
<td>
|
<td>
|
||||||
<p><code><a href="AMR-deprecated.html">as.atc()</a></code> <code><a href="AMR-deprecated.html">p.symbol()</a></code> </p>
|
<p><code><a href="p_symbol.html">p_symbol()</a></code> </p>
|
||||||
</td>
|
</td>
|
||||||
<td><p>Deprecated functions</p></td>
|
<td><p>Symbol of a p value</p></td>
|
||||||
</tr><tr>
|
</tr><tr>
|
||||||
|
|
||||||
<td>
|
<td>
|
||||||
@ -528,7 +528,7 @@
|
|||||||
</tr><tr>
|
</tr><tr>
|
||||||
|
|
||||||
<td>
|
<td>
|
||||||
<p><code><a href="like.html">like()</a></code> <code><a href="like.html">`%like%`</a></code> </p>
|
<p><code><a href="like.html">like()</a></code> <code><a href="like.html">`%like%`</a></code> <code><a href="like.html">`%like_case%`</a></code> </p>
|
||||||
</td>
|
</td>
|
||||||
<td><p>Pattern Matching</p></td>
|
<td><p>Pattern Matching</p></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
@ -47,7 +47,7 @@
|
|||||||
<script src="../extra.js"></script>
|
<script src="../extra.js"></script>
|
||||||
<meta property="og:title" content="Pattern Matching — like" />
|
<meta property="og:title" content="Pattern Matching — like" />
|
||||||
|
|
||||||
<meta property="og:description" content="Convenient wrapper around grep to match a pattern: a %like% b. It always returns a logical vector and is always case-insensitive. Also, pattern (b) can be as long as x (a) to compare items of each index in both vectors." />
|
<meta property="og:description" content="Convenient wrapper around grep to match a pattern: a %like% b. It always returns a logical vector and is always case-insensitive (use a %like_case% b for case-sensitive matching). Also, pattern (b) can be as long as x (a) to compare items of each index in both vectors, or can both have the same length to iterate over all cases." />
|
||||||
|
|
||||||
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png" />
|
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png" />
|
||||||
<meta name="twitter:card" content="summary" />
|
<meta name="twitter:card" content="summary" />
|
||||||
@ -80,7 +80,7 @@
|
|||||||
</button>
|
</button>
|
||||||
<span class="navbar-brand">
|
<span class="navbar-brand">
|
||||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9067</span>
|
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9073</span>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -230,13 +230,15 @@
|
|||||||
|
|
||||||
<div class="ref-description">
|
<div class="ref-description">
|
||||||
|
|
||||||
<p>Convenient wrapper around <code><a href='https://www.rdocumentation.org/packages/base/topics/grep'>grep</a></code> to match a pattern: <code>a %like% b</code>. It always returns a <code>logical</code> vector and is always case-insensitive. Also, <code>pattern</code> (<code>b</code>) can be as long as <code>x</code> (<code>a</code>) to compare items of each index in both vectors.</p>
|
<p>Convenient wrapper around <code><a href='https://www.rdocumentation.org/packages/base/topics/grep'>grep</a></code> to match a pattern: <code>a %like% b</code>. It always returns a <code>logical</code> vector and is always case-insensitive (use <code>a %like_case% b</code> for case-sensitive matching). Also, <code>pattern</code> (<code>b</code>) can be as long as <code>x</code> (<code>a</code>) to compare items of each index in both vectors, or can both have the same length to iterate over all cases.</p>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<pre class="usage"><span class='fu'>like</span>(<span class='no'>x</span>, <span class='no'>pattern</span>)
|
<pre class="usage"><span class='fu'>like</span>(<span class='no'>x</span>, <span class='no'>pattern</span>, <span class='kw'>ignore.case</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>)
|
||||||
|
|
||||||
<span class='no'>x</span> <span class='kw'>%like%</span> <span class='no'>pattern</span></pre>
|
<span class='no'>x</span> <span class='kw'>%like%</span> <span class='no'>pattern</span>
|
||||||
|
|
||||||
|
<span class='no'>x</span> <span class='kw'>%like_case%</span> <span class='no'>pattern</span></pre>
|
||||||
|
|
||||||
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
|
||||||
<table class="ref-arguments">
|
<table class="ref-arguments">
|
||||||
@ -257,6 +259,11 @@
|
|||||||
is used with a warning. Missing values are allowed except for
|
is used with a warning. Missing values are allowed except for
|
||||||
<code>regexpr</code> and <code>gregexpr</code>.</p></td>
|
<code>regexpr</code> and <code>gregexpr</code>.</p></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>ignore.case</th>
|
||||||
|
<td><p>if <code>FALSE</code>, the pattern matching is <em>case
|
||||||
|
sensitive</em> and if <code>TRUE</code>, case is ignored during matching.</p></td>
|
||||||
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>
|
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>
|
||||||
|
@ -80,7 +80,7 @@
|
|||||||
</button>
|
</button>
|
||||||
<span class="navbar-brand">
|
<span class="navbar-brand">
|
||||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9072</span>
|
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9073</span>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
1
inst/mo_history/mo_history.csv
Normal file
1
inst/mo_history/mo_history.csv
Normal file
@ -0,0 +1 @@
|
|||||||
|
"","x","mo","uncertainty_level","package_v"
|
|
19
man/as.mo.Rd
19
man/as.mo.Rd
@ -1,5 +1,5 @@
|
|||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/mo.R
|
% Please edit documentation in R/mo.R, R/mo_history.R
|
||||||
\name{as.mo}
|
\name{as.mo}
|
||||||
\alias{as.mo}
|
\alias{as.mo}
|
||||||
\alias{mo}
|
\alias{mo}
|
||||||
@ -7,6 +7,7 @@
|
|||||||
\alias{mo_failures}
|
\alias{mo_failures}
|
||||||
\alias{mo_uncertainties}
|
\alias{mo_uncertainties}
|
||||||
\alias{mo_renamed}
|
\alias{mo_renamed}
|
||||||
|
\alias{clear_mo_history}
|
||||||
\title{Transform to microorganism ID}
|
\title{Transform to microorganism ID}
|
||||||
\usage{
|
\usage{
|
||||||
as.mo(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
|
as.mo(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
|
||||||
@ -19,6 +20,8 @@ mo_failures()
|
|||||||
mo_uncertainties()
|
mo_uncertainties()
|
||||||
|
|
||||||
mo_renamed()
|
mo_renamed()
|
||||||
|
|
||||||
|
clear_mo_history(...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{x}{a character vector or a \code{data.frame} with one or two columns}
|
\item{x}{a character vector or a \code{data.frame} with one or two columns}
|
||||||
@ -45,7 +48,7 @@ Use this function to determine a valid microorganism ID (\code{mo}). Determinati
|
|||||||
}
|
}
|
||||||
\details{
|
\details{
|
||||||
\strong{General info} \cr
|
\strong{General info} \cr
|
||||||
A microbial ID from this package (class: \code{mo}) typically looks like these examples:\cr
|
A microorganism ID from this package (class: \code{mo}) typically looks like these examples:\cr
|
||||||
\preformatted{
|
\preformatted{
|
||||||
Code Full name
|
Code Full name
|
||||||
--------------- --------------------------------------
|
--------------- --------------------------------------
|
||||||
@ -65,7 +68,13 @@ Values that cannot be coered will be considered 'unknown' and will get the MO co
|
|||||||
|
|
||||||
Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples.
|
Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples.
|
||||||
|
|
||||||
The algorithm uses data from the Catalogue of Life (see below) and from one other source (see \code{?microorganisms}).
|
The algorithm uses data from the Catalogue of Life (see below) and from one other source (see \code{\link{microorganisms}}).
|
||||||
|
|
||||||
|
\strong{Self-learning algoritm} \cr
|
||||||
|
The \code{as.mo()} function gains experience from previously determined microorganism IDs and learns from it. This drastically improves both speed and reliability. Use \code{clear_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge.
|
||||||
|
|
||||||
|
Usually, any guess after the first try runs 80-95\% faster than the first try.
|
||||||
|
|
||||||
|
|
||||||
\strong{Intelligent rules} \cr
|
\strong{Intelligent rules} \cr
|
||||||
This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order:
|
This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order:
|
||||||
@ -105,7 +114,7 @@ Use \code{mo_failures()} to get a vector with all values that could not be coerc
|
|||||||
|
|
||||||
Use \code{mo_uncertainties()} to get a data.frame with all values that were coerced to a valid value, but with uncertainty.
|
Use \code{mo_uncertainties()} to get a data.frame with all values that were coerced to a valid value, but with uncertainty.
|
||||||
|
|
||||||
Use \code{mo_renamed()} to get a vector with all values that could be coerced based on an old, previously accepted taxonomic name.
|
Use \code{mo_renamed()} to get a data.frame with all values that could be coerced based on an old, previously accepted taxonomic name.
|
||||||
|
|
||||||
\strong{Microbial prevalence of pathogens in humans} \cr
|
\strong{Microbial prevalence of pathogens in humans} \cr
|
||||||
The intelligent rules take into account microbial prevalence of pathogens in humans. It uses three groups and all (sub)species are in only one group. These groups are:
|
The intelligent rules take into account microbial prevalence of pathogens in humans. It uses three groups and all (sub)species are in only one group. These groups are:
|
||||||
@ -117,7 +126,7 @@ The intelligent rules take into account microbial prevalence of pathogens in hum
|
|||||||
|
|
||||||
Group 1 contains all common Gram positives and Gram negatives, like all Enterobacteriaceae and e.g. \emph{Pseudomonas} and \emph{Legionella}.
|
Group 1 contains all common Gram positives and Gram negatives, like all Enterobacteriaceae and e.g. \emph{Pseudomonas} and \emph{Legionella}.
|
||||||
|
|
||||||
Group 2 probably contains less microbial pathogens; all other members of phyla that were found in humans in the Northern Netherlands between 2001 and 2018.
|
Group 2 contains probably less pathogenic microorganisms; all other members of phyla that were found in humans in the Northern Netherlands between 2001 and 2018.
|
||||||
}
|
}
|
||||||
\section{Source}{
|
\section{Source}{
|
||||||
|
|
||||||
|
10
man/like.Rd
10
man/like.Rd
@ -3,14 +3,17 @@
|
|||||||
\name{like}
|
\name{like}
|
||||||
\alias{like}
|
\alias{like}
|
||||||
\alias{\%like\%}
|
\alias{\%like\%}
|
||||||
|
\alias{\%like_case\%}
|
||||||
\title{Pattern Matching}
|
\title{Pattern Matching}
|
||||||
\source{
|
\source{
|
||||||
Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns. Also, if the regex fails the first time, it tries again with \code{perl = TRUE}.
|
Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default and let it support multiple patterns. Also, if the regex fails the first time, it tries again with \code{perl = TRUE}.
|
||||||
}
|
}
|
||||||
\usage{
|
\usage{
|
||||||
like(x, pattern)
|
like(x, pattern, ignore.case = TRUE)
|
||||||
|
|
||||||
x \%like\% pattern
|
x \%like\% pattern
|
||||||
|
|
||||||
|
x \%like_case\% pattern
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{x}{a character vector where matches are sought, or an
|
\item{x}{a character vector where matches are sought, or an
|
||||||
@ -24,12 +27,15 @@ x \%like\% pattern
|
|||||||
character vector of length 2 or more is supplied, the first element
|
character vector of length 2 or more is supplied, the first element
|
||||||
is used with a warning. Missing values are allowed except for
|
is used with a warning. Missing values are allowed except for
|
||||||
\code{regexpr} and \code{gregexpr}.}
|
\code{regexpr} and \code{gregexpr}.}
|
||||||
|
|
||||||
|
\item{ignore.case}{if \code{FALSE}, the pattern matching is \emph{case
|
||||||
|
sensitive} and if \code{TRUE}, case is ignored during matching.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
A \code{logical} vector
|
A \code{logical} vector
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Convenient wrapper around \code{\link[base]{grep}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive. Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors.
|
Convenient wrapper around \code{\link[base]{grep}} to match a pattern: \code{a \%like\% b}. It always returns a \code{logical} vector and is always case-insensitive (use \code{a \%like_case\% b} for case-sensitive matching). Also, \code{pattern} (\code{b}) can be as long as \code{x} (\code{a}) to compare items of each index in both vectors, or can both have the same length to iterate over all cases.
|
||||||
}
|
}
|
||||||
\details{
|
\details{
|
||||||
Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).
|
Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).
|
||||||
|
@ -23,6 +23,8 @@ context("mo.R")
|
|||||||
|
|
||||||
test_that("as.mo works", {
|
test_that("as.mo works", {
|
||||||
|
|
||||||
|
clear_mo_history(force = TRUE)
|
||||||
|
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
MOs <- AMR::microorganisms %>% filter(!is.na(mo), nchar(mo) > 3)
|
MOs <- AMR::microorganisms %>% filter(!is.na(mo), nchar(mo) > 3)
|
||||||
expect_identical(as.character(MOs$mo), as.character(as.mo(MOs$mo)))
|
expect_identical(as.character(MOs$mo), as.character(as.mo(MOs$mo)))
|
||||||
|
@ -20,7 +20,7 @@ knitr::opts_chunk$set(
|
|||||||
comment = "#",
|
comment = "#",
|
||||||
fig.width = 7.5,
|
fig.width = 7.5,
|
||||||
fig.height = 4.5,
|
fig.height = 4.5,
|
||||||
dpi = 150
|
dpi = 75
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -110,26 +110,40 @@ That takes `r round(mean(T.islandicus$time, na.rm = TRUE) / mean(S.aureus$time,
|
|||||||
In the figure below, we compare *Escherichia coli* (which is very common) with *Prevotella brevis* (which is moderately common) and with *Thermus islandicus* (which is uncommon):
|
In the figure below, we compare *Escherichia coli* (which is very common) with *Prevotella brevis* (which is moderately common) and with *Thermus islandicus* (which is uncommon):
|
||||||
|
|
||||||
```{r, echo = FALSE}
|
```{r, echo = FALSE}
|
||||||
ggplot.bm(
|
# ggplot.bm(
|
||||||
microbenchmark(as.mo("Escherichia coli"),
|
# microbenchmark(as.mo("Escherichia coli"),
|
||||||
as.mo("E. coli"),
|
# as.mo("E. coli"),
|
||||||
times = 10), title = "Very common")
|
# times = 10), title = "Very common")
|
||||||
|
#
|
||||||
|
# ggplot.bm(
|
||||||
|
# microbenchmark(as.mo("Prevotella brevis"),
|
||||||
|
# as.mo("P. brevis"),
|
||||||
|
# times = 10), title = "Moderately common")
|
||||||
|
#
|
||||||
|
# ggplot.bm(
|
||||||
|
# microbenchmark(as.mo("Thermus islandicus"),
|
||||||
|
# as.mo("T. islandicus"),
|
||||||
|
# times = 10), title = "Uncommon")
|
||||||
|
|
||||||
ggplot.bm(
|
par(mar = c(5, 16, 4, 2))
|
||||||
microbenchmark(as.mo("Prevotella brevis"),
|
boxplot(microbenchmark(
|
||||||
as.mo("P. brevis"),
|
'as.mo("Thermus islandicus")' = as.mo("Thermus islandicus"),
|
||||||
times = 10), title = "Moderately common")
|
'as.mo("Prevotella brevis")' = as.mo("Prevotella brevis"),
|
||||||
|
'as.mo("Escherichia coli")' = as.mo("Escherichia coli"),
|
||||||
ggplot.bm(
|
'as.mo("T. islandicus")' = as.mo("T. islandicus"),
|
||||||
microbenchmark(as.mo("Thermus islandicus"),
|
'as.mo("P. brevis")' = as.mo("P. brevis"),
|
||||||
as.mo("T. islandicus"),
|
'as.mo("E. coli")' = as.mo("E. coli"),
|
||||||
times = 10), title = "Uncommon")
|
times = 10),
|
||||||
|
horizontal = TRUE, las = 1, unit = "s", log = FALSE,
|
||||||
|
xlab = "", ylab = "Time in seconds", ylim = c(0, 0.5),
|
||||||
|
main = "Benchmarks per prevalence")
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r, echo = FALSE, eval = FALSE}
|
In reality, the `as.mo()` functions **learns from its own output to speed up determinations for next times**. In above figure, this effect was disabled to show the difference with the boxplot below - when you would use `as.mo()` yourself:
|
||||||
# In reality, the `as.mo()` functions **learns from its own output to speed up determinations for next times**. In above figure, this effect was disabled to show the difference with the boxplot below - when you would use `as.mo()` yourself:
|
|
||||||
|
|
||||||
clean_mo_history()
|
```{r, echo = FALSE}
|
||||||
|
|
||||||
|
clear_mo_history()
|
||||||
par(mar = c(5, 16, 4, 2))
|
par(mar = c(5, 16, 4, 2))
|
||||||
boxplot(microbenchmark(
|
boxplot(microbenchmark(
|
||||||
'as.mo("Thermus islandicus")' = as.mo("Thermus islandicus", force_mo_history = TRUE),
|
'as.mo("Thermus islandicus")' = as.mo("Thermus islandicus", force_mo_history = TRUE),
|
||||||
@ -142,10 +156,10 @@ boxplot(microbenchmark(
|
|||||||
horizontal = TRUE, las = 1, unit = "s", log = FALSE,
|
horizontal = TRUE, las = 1, unit = "s", log = FALSE,
|
||||||
xlab = "", ylab = "Time in seconds", ylim = c(0, 0.5),
|
xlab = "", ylab = "Time in seconds", ylim = c(0, 0.5),
|
||||||
main = "Benchmarks per prevalence")
|
main = "Benchmarks per prevalence")
|
||||||
|
|
||||||
# The highest outliers are the first times. All next determinations were done in only thousands of seconds. For now, learning only works per session. If R is closed or terminated, the algorithms reset. This will probably be resolved in a next version.
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The highest outliers are the first times. All next determinations were done in only thousands of seconds.
|
||||||
|
|
||||||
Uncommon microorganisms take a lot more time than common microorganisms. To relieve this pitfall and further improve performance, two important calculations take almost no time at all: **repetitive results** and **already precalculated results**.
|
Uncommon microorganisms take a lot more time than common microorganisms. To relieve this pitfall and further improve performance, two important calculations take almost no time at all: **repetitive results** and **already precalculated results**.
|
||||||
|
|
||||||
### Repetitive results
|
### Repetitive results
|
||||||
|
Loading…
Reference in New Issue
Block a user