From 84ed8c32bb67c1b7bc66b83ac568986889420d9d Mon Sep 17 00:00:00 2001 From: Matthijs Berends Date: Fri, 6 Jan 2023 19:21:04 +0100 Subject: [PATCH] documentation --- .github/prehooks/pre-commit | 14 ++++++------ DESCRIPTION | 2 +- NEWS.md | 2 +- R/aa_helper_functions.R | 2 +- R/eucast_rules.R | 2 +- R/join_microorganisms.R | 7 +++--- R/mo_matching_score.R | 4 ++-- R/mo_property.R | 14 +++++++----- R/mo_source.R | 4 ++-- man/as.mo.Rd | 2 +- man/mo_matching_score.Rd | 4 ++-- man/mo_property.Rd | 44 ++++++------------------------------- 12 files changed, 37 insertions(+), 64 deletions(-) diff --git a/.github/prehooks/pre-commit b/.github/prehooks/pre-commit index 76f833ce..458446bb 100755 --- a/.github/prehooks/pre-commit +++ b/.github/prehooks/pre-commit @@ -35,8 +35,8 @@ echo "Running pre-commit hook..." if command -v Rscript > /dev/null; then if [ "$(Rscript -e 'cat(all(c('"'pkgload'"', '"'devtools'"', '"'dplyr'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then Rscript -e "source('data-raw/_pre_commit_hook.R')" - currentpkg=`Rscript -e "cat(pkgload::pkg_name())"` - echo "-> Adding all files in 'data-raw' to this commit" + currentpkg=$(Rscript -e "cat(pkgload::pkg_name())") + echo "-> Adding files in 'data-raw' and 'man' to this commit" git add data-raw/* git add man/* git add R/sysdata.rda @@ -57,18 +57,18 @@ echo "Updating semantic versioning and date..." # get tags from remote, and remove tags not on remote: git fetch origin --prune --prune-tags --quiet -currenttagfull=`git describe --tags --abbrev=0` -currenttag=`git describe --tags --abbrev=0 | sed 's/v//'` +currenttagfull=$(git describe --tags --abbrev=0) +currenttag=$(git describe --tags --abbrev=0 | sed 's/v//') # assume main branch to be 'main' or 'master', pick the right name: -defaultbranch=`git branch | cut -c 3- | grep -E '^master$|^main$'` +defaultbranch=$(git branch | cut -c 3- | grep -E '^master$|^main$') if [ "$currenttag" = "" ]; then # there is no tag, so set tag to 0.0.1 and commit index to current count currenttag="0.0.1" - currentcommit=`git rev-list --count ${defaultbranch}` + currentcommit=$(git rev-list --count ${defaultbranch}) echo "- no git tags found, create one in format 'v(x).(y).(z)' - curently ${currentcommit} previous commits in ${defaultbranch}" else # there is a tag, so base version number on that - currentcommit=`git rev-list --count ${currenttagfull}..${defaultbranch}` + currentcommit=$(git rev-list --count ${currenttagfull}..${defaultbranch}) if (( "$currentcommit" == 0 )); then # tag is new, so this must become the version number currentversion="$currenttag" diff --git a/DESCRIPTION b/DESCRIPTION index 2dfff5bf..5fd29dc3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 1.8.2.9085 +Version: 1.8.2.9086 Date: 2023-01-06 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NEWS.md b/NEWS.md index e7ad5cc7..1ae7b161 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.8.2.9085 +# AMR 1.8.2.9086 *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)* diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index 469d7ee7..b41863ce 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -229,7 +229,7 @@ search_type_in_df <- function(x, type, info = TRUE) { # take first 'mo' column found <- colnames(x)[vapply(FUN.VALUE = logical(1), x, is.mo)] } else if ("mo" %in% colnames_formatted && - suppressWarnings(all(x$mo %in% c(NA, AMR::microorganisms$mo)))) { + suppressWarnings(all(x$mo %in% c(NA, AMR_env$MO_lookup$mo)))) { found <- "mo" } else if (any(colnames_formatted %like_case% "^(mo|microorganism|organism|bacteria|ba[ck]terie)s?$")) { found <- sort(colnames(x)[colnames_formatted %like_case% "^(mo|microorganism|organism|bacteria|ba[ck]terie)s?$"]) diff --git a/R/eucast_rules.R b/R/eucast_rules.R index 14045934..70f26fe1 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -1116,7 +1116,7 @@ edit_rsi <- function(x, error = function(e) { txt_error() stop(paste0( - "In row(s) ", paste(rows[1:min(length(rows), 10)], collapse = ","), + "In row(s) ", paste(rows[seq_len(min(length(rows), 10))], collapse = ","), ifelse(length(rows) > 10, "...", ""), " while writing value '", to, "' to column(s) `", paste(cols, collapse = "`, `"), diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R index d6b75fd0..da93fa50 100755 --- a/R/join_microorganisms.R +++ b/R/join_microorganisms.R @@ -165,10 +165,11 @@ join_microorganisms <- function(type, x, by, suffix, ...) { # otherwise use poorman, see R/aa_helper_pm_functions.R join_fn <- get(paste0("pm_", type), envir = asNamespace("AMR")) } + MO_df <- AMR_env$MO_lookup[, colnames(AMR::microorganisms), drop = FALSE] if (type %like% "full|left|right|inner") { - joined <- join_fn(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...) + joined <- join_fn(x = x, y = MO_df, by = by, suffix = suffix, ...) } else { - joined <- join_fn(x = x, y = AMR::microorganisms, by = by, ...) + joined <- join_fn(x = x, y = MO_df, by = by, ...) } if ("join.mo" %in% colnames(joined)) { @@ -185,5 +186,5 @@ join_microorganisms <- function(type, x, by, suffix, ...) { warning_("in `", type, "_microorganisms()`: the newly joined data set contains ", nrow(joined) - nrow(x), " rows more than the number of rows of `x`.") } - as_original_data_class(joined, class(x.bak)) # will remove tibble groups + as_original_data_class(joined, class(x.bak)) # will remove tibble groups } diff --git a/R/mo_matching_score.R b/R/mo_matching_score.R index 49f85795..122a9943 100755 --- a/R/mo_matching_score.R +++ b/R/mo_matching_score.R @@ -30,7 +30,7 @@ #' Calculate the Matching Score for Microorganisms #' #' This algorithm is used by [as.mo()] and all the [`mo_*`][mo_property()] functions to determine the most probable match of taxonomic records based on user input. -#' @author Dr. Matthijs Berends +#' @author Dr. Matthijs Berends, 2018 #' @param x Any user input value(s) #' @param n A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms] #' @note This algorithm was originally described in: Berends MS *et al.* (2022). **AMR: An R Package for Working with Antimicrobial Resistance Data**. *Journal of Statistical Software*, 104(3), 1-31; \doi{10.18637/jss.v104.i03}. @@ -43,7 +43,7 @@ #' #' where: #' -#' * \ifelse{html}{\out{x is the user input;}}{\eqn{x} is the user input;} +#' * \eqn{x} is the user input; #' * \ifelse{html}{\out{n is a taxonomic name (genus, species, and subspecies);}}{\eqn{n} is a taxonomic name (genus, species, and subspecies);} #' * \ifelse{html}{\out{ln is the length of n;}}{l_n is the length of \eqn{n};} #' * \ifelse{html}{\out{lev is the Levenshtein distance function (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change x into n;}}{lev is the Levenshtein distance function (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};} diff --git a/R/mo_property.R b/R/mo_property.R index 192490fa..e63263b0 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -36,12 +36,13 @@ #' @param ... other arguments passed on to [as.mo()], such as 'minimum_matching_score', 'ignore_pattern', and 'remove_from_input' #' @param ab any (vector of) text that can be coerced to a valid antibiotic drug code with [as.ab()] #' @param open browse the URL using [`browseURL()`][utils::browseURL()] -#' @details All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that *Escherichia blattae* was renamed to *Shimwellia blattae* in 2010: -#' - `mo_name("Escherichia blattae")` will return `"Shimwellia blattae"` (with a note about the renaming) -#' - `mo_ref("Escherichia blattae", keep_synonyms = TRUE)` will return `"Burgess et al., 1973"` (without a note) -#' - `mo_ref("Shimwellia blattae", keep_synonyms = FALSE)` will return `"Priest et al., 2010"` (without a note) +#' @details All functions will, at default, **not** keep old taxonomic properties, as synonyms are automatically replaced with the current taxonomy. Take for example *Escherichia blattae*, which was renamed to *Shimwellia blattae* in 2010: +#' - `mo_genus("Escherichia blattae")` will return `"Shemwellia"` (with a note about the renaming) +#' - `mo_genus("Escherichia blattae", keep_synonyms = TRUE)` will return `"Escherichia"` (with a warning that the name is outdated) +#' - `mo_ref("Escherichia blattae")` will return `"Priest et al., 2010"` (with a note) +#' - `mo_ref("Escherichia blattae", keep_synonyms = TRUE)` will return `"Burgess et al., 1973"` (with a warning) #' -#' The short name - [mo_shortname()] - almost always returns the first character of the genus and the full species, like `"E. coli"`. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. In other words, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`. +#' The short name ([mo_shortname()]) returns the first character of the genus and the full species, such as `"E. coli"`, for species and subspecies. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. In other words, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`. #' #' Since the top-level of the taxonomy is sometimes referred to as 'kingdom' and sometimes as 'domain', the functions [mo_kingdom()] and [mo_domain()] return the exact same results. #' @@ -60,7 +61,8 @@ #' SNOMED codes ([mo_snomed()]) are from the version of `r documentation_date(TAXONOMY_VERSION$SNOMED$accessed_date)`. See *Source* and the [microorganisms] data set for more info. #' #' Old taxonomic names (so-called 'synonyms') can be retrieved with [mo_synonyms()], the current taxonomic name can be retrieved with [mo_current()]. Both functions return full names. -#' @inheritSection mo_matching_score Matching Score for Microorganisms +#' @section Matching Score for Microorganisms: +#' This function uses [as.mo()] internally, which uses an advanced algorithm to translate arbitrary user input to valid taxonomy using a so-called matching score. You can read about this public algorithm on the [MO matching score page][mo_matching_score()]. #' @inheritSection as.mo Source #' @rdname mo_property #' @name mo_property diff --git a/R/mo_source.R b/R/mo_source.R index 1f449890..c25f9012 100644 --- a/R/mo_source.R +++ b/R/mo_source.R @@ -288,9 +288,9 @@ check_validity_mo_source <- function(x, refer_to_name = "`reference_df`", stop_o return(FALSE) } } - if (!all(x$mo %in% c("", AMR::microorganisms$mo, AMR::microorganisms$fullname), na.rm = TRUE)) { + if (!all(x$mo %in% c("", AMR_env$MO_lookup$mo, AMR_env$MO_lookup$fullname), na.rm = TRUE)) { if (stop_on_error == TRUE) { - invalid <- x[which(!x$mo %in% c("", AMR::microorganisms$mo, AMR::microorganisms$fullname)), , drop = FALSE] + invalid <- x[which(!x$mo %in% c("", AMR_env$MO_lookup$mo, AMR_env$MO_lookup$fullname)), , drop = FALSE] if (nrow(invalid) > 1) { plural <- "s" } else { diff --git a/man/as.mo.Rd b/man/as.mo.Rd index 8dbcf2fa..7dd461a9 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -135,7 +135,7 @@ With ambiguous user input in \code{\link[=as.mo]{as.mo()}} and all the \code{\li where: \itemize{ -\item \ifelse{html}{\out{x is the user input;}}{\eqn{x} is the user input;} +\item \eqn{x} is the user input; \item \ifelse{html}{\out{n is a taxonomic name (genus, species, and subspecies);}}{\eqn{n} is a taxonomic name (genus, species, and subspecies);} \item \ifelse{html}{\out{ln is the length of n;}}{l_n is the length of \eqn{n};} \item \ifelse{html}{\out{lev is the Levenshtein distance function (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change x into n;}}{lev is the Levenshtein distance function (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};} diff --git a/man/mo_matching_score.Rd b/man/mo_matching_score.Rd index 34d1a489..1a33418a 100644 --- a/man/mo_matching_score.Rd +++ b/man/mo_matching_score.Rd @@ -27,7 +27,7 @@ With ambiguous user input in \code{\link[=as.mo]{as.mo()}} and all the \code{\li where: \itemize{ -\item \ifelse{html}{\out{x is the user input;}}{\eqn{x} is the user input;} +\item \eqn{x} is the user input; \item \ifelse{html}{\out{n is a taxonomic name (genus, species, and subspecies);}}{\eqn{n} is a taxonomic name (genus, species, and subspecies);} \item \ifelse{html}{\out{ln is the length of n;}}{l_n is the length of \eqn{n};} \item \ifelse{html}{\out{lev is the Levenshtein distance function (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change x into n;}}{lev is the Levenshtein distance function (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};} @@ -70,5 +70,5 @@ mo_matching_score( ) } \author{ -Dr. Matthijs Berends +Dr. Matthijs Berends, 2018 } diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 22738e84..5df9ff7e 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -294,14 +294,15 @@ mo_property( Use these functions to return a specific property of a microorganism based on the latest accepted taxonomy. All input values will be evaluated internally with \code{\link[=as.mo]{as.mo()}}, which makes it possible to use microbial abbreviations, codes and names as input. See \emph{Examples}. } \details{ -All functions will, at default, keep old taxonomic properties. Please refer to this example, knowing that \emph{Escherichia blattae} was renamed to \emph{Shimwellia blattae} in 2010: +All functions will, at default, \strong{not} keep old taxonomic properties, as synonyms are automatically replaced with the current taxonomy. Take for example \emph{Escherichia blattae}, which was renamed to \emph{Shimwellia blattae} in 2010: \itemize{ -\item \code{mo_name("Escherichia blattae")} will return \code{"Shimwellia blattae"} (with a note about the renaming) -\item \code{mo_ref("Escherichia blattae", keep_synonyms = TRUE)} will return \code{"Burgess et al., 1973"} (without a note) -\item \code{mo_ref("Shimwellia blattae", keep_synonyms = FALSE)} will return \code{"Priest et al., 2010"} (without a note) +\item \code{mo_genus("Escherichia blattae")} will return \code{"Shemwellia"} (with a note about the renaming) +\item \code{mo_genus("Escherichia blattae", keep_synonyms = TRUE)} will return \code{"Escherichia"} (with a warning that the name is outdated) +\item \code{mo_ref("Escherichia blattae")} will return \code{"Priest et al., 2010"} (with a note) +\item \code{mo_ref("Escherichia blattae", keep_synonyms = TRUE)} will return \code{"Burgess et al., 1973"} (with a warning) } -The short name - \code{\link[=mo_shortname]{mo_shortname()}} - almost always returns the first character of the genus and the full species, like \code{"E. coli"}. Exceptions are abbreviations of staphylococci (such as \emph{"CoNS"}, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as \emph{"GBS"}, Group B Streptococci). Please bear in mind that e.g. \emph{E. coli} could mean \emph{Escherichia coli} (kingdom of Bacteria) as well as \emph{Entamoeba coli} (kingdom of Protozoa). Returning to the full name will be done using \code{\link[=as.mo]{as.mo()}} internally, giving priority to bacteria and human pathogens, i.e. \code{"E. coli"} will be considered \emph{Escherichia coli}. In other words, \code{mo_fullname(mo_shortname("Entamoeba coli"))} returns \code{"Escherichia coli"}. +The short name (\code{\link[=mo_shortname]{mo_shortname()}}) returns the first character of the genus and the full species, such as \code{"E. coli"}, for species and subspecies. Exceptions are abbreviations of staphylococci (such as \emph{"CoNS"}, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as \emph{"GBS"}, Group B Streptococci). Please bear in mind that e.g. \emph{E. coli} could mean \emph{Escherichia coli} (kingdom of Bacteria) as well as \emph{Entamoeba coli} (kingdom of Protozoa). Returning to the full name will be done using \code{\link[=as.mo]{as.mo()}} internally, giving priority to bacteria and human pathogens, i.e. \code{"E. coli"} will be considered \emph{Escherichia coli}. In other words, \code{mo_fullname(mo_shortname("Entamoeba coli"))} returns \code{"Escherichia coli"}. Since the top-level of the taxonomy is sometimes referred to as 'kingdom' and sometimes as 'domain', the functions \code{\link[=mo_kingdom]{mo_kingdom()}} and \code{\link[=mo_domain]{mo_domain()}} return the exact same results. @@ -323,38 +324,7 @@ Old taxonomic names (so-called 'synonyms') can be retrieved with \code{\link[=mo } \section{Matching Score for Microorganisms}{ -With ambiguous user input in \code{\link[=as.mo]{as.mo()}} and all the \code{\link[=mo_property]{mo_*}} functions, the returned results are chosen based on their matching score using \code{\link[=mo_matching_score]{mo_matching_score()}}. This matching score \eqn{m}, is calculated as: - -\ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}} - -where: -\itemize{ -\item \ifelse{html}{\out{x is the user input;}}{\eqn{x} is the user input;} -\item \ifelse{html}{\out{n is a taxonomic name (genus, species, and subspecies);}}{\eqn{n} is a taxonomic name (genus, species, and subspecies);} -\item \ifelse{html}{\out{ln is the length of n;}}{l_n is the length of \eqn{n};} -\item \ifelse{html}{\out{lev is the Levenshtein distance function (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change x into n;}}{lev is the Levenshtein distance function (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};} -\item \ifelse{html}{\out{pn is the human pathogenic prevalence group of n, as described below;}}{p_n is the human pathogenic prevalence group of \eqn{n}, as described below;} -\item \ifelse{html}{\out{kn is the taxonomic kingdom of n, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.}}{l_n is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.} -} - -The grouping into human pathogenic prevalence (\eqn{p}) is based on recent work from Bartlett \emph{et al.} (2022, \doi{10.1099/mic.0.001269}) who extensively studied medical-scientific literature to categorise all bacterial species into these groups: -\itemize{ -\item \strong{Established}, if a taxonomic species has infected at least three persons in three or more references. These records have \code{prevalence = 1.0} in the \link{microorganisms} data set; -\item \strong{Putative}, if a taxonomic species has fewer than three known cases. These records have \code{prevalence = 1.25} in the \link{microorganisms} data set. -} - -Furthermore, -\itemize{ -\item Any genus present in the \strong{established} list also has \code{prevalence = 1.0} in the \link{microorganisms} data set; -\item Any other genus present in the \strong{putative} list has \code{prevalence = 1.25} in the \link{microorganisms} data set; -\item Any other species or subspecies of which the genus is present in the two aforementioned groups, has \code{prevalence = 1.5} in the \link{microorganisms} data set; -\item Any \emph{non-bacterial} genus, species or subspecies of which the genus is present in the following list, has \code{prevalence = 1.5} in the \link{microorganisms} data set: \emph{Absidia}, \emph{Acanthamoeba}, \emph{Acremonium}, \emph{Aedes}, \emph{Alternaria}, \emph{Amoeba}, \emph{Ancylostoma}, \emph{Angiostrongylus}, \emph{Anisakis}, \emph{Anopheles}, \emph{Apophysomyces}, \emph{Aspergillus}, \emph{Aureobasidium}, \emph{Basidiobolus}, \emph{Beauveria}, \emph{Blastocystis}, \emph{Blastomyces}, \emph{Candida}, \emph{Capillaria}, \emph{Chaetomium}, \emph{Chrysonilia}, \emph{Cladophialophora}, \emph{Cladosporium}, \emph{Conidiobolus}, \emph{Contracaecum}, \emph{Cordylobia}, \emph{Cryptococcus}, \emph{Curvularia}, \emph{Demodex}, \emph{Dermatobia}, \emph{Dientamoeba}, \emph{Diphyllobothrium}, \emph{Dirofilaria}, \emph{Echinostoma}, \emph{Entamoeba}, \emph{Enterobius}, \emph{Exophiala}, \emph{Exserohilum}, \emph{Fasciola}, \emph{Fonsecaea}, \emph{Fusarium}, \emph{Giardia}, \emph{Haloarcula}, \emph{Halobacterium}, \emph{Halococcus}, \emph{Hendersonula}, \emph{Heterophyes}, \emph{Histomonas}, \emph{Histoplasma}, \emph{Hymenolepis}, \emph{Hypomyces}, \emph{Hysterothylacium}, \emph{Leishmania}, \emph{Malassezia}, \emph{Malbranchea}, \emph{Metagonimus}, \emph{Meyerozyma}, \emph{Microsporidium}, \emph{Microsporum}, \emph{Mortierella}, \emph{Mucor}, \emph{Mycocentrospora}, \emph{Necator}, \emph{Nectria}, \emph{Ochroconis}, \emph{Oesophagostomum}, \emph{Oidiodendron}, \emph{Opisthorchis}, \emph{Pediculus}, \emph{Phlebotomus}, \emph{Phoma}, \emph{Pichia}, \emph{Piedraia}, \emph{Pithomyces}, \emph{Pityrosporum}, \emph{Pneumocystis}, \emph{Pseudallescheria}, \emph{Pseudoterranova}, \emph{Pulex}, \emph{Rhizomucor}, \emph{Rhizopus}, \emph{Rhodotorula}, \emph{Saccharomyces}, \emph{Sarcoptes}, \emph{Scolecobasidium}, \emph{Scopulariopsis}, \emph{Scytalidium}, \emph{Spirometra}, \emph{Sporobolomyces}, \emph{Stachybotrys}, \emph{Strongyloides}, \emph{Syngamus}, \emph{Taenia}, \emph{Toxocara}, \emph{Trichinella}, \emph{Trichobilharzia}, \emph{Trichoderma}, \emph{Trichomonas}, \emph{Trichophyton}, \emph{Trichosporon}, \emph{Trichostrongylus}, \emph{Trichuris}, \emph{Tritirachium}, \emph{Trombicula}, \emph{Trypanosoma}, \emph{Tunga} or \emph{Wuchereria}; -\item All other records have \code{prevalence = 2.0} in the \link{microorganisms} data set. -} - -When calculating the matching score, all characters in \eqn{x} and \eqn{n} are ignored that are other than A-Z, a-z, 0-9, spaces and parentheses. - -All matches are sorted descending on their matching score and for all user input values, the top match will be returned. This will lead to the effect that e.g., \code{"E. coli"} will return the microbial ID of \emph{Escherichia coli} (\eqn{m = 0.688}, a highly prevalent microorganism found in humans) and not \emph{Entamoeba coli} (\eqn{m = 0.159}, a less prevalent microorganism in humans), although the latter would alphabetically come first. +This function uses \code{\link[=as.mo]{as.mo()}} internally, which uses an advanced algorithm to translate arbitrary user input to valid taxonomy using a so-called matching score. You can read about this public algorithm on the \link[=mo_matching_score]{MO matching score page}. } \section{Source}{