(v1.3.0.9022) mo_matching_score(), poorman update, as.rsi() fix

2025-08-24 12:32:10 +02:00 · 2020-09-18 16:05:53 +02:00
parent 89401ede9f
commit 4e40e42011
138 changed files with 2923 additions and 1472 deletions
--- a/man/WHONET.Rd
+++ b/man/WHONET.Rd
@@ -5,7 +5,7 @@
 \alias{WHONET}
 \title{Data set with 500 isolates - WHONET example}
 \format{
-A \code{\link{data.frame}} with 500 observations and 53 variables:
+A \link{data.frame} with 500 observations and 53 variables:
 \itemize{
 \item \verb{Identification number}\cr ID of the sample
 \item \verb{Specimen number}\cr ID of the specimen
--- a/man/ab_from_text.Rd
+++ b/man/ab_from_text.Rd
@@ -27,7 +27,7 @@ ab_from_text(
 \item{...}{parameters passed on to \code{\link[=as.ab]{as.ab()}}}
 }
 \value{
-A \link{list}, or a \link{character} if \code{collapse} is not \code{NULL}
+A \link{list}, or a  \link{character} if \code{collapse} is not \code{NULL}
 }
 \description{
 Use this function on e.g. clinical texts from health care records. It returns a \link{list} with all antimicrobial drugs, doses and forms of administration found in the texts.
--- a/man/ab_property.Rd
+++ b/man/ab_property.Rd
@@ -62,10 +62,10 @@ ab_property(x, property = "name", language = get_locale(), ...)
 }
 \value{
 \itemize{
-\item An \code{\link{integer}} in case of \code{\link[=ab_cid]{ab_cid()}}
-\item A named \code{\link{list}} in case of \code{\link[=ab_info]{ab_info()}} and multiple \code{\link[=ab_synonyms]{ab_synonyms()}}/\code{\link[=ab_tradenames]{ab_tradenames()}}
-\item A \code{\link{double}} in case of \code{\link[=ab_ddd]{ab_ddd()}}
-\item A \code{\link{character}} in all other cases
+\item An \link{integer} in case of \code{\link[=ab_cid]{ab_cid()}}
+\item A named \link{list} in case of \code{\link[=ab_info]{ab_info()}} and multiple \code{\link[=ab_synonyms]{ab_synonyms()}}/\code{\link[=ab_tradenames]{ab_tradenames()}}
+\item A \link{double} in case of \code{\link[=ab_ddd]{ab_ddd()}}
+\item A \link{character} in all other cases
 }
 }
 \description{
--- a/man/age_groups.Rd
+++ b/man/age_groups.Rd
@@ -76,7 +76,7 @@ example_isolates \%>\%
  filter(mo == as.mo("E. coli")) \%>\%
  group_by(age_group = age_groups(age)) \%>\%
  select(age_group, CIP) \%>\%
-  ggplot_rsi(x = "age_group")
+  ggplot_rsi(x = "age_group", minimum = 0)
 }
 }
 \seealso{
--- a/man/antibiotics.Rd
+++ b/man/antibiotics.Rd
@@ -6,7 +6,7 @@
 \alias{antivirals}
 \title{Data sets with 558 antimicrobials}
 \format{
-\subsection{For the \link{antibiotics} data set: a \code{\link{data.frame}} with 456 observations and 14 variables:}{
+\subsection{For the \link{antibiotics} data set: a \link{data.frame} with 456 observations and 14 variables:}{
 \itemize{
 \item \code{ab}\cr Antibiotic ID as used in this package (like \code{AMC}), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available
 \item \code{atc}\cr ATC code (Anatomical Therapeutic Chemical) as defined by the WHOCC, like \code{J01CR02}
@@ -25,7 +25,7 @@
 }
 }

-\subsection{For the \link{antivirals} data set: a \code{\link{data.frame}} with 102 observations and 9 variables:}{
+\subsection{For the \link{antivirals} data set: a \link{data.frame} with 102 observations and 9 variables:}{
 \itemize{
 \item \code{atc}\cr ATC code (Anatomical Therapeutic Chemical) as defined by the WHOCC
 \item \code{cid}\cr Compound ID as found in PubChem
--- a/man/as.ab.Rd
+++ b/man/as.ab.Rd
@@ -20,7 +20,7 @@ is.ab(x)
 \item{...}{arguments passed on to internal functions}
 }
 \value{
-Character (vector) with class \code{\link{ab}}. Unknown values will return \code{NA}.
+A \link{character} \link{vector} with additional class \code{\link{ab}}
 }
 \description{
 Use this function to determine the antibiotic code of one or more antibiotics. The data set \link{antibiotics} will be searched for abbreviations, official names and synonyms (brand names).
@@ -101,7 +101,7 @@ ab_name("eryt")       # "Erythromycin"
 }
 \seealso{
 \itemize{
-\item \link{antibiotics} for the dataframe that is being used to determine ATCs
+\item \link{antibiotics} for the \link{data.frame} that is being used to determine ATCs
 \item \code{\link[=ab_from_text]{ab_from_text()}} for a function to retrieve antimicrobial drugs from clinical text (from health care records)
 }
 }
--- a/man/as.disk.Rd
+++ b/man/as.disk.Rd
@@ -16,7 +16,7 @@ is.disk(x)
 \item{na.rm}{a logical indicating whether missing values should be removed}
 }
 \value{
-An \code{\link{integer}} with additional new class \code{\link{disk}}
+An \link{integer} with additional class \code{\link{disk}}
 }
 \description{
 This transforms a vector to a new class \code{\link{disk}}, which is a disk diffusion growth zone size (around an antibiotic disk) in millimetres between 6 and 50.
--- a/man/as.mic.Rd
+++ b/man/as.mic.Rd
@@ -4,7 +4,7 @@
 \alias{as.mic}
 \alias{mic}
 \alias{is.mic}
-\title{Transform input to minimum inhibitory concentrations}
+\title{Transform input to minimum inhibitory concentrations (MIC)}
 \usage{
 as.mic(x, na.rm = FALSE)

@@ -16,10 +16,10 @@ is.mic(x)
 \item{na.rm}{a logical indicating whether missing values should be removed}
 }
 \value{
-Ordered \code{\link{factor}} with new class \code{\link{mic}}
+Ordered \link{factor} with additional class \code{\link{mic}}
 }
 \description{
-This transforms a vector to a new class \code{\link{mic}}, which is an ordered \code{\link{factor}} with valid minimum inhibitory concentrations (MIC) as levels. Invalid MIC values will be translated as \code{NA} with a warning.
+This transforms a vector to a new class \code{\link{mic}}, which is an ordered \link{factor} with valid minimum inhibitory concentrations (MIC) as levels. Invalid MIC values will be translated as \code{NA} with a warning.
 }
 \details{
 To interpret MIC values as RSI values, use \code{\link[=as.rsi]{as.rsi()}} on MIC values. It supports guidelines from EUCAST and CLSI.
--- a/man/as.mo.Rd
+++ b/man/as.mo.Rd
@@ -29,7 +29,7 @@ mo_uncertainties()
 mo_renamed()
 }
 \arguments{
-\item{x}{a character vector or a \code{\link{data.frame}} with one or two columns}
+\item{x}{a character vector or a \link{data.frame} with one or two columns}

 \item{Becker}{a logical to indicate whether \emph{Staphylococci} should be categorised into coagulase-negative \emph{Staphylococci} ("CoNS") and coagulase-positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} (1,2). Note that this does not include species that were newly named after these publications, like \emph{S. caeli}.

@@ -41,7 +41,7 @@ This excludes \emph{Enterococci} at default (who are in group D), use \code{Lanc

 \item{allow_uncertain}{a number between \code{0} (or \code{"none"}) and \code{3} (or \code{"all"}), or \code{TRUE} (= \code{2}) or \code{FALSE} (= \code{0}) to indicate whether the input should be checked for less probable results, please see \emph{Details}}

-\item{reference_df}{a \code{\link{data.frame}} to be used for extra reference when translating \code{x} to a valid \code{\link{mo}}. See \code{\link[=set_mo_source]{set_mo_source()}} and \code{\link[=get_mo_source]{get_mo_source()}} to automate the usage of your own codes (e.g. used in your analysis or organisation).}
+\item{reference_df}{a \link{data.frame} to be used for extra reference when translating \code{x} to a valid \code{\link{mo}}. See \code{\link[=set_mo_source]{set_mo_source()}} and \code{\link[=get_mo_source]{get_mo_source()}} to automate the usage of your own codes (e.g. used in your analysis or organisation).}

 \item{ignore_pattern}{a regular expression (case-insensitive) of which all matches in \code{x} must return \code{NA}. This can be convenient to exclude known non-relevant input and can also be set with the option \code{AMR_ignore_pattern}, e.g. \code{options(AMR_ignore_pattern = "(not reported|contaminated flora)")}.}

@@ -50,7 +50,7 @@ This excludes \emph{Enterococci} at default (who are in group D), use \code{Lanc
 \item{...}{other parameters passed on to functions}
 }
 \value{
-A \code{\link{character}} \code{\link{vector}} with additional class \code{\link{mo}}
+A \link{character} \link{vector} with additional class \code{\link{mo}}
 }
 \description{
 Use this function to determine a valid microorganism ID (\code{\link{mo}}). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. Please see \emph{Examples}.
@@ -109,9 +109,9 @@ With the default setting (\code{allow_uncertain = TRUE}, level 2), below example

 There are three helper functions that can be run after using the \code{\link[=as.mo]{as.mo()}} function:
 \itemize{
-\item Use \code{\link[=mo_uncertainties]{mo_uncertainties()}} to get a \code{\link{data.frame}} that prints in a pretty format with all taxonomic names that were guessed. The output contains a score that is based on the human pathogenic prevalence and the \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance} between the user input and the full taxonomic name.
-\item Use \code{\link[=mo_failures]{mo_failures()}} to get a \code{\link{character}} \code{\link{vector}} with all values that could not be coerced to a valid value.
-\item Use \code{\link[=mo_renamed]{mo_renamed()}} to get a \code{\link{data.frame}} with all values that could be coerced based on old, previously accepted taxonomic names.
+\item Use \code{\link[=mo_uncertainties]{mo_uncertainties()}} to get a \link{data.frame} that prints in a pretty format with all taxonomic names that were guessed. The output contains the matching score for all matches (see \emph{Background on matching score}).
+\item Use \code{\link[=mo_failures]{mo_failures()}} to get a \link{character} \link{vector} with all values that could not be coerced to a valid value.
+\item Use \code{\link[=mo_renamed]{mo_renamed()}} to get a \link{data.frame} with all values that could be coerced based on old, previously accepted taxonomic names.
 }
 }

@@ -125,6 +125,24 @@ Group 2 consists of all microorganisms where the taxonomic phylum is Proteobacte

 Group 3 (least prevalent microorganisms) consists of all other microorganisms. This group contains microorganisms most probably not found in humans.
 }
+
+\subsection{Background on matching scores}{
+
+With ambiguous user input, the returned results are chosen based on their matching score using \code{\link[=mo_matching_score]{mo_matching_score()}}. This matching score is based on four parameters:
+\enumerate{
+\item The prevalence \eqn{P} is categorised into group 1, 2 and 3 as stated above;
+\item A kingdom index \eqn{K} is set as follows: Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, and all others = 5;
+\item The level of uncertainty \eqn{U} needed to get to the result, as stated above (1 to 3);
+\item The \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance} \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as:
+}
+
+\deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F}
+
+The final matching score \eqn{M} is calculated as:
+\deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)}
+
+All matches are sorted descending on their matching score and for all user input values, the top match will be returned.
+}
 }
 \section{Source}{

@@ -220,7 +238,7 @@ df <- df \%>\%
 }
 }
 \seealso{
-\link{microorganisms} for the \code{\link{data.frame}} that is being used to determine ID's.
+\link{microorganisms} for the \link{data.frame} that is being used to determine ID's.

 The \code{\link[=mo_property]{mo_property()}} functions (like \code{\link[=mo_genus]{mo_genus()}}, \code{\link[=mo_gramstain]{mo_gramstain()}}) to get properties based on the returned code.
 }
--- a/man/atc_online.Rd
+++ b/man/atc_online.Rd
@@ -13,7 +13,8 @@ atc_online_property(
  atc_code,
  property,
  administration = "O",
-  url = "https://www.whocc.no/atc_ddd_index/?code=\%s&showdescription=no"
+  url = "https://www.whocc.no/atc_ddd_index/?code=\%s&showdescription=no",
+  url_vet = "https://www.whocc.no/atcvet/atcvet_index/?code=\%s&showdescription=no"
 )

 atc_online_groups(atc_code, ...)
@@ -27,12 +28,14 @@ atc_online_ddd(atc_code, ...)

 \item{administration}{type of administration when using \code{property = "Adm.R"}, see Details}

-\item{url}{url of website of the WHO. The sign \verb{\%s} can be used as a placeholder for ATC codes.}
+\item{url}{url of website of the WHOCC. The sign \verb{\%s} can be used as a placeholder for ATC codes.}
+
+\item{url_vet}{url of website of the WHOCC for veterinary medicine. The sign \verb{\%s} can be used as a placeholder for ATC_vet codes (that all start with "Q").}

 \item{...}{parameters to pass on to \code{atc_property}}
 }
 \description{
-Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic) like name, defined daily dose (DDD) or standard unit.
+Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic), such as the name, defined daily dose (DDD) or standard unit.
 }
 \details{
 Options for parameter \code{administration}:
--- a/man/availability.Rd
+++ b/man/availability.Rd
@@ -7,18 +7,18 @@
 availability(tbl, width = NULL)
 }
 \arguments{
-\item{tbl}{a \code{\link{data.frame}} or \code{\link{list}}}
+\item{tbl}{a \link{data.frame} or \link{list}}

 \item{width}{number of characters to present the visual availability, defaults to filling the width of the console}
 }
 \value{
-\code{\link{data.frame}} with column names of \code{tbl} as row names
+\link{data.frame} with column names of \code{tbl} as row names
 }
 \description{
 Easy check for data availability of all columns in a data set. This makes it easy to get an idea of which antimicrobial combinations can be used for calculation with e.g. \code{\link[=susceptibility]{susceptibility()}} and \code{\link[=resistance]{resistance()}}.
 }
 \details{
-The function returns a \code{\link{data.frame}} with columns \code{"resistant"} and \code{"visual_resistance"}. The values in that columns are calculated with \code{\link[=resistance]{resistance()}}.
+The function returns a \link{data.frame} with columns \code{"resistant"} and \code{"visual_resistance"}. The values in that columns are calculated with \code{\link[=resistance]{resistance()}}.
 }
 \section{Stable lifecycle}{

--- a/man/bug_drug_combinations.Rd
+++ b/man/bug_drug_combinations.Rd
@@ -55,7 +55,7 @@ bug_drug_combinations(x, col_mo = NULL, FUN = mo_shortname, ...)
    decimal point.}
 }
 \value{
-The function \code{\link[=bug_drug_combinations]{bug_drug_combinations()}} returns a \code{\link{data.frame}} with columns "mo", "ab", "S", "I", "R" and "total".
+The function \code{\link[=bug_drug_combinations]{bug_drug_combinations()}} returns a \link{data.frame} with columns "mo", "ab", "S", "I", "R" and "total".
 }
 \description{
 Determine antimicrobial resistance (AMR) of all bug-drug combinations in your data set where at least 30 (default) isolates are available per species. Use \code{\link[=format]{format()}} on the result to prettify it to a publicable/printable format, see Examples.
--- a/man/catalogue_of_life_version.Rd
+++ b/man/catalogue_of_life_version.Rd
@@ -7,7 +7,7 @@
 catalogue_of_life_version()
 }
 \value{
-a \code{\link{list}}, which prints in pretty format
+a \link{list}, which prints in pretty format
 }
 \description{
 This function returns information about the included data from the Catalogue of Life.
--- a/man/count.Rd
+++ b/man/count.Rd
@@ -45,7 +45,7 @@ count_df(

 \item{only_all_tested}{(for combination therapies, i.e. using more than one variable for \code{...}): a logical to indicate that isolates must be tested for all antibiotics, see section \emph{Combination therapy} below}

-\item{data}{a \code{\link{data.frame}} containing columns with class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})}
+\item{data}{a \link{data.frame} containing columns with class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})}

 \item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}. Use a value}

@@ -56,7 +56,7 @@ count_df(
 \item{combine_IR}{a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. I+R (susceptible vs. non-susceptible). This is outdated, see parameter \code{combine_SI}.}
 }
 \value{
-An \code{\link{integer}}
+An \link{integer}
 }
 \description{
 These functions can be used to count resistant/susceptible microbial isolates. All functions support quasiquotation with pipes, can be used in \code{summarise()} from the \code{dplyr} package and also support grouped variables, please see \emph{Examples}.
--- a/man/eucast_rules.Rd
+++ b/man/eucast_rules.Rd
@@ -39,7 +39,7 @@ eucast_rules(
 \item{...}{column name of an antibiotic, please see section \emph{Antibiotics} below}
 }
 \value{
-The input of \code{x}, possibly with edited values of antibiotics. Or, if \code{verbose = TRUE}, a \code{\link{data.frame}} with all original and new values of the affected bug-drug combinations.
+The input of \code{x}, possibly with edited values of antibiotics. Or, if \code{verbose = TRUE}, a \link{data.frame} with all original and new values of the affected bug-drug combinations.
 }
 \description{
 Apply susceptibility rules as defined by the European Committee on Antimicrobial Susceptibility Testing (EUCAST, \url{http://eucast.org}), see \emph{Source}. This includes (1) expert rules and intrinsic resistance and (2) inferred resistance as defined in their breakpoint tables.
--- a/man/example_isolates.Rd
+++ b/man/example_isolates.Rd
@@ -5,7 +5,7 @@
 \alias{example_isolates}
 \title{Data set with 2,000 example isolates}
 \format{
-A \code{\link{data.frame}} with 2,000 observations and 49 variables:
+A \link{data.frame} with 2,000 observations and 49 variables:
 \itemize{
 \item \code{date}\cr date of receipt at the laboratory
 \item \code{hospital_id}\cr ID of the hospital, from A to D
--- a/man/example_isolates_unclean.Rd
+++ b/man/example_isolates_unclean.Rd
@@ -5,7 +5,7 @@
 \alias{example_isolates_unclean}
 \title{Data set with unclean data}
 \format{
-A \code{\link{data.frame}} with 3,000 observations and 8 variables:
+A \link{data.frame} with 3,000 observations and 8 variables:
 \itemize{
 \item \code{patient_id}\cr ID of the patient
 \item \code{date}\cr date of receipt at the laboratory
--- a/man/first_isolate.Rd
+++ b/man/first_isolate.Rd
@@ -50,7 +50,7 @@ filter_first_weighted_isolate(
 )
 }
 \arguments{
-\item{x}{a \code{\link{data.frame}} containing isolates.}
+\item{x}{a \link{data.frame} containing isolates.}

 \item{col_date}{column name of the result date (or date that is was received on the lab), defaults to the first column of with a date class}

--- a/man/ggplot_rsi.Rd
+++ b/man/ggplot_rsi.Rd
@@ -21,6 +21,7 @@ ggplot_rsi(
  translate_ab = "name",
  combine_SI = TRUE,
  combine_IR = FALSE,
+  minimum = 30,
  language = get_locale(),
  nrow = NULL,
  colours = c(S = "#61a8ff", SI = "#61a8ff", I = "#61f7ff", IR = "#ff6961", R =
@@ -41,6 +42,7 @@ geom_rsi(
  x = c("antibiotic", "interpretation"),
  fill = "interpretation",
  translate_ab = "name",
+  minimum = 30,
  language = get_locale(),
  combine_SI = TRUE,
  combine_IR = FALSE,
@@ -62,6 +64,8 @@ labels_rsi_count(
  position = NULL,
  x = "antibiotic",
  translate_ab = "name",
+  minimum = 30,
+  language = get_locale(),
  combine_SI = TRUE,
  combine_IR = FALSE,
  datalabels.size = 3,
@@ -69,7 +73,7 @@ labels_rsi_count(
 )
 }
 \arguments{
-\item{data}{a \code{\link{data.frame}} with column(s) of class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})}
+\item{data}{a \link{data.frame} with column(s) of class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})}

 \item{position}{position adjustment of bars, either \code{"fill"}, \code{"stack"} or \code{"dodge"}}

@@ -89,6 +93,8 @@ labels_rsi_count(

 \item{combine_IR}{a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. I+R (susceptible vs. non-susceptible). This is outdated, see parameter \code{combine_SI}.}

+\item{minimum}{the minimum allowed number of available (tested) isolates. Any isolate count lower than \code{minimum} will return \code{NA} with a warning. The default number of \code{30} isolates is advised by the Clinical and Laboratory Standards Institute (CLSI) as best practice, see Source.}
+
 \item{language}{language of the returned text, defaults to system language (see \code{\link[=get_locale]{get_locale()}}) and can also be set with \code{getOption("AMR_locale")}. Use \code{language = NULL} or \code{language = ""} to prevent translation.}

 \item{nrow}{(when using \code{facet}) number of rows}
--- a/man/guess_ab_col.Rd
+++ b/man/guess_ab_col.Rd
@@ -7,7 +7,7 @@
 guess_ab_col(x = NULL, search_string = NULL, verbose = FALSE)
 }
 \arguments{
-\item{x}{a \code{\link{data.frame}}}
+\item{x}{a \link{data.frame}}

 \item{search_string}{a text to search \code{x} for, will be checked with \code{\link[=as.ab]{as.ab()}} if this value is not a column in \code{x}}

--- a/man/intrinsic_resistant.Rd
+++ b/man/intrinsic_resistant.Rd
@@ -5,7 +5,7 @@
 \alias{intrinsic_resistant}
 \title{Data set with bacterial intrinsic resistance}
 \format{
-A \code{\link{data.frame}} with 49,462 observations and 2 variables:
+A \link{data.frame} with 49,462 observations and 2 variables:
 \itemize{
 \item \code{microorganism}\cr Name of the microorganism
 \item \code{antibiotic}\cr Name of the antibiotic drug
--- a/man/join.Rd
+++ b/man/join.Rd
@@ -36,7 +36,7 @@ anti_join_microorganisms(x, by = NULL, ...)
 Join the data set \link{microorganisms} easily to an existing table or character vector.
 }
 \details{
-\strong{Note:} As opposed to the \code{join()} functions of \code{dplyr}, \code{\link{character}} vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix.
+\strong{Note:} As opposed to the \code{join()} functions of \code{dplyr}, \link{character} vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix.

 These functions rely on \code{\link[=merge]{merge()}}, a base R function to do joins.
 }
--- a/man/kurtosis.Rd
+++ b/man/kurtosis.Rd
@@ -16,7 +16,7 @@ kurtosis(x, na.rm = FALSE)
 \method{kurtosis}{data.frame}(x, na.rm = FALSE)
 }
 \arguments{
-\item{x}{a vector of values, a \code{\link{matrix}} or a \code{\link{data.frame}}}
+\item{x}{a vector of values, a \code{\link{matrix}} or a \link{data.frame}}

 \item{na.rm}{a logical value indicating whether \code{NA} values should be stripped before the computation proceeds.}
 }
--- a/man/like.Rd
+++ b/man/like.Rd
@@ -18,7 +18,7 @@ x \%like_case\% pattern
 \arguments{
 \item{x}{a character vector where matches are sought, or an object which can be coerced by \code{\link[=as.character]{as.character()}} to a character vector.}

-\item{pattern}{a character string containing a regular expression (or \code{\link{character}} string for \code{fixed = TRUE}) to be matched in the given character vector. Coerced by \code{\link[=as.character]{as.character()}} to a character string if possible.  If a \code{\link{character}} vector of length 2 or more is supplied, the first element is used with a warning.}
+\item{pattern}{a character string containing a regular expression (or \link{character} string for \code{fixed = TRUE}) to be matched in the given character vector. Coerced by \code{\link[=as.character]{as.character()}} to a character string if possible.  If a \link{character} vector of length 2 or more is supplied, the first element is used with a warning.}

 \item{ignore.case}{if \code{FALSE}, the pattern matching is \emph{case sensitive} and if \code{TRUE}, case is ignored during matching.}
 }
--- a/man/mdro.Rd
+++ b/man/mdro.Rd
@@ -59,13 +59,13 @@ eucast_exceptional_phenotypes(x, guideline = "EUCAST", ...)
 \value{
 \itemize{
 \item CMI 2012 paper - function \code{\link[=mdr_cmi2012]{mdr_cmi2012()}} or \code{\link[=mdro]{mdro()}}:\cr
-Ordered \code{\link{factor}} with levels \code{Negative} < \code{Multi-drug-resistant (MDR)} < \verb{Extensively drug-resistant (XDR)} < \code{Pandrug-resistant (PDR)}
+Ordered \link{factor} with levels \code{Negative} < \code{Multi-drug-resistant (MDR)} < \verb{Extensively drug-resistant (XDR)} < \code{Pandrug-resistant (PDR)}
 \item TB guideline - function \code{\link[=mdr_tb]{mdr_tb()}} or \code{\link[=mdro]{mdro(..., guideline = "TB")}}:\cr
-Ordered \code{\link{factor}} with levels \code{Negative} < \code{Mono-resistant} < \code{Poly-resistant} < \code{Multi-drug-resistant} < \verb{Extensively drug-resistant}
+Ordered \link{factor} with levels \code{Negative} < \code{Mono-resistant} < \code{Poly-resistant} < \code{Multi-drug-resistant} < \verb{Extensively drug-resistant}
 \item German guideline - function \code{\link[=mrgn]{mrgn()}} or \code{\link[=mdro]{mdro(..., guideline = "MRGN")}}:\cr
-Ordered \code{\link{factor}} with levels \code{Negative} < \verb{3MRGN} < \verb{4MRGN}
+Ordered \link{factor} with levels \code{Negative} < \verb{3MRGN} < \verb{4MRGN}
 \item Everything else:\cr
-Ordered \code{\link{factor}} with levels \code{Negative} < \verb{Positive, unconfirmed} < \code{Positive}. The value \code{"Positive, unconfirmed"} means that, according to the guideline, it is not entirely sure if the isolate is multi-drug resistant and this should be confirmed with additional (e.g. molecular) tests
+Ordered \link{factor} with levels \code{Negative} < \verb{Positive, unconfirmed} < \code{Positive}. The value \code{"Positive, unconfirmed"} means that, according to the guideline, it is not entirely sure if the isolate is multi-drug resistant and this should be confirmed with additional (e.g. molecular) tests
 }
 }
 \description{
--- a/man/microorganisms.Rd
+++ b/man/microorganisms.Rd
@@ -5,7 +5,7 @@
 \alias{microorganisms}
 \title{Data set with 67,151 microorganisms}
 \format{
-A \code{\link{data.frame}} with 67,151 observations and 16 variables:
+A \link{data.frame} with 67,151 observations and 16 variables:
 \itemize{
 \item \code{mo}\cr ID of microorganism as used by this package
 \item \code{fullname}\cr Full name, like \code{"Escherichia coli"}
--- a/man/microorganisms.codes.Rd
+++ b/man/microorganisms.codes.Rd
@@ -5,7 +5,7 @@
 \alias{microorganisms.codes}
 \title{Data set with 5,583 common microorganism codes}
 \format{
-A \code{\link{data.frame}} with 5,583 observations and 2 variables:
+A \link{data.frame} with 5,583 observations and 2 variables:
 \itemize{
 \item \code{code}\cr Commonly used code of a microorganism
 \item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set
--- a/man/microorganisms.old.Rd
+++ b/man/microorganisms.old.Rd
@@ -5,7 +5,7 @@
 \alias{microorganisms.old}
 \title{Data set with previously accepted taxonomic names}
 \format{
-A \code{\link{data.frame}} with 12,708 observations and 4 variables:
+A \link{data.frame} with 12,708 observations and 4 variables:
 \itemize{
 \item \code{fullname}\cr Old full taxonomic name of the microorganism
 \item \code{fullname_new}\cr New full taxonomic name of the microorganism
--- a/man/mo_matching_score.Rd
+++ b/man/mo_matching_score.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mo_matching_score.R
+\name{mo_matching_score}
+\alias{mo_matching_score}
+\title{Calculate the matching score for microorganisms}
+\usage{
+mo_matching_score(x, fullname, uncertainty = 1)
+}
+\arguments{
+\item{x}{Any user input value(s)}
+
+\item{fullname}{A full taxonomic name, that exists in \code{\link[=microorganisms]{microorganisms$fullname}}}
+
+\item{uncertainty}{The level of uncertainty set in \code{\link[=as.mo]{as.mo()}}, see \code{allow_uncertain} in that function (here, it defaults to 1, but is automatically determined in \code{\link[=as.mo]{as.mo()}} based on the number of transformations needed to get to a result)}
+}
+\description{
+This helper function is used by \code{\link[=as.mo]{as.mo()}} to determine the most probable match of taxonomic records, based on user input.
+}
+\details{
+The matching score is based on four parameters:
+\enumerate{
+\item A human pathogenic prevalence \eqn{P}, that is categorised into group 1, 2 and 3 (see \code{\link[=as.mo]{as.mo()}});
+\item A kingdom index \eqn{K} is set as follows: Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, and all others = 5;
+\item The level of uncertainty \eqn{U} that is needed to get to a result (1 to 3, see \code{\link[=as.mo]{as.mo()}});
+\item The \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance} \eqn{L} is the distance between the user input and all taxonomic full names, with the text length of the user input being the maximum distance. A modified version of the Levenshtein distance \eqn{L'} based on the text length of the full name \eqn{F} is calculated as:
+}
+
+\deqn{L' = F - \frac{0.5 \times L}{F}}{L' = F - (0.5 * L) / F}
+
+The final matching score \eqn{M} is calculated as:
+\deqn{M = L' \times \frac{1}{P \times K} * \frac{1}{U}}{M = L' * (1 / (P * K)) * (1 / U)}
+}
+\examples{
+as.mo("E. coli")
+mo_uncertainties()
+}
--- a/man/mo_property.Rd
+++ b/man/mo_property.Rd
@@ -88,11 +88,11 @@ mo_property(x, property = "fullname", language = get_locale(), ...)
 }
 \value{
 \itemize{
-\item An \code{\link{integer}} in case of \code{\link[=mo_year]{mo_year()}}
-\item A \code{\link{list}} in case of \code{\link[=mo_taxonomy]{mo_taxonomy()}} and \code{\link[=mo_info]{mo_info()}}
-\item A named \code{\link{character}} in case of \code{\link[=mo_url]{mo_url()}}
-\item A \code{\link{double}} in case of \code{\link[=mo_snomed]{mo_snomed()}}
-\item A \code{\link{character}} in all other cases
+\item An \link{integer} in case of \code{\link[=mo_year]{mo_year()}}
+\item A \link{list} in case of \code{\link[=mo_taxonomy]{mo_taxonomy()}} and \code{\link[=mo_info]{mo_info()}}
+\item A named \link{character} in case of \code{\link[=mo_url]{mo_url()}}
+\item A \link{double} in case of \code{\link[=mo_snomed]{mo_snomed()}}
+\item A \link{character} in all other cases
 }
 }
 \description{
--- a/man/mo_source.Rd
+++ b/man/mo_source.Rd
@@ -19,11 +19,13 @@ These functions can be used to predefine your own reference to be used in \code{
 This is \strong{the fastest way} to have your organisation (or analysis) specific codes picked up and translated by this package.
 }
 \details{
-The reference file can be a text file seperated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you need to have the \code{readxl} package installed.
+The reference file can be a text file separated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you will need to have the \code{readxl} package installed.

-\code{\link[=set_mo_source]{set_mo_source()}} will check the file for validity: it must be a \code{\link{data.frame}}, must have a column named \code{"mo"} which contains values from \code{\link[=microorganisms]{microorganisms$mo}} and must have a reference column with your own defined values. If all tests pass, \code{\link[=set_mo_source]{set_mo_source()}} will read the file into R and export it to \code{"~/.mo_source.rds"}. This compressed data file will then be used at default for MO determination (function \code{\link[=as.mo]{as.mo()}} and consequently all \verb{mo_*} functions like \code{\link[=mo_genus]{mo_genus()}} and \code{\link[=mo_gramstain]{mo_gramstain()}}). The location of the original file will be saved as option with \code{options(mo_source = path)}. Its timestamp will be saved with \code{options(mo_source_datetime = ...)}.
+\code{\link[=set_mo_source]{set_mo_source()}} will check the file for validity: it must be a \link{data.frame}, must have a column named \code{"mo"} which contains values from \code{\link[=microorganisms]{microorganisms$mo}} and must have a reference column with your own defined values. If all tests pass, \code{\link[=set_mo_source]{set_mo_source()}} will read the file into R and export it to \code{"~/.mo_source.rds"} after the user \strong{specifically confirms and allows} that this file will be created. For this reason, this function only works in interactive sessions.

-\code{\link[=get_mo_source]{get_mo_source()}} will return the data set by reading \code{"~/.mo_source.rds"} with \code{\link[=readRDS]{readRDS()}}. If the original file has changed (the file defined with \code{path}), it will call \code{\link[=set_mo_source]{set_mo_source()}} to update the data file automatically.
+The created compressed data file \code{"~/.mo_source.rds"} will be used at default for MO determination (function \code{\link[=as.mo]{as.mo()}} and consequently all \verb{mo_*} functions like \code{\link[=mo_genus]{mo_genus()}} and \code{\link[=mo_gramstain]{mo_gramstain()}}). The location of the original file will be saved as an R option with \code{options(mo_source = path)}. Its timestamp will be saved with \code{options(mo_source_datetime = ...)}.
+
+The function \code{\link[=get_mo_source]{get_mo_source()}} will return the data set by reading \code{"~/.mo_source.rds"} with \code{\link[=readRDS]{readRDS()}}. If the original file has changed (by checking the aforementioned options \code{mo_source} and \code{mo_source_datetime}), it will call \code{\link[=set_mo_source]{set_mo_source()}} to update the data file automatically.

 Reading an Excel file (\code{.xlsx}) with only one row has a size of 8-9 kB. The compressed file created with \code{\link[=set_mo_source]{set_mo_source()}} will then have a size of 0.1 kB and can be read by \code{\link[=get_mo_source]{get_mo_source()}} in only a couple of microseconds (millionths of a second).
 }
--- a/man/proportion.Rd
+++ b/man/proportion.Rd
@@ -60,7 +60,7 @@ rsi_df(

 \item{only_all_tested}{(for combination therapies, i.e. using more than one variable for \code{...}): a logical to indicate that isolates must be tested for all antibiotics, see section \emph{Combination therapy} below}

-\item{data}{a \code{\link{data.frame}} containing columns with class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})}
+\item{data}{a \link{data.frame} containing columns with class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}})}

 \item{translate_ab}{a column name of the \link{antibiotics} data set to translate the antibiotic abbreviations to, using \code{\link[=ab_property]{ab_property()}}. Use a value}

@@ -71,7 +71,7 @@ rsi_df(
 \item{combine_IR}{a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. I+R (susceptible vs. non-susceptible). This is outdated, see parameter \code{combine_SI}.}
 }
 \value{
-A \code{\link{double}} or, when \code{as_percent = TRUE}, a \code{\link{character}}.
+A \link{double} or, when \code{as_percent = TRUE}, a \link{character}.
 }
 \description{
 These functions can be used to calculate the (co-)resistance or susceptibility of microbial isolates (i.e. percentage of S, SI, I, IR or R). All functions support quasiquotation with pipes, can be used in \code{summarise()} from the \code{dplyr} package and also support grouped variables, please see \emph{Examples}.
--- a/man/resistance_predict.Rd
+++ b/man/resistance_predict.Rd
@@ -47,7 +47,7 @@ ggplot_rsi_predict(
 )
 }
 \arguments{
-\item{x}{a \code{\link{data.frame}} containing isolates.}
+\item{x}{a \link{data.frame} containing isolates.}

 \item{col_ab}{column name of \code{x} containing antimicrobial interpretations (\code{"R"}, \code{"I"} and \code{"S"})}

@@ -76,7 +76,7 @@ ggplot_rsi_predict(
 \item{ribbon}{a logical to indicate whether a ribbon should be shown (default) or error bars}
 }
 \value{
-A \code{\link{data.frame}} with extra class \code{\link{resistance_predict}} with columns:
+A \link{data.frame} with extra class \code{\link{resistance_predict}} with columns:
 \itemize{
 \item \code{year}
 \item \code{value}, the same as \code{estimated} when \code{preserve_measurements = FALSE}, and a combination of \code{observed} and \code{estimated} otherwise
--- a/man/rsi_translation.Rd
+++ b/man/rsi_translation.Rd
@@ -5,7 +5,7 @@
 \alias{rsi_translation}
 \title{Data set for R/SI interpretation}
 \format{
-A \code{\link{data.frame}} with 18,650 observations and 10 variables:
+A \link{data.frame} with 18,650 observations and 10 variables:
 \itemize{
 \item \code{guideline}\cr Name of the guideline
 \item \code{method}\cr Either "MIC" or "DISK"
--- a/man/skewness.Rd
+++ b/man/skewness.Rd
@@ -16,7 +16,7 @@ skewness(x, na.rm = FALSE)
 \method{skewness}{data.frame}(x, na.rm = FALSE)
 }
 \arguments{
-\item{x}{a vector of values, a \code{\link{matrix}} or a \code{\link{data.frame}}}
+\item{x}{a vector of values, a \code{\link{matrix}} or a \link{data.frame}}

 \item{na.rm}{a logical value indicating whether \code{NA} values should be stripped before the computation proceeds.}
 }