AMR/reference/mo_matching_score.html

232 lines
21 KiB
HTML

<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta name="description" content="This algorithm is used by as.mo() and all the mo_* functions to determine the most probable match of taxonomic records based on user input."><title>Calculate the Matching Score for Microorganisms — mo_matching_score • AMR (for R)</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link href="../deps/bootstrap-5.1.3/bootstrap.min.css" rel="stylesheet"><script src="../deps/bootstrap-5.1.3/bootstrap.bundle.min.js"></script><link href="../deps/Fira_Code-0.4.4/font.css" rel="stylesheet"><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><link href="../extra.css" rel="stylesheet"><script src="../extra.js"></script><meta property="og:title" content="Calculate the Matching Score for Microorganisms — mo_matching_score"><meta property="og:description" content="This algorithm is used by as.mo() and all the mo_* functions to determine the most probable match of taxonomic records based on user input."><meta property="og:image" content="https://msberends.github.io/AMR/logo.svg"><meta name="twitter:card" content="summary_large_image"><meta name="twitter:creator" content="@msberends"><meta name="twitter:site" content="@msberends"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]--></head><body>
<a href="#main" class="visually-hidden-focusable">Skip to contents</a>
<nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-primary"><div class="container">
<a class="navbar-brand me-2" href="../index.html">AMR (for R)</a>
<small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">1.8.2.9062</small>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar" class="collapse navbar-collapse ms-3">
<ul class="navbar-nav me-auto"><li class="nav-item">
<a class="nav-link" href="../index.html">
<span class="fa fa-home"></span>
Home
</a>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown--how-to">
<span class="fa fa-question-circle"></span>
How to
</a>
<div class="dropdown-menu" aria-labelledby="dropdown--how-to">
<a class="dropdown-item" href="../articles/AMR.html">
<span class="fa fa-directions"></span>
Conduct AMR analysis
</a>
<a class="dropdown-item" href="../articles/resistance_predict.html">
<span class="fa fa-dice"></span>
Predict antimicrobial resistance
</a>
<a class="dropdown-item" href="../articles/datasets.html">
<span class="fa fa-database"></span>
Data sets for download / own use
</a>
<a class="dropdown-item" href="../articles/PCA.html">
<span class="fa fa-compress"></span>
Conduct principal component analysis for AMR
</a>
<a class="dropdown-item" href="../articles/MDR.html">
<span class="fa fa-skull-crossbones"></span>
Determine multi-drug resistance (MDR)
</a>
<a class="dropdown-item" href="../articles/WHONET.html">
<span class="fa fa-globe-americas"></span>
Work with WHONET data
</a>
<a class="dropdown-item" href="../articles/SPSS.html">
<span class="fa fa-file-upload"></span>
Import data from SPSS/SAS/Stata
</a>
<a class="dropdown-item" href="../articles/EUCAST.html">
<span class="fa fa-exchange-alt"></span>
Apply EUCAST rules
</a>
<a class="dropdown-item" href="../reference/mo_property.html">
<span class="fa fa-bug"></span>
Get properties of a microorganism
</a>
<a class="dropdown-item" href="../reference/ab_property.html">
<span class="fa fa-capsules"></span>
Get properties of an antibiotic
</a>
<a class="dropdown-item" href="../reference/av_property.html">
<span class="fa fa-capsules"></span>
Get properties of an antiviral agent
</a>
</div>
</li>
<li class="active nav-item">
<a class="nav-link" href="../reference/index.html">
<span class="fa fa-book-open"></span>
Manual
</a>
</li>
<li class="nav-item">
<a class="nav-link" href="../authors.html">
<span class="fa fa-users"></span>
Authors
</a>
</li>
<li class="nav-item">
<a class="nav-link" href="../news/index.html">
<span class="far fa far fa-newspaper"></span>
Changelog
</a>
</li>
</ul><form class="form-inline my-2 my-lg-0" role="search">
<input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="../search.json" id="search-input" placeholder="Search for" autocomplete="off"></form>
<ul class="navbar-nav"><li class="nav-item">
<a class="external-link nav-link" href="https://github.com/msberends/AMR">
<span class="fab fa fab fa-github"></span>
Source Code
</a>
</li>
</ul></div>
</div>
</nav><div class="container template-reference-topic">
<div class="row">
<main id="main" class="col-md-9"><div class="page-header">
<img src="../logo.svg" class="logo" alt=""><h1>Calculate the Matching Score for Microorganisms</h1>
<small class="dont-index">Source: <a href="https://github.com/msberends/AMR/blob/HEAD/R/mo_matching_score.R" class="external-link"><code>R/mo_matching_score.R</code></a></small>
<div class="d-none name"><code>mo_matching_score.Rd</code></div>
</div>
<div class="ref-description section level2">
<p>This algorithm is used by <code><a href="as.mo.html">as.mo()</a></code> and all the <code><a href="mo_property.html">mo_*</a></code> functions to determine the most probable match of taxonomic records based on user input.</p>
</div>
<div class="section level2">
<h2 id="ref-usage">Usage<a class="anchor" aria-label="anchor" href="#ref-usage"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">mo_matching_score</span><span class="op">(</span><span class="va">x</span>, <span class="va">n</span><span class="op">)</span></span></code></pre></div>
</div>
<div class="section level2">
<h2 id="arguments">Arguments<a class="anchor" aria-label="anchor" href="#arguments"></a></h2>
<dl><dt>x</dt>
<dd><p>Any user input value(s)</p></dd>
<dt>n</dt>
<dd><p>A full taxonomic name, that exists in <code><a href="microorganisms.html">microorganisms$fullname</a></code></p></dd>
</dl></div>
<div class="section level2">
<h2 id="note">Note<a class="anchor" aria-label="anchor" href="#note"></a></h2>
<p>This algorithm was described in: Berends MS <em>et al.</em> (2022). <strong>AMR: An R Package for Working with Antimicrobial Resistance Data</strong>. <em>Journal of Statistical Software</em>, 104(3), 1-31; <a href="https://doi.org/10.18637/jss.v104.i03" class="external-link">doi:10.18637/jss.v104.i03</a>
.</p>
</div>
<div class="section level2">
<h2 id="matching-score-for-microorganisms">Matching Score for Microorganisms<a class="anchor" aria-label="anchor" href="#matching-score-for-microorganisms"></a></h2>
<p>With ambiguous user input in <code><a href="as.mo.html">as.mo()</a></code> and all the <code><a href="mo_property.html">mo_*</a></code> functions, the returned results are chosen based on their matching score using <code>mo_matching_score()</code>. This matching score \(m\), is calculated as:</p>
<p><img src="figures/mo_matching_score.png" width="300" alt="mo matching score"></p>
<p>where:</p><ul><li><p><i>x</i> is the user input;</p></li>
<li><p><i>n</i> is a taxonomic name (genus, species, and subspecies);</p></li>
<li><p><i>l<sub>n</sub></i> is the length of <i>n</i>;</p></li>
<li><p><i>lev</i> is the <a href="https://en.wikipedia.org/wiki/Levenshtein_distance" class="external-link">Levenshtein distance function</a> (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change <i>x</i> into <i>n</i>;</p></li>
<li><p><i>p<sub>n</sub></i> is the human pathogenic prevalence group of <i>n</i>, as described below;</p></li>
<li><p><i>k<sub>n</sub></i> is the taxonomic kingdom of <i>n</i>, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.</p></li>
</ul><p>The grouping into human pathogenic prevalence (\(p\)) is based on experience from several microbiological laboratories in the Netherlands in conjunction with international reports on pathogen prevalence:</p>
<p><strong>Group 1</strong> (most prevalent microorganisms) consists of all microorganisms where the taxonomic class is Gammaproteobacteria or where the taxonomic genus is <em>Enterococcus</em>, <em>Staphylococcus</em> or <em>Streptococcus</em>. This group consequently contains all common Gram-negative bacteria, such as <em>Pseudomonas</em> and <em>Legionella</em> and all species within the order Enterobacterales.</p>
<p><strong>Group 2</strong> consists of all microorganisms where the taxonomic phylum is Pseudomonadota (previously named Proteobacteria), Bacillota (previously named Firmicutes), Actinomycetota (previously named Actinobacteria) or Sarcomastigophora, or where the taxonomic genus is <em>Absidia</em>, <em>Acanthamoeba</em>, <em>Acholeplasma</em>, <em>Acremonium</em>, <em>Actinotignum</em>, <em>Aedes</em>, <em>Alistipes</em>, <em>Alloprevotella</em>, <em>Alternaria</em>, <em>Amoeba</em>, <em>Anaerosalibacter</em>, <em>Ancylostoma</em>, <em>Angiostrongylus</em>, <em>Anisakis</em>, <em>Anopheles</em>, <em>Apophysomyces</em>, <em>Arachnia</em>, <em>Aspergillus</em>, <em>Aureobasidium</em>, <em>Bacteroides</em>, <em>Basidiobolus</em>, <em>Beauveria</em>, <em>Bergeyella</em>, <em>Blastocystis</em>, <em>Blastomyces</em>, <em>Borrelia</em>, <em>Brachyspira</em>, <em>Branhamella</em>, <em>Butyricimonas</em>, <em>Candida</em>, <em>Capillaria</em>, <em>Capnocytophaga</em>, <em>Catabacter</em>, <em>Cetobacterium</em>, <em>Chaetomium</em>, <em>Chlamydia</em>, <em>Chlamydophila</em>, <em>Christensenella</em>, <em>Chryseobacterium</em>, <em>Chrysonilia</em>, <em>Cladophialophora</em>, <em>Cladosporium</em>, <em>Conidiobolus</em>, <em>Contracaecum</em>, <em>Cordylobia</em>, <em>Cryptococcus</em>, <em>Curvularia</em>, <em>Deinococcus</em>, <em>Demodex</em>, <em>Dermatobia</em>, <em>Dientamoeba</em>, <em>Diphyllobothrium</em>, <em>Dirofilaria</em>, <em>Dysgonomonas</em>, <em>Echinostoma</em>, <em>Elizabethkingia</em>, <em>Empedobacter</em>, <em>Entamoeba</em>, <em>Enterobius</em>, <em>Exophiala</em>, <em>Exserohilum</em>, <em>Fasciola</em>, <em>Flavobacterium</em>, <em>Fonsecaea</em>, <em>Fusarium</em>, <em>Fusobacterium</em>, <em>Giardia</em>, <em>Haloarcula</em>, <em>Halobacterium</em>, <em>Halococcus</em>, <em>Hendersonula</em>, <em>Heterophyes</em>, <em>Histomonas</em>, <em>Histoplasma</em>, <em>Hymenolepis</em>, <em>Hypomyces</em>, <em>Hysterothylacium</em>, <em>Leishmania</em>, <em>Lelliottia</em>, <em>Leptosphaeria</em>, <em>Leptotrichia</em>, <em>Lucilia</em>, <em>Lumbricus</em>, <em>Malassezia</em>, <em>Malbranchea</em>, <em>Metagonimus</em>, <em>Meyerozyma</em>, <em>Microsporidium</em>, <em>Microsporum</em>, <em>Mortierella</em>, <em>Mucor</em>, <em>Mycocentrospora</em>, <em>Mycoplasma</em>, <em>Myroides</em>, <em>Necator</em>, <em>Nectria</em>, <em>Ochroconis</em>, <em>Odoribacter</em>, <em>Oesophagostomum</em>, <em>Oidiodendron</em>, <em>Opisthorchis</em>, <em>Ornithobacterium</em>, <em>Parabacteroides</em>, <em>Pediculus</em>, <em>Pedobacter</em>, <em>Phlebotomus</em>, <em>Phocaeicola</em>, <em>Phocanema</em>, <em>Phoma</em>, <em>Pichia</em>, <em>Piedraia</em>, <em>Pithomyces</em>, <em>Pityrosporum</em>, <em>Pneumocystis</em>, <em>Porphyromonas</em>, <em>Prevotella</em>, <em>Pseudallescheria</em>, <em>Pseudoterranova</em>, <em>Pulex</em>, <em>Rhizomucor</em>, <em>Rhizopus</em>, <em>Rhodotorula</em>, <em>Riemerella</em>, <em>Saccharomyces</em>, <em>Sarcoptes</em>, <em>Scolecobasidium</em>, <em>Scopulariopsis</em>, <em>Scytalidium</em>, <em>Sphingobacterium</em>, <em>Spirometra</em>, <em>Spiroplasma</em>, <em>Sporobolomyces</em>, <em>Stachybotrys</em>, <em>Streptobacillus</em>, <em>Strongyloides</em>, <em>Syngamus</em>, <em>Taenia</em>, <em>Tannerella</em>, <em>Tenacibaculum</em>, <em>Terrimonas</em>, <em>Toxocara</em>, <em>Treponema</em>, <em>Trichinella</em>, <em>Trichobilharzia</em>, <em>Trichoderma</em>, <em>Trichomonas</em>, <em>Trichophyton</em>, <em>Trichosporon</em>, <em>Trichostrongylus</em>, <em>Trichuris</em>, <em>Tritirachium</em>, <em>Trombicula</em>, <em>Trypanosoma</em>, <em>Tunga</em>, <em>Ureaplasma</em>, <em>Victivallis</em>, <em>Wautersiella</em>, <em>Weeksella</em> or <em>Wuchereria</em>.</p>
<p><strong>Group 3</strong> consists of all other microorganisms.</p>
<p>All characters in \(x\) and \(n\) are ignored that are other than A-Z, a-z, 0-9, spaces and parentheses.</p>
<p>All matches are sorted descending on their matching score and for all user input values, the top match will be returned. This will lead to the effect that e.g., <code>"E. coli"</code> will return the microbial ID of <em>Escherichia coli</em> (\(m = 0.688\), a highly prevalent microorganism found in humans) and not <em>Entamoeba coli</em> (\(m = 0.079\), a less prevalent microorganism in humans), although the latter would alphabetically come first.</p>
</div>
<div class="section level2">
<h2 id="reference-data-publicly-available">Reference Data Publicly Available<a class="anchor" aria-label="anchor" href="#reference-data-publicly-available"></a></h2>
<p>All data sets in this <code>AMR</code> package (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) are publicly and freely available for download in the following formats: R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. We also provide tab-separated plain text files that are machine-readable and suitable for input in any software program, such as laboratory information systems. Please visit <a href="https://msberends.github.io/AMR/articles/datasets.html">our website for the download links</a>. The actual files are of course available on <a href="https://github.com/msberends/AMR/tree/main/data-raw" class="external-link">our GitHub repository</a>.</p>
</div>
<div class="section level2">
<h2 id="author">Author<a class="anchor" aria-label="anchor" href="#author"></a></h2>
<p>Dr. Matthijs Berends</p>
</div>
<div class="section level2">
<h2 id="ref-examples">Examples<a class="anchor" aria-label="anchor" href="#ref-examples"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="fu"><a href="as.mo.html">as.mo</a></span><span class="op">(</span><span class="st">"E. coli"</span><span class="op">)</span></span></span>
<span class="r-out co"><span class="r-pr">#&gt;</span> Class 'mo'</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> [1] B_ESCHR_COLI</span>
<span class="r-in"><span><span class="fu"><a href="as.mo.html">mo_uncertainties</a></span><span class="op">(</span><span class="op">)</span></span></span>
<span class="r-out co"><span class="r-pr">#&gt;</span> Matching scores are based on the resemblance between the input and the full</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> taxonomic name, and the pathogenicity in humans. See ?mo_matching_score.</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> </span>
<span class="r-out co"><span class="r-pr">#&gt;</span> --------------------------------------------------------------------------------</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> "K. pneumoniae" -&gt; Klebsiella pneumoniae (B_KLBSL_PNMN, 0.786)</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> Based on input "K pneumoniae"</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> Also matched: Klebsiella pneumoniae ozaenae (0.707), Klebsiella pneumoniae</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> pneumoniae (0.688), Klebsiella pneumoniae rhinoscleromatis (0.658) and</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> Kroppenstedtia pulmonis (0.304)</span>
<span class="r-in"><span></span></span>
<span class="r-in"><span><span class="fu">mo_matching_score</span><span class="op">(</span></span></span>
<span class="r-in"><span> x <span class="op">=</span> <span class="st">"E. coli"</span>,</span></span>
<span class="r-in"><span> n <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"Escherichia coli"</span>, <span class="st">"Entamoeba coli"</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="op">)</span></span></span>
<span class="r-out co"><span class="r-pr">#&gt;</span> [1] 0.68750000 0.07936508</span>
</code></pre></div>
</div>
</main><aside class="col-md-3"><nav id="toc"><h2>On this page</h2>
</nav></aside></div>
<footer><div class="pkgdown-footer-left">
<p></p><p><code>AMR</code> (for R). Free and open-source, licenced under the <a target="_blank" href="https://github.com/msberends/AMR/blob/main/LICENSE" class="external-link">GNU General Public License version 2.0 (GPL-2)</a>.<br>Developed at the <a target="_blank" href="https://www.rug.nl" class="external-link">University of Groningen</a> and <a target="_blank" href="https://www.umcg.nl" class="external-link">University Medical Center Groningen</a> in The Netherlands.</p>
</div>
<div class="pkgdown-footer-right">
<p></p><p><a target="_blank" href="https://www.rug.nl" class="external-link"><img src="https://github.com/msberends/AMR/raw/main/pkgdown/logos/logo_rug.svg" style="max-width: 150px;"></a><a target="_blank" href="https://www.umcg.nl" class="external-link"><img src="https://github.com/msberends/AMR/raw/main/pkgdown/logos/logo_umcg.svg" style="max-width: 150px;"></a></p>
</div>
</footer></div>
</body></html>