mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 10:31:53 +02:00
(v0.9.0.9020) as.mo() improvement
This commit is contained in:
@ -41,7 +41,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.9.0.9019</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.9.0.9020</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@ -187,7 +187,7 @@
|
||||
<h1>Benchmarks</h1>
|
||||
<h4 class="author">Matthijs S. Berends</h4>
|
||||
|
||||
<h4 class="date">01 February 2020</h4>
|
||||
<h4 class="date">09 February 2020</h4>
|
||||
|
||||
|
||||
<div class="hidden name"><code>benchmarks.Rmd</code></div>
|
||||
@ -221,21 +221,21 @@
|
||||
<a class="sourceLine" id="cb2-16" data-line-number="16"> <span class="dt">times =</span> <span class="dv">10</span>)</a>
|
||||
<a class="sourceLine" id="cb2-17" data-line-number="17"><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(S.aureus, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">2</span>)</a>
|
||||
<a class="sourceLine" id="cb2-18" data-line-number="18"><span class="co"># Unit: milliseconds</span></a>
|
||||
<a class="sourceLine" id="cb2-19" data-line-number="19"><span class="co"># expr min lq mean median uq max</span></a>
|
||||
<a class="sourceLine" id="cb2-20" data-line-number="20"><span class="co"># as.mo("sau") 8.3 8.3 11.0 8.8 9.4 34.0</span></a>
|
||||
<a class="sourceLine" id="cb2-21" data-line-number="21"><span class="co"># as.mo("stau") 37.0 46.0 76.0 62.0 70.0 160.0</span></a>
|
||||
<a class="sourceLine" id="cb2-22" data-line-number="22"><span class="co"># as.mo("STAU") 37.0 38.0 48.0 44.0 61.0 65.0</span></a>
|
||||
<a class="sourceLine" id="cb2-23" data-line-number="23"><span class="co"># as.mo("staaur") 8.3 8.4 8.9 8.6 9.2 10.0</span></a>
|
||||
<a class="sourceLine" id="cb2-24" data-line-number="24"><span class="co"># as.mo("STAAUR") 8.2 8.2 8.5 8.5 8.6 9.3</span></a>
|
||||
<a class="sourceLine" id="cb2-25" data-line-number="25"><span class="co"># as.mo("S. aureus") 14.0 14.0 23.0 15.0 37.0 46.0</span></a>
|
||||
<a class="sourceLine" id="cb2-26" data-line-number="26"><span class="co"># as.mo("S aureus") 14.0 14.0 18.0 14.0 15.0 46.0</span></a>
|
||||
<a class="sourceLine" id="cb2-27" data-line-number="27"><span class="co"># as.mo("Staphylococcus aureus") 4.9 4.9 5.4 5.1 5.4 7.7</span></a>
|
||||
<a class="sourceLine" id="cb2-28" data-line-number="28"><span class="co"># as.mo("Staphylococcus aureus (MRSA)") 630.0 660.0 700.0 690.0 700.0 830.0</span></a>
|
||||
<a class="sourceLine" id="cb2-29" data-line-number="29"><span class="co"># as.mo("Sthafilokkockus aaureuz") 330.0 380.0 380.0 380.0 390.0 440.0</span></a>
|
||||
<a class="sourceLine" id="cb2-30" data-line-number="30"><span class="co"># as.mo("MRSA") 8.3 8.4 14.0 8.9 12.0 37.0</span></a>
|
||||
<a class="sourceLine" id="cb2-31" data-line-number="31"><span class="co"># as.mo("VISA") 23.0 25.0 35.0 31.0 49.0 53.0</span></a>
|
||||
<a class="sourceLine" id="cb2-32" data-line-number="32"><span class="co"># as.mo("VRSA") 24.0 25.0 65.0 26.0 51.0 270.0</span></a>
|
||||
<a class="sourceLine" id="cb2-33" data-line-number="33"><span class="co"># as.mo(22242419) 130.0 140.0 150.0 140.0 160.0 170.0</span></a>
|
||||
<a class="sourceLine" id="cb2-19" data-line-number="19"><span class="co"># expr min lq mean median uq max</span></a>
|
||||
<a class="sourceLine" id="cb2-20" data-line-number="20"><span class="co"># as.mo("sau") 8.0 8.3 14 8.5 9.2 38.0</span></a>
|
||||
<a class="sourceLine" id="cb2-21" data-line-number="21"><span class="co"># as.mo("stau") 36.0 37.0 42 38.0 43.0 64.0</span></a>
|
||||
<a class="sourceLine" id="cb2-22" data-line-number="22"><span class="co"># as.mo("STAU") 37.0 38.0 50 45.0 64.0 74.0</span></a>
|
||||
<a class="sourceLine" id="cb2-23" data-line-number="23"><span class="co"># as.mo("staaur") 7.9 8.3 12 8.9 9.3 37.0</span></a>
|
||||
<a class="sourceLine" id="cb2-24" data-line-number="24"><span class="co"># as.mo("STAAUR") 8.0 8.2 14 8.3 9.5 36.0</span></a>
|
||||
<a class="sourceLine" id="cb2-25" data-line-number="25"><span class="co"># as.mo("S. aureus") 14.0 14.0 22 15.0 37.0 40.0</span></a>
|
||||
<a class="sourceLine" id="cb2-26" data-line-number="26"><span class="co"># as.mo("S aureus") 13.0 14.0 16 14.0 15.0 36.0</span></a>
|
||||
<a class="sourceLine" id="cb2-27" data-line-number="27"><span class="co"># as.mo("Staphylococcus aureus") 4.6 4.8 5 4.9 5.3 5.4</span></a>
|
||||
<a class="sourceLine" id="cb2-28" data-line-number="28"><span class="co"># as.mo("Staphylococcus aureus (MRSA)") 630.0 650.0 730 690.0 710.0 1100.0</span></a>
|
||||
<a class="sourceLine" id="cb2-29" data-line-number="29"><span class="co"># as.mo("Sthafilokkockus aaureuz") 350.0 370.0 400 380.0 420.0 500.0</span></a>
|
||||
<a class="sourceLine" id="cb2-30" data-line-number="30"><span class="co"># as.mo("MRSA") 8.0 8.2 11 8.3 9.3 33.0</span></a>
|
||||
<a class="sourceLine" id="cb2-31" data-line-number="31"><span class="co"># as.mo("VISA") 24.0 25.0 46 36.0 49.0 140.0</span></a>
|
||||
<a class="sourceLine" id="cb2-32" data-line-number="32"><span class="co"># as.mo("VRSA") 23.0 25.0 37 37.0 48.0 49.0</span></a>
|
||||
<a class="sourceLine" id="cb2-33" data-line-number="33"><span class="co"># as.mo(22242419) 120.0 130.0 140 140.0 150.0 150.0</span></a>
|
||||
<a class="sourceLine" id="cb2-34" data-line-number="34"><span class="co"># neval</span></a>
|
||||
<a class="sourceLine" id="cb2-35" data-line-number="35"><span class="co"># 10</span></a>
|
||||
<a class="sourceLine" id="cb2-36" data-line-number="36"><span class="co"># 10</span></a>
|
||||
@ -262,18 +262,18 @@
|
||||
<a class="sourceLine" id="cb3-6" data-line-number="6"> <span class="dt">times =</span> <span class="dv">10</span>)</a>
|
||||
<a class="sourceLine" id="cb3-7" data-line-number="7"><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(M.semesiae, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">4</span>)</a>
|
||||
<a class="sourceLine" id="cb3-8" data-line-number="8"><span class="co"># Unit: milliseconds</span></a>
|
||||
<a class="sourceLine" id="cb3-9" data-line-number="9"><span class="co"># expr min lq mean median uq</span></a>
|
||||
<a class="sourceLine" id="cb3-10" data-line-number="10"><span class="co"># as.mo("metsem") 1444.000 1496.000 1539.00 1530.000 1573.000</span></a>
|
||||
<a class="sourceLine" id="cb3-11" data-line-number="11"><span class="co"># as.mo("METSEM") 1451.000 1478.000 1521.00 1505.000 1555.000</span></a>
|
||||
<a class="sourceLine" id="cb3-12" data-line-number="12"><span class="co"># as.mo("M. semesiae") 14.140 14.370 17.22 14.790 14.970</span></a>
|
||||
<a class="sourceLine" id="cb3-13" data-line-number="13"><span class="co"># as.mo("M. semesiae") 14.460 14.690 20.25 14.850 15.800</span></a>
|
||||
<a class="sourceLine" id="cb3-14" data-line-number="14"><span class="co"># as.mo("Methanosarcina semesiae") 5.014 5.383 11.53 5.566 5.831</span></a>
|
||||
<a class="sourceLine" id="cb3-9" data-line-number="9"><span class="co"># expr min lq mean median uq</span></a>
|
||||
<a class="sourceLine" id="cb3-10" data-line-number="10"><span class="co"># as.mo("metsem") 1412.000 1475.000 1514.00 1503.00 1553.000</span></a>
|
||||
<a class="sourceLine" id="cb3-11" data-line-number="11"><span class="co"># as.mo("METSEM") 1348.000 1448.000 1470.00 1471.00 1512.000</span></a>
|
||||
<a class="sourceLine" id="cb3-12" data-line-number="12"><span class="co"># as.mo("M. semesiae") 14.410 14.600 20.03 14.84 16.170</span></a>
|
||||
<a class="sourceLine" id="cb3-13" data-line-number="13"><span class="co"># as.mo("M. semesiae") 14.640 15.070 26.61 18.52 41.270</span></a>
|
||||
<a class="sourceLine" id="cb3-14" data-line-number="14"><span class="co"># as.mo("Methanosarcina semesiae") 5.289 5.508 11.34 5.63 5.837</span></a>
|
||||
<a class="sourceLine" id="cb3-15" data-line-number="15"><span class="co"># max neval</span></a>
|
||||
<a class="sourceLine" id="cb3-16" data-line-number="16"><span class="co"># 1628.00 10</span></a>
|
||||
<a class="sourceLine" id="cb3-17" data-line-number="17"><span class="co"># 1658.00 10</span></a>
|
||||
<a class="sourceLine" id="cb3-18" data-line-number="18"><span class="co"># 39.56 10</span></a>
|
||||
<a class="sourceLine" id="cb3-19" data-line-number="19"><span class="co"># 42.11 10</span></a>
|
||||
<a class="sourceLine" id="cb3-20" data-line-number="20"><span class="co"># 38.03 10</span></a></code></pre></div>
|
||||
<a class="sourceLine" id="cb3-16" data-line-number="16"><span class="co"># 1634.00 10</span></a>
|
||||
<a class="sourceLine" id="cb3-17" data-line-number="17"><span class="co"># 1554.00 10</span></a>
|
||||
<a class="sourceLine" id="cb3-18" data-line-number="18"><span class="co"># 41.27 10</span></a>
|
||||
<a class="sourceLine" id="cb3-19" data-line-number="19"><span class="co"># 45.83 10</span></a>
|
||||
<a class="sourceLine" id="cb3-20" data-line-number="20"><span class="co"># 35.94 10</span></a></code></pre></div>
|
||||
<p>That takes 5.6 times as much time on average. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like <em>Methanosarcina semesiae</em>) are always very fast and only take some thousands of seconds to coerce - they are the most probable input from most data sets.</p>
|
||||
<p>In the figure below, we compare <em>Escherichia coli</em> (which is very common) with <em>Prevotella brevis</em> (which is moderately common) and with <em>Methanosarcina semesiae</em> (which is uncommon):</p>
|
||||
<p><img src="benchmarks_files/figure-html/unnamed-chunk-6-1.png" width="900"></p>
|
||||
@ -308,8 +308,8 @@
|
||||
<a class="sourceLine" id="cb4-24" data-line-number="24"><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a>
|
||||
<a class="sourceLine" id="cb4-25" data-line-number="25"><span class="co"># Unit: milliseconds</span></a>
|
||||
<a class="sourceLine" id="cb4-26" data-line-number="26"><span class="co"># expr min lq mean median uq max neval</span></a>
|
||||
<a class="sourceLine" id="cb4-27" data-line-number="27"><span class="co"># mo_name(x) 568 612 633 626 649 774 100</span></a></code></pre></div>
|
||||
<p>So transforming 500,000 values (!!) of 50 unique values only takes 0.63 seconds (626 ms). You only lose time on your unique input values.</p>
|
||||
<a class="sourceLine" id="cb4-27" data-line-number="27"><span class="co"># mo_name(x) 574 626 649 644 660 787 100</span></a></code></pre></div>
|
||||
<p>So transforming 500,000 values (!!) of 50 unique values only takes 0.64 seconds (643 ms). You only lose time on your unique input values.</p>
|
||||
</div>
|
||||
<div id="precalculated-results" class="section level3">
|
||||
<h3 class="hasAnchor">
|
||||
@ -321,11 +321,11 @@
|
||||
<a class="sourceLine" id="cb5-4" data-line-number="4"> <span class="dt">times =</span> <span class="dv">10</span>)</a>
|
||||
<a class="sourceLine" id="cb5-5" data-line-number="5"><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a>
|
||||
<a class="sourceLine" id="cb5-6" data-line-number="6"><span class="co"># Unit: milliseconds</span></a>
|
||||
<a class="sourceLine" id="cb5-7" data-line-number="7"><span class="co"># expr min lq mean median uq max neval</span></a>
|
||||
<a class="sourceLine" id="cb5-8" data-line-number="8"><span class="co"># A 6.310 6.380 6.720 6.46 6.740 8.79 10</span></a>
|
||||
<a class="sourceLine" id="cb5-9" data-line-number="9"><span class="co"># B 13.500 13.800 18.200 14.60 15.000 52.20 10</span></a>
|
||||
<a class="sourceLine" id="cb5-10" data-line-number="10"><span class="co"># C 0.815 0.839 0.886 0.86 0.899 1.13 10</span></a></code></pre></div>
|
||||
<p>So going from <code><a href="../reference/mo_property.html">mo_name("Staphylococcus aureus")</a></code> to <code>"Staphylococcus aureus"</code> takes 0.0009 seconds - it doesn’t even start calculating <em>if the result would be the same as the expected resulting value</em>. That goes for all helper functions:</p>
|
||||
<a class="sourceLine" id="cb5-7" data-line-number="7"><span class="co"># expr min lq mean median uq max neval</span></a>
|
||||
<a class="sourceLine" id="cb5-8" data-line-number="8"><span class="co"># A 6.370 6.460 9.890 6.53 6.900 39.400 10</span></a>
|
||||
<a class="sourceLine" id="cb5-9" data-line-number="9"><span class="co"># B 13.400 13.500 13.800 13.60 14.100 14.500 10</span></a>
|
||||
<a class="sourceLine" id="cb5-10" data-line-number="10"><span class="co"># C 0.795 0.825 0.851 0.84 0.849 0.973 10</span></a></code></pre></div>
|
||||
<p>So going from <code><a href="../reference/mo_property.html">mo_name("Staphylococcus aureus")</a></code> to <code>"Staphylococcus aureus"</code> takes 0.0008 seconds - it doesn’t even start calculating <em>if the result would be the same as the expected resulting value</em>. That goes for all helper functions:</p>
|
||||
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb6-1" data-line-number="1">run_it <-<span class="st"> </span><span class="kw"><a href="https://rdrr.io/pkg/microbenchmark/man/microbenchmark.html">microbenchmark</a></span>(<span class="dt">A =</span> <span class="kw"><a href="../reference/mo_property.html">mo_species</a></span>(<span class="st">"aureus"</span>),</a>
|
||||
<a class="sourceLine" id="cb6-2" data-line-number="2"> <span class="dt">B =</span> <span class="kw"><a href="../reference/mo_property.html">mo_genus</a></span>(<span class="st">"Staphylococcus"</span>),</a>
|
||||
<a class="sourceLine" id="cb6-3" data-line-number="3"> <span class="dt">C =</span> <span class="kw"><a href="../reference/mo_property.html">mo_name</a></span>(<span class="st">"Staphylococcus aureus"</span>),</a>
|
||||
@ -338,14 +338,14 @@
|
||||
<a class="sourceLine" id="cb6-10" data-line-number="10"><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a>
|
||||
<a class="sourceLine" id="cb6-11" data-line-number="11"><span class="co"># Unit: milliseconds</span></a>
|
||||
<a class="sourceLine" id="cb6-12" data-line-number="12"><span class="co"># expr min lq mean median uq max neval</span></a>
|
||||
<a class="sourceLine" id="cb6-13" data-line-number="13"><span class="co"># A 0.485 0.510 0.514 0.515 0.529 0.535 10</span></a>
|
||||
<a class="sourceLine" id="cb6-14" data-line-number="14"><span class="co"># B 0.492 0.510 0.538 0.535 0.565 0.588 10</span></a>
|
||||
<a class="sourceLine" id="cb6-15" data-line-number="15"><span class="co"># C 0.665 0.679 0.794 0.804 0.883 0.932 10</span></a>
|
||||
<a class="sourceLine" id="cb6-16" data-line-number="16"><span class="co"># D 0.512 0.530 0.547 0.547 0.562 0.584 10</span></a>
|
||||
<a class="sourceLine" id="cb6-17" data-line-number="17"><span class="co"># E 0.467 0.504 0.507 0.512 0.515 0.520 10</span></a>
|
||||
<a class="sourceLine" id="cb6-18" data-line-number="18"><span class="co"># F 0.472 0.492 0.511 0.515 0.524 0.557 10</span></a>
|
||||
<a class="sourceLine" id="cb6-19" data-line-number="19"><span class="co"># G 0.473 0.476 0.497 0.497 0.515 0.523 10</span></a>
|
||||
<a class="sourceLine" id="cb6-20" data-line-number="20"><span class="co"># H 0.470 0.499 0.507 0.507 0.524 0.553 10</span></a></code></pre></div>
|
||||
<a class="sourceLine" id="cb6-13" data-line-number="13"><span class="co"># A 0.451 0.485 0.488 0.489 0.495 0.518 10</span></a>
|
||||
<a class="sourceLine" id="cb6-14" data-line-number="14"><span class="co"># B 0.507 0.510 0.526 0.522 0.538 0.554 10</span></a>
|
||||
<a class="sourceLine" id="cb6-15" data-line-number="15"><span class="co"># C 0.732 0.742 0.769 0.781 0.786 0.807 10</span></a>
|
||||
<a class="sourceLine" id="cb6-16" data-line-number="16"><span class="co"># D 0.507 0.514 0.534 0.531 0.549 0.585 10</span></a>
|
||||
<a class="sourceLine" id="cb6-17" data-line-number="17"><span class="co"># E 0.469 0.486 0.492 0.489 0.499 0.532 10</span></a>
|
||||
<a class="sourceLine" id="cb6-18" data-line-number="18"><span class="co"># F 0.473 0.479 0.483 0.481 0.482 0.513 10</span></a>
|
||||
<a class="sourceLine" id="cb6-19" data-line-number="19"><span class="co"># G 0.466 0.469 0.481 0.480 0.486 0.517 10</span></a>
|
||||
<a class="sourceLine" id="cb6-20" data-line-number="20"><span class="co"># H 0.468 0.476 0.502 0.483 0.494 0.665 10</span></a></code></pre></div>
|
||||
<p>Of course, when running <code><a href="../reference/mo_property.html">mo_phylum("Firmicutes")</a></code> the function has zero knowledge about the actual microorganism, namely <em>S. aureus</em>. But since the result would be <code>"Firmicutes"</code> anyway, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.</p>
|
||||
</div>
|
||||
<div id="results-in-other-languages" class="section level3">
|
||||
@ -372,13 +372,13 @@
|
||||
<a class="sourceLine" id="cb7-18" data-line-number="18"><span class="kw"><a href="https://rdrr.io/r/base/print.html">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">4</span>)</a>
|
||||
<a class="sourceLine" id="cb7-19" data-line-number="19"><span class="co"># Unit: milliseconds</span></a>
|
||||
<a class="sourceLine" id="cb7-20" data-line-number="20"><span class="co"># expr min lq mean median uq max neval</span></a>
|
||||
<a class="sourceLine" id="cb7-21" data-line-number="21"><span class="co"># en 23.40 25.17 29.92 25.55 26.50 62.94 100</span></a>
|
||||
<a class="sourceLine" id="cb7-22" data-line-number="22"><span class="co"># de 24.69 26.72 31.90 27.14 29.34 159.90 100</span></a>
|
||||
<a class="sourceLine" id="cb7-23" data-line-number="23"><span class="co"># nl 31.03 32.69 40.33 33.21 39.65 73.05 100</span></a>
|
||||
<a class="sourceLine" id="cb7-24" data-line-number="24"><span class="co"># es 25.22 26.61 32.78 27.11 31.04 61.09 100</span></a>
|
||||
<a class="sourceLine" id="cb7-25" data-line-number="25"><span class="co"># it 24.68 26.52 30.28 26.99 27.93 61.03 100</span></a>
|
||||
<a class="sourceLine" id="cb7-26" data-line-number="26"><span class="co"># fr 24.97 26.48 29.79 26.86 27.88 59.57 100</span></a>
|
||||
<a class="sourceLine" id="cb7-27" data-line-number="27"><span class="co"># pt 25.19 26.65 33.03 27.15 28.42 161.00 100</span></a></code></pre></div>
|
||||
<a class="sourceLine" id="cb7-21" data-line-number="21"><span class="co"># en 24.62 25.46 32.22 25.95 27.72 146.70 100</span></a>
|
||||
<a class="sourceLine" id="cb7-22" data-line-number="22"><span class="co"># de 25.97 26.93 33.24 27.52 29.66 64.41 100</span></a>
|
||||
<a class="sourceLine" id="cb7-23" data-line-number="23"><span class="co"># nl 31.00 32.90 37.58 33.48 35.16 69.10 100</span></a>
|
||||
<a class="sourceLine" id="cb7-24" data-line-number="24"><span class="co"># es 25.86 26.98 32.09 27.47 28.42 67.33 100</span></a>
|
||||
<a class="sourceLine" id="cb7-25" data-line-number="25"><span class="co"># it 25.71 26.95 33.87 27.67 31.49 62.06 100</span></a>
|
||||
<a class="sourceLine" id="cb7-26" data-line-number="26"><span class="co"># fr 25.80 27.00 31.05 27.41 28.07 72.99 100</span></a>
|
||||
<a class="sourceLine" id="cb7-27" data-line-number="27"><span class="co"># pt 25.80 27.02 33.26 27.51 31.44 63.41 100</span></a></code></pre></div>
|
||||
<p>Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
Reference in New Issue
Block a user