(v1.5.0.9023) mo properties speed improvement

This commit is contained in:
dr. M.S. (Matthijs) Berends 2021-02-21 23:19:40 +01:00
parent 062c49fca1
commit 0fdabff1ba
18 changed files with 62 additions and 86 deletions

View File

@ -1,5 +1,5 @@
Package: AMR
Version: 1.5.0.9022
Version: 1.5.0.9023
Date: 2021-02-21
Title: Antimicrobial Resistance Data Analysis
Authors@R: c(

View File

@ -1,4 +1,4 @@
# AMR 1.5.0.9022
# AMR 1.5.0.9023
## <small>Last updated: 21 February 2021</small>
### New

View File

@ -26,6 +26,7 @@
globalVariables(c(".rowid",
"ab",
"ab_txt",
"affect_mo_name",
"angle",
"antibiotic",
"antibiotics",

15
R/mo.R
View File

@ -766,21 +766,24 @@ exec_as.mo <- function(x,
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB)
x[i] <- lookup(mo == toupper(gsub("g([abcdfghk])s",
"B_STRPT_GRP\\1",
x_backup_without_spp[i])), uncertainty = -1)
x_backup_without_spp[i],
perl = TRUE)), uncertainty = -1)
next
}
if (x_backup_without_spp[i] %like_case% "(streptococ|streptokok).* [abcdfghk]$") {
# Streptococci in different languages, like "estreptococos grupo B"
x[i] <- lookup(mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$",
"B_STRPT_GRP\\2",
x_backup_without_spp[i])), uncertainty = -1)
x_backup_without_spp[i],
perl = TRUE)), uncertainty = -1)
next
}
if (x_backup_without_spp[i] %like_case% "group [abcdfghk] (streptococ|streptokok|estreptococ)") {
# Streptococci in different languages, like "Group A Streptococci"
x[i] <- lookup(mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*",
"B_STRPT_GRP\\1",
x_backup_without_spp[i])), uncertainty = -1)
x_backup_without_spp[i],
perl = TRUE)), uncertainty = -1)
next
}
if (x_backup_without_spp[i] %like_case% "haemoly.*strep") {
@ -843,7 +846,7 @@ exec_as.mo <- function(x,
# Salmonella Group A to Z, just return S. species for now
x[i] <- lookup(genus == "Salmonella", uncertainty = -1)
next
} else if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup[i], ignore.case = FALSE) &
} else if (x_backup[i] %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" &
!x_backup[i] %like% "t[iy](ph|f)[iy]") {
# Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica
# except for S. typhi, S. paratyphi, S. typhimurium
@ -1108,7 +1111,7 @@ exec_as.mo <- function(x,
cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (3) look for genus only, part of name\n"))
}
if (nchar(g.x_backup_without_spp) > 4 & !b.x_trimmed %like_case% " ") {
if (!grepl("^[A-Z][a-z]+", b.x_trimmed, ignore.case = FALSE)) {
if (!b.x_trimmed %like_case% "^[A-Z][a-z]+") {
if (isTRUE(debug)) {
message("Running '", paste(b.x_trimmed, "species"), "'")
}
@ -1852,7 +1855,7 @@ print.mo_uncertainties <- function(x, ...) {
width = 0.98 * getOption("width")),
collapse = "")
# after strwrap, make taxonomic names italic
candidates <- gsub("([A-Za-z]+)", font_italic("\\1"), candidates)
candidates <- gsub("([A-Za-z]+)", font_italic("\\1"), candidates, perl = TRUE)
candidates <- gsub(paste(font_italic(c("Also", "matched"), collapse = NULL), collapse = " "),
"Also matched",
candidates, fixed = TRUE)

View File

@ -723,12 +723,6 @@ mo_validate <- function(x, property, language, ...) {
x <- exec_as.mo(x, property = property, initial_search = FALSE, language = language, ...)
} else if (!all(x %in% MO_lookup[, property, drop = TRUE])
| has_Becker_or_Lancefield) {
accepted_args <- names(as.list(args("as.mo")))
accepted_args <- accepted_args[!accepted_args %in% c("", "...", "x", "property")]
stop_if(!all(names(dots) %in% names(as.list(args("as.mo")))),
"invalid argument(s): ", vector_and(names(dots)[!names(dots) %in% names(as.list(args("as.mo")))], quotes = "'"),
".\nAccepted arguments are ", vector_and(accepted_args, quotes = "'"), ".",
call = FALSE)
x <- exec_as.mo(x, property = property, language = language, ...)
}

Binary file not shown.

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="https://msberends.github.io/AMR//index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9023</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9023</span>
</span>
</div>

View File

@ -39,7 +39,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9023</span>
</span>
</div>
@ -192,7 +192,6 @@
<div class="page-header toc-ignore">
<h1 data-toc-skip>Benchmarks</h1>
<h4 class="date">21 February 2021</h4>
<small class="dont-index">Source: <a href="https://github.com/msberends/AMR/blob/master/vignettes/benchmarks.Rmd"><code>vignettes/benchmarks.Rmd</code></a></small>
<div class="hidden name"><code>benchmarks.Rmd</code></div>
@ -226,42 +225,22 @@
<span class="fu"><a href="../reference/as.mo.html">as.mo</a></span><span class="op">(</span><span class="st">"VISA"</span><span class="op">)</span>, <span class="co"># Vancomycin Intermediate S. aureus</span>
<span class="fu"><a href="../reference/as.mo.html">as.mo</a></span><span class="op">(</span><span class="st">"VRSA"</span><span class="op">)</span>, <span class="co"># Vancomycin Resistant S. aureus</span>
times <span class="op">=</span> <span class="fl">10</span><span class="op">)</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="co"># [1] "^st.* au"</span>
<span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="va">S.aureus</span>, unit <span class="op">=</span> <span class="st">"ms"</span>, signif <span class="op">=</span> <span class="fl">2</span><span class="op">)</span>
<span class="co"># Unit: milliseconds</span>
<span class="co"># expr min lq mean median uq max neval</span>
<span class="co"># as.mo("sau") 12.0 12.0 13.0 14.0 14.0 16 10</span>
<span class="co"># as.mo("stau") 55.0 67.0 88.0 95.0 98.0 120 10</span>
<span class="co"># as.mo("STAU") 57.0 66.0 110.0 100.0 130.0 250 10</span>
<span class="co"># as.mo("staaur") 12.0 12.0 17.0 13.0 14.0 54 10</span>
<span class="co"># as.mo("STAAUR") 11.0 12.0 17.0 13.0 14.0 61 10</span>
<span class="co"># as.mo("S. aureus") 30.0 32.0 62.0 69.0 93.0 95 10</span>
<span class="co"># as.mo("S aureus") 29.0 33.0 43.0 37.0 53.0 71 10</span>
<span class="co"># as.mo("Staphylococcus aureus") 2.2 2.4 2.6 2.6 2.8 3 10</span>
<span class="co"># as.mo("Staphylococcus aureus (MRSA)") 270.0 290.0 330.0 310.0 380.0 430 10</span>
<span class="co"># as.mo("Sthafilokkockus aaureuz") 190.0 210.0 240.0 250.0 270.0 290 10</span>
<span class="co"># as.mo("MRSA") 12.0 12.0 14.0 13.0 15.0 16 10</span>
<span class="co"># as.mo("VISA") 21.0 22.0 29.0 24.0 25.0 79 10</span>
<span class="co"># as.mo("VRSA") 21.0 24.0 35.0 28.0 56.0 58 10</span></code></pre></div>
<span class="co"># as.mo("sau") 11.0 12.0 27.0 13.0 49.0 51 10</span>
<span class="co"># as.mo("stau") 53.0 57.0 76.0 74.0 93.0 100 10</span>
<span class="co"># as.mo("STAU") 53.0 54.0 69.0 56.0 58.0 190 10</span>
<span class="co"># as.mo("staaur") 11.0 12.0 21.0 13.0 42.0 44 10</span>
<span class="co"># as.mo("STAAUR") 11.0 12.0 16.0 13.0 14.0 48 10</span>
<span class="co"># as.mo("S. aureus") 27.0 27.0 38.0 32.0 35.0 75 10</span>
<span class="co"># as.mo("S aureus") 27.0 29.0 38.0 30.0 36.0 73 10</span>
<span class="co"># as.mo("Staphylococcus aureus") 3.1 3.2 6.9 3.5 3.7 38 10</span>
<span class="co"># as.mo("Staphylococcus aureus (MRSA)") 250.0 260.0 270.0 260.0 280.0 290 10</span>
<span class="co"># as.mo("Sthafilokkockus aaureuz") 160.0 200.0 200.0 200.0 210.0 230 10</span>
<span class="co"># as.mo("MRSA") 10.0 11.0 12.0 11.0 13.0 14 10</span>
<span class="co"># as.mo("VISA") 19.0 20.0 26.0 22.0 24.0 61 10</span>
<span class="co"># as.mo("VRSA") 19.0 20.0 24.0 21.0 22.0 56 10</span></code></pre></div>
<p><img src="benchmarks_files/figure-html/unnamed-chunk-4-1.png" width="562.5"></p>
<p>In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. It is clear that accepted taxonomic names are extremely fast, but some variations can take up to 500-1000 times as much time.</p>
<p>To improve performance, two important calculations take almost no time at all: <strong>repetitive results</strong> and <strong>already precalculated results</strong>.</p>
@ -291,8 +270,8 @@
<span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="va">run_it</span>, unit <span class="op">=</span> <span class="st">"ms"</span>, signif <span class="op">=</span> <span class="fl">3</span><span class="op">)</span>
<span class="co"># Unit: milliseconds</span>
<span class="co"># expr min lq mean median uq max neval</span>
<span class="co"># mo_name(x) 125 144 182 171 186 298 10</span></code></pre></div>
<p>So getting official taxonomic names of 2,000,000 (!!) items consisting of 90 unique values only takes 0.171 seconds. You only lose time on your unique input values.</p>
<span class="co"># mo_name(x) 137 146 178 172 193 282 10</span></code></pre></div>
<p>So getting official taxonomic names of 2,000,000 (!!) items consisting of 90 unique values only takes 0.172 seconds. You only lose time on your unique input values.</p>
</div>
<div id="precalculated-results" class="section level3">
<h3 class="hasAnchor">
@ -305,11 +284,11 @@
times <span class="op">=</span> <span class="fl">10</span><span class="op">)</span>
<span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="va">run_it</span>, unit <span class="op">=</span> <span class="st">"ms"</span>, signif <span class="op">=</span> <span class="fl">3</span><span class="op">)</span>
<span class="co"># Unit: milliseconds</span>
<span class="co"># expr min lq mean median uq max neval</span>
<span class="co"># A 7.08 7.37 15.90 7.94 9.02 48.9 10</span>
<span class="co"># B 23.50 24.00 25.20 24.10 26.20 30.1 10</span>
<span class="co"># C 1.54 1.62 1.76 1.71 1.81 2.3 10</span></code></pre></div>
<p>So going from <code><a href="../reference/mo_property.html">mo_name("Staphylococcus aureus")</a></code> to <code>"Staphylococcus aureus"</code> takes 0.0017 seconds - it doesnt even start calculating <em>if the result would be the same as the expected resulting value</em>. That goes for all helper functions:</p>
<span class="co"># expr min lq mean median uq max neval</span>
<span class="co"># A 7.12 7.56 7.89 7.7 8.39 8.69 10</span>
<span class="co"># B 23.90 24.50 35.10 24.8 27.40 77.50 10</span>
<span class="co"># C 1.73 1.84 1.95 1.9 2.09 2.36 10</span></code></pre></div>
<p>So going from <code><a href="../reference/mo_property.html">mo_name("Staphylococcus aureus")</a></code> to <code>"Staphylococcus aureus"</code> takes 0.0019 seconds - it doesnt even start calculating <em>if the result would be the same as the expected resulting value</em>. That goes for all helper functions:</p>
<div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
<code class="sourceCode R"><span class="va">run_it</span> <span class="op">&lt;-</span> <span class="fu">microbenchmark</span><span class="op">(</span>A <span class="op">=</span> <span class="fu"><a href="../reference/mo_property.html">mo_species</a></span><span class="op">(</span><span class="st">"aureus"</span><span class="op">)</span>,
B <span class="op">=</span> <span class="fu"><a href="../reference/mo_property.html">mo_genus</a></span><span class="op">(</span><span class="st">"Staphylococcus"</span><span class="op">)</span>,
@ -322,15 +301,15 @@
times <span class="op">=</span> <span class="fl">10</span><span class="op">)</span>
<span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="va">run_it</span>, unit <span class="op">=</span> <span class="st">"ms"</span>, signif <span class="op">=</span> <span class="fl">3</span><span class="op">)</span>
<span class="co"># Unit: milliseconds</span>
<span class="co"># expr min lq mean median uq max neval</span>
<span class="co"># A 1.63 1.92 1.99 2.01 2.11 2.29 10</span>
<span class="co"># B 1.67 1.89 2.01 1.96 2.12 2.62 10</span>
<span class="co"># C 1.86 1.87 1.96 1.97 2.04 2.13 10</span>
<span class="co"># D 1.63 1.82 1.90 1.94 2.00 2.06 10</span>
<span class="co"># E 1.60 1.94 3.05 1.97 2.24 12.60 10</span>
<span class="co"># F 1.66 1.90 2.18 1.95 2.01 4.33 10</span>
<span class="co"># G 1.84 1.89 1.99 1.98 2.02 2.24 10</span>
<span class="co"># H 1.79 1.95 2.08 2.06 2.25 2.36 10</span></code></pre></div>
<span class="co"># expr min lq mean median uq max neval</span>
<span class="co"># A 1.54 1.57 1.73 1.69 1.88 2.00 10</span>
<span class="co"># B 1.51 1.52 1.75 1.70 1.86 2.41 10</span>
<span class="co"># C 1.55 1.60 1.70 1.70 1.81 1.86 10</span>
<span class="co"># D 1.55 1.59 1.71 1.66 1.83 1.94 10</span>
<span class="co"># E 1.50 1.55 1.68 1.61 1.78 2.10 10</span>
<span class="co"># F 1.51 1.62 1.75 1.79 1.82 2.02 10</span>
<span class="co"># G 1.52 1.53 1.68 1.60 1.80 2.02 10</span>
<span class="co"># H 1.53 1.54 1.63 1.60 1.67 1.84 10</span></code></pre></div>
<p>Of course, when running <code><a href="../reference/mo_property.html">mo_phylum("Firmicutes")</a></code> the function has zero knowledge about the actual microorganism, namely <em>S. aureus</em>. But since the result would be <code>"Firmicutes"</code> anyway, there is no point in calculating the result. And because this package knows all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.</p>
</div>
<div id="results-in-other-languages" class="section level3">
@ -358,13 +337,13 @@
<span class="fu"><a href="https://rdrr.io/r/base/print.html">print</a></span><span class="op">(</span><span class="va">run_it</span>, unit <span class="op">=</span> <span class="st">"ms"</span>, signif <span class="op">=</span> <span class="fl">4</span><span class="op">)</span>
<span class="co"># Unit: milliseconds</span>
<span class="co"># expr min lq mean median uq max neval</span>
<span class="co"># en 17.21 17.88 21.68 18.14 19.20 71.64 100</span>
<span class="co"># de 20.08 20.74 26.58 21.26 22.41 159.80 100</span>
<span class="co"># nl 24.88 25.81 31.01 26.32 27.03 74.57 100</span>
<span class="co"># es 19.91 20.80 26.33 21.28 22.60 80.34 100</span>
<span class="co"># it 19.96 20.63 25.21 21.20 22.25 76.35 100</span>
<span class="co"># fr 19.61 20.38 26.62 21.15 22.59 80.90 100</span>
<span class="co"># pt 19.87 20.58 27.65 20.92 23.22 80.73 100</span></code></pre></div>
<span class="co"># en 17.38 17.71 25.63 18.11 19.75 81.61 100</span>
<span class="co"># de 20.14 20.61 24.87 20.91 21.57 85.23 100</span>
<span class="co"># nl 25.02 25.46 28.40 25.83 26.58 78.47 100</span>
<span class="co"># es 19.90 20.41 24.86 20.77 21.78 81.38 100</span>
<span class="co"># it 20.01 20.44 24.40 20.80 21.57 76.08 100</span>
<span class="co"># fr 19.85 20.23 29.97 20.89 23.56 192.40 100</span>
<span class="co"># pt 19.90 20.26 26.53 20.75 22.56 85.69 100</span></code></pre></div>
<p>Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.</p>
</div>
</div>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

After

Width:  |  Height:  |  Size: 68 KiB

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9023</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9023</span>
</span>
</div>

View File

@ -43,7 +43,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9023</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9023</span>
</span>
</div>
@ -236,9 +236,9 @@
<small>Source: <a href='https://github.com/msberends/AMR/blob/master/NEWS.md'><code>NEWS.md</code></a></small>
</div>
<div id="amr-1509022" class="section level1">
<h1 class="page-header" data-toc-text="1.5.0.9022">
<a href="#amr-1509022" class="anchor"></a>AMR 1.5.0.9022<small> Unreleased </small>
<div id="amr-1509023" class="section level1">
<h1 class="page-header" data-toc-text="1.5.0.9023">
<a href="#amr-1509023" class="anchor"></a>AMR 1.5.0.9023<small> Unreleased </small>
</h1>
<div id="last-updated-21-february-2021" class="section level2">
<h2 class="hasAnchor">

View File

@ -12,7 +12,7 @@ articles:
datasets: datasets.html
resistance_predict: resistance_predict.html
welcome_to_AMR: welcome_to_AMR.html
last_built: 2021-02-21T21:55Z
last_built: 2021-02-21T22:18Z
urls:
reference: https://msberends.github.io/AMR//reference
article: https://msberends.github.io/AMR//articles

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9023</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9022</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.5.0.9023</span>
</span>
</div>

View File

@ -1,6 +1,5 @@
---
title: "Benchmarks"
date: '`r format(Sys.Date(), "%d %B %Y")`'
output:
rmarkdown::html_vignette:
toc: true
@ -20,7 +19,7 @@ knitr::opts_chunk$set(
fig.width = 7.5,
fig.height = 4.5,
dpi = 75
)
)
```
One of the most important features of this package is the complete microbial taxonomic database, supplied by the [Catalogue of Life](http://catalogueoflife.org). We created a function `as.mo()` that transforms any user input value to a valid microbial ID by using intelligent rules combined with the taxonomic tree of Catalogue of Life.