diff --git a/DESCRIPTION b/DESCRIPTION index ee7860ad..99727499 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 1.5.0.9022 +Version: 1.5.0.9023 Date: 2021-02-21 Title: Antimicrobial Resistance Data Analysis Authors@R: c( diff --git a/NEWS.md b/NEWS.md index 1e70ba86..063832d2 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.5.0.9022 +# AMR 1.5.0.9023 ## Last updated: 21 February 2021 ### New diff --git a/R/globals.R b/R/globals.R index b47f70d5..557a7a69 100755 --- a/R/globals.R +++ b/R/globals.R @@ -26,6 +26,7 @@ globalVariables(c(".rowid", "ab", "ab_txt", + "affect_mo_name", "angle", "antibiotic", "antibiotics", diff --git a/R/mo.R b/R/mo.R index 6dadfff8..2187c776 100755 --- a/R/mo.R +++ b/R/mo.R @@ -766,21 +766,24 @@ exec_as.mo <- function(x, # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) x[i] <- lookup(mo == toupper(gsub("g([abcdfghk])s", "B_STRPT_GRP\\1", - x_backup_without_spp[i])), uncertainty = -1) + x_backup_without_spp[i], + perl = TRUE)), uncertainty = -1) next } if (x_backup_without_spp[i] %like_case% "(streptococ|streptokok).* [abcdfghk]$") { # Streptococci in different languages, like "estreptococos grupo B" x[i] <- lookup(mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", "B_STRPT_GRP\\2", - x_backup_without_spp[i])), uncertainty = -1) + x_backup_without_spp[i], + perl = TRUE)), uncertainty = -1) next } if (x_backup_without_spp[i] %like_case% "group [abcdfghk] (streptococ|streptokok|estreptococ)") { # Streptococci in different languages, like "Group A Streptococci" x[i] <- lookup(mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GRP\\1", - x_backup_without_spp[i])), uncertainty = -1) + x_backup_without_spp[i], + perl = TRUE)), uncertainty = -1) next } if (x_backup_without_spp[i] %like_case% "haemoly.*strep") { @@ -843,7 +846,7 @@ exec_as.mo <- function(x, # Salmonella Group A to Z, just return S. species for now x[i] <- lookup(genus == "Salmonella", uncertainty = -1) next - } else if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup[i], ignore.case = FALSE) & + } else if (x_backup[i] %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & !x_backup[i] %like% "t[iy](ph|f)[iy]") { # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica # except for S. typhi, S. paratyphi, S. typhimurium @@ -1108,7 +1111,7 @@ exec_as.mo <- function(x, cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (3) look for genus only, part of name\n")) } if (nchar(g.x_backup_without_spp) > 4 & !b.x_trimmed %like_case% " ") { - if (!grepl("^[A-Z][a-z]+", b.x_trimmed, ignore.case = FALSE)) { + if (!b.x_trimmed %like_case% "^[A-Z][a-z]+") { if (isTRUE(debug)) { message("Running '", paste(b.x_trimmed, "species"), "'") } @@ -1852,7 +1855,7 @@ print.mo_uncertainties <- function(x, ...) { width = 0.98 * getOption("width")), collapse = "") # after strwrap, make taxonomic names italic - candidates <- gsub("([A-Za-z]+)", font_italic("\\1"), candidates) + candidates <- gsub("([A-Za-z]+)", font_italic("\\1"), candidates, perl = TRUE) candidates <- gsub(paste(font_italic(c("Also", "matched"), collapse = NULL), collapse = " "), "Also matched", candidates, fixed = TRUE) diff --git a/R/mo_property.R b/R/mo_property.R index e506e8eb..c6133f5b 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -723,12 +723,6 @@ mo_validate <- function(x, property, language, ...) { x <- exec_as.mo(x, property = property, initial_search = FALSE, language = language, ...) } else if (!all(x %in% MO_lookup[, property, drop = TRUE]) | has_Becker_or_Lancefield) { - accepted_args <- names(as.list(args("as.mo"))) - accepted_args <- accepted_args[!accepted_args %in% c("", "...", "x", "property")] - stop_if(!all(names(dots) %in% names(as.list(args("as.mo")))), - "invalid argument(s): ", vector_and(names(dots)[!names(dots) %in% names(as.list(args("as.mo")))], quotes = "'"), - ".\nAccepted arguments are ", vector_and(accepted_args, quotes = "'"), ".", - call = FALSE) x <- exec_as.mo(x, property = property, language = language, ...) } diff --git a/data-raw/AMR_latest.tar.gz b/data-raw/AMR_latest.tar.gz index 1fe5637c..58c5a1f4 100644 Binary files a/data-raw/AMR_latest.tar.gz and b/data-raw/AMR_latest.tar.gz differ diff --git a/docs/404.html b/docs/404.html index 406e4f83..0614e287 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9022 + 1.5.0.9023 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index df38b9f5..ea526df4 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9022 + 1.5.0.9023 diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html index 215fa0a6..fbfbfb39 100644 --- a/docs/articles/benchmarks.html +++ b/docs/articles/benchmarks.html @@ -39,7 +39,7 @@ AMR (for R) - 1.5.0.9021 + 1.5.0.9023 @@ -192,7 +192,6 @@ +# as.mo("sau") 11.0 12.0 27.0 13.0 49.0 51 10 +# as.mo("stau") 53.0 57.0 76.0 74.0 93.0 100 10 +# as.mo("STAU") 53.0 54.0 69.0 56.0 58.0 190 10 +# as.mo("staaur") 11.0 12.0 21.0 13.0 42.0 44 10 +# as.mo("STAAUR") 11.0 12.0 16.0 13.0 14.0 48 10 +# as.mo("S. aureus") 27.0 27.0 38.0 32.0 35.0 75 10 +# as.mo("S aureus") 27.0 29.0 38.0 30.0 36.0 73 10 +# as.mo("Staphylococcus aureus") 3.1 3.2 6.9 3.5 3.7 38 10 +# as.mo("Staphylococcus aureus (MRSA)") 250.0 260.0 270.0 260.0 280.0 290 10 +# as.mo("Sthafilokkockus aaureuz") 160.0 200.0 200.0 200.0 210.0 230 10 +# as.mo("MRSA") 10.0 11.0 12.0 11.0 13.0 14 10 +# as.mo("VISA") 19.0 20.0 26.0 22.0 24.0 61 10 +# as.mo("VRSA") 19.0 20.0 24.0 21.0 22.0 56 10

In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. It is clear that accepted taxonomic names are extremely fast, but some variations can take up to 500-1000 times as much time.

To improve performance, two important calculations take almost no time at all: repetitive results and already precalculated results.

@@ -291,8 +270,8 @@ print(run_it, unit = "ms", signif = 3) # Unit: milliseconds # expr min lq mean median uq max neval -# mo_name(x) 125 144 182 171 186 298 10 -

So getting official taxonomic names of 2,000,000 (!!) items consisting of 90 unique values only takes 0.171 seconds. You only lose time on your unique input values.

+# mo_name(x) 137 146 178 172 193 282 10 +

So getting official taxonomic names of 2,000,000 (!!) items consisting of 90 unique values only takes 0.172 seconds. You only lose time on your unique input values.

@@ -305,11 +284,11 @@ times = 10) print(run_it, unit = "ms", signif = 3) # Unit: milliseconds -# expr min lq mean median uq max neval -# A 7.08 7.37 15.90 7.94 9.02 48.9 10 -# B 23.50 24.00 25.20 24.10 26.20 30.1 10 -# C 1.54 1.62 1.76 1.71 1.81 2.3 10

-

So going from mo_name("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0017 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

+# expr min lq mean median uq max neval +# A 7.12 7.56 7.89 7.7 8.39 8.69 10 +# B 23.90 24.50 35.10 24.8 27.40 77.50 10 +# C 1.73 1.84 1.95 1.9 2.09 2.36 10 +

So going from mo_name("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0019 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

 run_it <- microbenchmark(A = mo_species("aureus"),
                          B = mo_genus("Staphylococcus"),
@@ -322,15 +301,15 @@
                          times = 10)
 print(run_it, unit = "ms", signif = 3)
 # Unit: milliseconds
-#  expr  min   lq mean median   uq   max neval
-#     A 1.63 1.92 1.99   2.01 2.11  2.29    10
-#     B 1.67 1.89 2.01   1.96 2.12  2.62    10
-#     C 1.86 1.87 1.96   1.97 2.04  2.13    10
-#     D 1.63 1.82 1.90   1.94 2.00  2.06    10
-#     E 1.60 1.94 3.05   1.97 2.24 12.60    10
-#     F 1.66 1.90 2.18   1.95 2.01  4.33    10
-#     G 1.84 1.89 1.99   1.98 2.02  2.24    10
-#     H 1.79 1.95 2.08   2.06 2.25  2.36    10
+# expr min lq mean median uq max neval +# A 1.54 1.57 1.73 1.69 1.88 2.00 10 +# B 1.51 1.52 1.75 1.70 1.86 2.41 10 +# C 1.55 1.60 1.70 1.70 1.81 1.86 10 +# D 1.55 1.59 1.71 1.66 1.83 1.94 10 +# E 1.50 1.55 1.68 1.61 1.78 2.10 10 +# F 1.51 1.62 1.75 1.79 1.82 2.02 10 +# G 1.52 1.53 1.68 1.60 1.80 2.02 10 +# H 1.53 1.54 1.63 1.60 1.67 1.84 10

Of course, when running mo_phylum("Firmicutes") the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes" anyway, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.

@@ -358,13 +337,13 @@ print(run_it, unit = "ms", signif = 4) # Unit: milliseconds # expr min lq mean median uq max neval -# en 17.21 17.88 21.68 18.14 19.20 71.64 100 -# de 20.08 20.74 26.58 21.26 22.41 159.80 100 -# nl 24.88 25.81 31.01 26.32 27.03 74.57 100 -# es 19.91 20.80 26.33 21.28 22.60 80.34 100 -# it 19.96 20.63 25.21 21.20 22.25 76.35 100 -# fr 19.61 20.38 26.62 21.15 22.59 80.90 100 -# pt 19.87 20.58 27.65 20.92 23.22 80.73 100
+# en 17.38 17.71 25.63 18.11 19.75 81.61 100 +# de 20.14 20.61 24.87 20.91 21.57 85.23 100 +# nl 25.02 25.46 28.40 25.83 26.58 78.47 100 +# es 19.90 20.41 24.86 20.77 21.78 81.38 100 +# it 20.01 20.44 24.40 20.80 21.57 76.08 100 +# fr 19.85 20.23 29.97 20.89 23.56 192.40 100 +# pt 19.90 20.26 26.53 20.75 22.56 85.69 100

Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.

diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png index 16afa08a..c643f012 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 7db7b25e..ec8d95b2 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9022 + 1.5.0.9023 diff --git a/docs/authors.html b/docs/authors.html index 404d5bf5..29ad2cfc 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9022 + 1.5.0.9023 diff --git a/docs/index.html b/docs/index.html index c66e627b..d631d734 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.5.0.9022 + 1.5.0.9023 diff --git a/docs/news/index.html b/docs/news/index.html index 106b13c4..5388f298 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9022 + 1.5.0.9023 @@ -236,9 +236,9 @@ Source: NEWS.md -
-

-AMR 1.5.0.9022 Unreleased +
+

+AMR 1.5.0.9023 Unreleased

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 9ca0c9d3..929b6715 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -12,7 +12,7 @@ articles: datasets: datasets.html resistance_predict: resistance_predict.html welcome_to_AMR: welcome_to_AMR.html -last_built: 2021-02-21T21:55Z +last_built: 2021-02-21T22:18Z urls: reference: https://msberends.github.io/AMR//reference article: https://msberends.github.io/AMR//articles diff --git a/docs/reference/index.html b/docs/reference/index.html index 01eabc39..1bab2d20 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9022 + 1.5.0.9023

diff --git a/docs/survey.html b/docs/survey.html index 992f92b0..fe19f59c 100644 --- a/docs/survey.html +++ b/docs/survey.html @@ -81,7 +81,7 @@ AMR (for R) - 1.5.0.9022 + 1.5.0.9023
diff --git a/vignettes/benchmarks.Rmd b/vignettes/benchmarks.Rmd index 03e886aa..391b2ba1 100755 --- a/vignettes/benchmarks.Rmd +++ b/vignettes/benchmarks.Rmd @@ -1,6 +1,5 @@ --- title: "Benchmarks" -date: '`r format(Sys.Date(), "%d %B %Y")`' output: rmarkdown::html_vignette: toc: true @@ -20,7 +19,7 @@ knitr::opts_chunk$set( fig.width = 7.5, fig.height = 4.5, dpi = 75 -) +) ``` One of the most important features of this package is the complete microbial taxonomic database, supplied by the [Catalogue of Life](http://catalogueoflife.org). We created a function `as.mo()` that transforms any user input value to a valid microbial ID by using intelligent rules combined with the taxonomic tree of Catalogue of Life.