diff --git a/DESCRIPTION b/DESCRIPTION index b375bd7f..7c259842 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR Version: 0.5.0.9021 -Date: 2019-03-06 +Date: 2019-03-09 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/R/mo.R b/R/mo.R index 94123298..5cfc5b9b 100755 --- a/R/mo.R +++ b/R/mo.R @@ -122,6 +122,7 @@ #' @importFrom dplyr %>% pull left_join #' @examples #' # These examples all return "B_STPHY_AUR", the ID of S. aureus: +#' as.mo("sau") # WHONET code #' as.mo("stau") #' as.mo("STAU") #' as.mo("staaur") @@ -598,6 +599,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, } # TRY OTHER SOURCES ---- + # WHONET and other common LIS codes if (toupper(x_backup[i]) %in% AMR::microorganisms.codes[, 1]) { mo_found <- AMR::microorganisms.codes[toupper(x_backup[i]) == AMR::microorganisms.codes[, 1], "mo"][1L] if (length(mo_found) > 0) { @@ -606,6 +608,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, } } if (!is.null(reference_df)) { + # self-defined reference if (x_backup[i] %in% reference_df[, 1]) { ref_mo <- reference_df[reference_df[, 1] == x_backup[i], "mo"] if (ref_mo %in% microorganismsDT[, mo]) { @@ -617,6 +620,13 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, } } + # allow no codes less than 4 characters long, was already checked for WHONET above + if (nchar(x_trimmed[i]) < 4) { + x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] + failures <- c(failures, x_backup[i]) + next + } + check_per_prevalence <- function(data_to_check, a.x_backup, b.x_trimmed, diff --git a/R/mo_property.R b/R/mo_property.R index b3ece779..d8ad30f4 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -222,32 +222,32 @@ mo_genus <- function(x, language = get_locale(), ...) { #' @rdname mo_property #' @export -mo_family <- function(x, ...) { - mo_validate(x = x, property = "family", ...) +mo_family <- function(x, language = get_locale(), ...) { + mo_translate(mo_validate(x = x, property = "family", ...), language = language) } #' @rdname mo_property #' @export -mo_order <- function(x, ...) { - mo_validate(x = x, property = "order", ...) +mo_order <- function(x, language = get_locale(), ...) { + mo_translate(mo_validate(x = x, property = "order", ...), language = language) } #' @rdname mo_property #' @export -mo_class <- function(x, ...) { - mo_validate(x = x, property = "class", ...) +mo_class <- function(x, language = get_locale(), ...) { + mo_translate(mo_validate(x = x, property = "class", ...), language = language) } #' @rdname mo_property #' @export -mo_phylum <- function(x, ...) { - mo_validate(x = x, property = "phylum", ...) +mo_phylum <- function(x, language = get_locale(), ...) { + mo_translate(mo_validate(x = x, property = "phylum", ...), language = language) } #' @rdname mo_property #' @export -mo_kingdom <- function(x, ...) { - mo_validate(x = x, property = "kingdom", ...) +mo_kingdom <- function(x, language = get_locale(), ...) { + mo_translate(mo_validate(x = x, property = "kingdom", ...), language = language) } #' @rdname mo_property @@ -306,16 +306,16 @@ mo_rank <- function(x, ...) { #' @rdname mo_property #' @export -mo_taxonomy <- function(x, ...) { +mo_taxonomy <- function(x, language = get_locale(), ...) { x <- AMR::as.mo(x, ...) - base::list(kingdom = mo_kingdom(x), - phylum = mo_phylum(x), - class = mo_class(x), - order = mo_order(x), - family = mo_family(x), - genus = mo_genus(x), - species = mo_species(x), - subspecies = mo_subspecies(x)) + base::list(kingdom = mo_kingdom(x, language = language), + phylum = mo_phylum(x, language = language), + class = mo_class(x, language = language), + order = mo_order(x, language = language), + family = mo_family(x, language = language), + genus = mo_genus(x, language = language), + species = mo_species(x, language = language), + subspecies = mo_subspecies(x, language = language)) } #' @rdname mo_property diff --git a/docs/articles/SPSS.html b/docs/articles/SPSS.html index 6ccf67a4..3461506f 100644 --- a/docs/articles/SPSS.html +++ b/docs/articles/SPSS.html @@ -192,7 +192,7 @@
SPSS.Rmd
benchmarks.Rmd
In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first one is a WHONET code) or common laboratory codes, or common full organism names like the last one. Full organism names are always preferred.
To achieve this speed, the as.mo
function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of Thermus islandicus (B_THERMS_ISL
), a bug probably never found before in humans:
T.islandicus <- microbenchmark(as.mo("theisl"),
@@ -236,12 +236,12 @@
print(T.islandicus, unit = "ms", signif = 3)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
-#> as.mo("theisl") 269.0 270.0 294.0 293.0 317 320 10
-#> as.mo("THEISL") 272.0 313.0 327.0 316.0 321 476 10
-#> as.mo("T. islandicus") 142.0 142.0 159.0 144.0 191 205 10
-#> as.mo("T. islandicus") 142.0 143.0 166.0 164.0 188 196 10
-#> as.mo("Thermus islandicus") 68.4 68.6 86.8 69.2 113 116 10
That takes 7.4 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets.
+#> as.mo("theisl") 262.0 263.0 284.0 284.0 304 308 10 +#> as.mo("THEISL") 263.0 264.0 293.0 304.0 306 308 10 +#> as.mo("T. islandicus") 142.0 142.0 151.0 143.0 147 187 10 +#> as.mo("T. islandicus") 142.0 142.0 169.0 184.0 185 194 10 +#> as.mo("Thermus islandicus") 67.9 68.1 93.3 90.3 116 130 10 +That takes 7.8 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets.
In the figure below, we compare Escherichia coli (which is very common) with Prevotella brevis (which is moderately common) and with Thermus islandicus (which is very uncommon):
par(mar = c(5, 16, 4, 2)) # set more space for left margin text (16)
@@ -287,8 +287,8 @@
print(run_it, unit = "ms", signif = 3)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
-#> mo_fullname(x) 688 757 800 758 919 921 10
So transforming 500,000 values (!!) of 50 unique values only takes 0.76 seconds (758 ms). You only lose time on your unique input values.
+#> mo_fullname(x) 734 810 840 817 860 973 10 +So transforming 500,000 values (!!) of 50 unique values only takes 0.82 seconds (817 ms). You only lose time on your unique input values.
So going from mo_fullname("Staphylococcus aureus")
to "Staphylococcus aureus"
takes 0.0006 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:
run_it <- microbenchmark(A = mo_species("aureus"),
B = mo_genus("Staphylococcus"),
@@ -317,14 +317,14 @@
print(run_it, unit = "ms", signif = 3)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
-#> A 0.321 0.434 0.481 0.501 0.519 0.665 10
-#> B 0.352 0.414 0.482 0.475 0.513 0.715 10
-#> C 0.394 0.648 0.670 0.679 0.770 0.839 10
-#> D 0.354 0.371 0.393 0.395 0.410 0.428 10
-#> E 0.286 0.353 0.370 0.369 0.399 0.443 10
-#> F 0.317 0.373 0.377 0.380 0.392 0.437 10
-#> G 0.272 0.307 0.352 0.348 0.387 0.431 10
-#> H 0.293 0.338 0.366 0.361 0.412 0.441 10
Of course, when running mo_phylum("Firmicutes")
the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes"
too, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.
Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.
diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png index d8aa9d5a..a6412a79 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 3faf0f12..30856766 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -366,6 +366,7 @@ Themo_property
functions (like # NOT RUN { # These examples all return "B_STPHY_AUR", the ID of S. aureus: +as.mo("sau") # WHONET code as.mo("stau") as.mo("STAU") as.mo("staaur") diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index 46f508c6..d91f524f 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -251,15 +251,15 @@ mo_genus(x, language = get_locale(), ...) -mo_family(x, ...) +mo_family(x, language = get_locale(), ...) -mo_order(x, ...) +mo_order(x, language = get_locale(), ...) -mo_class(x, ...) +mo_class(x, language = get_locale(), ...) -mo_phylum(x, ...) +mo_phylum(x, language = get_locale(), ...) -mo_kingdom(x, ...) +mo_kingdom(x, language = get_locale(), ...) mo_type(x, language = get_locale(), ...) @@ -273,7 +273,7 @@ mo_rank(x, ...) -mo_taxonomy(x, ...) +mo_taxonomy(x, language = get_locale(), ...) mo_url(x, open = FALSE, ...) diff --git a/man/as.mo.Rd b/man/as.mo.Rd index ca9bd668..a621d410 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -138,6 +138,7 @@ On our website \url{https://msberends.gitlab.io/AMR} you can find \href{https:// \examples{ # These examples all return "B_STPHY_AUR", the ID of S. aureus: +as.mo("sau") # WHONET code as.mo("stau") as.mo("STAU") as.mo("staaur") diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 085dbd3d..b595e32d 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -32,15 +32,15 @@ mo_species(x, language = get_locale(), ...) mo_genus(x, language = get_locale(), ...) -mo_family(x, ...) +mo_family(x, language = get_locale(), ...) -mo_order(x, ...) +mo_order(x, language = get_locale(), ...) -mo_class(x, ...) +mo_class(x, language = get_locale(), ...) -mo_phylum(x, ...) +mo_phylum(x, language = get_locale(), ...) -mo_kingdom(x, ...) +mo_kingdom(x, language = get_locale(), ...) mo_type(x, language = get_locale(), ...) @@ -54,7 +54,7 @@ mo_year(x, ...) mo_rank(x, ...) -mo_taxonomy(x, ...) +mo_taxonomy(x, language = get_locale(), ...) mo_url(x, open = FALSE, ...)