diff --git a/DESCRIPTION b/DESCRIPTION index b375bd7f..7c259842 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR Version: 0.5.0.9021 -Date: 2019-03-06 +Date: 2019-03-09 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/R/mo.R b/R/mo.R index 94123298..5cfc5b9b 100755 --- a/R/mo.R +++ b/R/mo.R @@ -122,6 +122,7 @@ #' @importFrom dplyr %>% pull left_join #' @examples #' # These examples all return "B_STPHY_AUR", the ID of S. aureus: +#' as.mo("sau") # WHONET code #' as.mo("stau") #' as.mo("STAU") #' as.mo("staaur") @@ -598,6 +599,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, } # TRY OTHER SOURCES ---- + # WHONET and other common LIS codes if (toupper(x_backup[i]) %in% AMR::microorganisms.codes[, 1]) { mo_found <- AMR::microorganisms.codes[toupper(x_backup[i]) == AMR::microorganisms.codes[, 1], "mo"][1L] if (length(mo_found) > 0) { @@ -606,6 +608,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, } } if (!is.null(reference_df)) { + # self-defined reference if (x_backup[i] %in% reference_df[, 1]) { ref_mo <- reference_df[reference_df[, 1] == x_backup[i], "mo"] if (ref_mo %in% microorganismsDT[, mo]) { @@ -617,6 +620,13 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, } } + # allow no codes less than 4 characters long, was already checked for WHONET above + if (nchar(x_trimmed[i]) < 4) { + x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] + failures <- c(failures, x_backup[i]) + next + } + check_per_prevalence <- function(data_to_check, a.x_backup, b.x_trimmed, diff --git a/R/mo_property.R b/R/mo_property.R index b3ece779..d8ad30f4 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -222,32 +222,32 @@ mo_genus <- function(x, language = get_locale(), ...) { #' @rdname mo_property #' @export -mo_family <- function(x, ...) { - mo_validate(x = x, property = "family", ...) +mo_family <- function(x, language = get_locale(), ...) { + mo_translate(mo_validate(x = x, property = "family", ...), language = language) } #' @rdname mo_property #' @export -mo_order <- function(x, ...) { - mo_validate(x = x, property = "order", ...) +mo_order <- function(x, language = get_locale(), ...) { + mo_translate(mo_validate(x = x, property = "order", ...), language = language) } #' @rdname mo_property #' @export -mo_class <- function(x, ...) { - mo_validate(x = x, property = "class", ...) +mo_class <- function(x, language = get_locale(), ...) { + mo_translate(mo_validate(x = x, property = "class", ...), language = language) } #' @rdname mo_property #' @export -mo_phylum <- function(x, ...) { - mo_validate(x = x, property = "phylum", ...) +mo_phylum <- function(x, language = get_locale(), ...) { + mo_translate(mo_validate(x = x, property = "phylum", ...), language = language) } #' @rdname mo_property #' @export -mo_kingdom <- function(x, ...) { - mo_validate(x = x, property = "kingdom", ...) +mo_kingdom <- function(x, language = get_locale(), ...) { + mo_translate(mo_validate(x = x, property = "kingdom", ...), language = language) } #' @rdname mo_property @@ -306,16 +306,16 @@ mo_rank <- function(x, ...) { #' @rdname mo_property #' @export -mo_taxonomy <- function(x, ...) { +mo_taxonomy <- function(x, language = get_locale(), ...) { x <- AMR::as.mo(x, ...) - base::list(kingdom = mo_kingdom(x), - phylum = mo_phylum(x), - class = mo_class(x), - order = mo_order(x), - family = mo_family(x), - genus = mo_genus(x), - species = mo_species(x), - subspecies = mo_subspecies(x)) + base::list(kingdom = mo_kingdom(x, language = language), + phylum = mo_phylum(x, language = language), + class = mo_class(x, language = language), + order = mo_order(x, language = language), + family = mo_family(x, language = language), + genus = mo_genus(x, language = language), + species = mo_species(x, language = language), + subspecies = mo_subspecies(x, language = language)) } #' @rdname mo_property diff --git a/docs/articles/SPSS.html b/docs/articles/SPSS.html index 6ccf67a4..3461506f 100644 --- a/docs/articles/SPSS.html +++ b/docs/articles/SPSS.html @@ -192,7 +192,7 @@

How to import data from SPSS / SAS / Stata

Matthijs S. Berends

-

06 March 2019

+

09 March 2019

diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html index 9f5ef3fb..6848bcaa 100644 --- a/docs/articles/benchmarks.html +++ b/docs/articles/benchmarks.html @@ -192,7 +192,7 @@

Benchmarks

Matthijs S. Berends

-

06 March 2019

+

09 March 2019

@@ -217,14 +217,14 @@ times = 10) print(S.aureus, unit = "ms", signif = 3) #> Unit: milliseconds -#> expr min lq mean median uq max neval -#> as.mo("sau") 16.50 16.60 17.0 16.70 17.00 19.0 10 -#> as.mo("stau") 31.70 31.90 51.5 32.10 49.10 166.0 10 -#> as.mo("staaur") 16.60 16.70 21.8 16.80 17.00 65.7 10 -#> as.mo("STAAUR") 16.70 16.70 27.1 16.80 17.60 77.3 10 -#> as.mo("S. aureus") 24.60 24.70 30.2 25.30 31.80 57.8 10 -#> as.mo("S. aureus") 24.60 24.60 37.7 25.10 66.00 67.4 10 -#> as.mo("Staphylococcus aureus") 7.47 7.48 11.1 7.65 8.02 41.1 10 +#> expr min lq mean median uq max neval +#> as.mo("sau") 16.60 16.60 25.2 16.80 18.00 58.3 10 +#> as.mo("stau") 31.60 31.80 44.8 32.40 72.20 76.7 10 +#> as.mo("staaur") 16.60 16.60 26.4 16.70 17.30 71.9 10 +#> as.mo("STAAUR") 16.50 16.60 16.6 16.60 16.70 16.8 10 +#> as.mo("S. aureus") 24.50 24.60 29.0 24.70 25.00 66.6 10 +#> as.mo("S. aureus") 24.30 24.60 24.6 24.60 24.70 24.9 10 +#> as.mo("Staphylococcus aureus") 7.45 7.47 11.9 7.53 7.97 50.0 10

In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first one is a WHONET code) or common laboratory codes, or common full organism names like the last one. Full organism names are always preferred.

To achieve this speed, the as.mo function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of Thermus islandicus (B_THERMS_ISL), a bug probably never found before in humans:

T.islandicus <- microbenchmark(as.mo("theisl"),
@@ -236,12 +236,12 @@
 print(T.islandicus, unit = "ms", signif = 3)
 #> Unit: milliseconds
 #>                         expr   min    lq  mean median  uq max neval
-#>              as.mo("theisl") 269.0 270.0 294.0  293.0 317 320    10
-#>              as.mo("THEISL") 272.0 313.0 327.0  316.0 321 476    10
-#>       as.mo("T. islandicus") 142.0 142.0 159.0  144.0 191 205    10
-#>      as.mo("T.  islandicus") 142.0 143.0 166.0  164.0 188 196    10
-#>  as.mo("Thermus islandicus")  68.4  68.6  86.8   69.2 113 116    10
-

That takes 7.4 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets.

+#> as.mo("theisl") 262.0 263.0 284.0 284.0 304 308 10 +#> as.mo("THEISL") 263.0 264.0 293.0 304.0 306 308 10 +#> as.mo("T. islandicus") 142.0 142.0 151.0 143.0 147 187 10 +#> as.mo("T. islandicus") 142.0 142.0 169.0 184.0 185 194 10 +#> as.mo("Thermus islandicus") 67.9 68.1 93.3 90.3 116 130 10 +

That takes 7.8 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets.

In the figure below, we compare Escherichia coli (which is very common) with Prevotella brevis (which is moderately common) and with Thermus islandicus (which is very uncommon):

par(mar = c(5, 16, 4, 2)) # set more space for left margin text (16)
 
@@ -287,8 +287,8 @@
 print(run_it, unit = "ms", signif = 3)
 #> Unit: milliseconds
 #>            expr min  lq mean median  uq max neval
-#>  mo_fullname(x) 688 757  800    758 919 921    10
-

So transforming 500,000 values (!!) of 50 unique values only takes 0.76 seconds (758 ms). You only lose time on your unique input values.

+#> mo_fullname(x) 734 810 840 817 860 973 10 +

So transforming 500,000 values (!!) of 50 unique values only takes 0.82 seconds (817 ms). You only lose time on your unique input values.

@@ -301,9 +301,9 @@ print(run_it, unit = "ms", signif = 3) #> Unit: milliseconds #> expr min lq mean median uq max neval -#> A 11.200 11.300 11.900 12.200 12.300 12.600 10 -#> B 22.300 22.500 23.300 23.100 23.900 24.700 10 -#> C 0.339 0.519 0.612 0.564 0.768 0.776 10

+#> A 11.200 11.300 11.400 11.400 11.600 11.600 10 +#> B 22.200 22.400 26.800 22.600 22.800 63.700 10 +#> C 0.328 0.564 0.525 0.568 0.577 0.591 10

So going from mo_fullname("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0006 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

run_it <- microbenchmark(A = mo_species("aureus"),
                          B = mo_genus("Staphylococcus"),
@@ -317,14 +317,14 @@
 print(run_it, unit = "ms", signif = 3)
 #> Unit: milliseconds
 #>  expr   min    lq  mean median    uq   max neval
-#>     A 0.321 0.434 0.481  0.501 0.519 0.665    10
-#>     B 0.352 0.414 0.482  0.475 0.513 0.715    10
-#>     C 0.394 0.648 0.670  0.679 0.770 0.839    10
-#>     D 0.354 0.371 0.393  0.395 0.410 0.428    10
-#>     E 0.286 0.353 0.370  0.369 0.399 0.443    10
-#>     F 0.317 0.373 0.377  0.380 0.392 0.437    10
-#>     G 0.272 0.307 0.352  0.348 0.387 0.431    10
-#>     H 0.293 0.338 0.366  0.361 0.412 0.441    10
+#> A 0.318 0.376 0.414 0.419 0.449 0.537 10 +#> B 0.343 0.397 0.437 0.447 0.479 0.522 10 +#> C 0.325 0.380 0.486 0.482 0.554 0.703 10 +#> D 0.334 0.337 0.381 0.372 0.426 0.434 10 +#> E 0.304 0.322 0.356 0.335 0.393 0.460 10 +#> F 0.295 0.323 0.370 0.362 0.424 0.463 10 +#> G 0.296 0.321 0.362 0.348 0.387 0.470 10 +#> H 0.289 0.335 0.355 0.351 0.387 0.421 10

Of course, when running mo_phylum("Firmicutes") the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes" too, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.

@@ -351,13 +351,13 @@ print(run_it, unit = "ms", signif = 4) #> Unit: milliseconds #> expr min lq mean median uq max neval -#> en 14.97 15.49 15.46 15.56 15.60 15.65 10 -#> de 27.29 27.76 44.46 27.98 69.27 71.00 10 -#> nl 27.22 27.97 28.53 28.03 29.40 31.36 10 -#> es 27.45 27.96 32.24 28.00 28.08 69.99 10 -#> it 27.21 27.32 28.01 27.89 28.05 29.41 10 -#> fr 27.26 27.94 28.16 28.12 28.25 29.47 10 -#> pt 27.08 27.45 36.14 27.92 28.07 70.34 10
+#> en 15.57 15.87 24.21 20.71 37.98 38.78 10 +#> de 28.79 34.96 53.24 52.02 56.97 95.22 10 +#> nl 28.31 29.29 47.74 40.57 58.94 97.03 10 +#> es 28.97 30.43 42.78 34.70 51.47 72.11 10 +#> it 27.71 29.15 38.88 31.19 35.10 76.86 10 +#> fr 28.37 29.28 40.89 41.73 50.43 56.78 10 +#> pt 27.82 29.03 42.49 29.84 50.14 94.96 10

Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.

diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png index d8aa9d5a..a6412a79 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 3faf0f12..30856766 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -366,6 +366,7 @@ The mo_property functions (like Examples
# NOT RUN {
 # These examples all return "B_STPHY_AUR", the ID of S. aureus:
+as.mo("sau") # WHONET code
 as.mo("stau")
 as.mo("STAU")
 as.mo("staaur")
diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html
index 46f508c6..d91f524f 100644
--- a/docs/reference/mo_property.html
+++ b/docs/reference/mo_property.html
@@ -251,15 +251,15 @@
 
 mo_genus(x, language = get_locale(), ...)
 
-mo_family(x, ...)
+mo_family(x, language = get_locale(), ...)
 
-mo_order(x, ...)
+mo_order(x, language = get_locale(), ...)
 
-mo_class(x, ...)
+mo_class(x, language = get_locale(), ...)
 
-mo_phylum(x, ...)
+mo_phylum(x, language = get_locale(), ...)
 
-mo_kingdom(x, ...)
+mo_kingdom(x, language = get_locale(), ...)
 
 mo_type(x, language = get_locale(), ...)
 
@@ -273,7 +273,7 @@
 
 mo_rank(x, ...)
 
-mo_taxonomy(x, ...)
+mo_taxonomy(x, language = get_locale(), ...)
 
 mo_url(x, open = FALSE, ...)
 
diff --git a/man/as.mo.Rd b/man/as.mo.Rd
index ca9bd668..a621d410 100644
--- a/man/as.mo.Rd
+++ b/man/as.mo.Rd
@@ -138,6 +138,7 @@ On our website \url{https://msberends.gitlab.io/AMR} you can find \href{https://
 
 \examples{
 # These examples all return "B_STPHY_AUR", the ID of S. aureus:
+as.mo("sau") # WHONET code
 as.mo("stau")
 as.mo("STAU")
 as.mo("staaur")
diff --git a/man/mo_property.Rd b/man/mo_property.Rd
index 085dbd3d..b595e32d 100644
--- a/man/mo_property.Rd
+++ b/man/mo_property.Rd
@@ -32,15 +32,15 @@ mo_species(x, language = get_locale(), ...)
 
 mo_genus(x, language = get_locale(), ...)
 
-mo_family(x, ...)
+mo_family(x, language = get_locale(), ...)
 
-mo_order(x, ...)
+mo_order(x, language = get_locale(), ...)
 
-mo_class(x, ...)
+mo_class(x, language = get_locale(), ...)
 
-mo_phylum(x, ...)
+mo_phylum(x, language = get_locale(), ...)
 
-mo_kingdom(x, ...)
+mo_kingdom(x, language = get_locale(), ...)
 
 mo_type(x, language = get_locale(), ...)
 
@@ -54,7 +54,7 @@ mo_year(x, ...)
 
 mo_rank(x, ...)
 
-mo_taxonomy(x, ...)
+mo_taxonomy(x, language = get_locale(), ...)
 
 mo_url(x, open = FALSE, ...)