From c6c3088e9f27bebc3d25cc7a0bec07622627dd24 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Thu, 17 Jan 2019 12:08:04 +0100 Subject: [PATCH] select() fix for freq --- DESCRIPTION | 4 +- NAMESPACE | 2 + NEWS.md | 15 +- R/freq.R | 8 + R/globals.R | 35 ++-- README.md | 7 +- docs/LICENSE-text.html | 2 +- docs/articles/benchmarks.html | 239 +++++++++++++------------ docs/articles/index.html | 2 +- docs/authors.html | 2 +- docs/index.html | 32 ++-- docs/news/index.html | 226 ++++++++++++++++++----- docs/pkgdown.yml | 2 +- docs/reference/ITIS.html | 2 +- docs/reference/age.html | 2 +- docs/reference/age_groups.html | 2 +- docs/reference/as.mo.html | 2 +- docs/reference/eucast_rules.html | 2 +- docs/reference/first_isolate.html | 2 +- docs/reference/g.test.html | 2 +- docs/reference/guess_ab_col.html | 2 +- docs/reference/index.html | 2 +- docs/reference/key_antibiotics.html | 2 +- docs/reference/mdro.html | 2 +- docs/reference/microorganisms.html | 2 +- docs/reference/microorganisms.old.html | 2 +- docs/reference/mo_property.html | 2 +- docs/reference/resistance_predict.html | 2 +- index.md | 4 +- vignettes/benchmarks.Rmd | 1 + 30 files changed, 381 insertions(+), 230 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index aaea9a0d..0f63d641 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.5.0.9009 -Date: 2019-01-15 +Version: 0.5.0.9010 +Date: 2019-01-17 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 9aac9a76..70daebb9 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ S3method(print,mo) S3method(print,rsi) S3method(pull,atc) S3method(pull,mo) +S3method(select,frequency_tbl) S3method(skewness,data.frame) S3method(skewness,default) S3method(skewness,matrix) @@ -167,6 +168,7 @@ exportMethods(print.mo) exportMethods(print.rsi) exportMethods(pull.atc) exportMethods(pull.mo) +exportMethods(select.frequency_tbl) exportMethods(skewness) exportMethods(skewness.data.frame) exportMethods(skewness.default) diff --git a/NEWS.md b/NEWS.md index fa50d777..480f68b9 100755 --- a/NEWS.md +++ b/NEWS.md @@ -7,12 +7,12 @@ * Contains the complete manual of this package and all of its functions with an explanation of their parameters * Contains a comprehensive tutorial about how to conduct antimicrobial resistance analysis * Support for the upcoming [`dplyr`](https://dplyr.tidyverse.org) version 0.8.0 -* Function `guess_ab_col()` to find an antibiotic column in a table -* Function `mo_failures()` to review values that could not be coerced to a valid MO code, using `as.mo()`. This latter function will now only show a maximum of 25 uncoerced values. -* Function `mo_renamed()` to get a list of all returned values from `as.mo()` that have had taxonomic renaming -* Function `age()` to calculate the (patients) age in years -* Function `age_groups()` to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis per age group. -* Function `ggplot_rsi_predict()` as well as the base R `plot()` function can now be used for resistance prediction calculated with `resistance_predict()`: +* New function `guess_ab_col()` to find an antibiotic column in a table +* New function `mo_failures()` to review values that could not be coerced to a valid MO code, using `as.mo()`. This latter function will now only show a maximum of 25 uncoerced values. +* New function `mo_renamed()` to get a list of all returned values from `as.mo()` that have had taxonomic renaming +* New function `age()` to calculate the (patients) age in years +* New function `age_groups()` to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis per age group. +* New function `ggplot_rsi_predict()` as well as the base R `plot()` function can now be used for resistance prediction calculated with `resistance_predict()`: ```r x <- resistance_predict(septic_patients, col_ab = "amox") plot(x) @@ -66,7 +66,8 @@ * The new `big.mark` parameter will at default be `","` when `decimal.mark = "."` and `"."` otherwise * Fix for header text where all observations are `NA` * New parameter `droplevels` to exclude empty factor levels when input is a factor - * Factor levels will be in header when present in input data + * Factor levels will be in header when present in input data (maximum of 5) + * Fix for using `select()` on frequency tables * Function `scale_y_percent()` now contains the `limits` parameter * Automatic parameter filling for `mdro()`, `key_antibiotics()` and `eucast_rules()` * Updated examples for resistance prediction (`resistance_predict()` function) diff --git a/R/freq.R b/R/freq.R index a3f69bd8..0d6ee9eb 100755 --- a/R/freq.R +++ b/R/freq.R @@ -965,6 +965,14 @@ as.data.frame.frequency_tbl <- function(x, ...) { as.data.frame.data.frame(x, ...) } +#' @exportMethod select.frequency_tbl +#' @export +#' @importFrom dplyr select +#' @noRd +select.frequency_tbl <- function(.data, ...) { + select(as.data.frame(.data), ...) +} + #' @noRd #' @exportMethod as_tibble.frequency_tbl #' @export diff --git a/R/globals.R b/R/globals.R index ee8aa43a..7c4d538b 100755 --- a/R/globals.R +++ b/R/globals.R @@ -19,26 +19,15 @@ # Visit our website for more info: https://msberends.gitab.io/AMR. # # ==================================================================== # - - - globalVariables(c(".", - "atc", - "certe", - "official", - "trade_name", - "umcg", - 'se_min', - 'se_max', - 'labs', - 'transmute', - 'observed', "..property", "antibiotic", "Antibiotic", "antibiotics", + "atc", "authors", "Becker", + "certe", "cnt", "count", "count.x", @@ -59,38 +48,46 @@ globalVariables(c(".", "key_ab", "key_ab_lag", "key_ab_other", + "labs", "Lancefield", "lbl", "median", "mic", "microorganisms", - "microorganisms.old", - "microorganismsDT", - "microorganisms.prevDT", - "microorganisms.unprevDT", - "microorganisms.oldDT", "microorganisms.certe", + "microorganisms.old", + "microorganisms.oldDT", + "microorganisms.prevDT", "microorganisms.umcg", + "microorganisms.unprevDT", + "microorganismsDT", "mo", "mo.old", "more_than_episode_ago", "n", "name", "observations", + "observed", + "official", "other_pat_or_mo", "Pasted", "patient_id", "prevalence", "psae", "R", - "ref", "real_first_isolate", + "ref", "S", + "se_max", + "se_min", "septic_patients", "shortname", "species", + "trade_name", + "transmute", "tsn", "tsn_new", + "umcg", "value", "Value", "y", diff --git a/README.md b/README.md index 0cc75d1e..51fc7bf9 100755 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ # `AMR` (for R) -*NOTE: the original source code is on GitLab (https://gitlab.com/msberends/AMR), so you can report a bug at https://gitlab.com/msberends/AMR/issues. There is a mirror repository on GitHub (https://github.com/msberends/AMR). As the mirror process is automated by GitLab, both repositories always contain the latest changes.* +### Not a developer? Then please visit our website [https://msberends.gitlab.io/AMR](https://msberends.gitlab.io/AMR) to read about this package. +**It contains documentation about all of the included functions and also a comprehensive tutorial about how to conduct AMR analysis.** ## Development source -This is the **development source** of `AMR`, a free and open-source [R package](https://www.r-project.org) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial properties by using evidence-based methods. +*NOTE: the original source code is on GitLab (https://gitlab.com/msberends/AMR), so you can report a bug at https://gitlab.com/msberends/AMR/issues. There is a mirror repository on GitHub (https://github.com/msberends/AMR). As the mirror process is automated by GitLab, both repositories always contain the latest changes.* -**Not a developer? Then our website https://msberends.gitlab.io/AMR is probably a better place to read about this package.** It contains documentation about all of the included functions and also a comprehensive tutorial about how to conduct AMR analysis. +This is the **development source** of `AMR`, a free and open-source [R package](https://www.r-project.org) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial properties by using evidence-based methods. ## Authors Matthijs S. Berends 1,2,a, diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index bcf568b9..dd71ced5 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html index 8e87d706..25383803 100644 --- a/docs/articles/benchmarks.html +++ b/docs/articles/benchmarks.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 @@ -178,7 +178,7 @@

Benchmarks

Matthijs S. Berends

-

12 January 2019

+

17 January 2019

@@ -189,148 +189,149 @@

One of the most important features of this package is the complete microbial taxonomic database, supplied by ITIS (https://www.itis.gov). We created a function as.mo() that transforms any user input value to a valid microbial ID by using AI (Artificial Intelligence) and based on the taxonomic tree of ITIS.

Using the microbenchmark package, we can review the calculation performance of this function.

-
library(microbenchmark)
+
library(microbenchmark)
+library(AMR)

In the next test, we try to ‘coerce’ different input values for Staphylococcus aureus. The actual result is the same every time: it returns its MO code B_STPHY_AUR (B stands for Bacteria, the taxonomic kingdom).

But the calculation time differs a lot. Here, the AI effect can be reviewed best:

-
microbenchmark(A = as.mo("stau"),
-               B = as.mo("staaur"),
-               C = as.mo("S. aureus"),
-               D = as.mo("S.  aureus"),
-               E = as.mo("STAAUR"),
-               F = as.mo("Staphylococcus aureus"),
-               G = as.mo("B_STPHY_AUR"),
-               times = 10,
-               unit = "ms")
-# Unit: milliseconds
-#  expr       min        lq       mean     median        uq       max neval
-#     A 34.745551 34.798630 35.2596102 34.8994810 35.258325 38.067062    10
-#     B  7.095386  7.125348  7.2219948  7.1613865  7.240377  7.495857    10
-#     C 11.677114 11.733826 11.8304789 11.7715050 11.843756 12.317559    10
-#     D 11.694435 11.730054 11.9859313 11.8775585 12.206371 12.750016    10
-#     E  7.044402  7.117387  7.2271630  7.1923610  7.246104  7.742396    10
-#     F  6.642326  6.778446  6.8988042  6.8753165  6.923577  7.513945    10
-#     G  0.106788  0.131023  0.1351229  0.1357725  0.144014  0.146458    10
+
microbenchmark(A = as.mo("stau"),
+               B = as.mo("staaur"),
+               C = as.mo("S. aureus"),
+               D = as.mo("S.  aureus"),
+               E = as.mo("STAAUR"),
+               F = as.mo("Staphylococcus aureus"),
+               G = as.mo("B_STPHY_AUR"),
+               times = 10,
+               unit = "ms")
+# Unit: milliseconds
+#  expr       min        lq       mean     median        uq       max neval
+#     A 34.745551 34.798630 35.2596102 34.8994810 35.258325 38.067062    10
+#     B  7.095386  7.125348  7.2219948  7.1613865  7.240377  7.495857    10
+#     C 11.677114 11.733826 11.8304789 11.7715050 11.843756 12.317559    10
+#     D 11.694435 11.730054 11.9859313 11.8775585 12.206371 12.750016    10
+#     E  7.044402  7.117387  7.2271630  7.1923610  7.246104  7.742396    10
+#     F  6.642326  6.778446  6.8988042  6.8753165  6.923577  7.513945    10
+#     G  0.106788  0.131023  0.1351229  0.1357725  0.144014  0.146458    10

In the table above, all measurements are shown in milliseconds (thousands of seconds), tested on a quite regular Linux server from 2007 (Core 2 Duo 2.7 GHz, 2 GB DDR2 RAM). A value of 6.9 milliseconds means it will roughly determine 144 input values per second. It case of 39.2 milliseconds, this is only 26 input values per second. The more an input value resembles a full name (like C, D and F), the faster the result will be found. In case of G, the input is already a valid MO code, so it only almost takes no time at all (0.0001 seconds on our server).

To achieve this speed, the as.mo function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined far less faster. See this example for the ID of Burkholderia nodosa (B_BRKHL_NOD):

-
microbenchmark(A = as.mo("buno"),
-               B = as.mo("burnod"),
-               C = as.mo("B. nodosa"),
-               D = as.mo("B.  nodosa"),
-               E = as.mo("BURNOD"),
-               F = as.mo("Burkholderia nodosa"),
-               G = as.mo("B_BRKHL_NOD"),
-               times = 10,
-               unit = "ms")
-# Unit: milliseconds
-#  expr        min         lq        mean      median         uq        max neval
-#     A 124.175427 124.474837 125.8610536 125.3750560 126.160945 131.485994    10
-#     B 154.249713 155.364729 160.9077032 156.8738940 157.136183 197.315105    10
-#     C  66.066571  66.162393  66.5538611  66.4488130  66.698077  67.623404    10
-#     D  86.747693  86.918665  90.7831016  87.8149725  89.440982 116.767991    10
-#     E 154.863827 155.208563 162.6535954 158.4062465 168.593785 187.378088    10
-#     F  32.427028  32.638648  32.9929454  32.7860475  32.992813  34.674241    10
-#     G   0.213155   0.216578   0.2369226   0.2338985   0.253734   0.285581    10
+
microbenchmark(A = as.mo("buno"),
+               B = as.mo("burnod"),
+               C = as.mo("B. nodosa"),
+               D = as.mo("B.  nodosa"),
+               E = as.mo("BURNOD"),
+               F = as.mo("Burkholderia nodosa"),
+               G = as.mo("B_BRKHL_NOD"),
+               times = 10,
+               unit = "ms")
+# Unit: milliseconds
+#  expr        min         lq        mean      median         uq        max neval
+#     A 124.175427 124.474837 125.8610536 125.3750560 126.160945 131.485994    10
+#     B 154.249713 155.364729 160.9077032 156.8738940 157.136183 197.315105    10
+#     C  66.066571  66.162393  66.5538611  66.4488130  66.698077  67.623404    10
+#     D  86.747693  86.918665  90.7831016  87.8149725  89.440982 116.767991    10
+#     E 154.863827 155.208563 162.6535954 158.4062465 168.593785 187.378088    10
+#     F  32.427028  32.638648  32.9929454  32.7860475  32.992813  34.674241    10
+#     G   0.213155   0.216578   0.2369226   0.2338985   0.253734   0.285581    10

That takes up to 11 times as much time! A value of 158.4 milliseconds means it can only determine ~6 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance.

To relieve this pitfall and further improve performance, two important calculations take almost no time at all: repetitive results and already precalculated results.

Repetitive results

Repetitive results mean that unique values are present more than once. Unique values will only be calculated once by as.mo(). We will use mo_fullname() for this test - a helper function that returns the full microbial name (genus, species and possibly subspecies) and uses as.mo() internally.

-
library(dplyr)
-# take 500,000 random MO codes from the septic_patients data set
-x = septic_patients %>%
-  sample_n(500000, replace = TRUE) %>%
-  pull(mo)
-  
-# got the right length?
-length(x)
-# [1] 500000
-
-# and how many unique values do we have?
-n_distinct(x)
-# [1] 96
-
-# only 96, but distributed in 500,000 results. now let's see:
-microbenchmark(X = mo_fullname(x),
-               times = 10,
-               unit = "ms")
-# Unit: milliseconds
-#  expr      min       lq     mean   median       uq      max neval
-#     X 114.9342 117.1076 129.6448 120.2047 131.5005 168.6371    10
+

So transforming 500,000 values (!) of 96 unique values only takes 0.12 seconds (120 ms). You only lose time on your unique input values.

Results of a tenfold - 5,000,000 values:

-
# Unit: milliseconds
-#  expr      min       lq     mean   median       uq      max neval
-#     X 882.9045 901.3011 1001.677 940.3421 1168.088 1226.846    10
+

Even the full names of 5 Million values are calculated within a second.

Precalculated results

What about precalculated results? If the input is an already precalculated result of a helper function like mo_fullname(), it almost doesn’t take any time at all (see ‘C’ below):

-
microbenchmark(A = mo_fullname("B_STPHY_AUR"),
-               B = mo_fullname("S. aureus"),
-               C = mo_fullname("Staphylococcus aureus"),
-               times = 10,
-               unit = "ms")
-# Unit: milliseconds
-#  expr       min        lq       mean     median        uq       max neval
-#     A 11.364086 11.460537 11.5104799 11.4795330 11.524860 11.818263    10
-#     B 11.976454 12.012352 12.1704592 12.0853020 12.210004 12.881737    10
-#     C  0.095823  0.102528  0.1167754  0.1153785  0.132629  0.140661    10
+

So going from mo_fullname("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0001 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

-
microbenchmark(A = mo_species("aureus"),
-               B = mo_genus("Staphylococcus"),
-               C = mo_fullname("Staphylococcus aureus"),
-               D = mo_family("Staphylococcaceae"),
-               E = mo_order("Bacillales"),
-               F = mo_class("Bacilli"),
-               G = mo_phylum("Firmicutes"),
-               H = mo_subkingdom("Posibacteria"),
-               I = mo_kingdom("Bacteria"),
-               times = 10,
-               unit = "ms")
-# Unit: milliseconds
-#  expr      min       lq      mean    median       uq      max neval
-#     A 0.105181 0.121314 0.1478538 0.1465265 0.166711 0.211409    10
-#     B 0.132558 0.146388 0.1584278 0.1499835 0.164895 0.208477    10
-#     C 0.135492 0.160355 0.2341847 0.1884665 0.348857 0.395931    10
-#     D 0.109650 0.115727 0.1270481 0.1264130 0.128648 0.168317    10
-#     E 0.081574 0.096940 0.0992582 0.0980915 0.101479 0.120477    10
-#     F 0.081575 0.088489 0.0988463 0.0989650 0.103365 0.126482    10
-#     G 0.091981 0.095333 0.1043568 0.1001530 0.111327 0.129625    10
-#     H 0.092610 0.093169 0.1009135 0.0985455 0.101828 0.120406    10
-#     I 0.087371 0.091213 0.1069758 0.0941815 0.109302 0.192831    10
+

Of course, when running mo_phylum("Firmicutes") the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes" too, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known microorganisms (according to ITIS), it can just return the initial value immediately.

Results in other languages

When the system language is non-English and supported by this AMR package, some functions take a little while longer:

-
mo_fullname("CoNS", language = "en") # or just mo_fullname("CoNS") on an English system
-# "Coagulase Negative Staphylococcus (CoNS)"
-
-mo_fullname("CoNS", language = "fr") # or just mo_fullname("CoNS") on a French system
-# "Staphylococcus à coagulase négative (CoNS)"
-
-microbenchmark(en = mo_fullname("CoNS", language = "en"),
-               de = mo_fullname("CoNS", language = "de"),
-               nl = mo_fullname("CoNS", language = "nl"),
-               es = mo_fullname("CoNS", language = "es"),
-               it = mo_fullname("CoNS", language = "it"),
-               fr = mo_fullname("CoNS", language = "fr"),
-               pt = mo_fullname("CoNS", language = "pt"),
-               times = 10,
-               unit = "ms")
-# Unit: milliseconds
-#  expr       min       lq      mean    median        uq      max neval
-#    en  6.093583  6.51724  6.555105  6.562986  6.630663  6.99698   100
-#    de 13.934874 14.35137 16.891587 14.462210 14.764658 43.63956   100
-#    nl 13.900092 14.34729 15.943268 14.424565 14.581535 43.76283   100
-#    es 13.833813 14.34596 14.574783 14.439757 14.653994 17.49168   100
-#    it 13.811883 14.36621 15.179060 14.453515 14.812359 43.64284   100
-#    fr 13.798683 14.37019 16.344731 14.468775 14.697610 48.62923   100
-#    pt 13.789674 14.36244 15.706321 14.443772 14.679905 44.76701   100
+

Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.

diff --git a/docs/articles/index.html b/docs/articles/index.html index 329fd1bd..2babab18 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/authors.html b/docs/authors.html index eeddf4b3..e76cedf6 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/index.html b/docs/index.html index d5063da5..652d96ac 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 @@ -220,9 +220,9 @@

Get this package

-

This package is available on the official R network (CRAN). Install this package in R with:

- -

It will be downloaded and installed automatically.

+

This package is available on the official R network (CRAN), which has a peer-reviewed submission process. Install this package in R with:

+ +

It will be downloaded and installed automatically. For RStudio, click on menu Tools > Install Packages…, then type in “AMR” and press Install.

@@ -245,17 +245,17 @@ Overview of functions

The AMR package basically does four important things:

    -
  1. It cleanses existing data, by transforming it to reproducible and profound classes, making the most efficient use of R. These functions all use artificial intelligence to guess results that you would expect:
  2. -
+
  • +

    It cleanses existing data, by transforming it to reproducible and profound classes, making the most efficient use of R. These functions all use artificial intelligence to guess results that you would expect:

    • Use as.mo() to get an ID of a microorganism. The IDs are human readable for the trained eye - the ID of Klebsiella pneumoniae is “B_KLBSL_PNE” (B stands for Bacteria) and the ID of S. aureus is “B_STPHY_AUR”. The function takes almost any text as input that looks like the name or code of a microorganism like “E. coli”, “esco” or “esccol” and tries to find expected results using artificial intelligence (AI) on the included ITIS data set, consisting of almost 20,000 microorganisms. It is very fast, please see our benchmarks. Moreover, it can group Staphylococci into coagulase negative and positive (CoNS and CoPS, see source) and can categorise Streptococci into Lancefield groups (like beta-haemolytic Streptococcus Group B, source).
    • Use as.rsi() to transform values to valid antimicrobial results. It produces just S, I or R based on your input and warns about invalid values. Even values like “<=0.002; S” (combined MIC/RSI) will result in “S”.
    • Use as.mic() to cleanse your MIC values. It produces a so-called factor (called ordinal in SPSS) with valid MIC values as levels. A value like “<=0.002; S” (combined MIC/RSI) will result in “<=0.002”.
    • Use as.atc() to get the ATC code of an antibiotic as defined by the WHO. This package contains a database with most LIS codes, official names, DDDs and even trade names of antibiotics. For example, the values “Furabid”, “Furadantin”, “nitro” all return the ATC code of Nitrofurantoine.
    -
      -
    1. It enhances existing data and adds new data from data sets included in this package.
    2. -
    +
  • +
  • +

    It enhances existing data and adds new data from data sets included in this package.

    • Use eucast_rules() to apply EUCAST expert rules to isolates.
    • Use first_isolate() to identify the first isolates of every patient using guidelines from the CLSI (Clinical and Laboratory Standards Institute). @@ -267,9 +267,9 @@
    • The data set microorganisms contains the complete taxonomic tree of more than 18,000 microorganisms (bacteria, fungi/yeasts and protozoa). Furthermore, the colloquial name and Gram stain are available, which enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like mo_genus(), mo_family(), mo_gramstain() or even mo_phylum(). As they use as.mo() internally, they also use artificial intelligence. For example, mo_genus("MRSA") and mo_genus("S. aureus") will both return "Staphylococcus". They also come with support for German, Dutch, Spanish, Italian, French and Portuguese. These functions can be used to add new variables to your data.
    • The data set antibiotics contains the ATC code, LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains a total of 298 trade names. Use functions like ab_name() and ab_tradenames() to look up values. The ab_* functions use as.atc() internally so they support AI to guess your expected result. For example, ab_name("Fluclox"), ab_name("Floxapen") and ab_name("J01CF05") will all return "Flucloxacillin". These functions can again be used to add new variables to your data.
    -
      -
    1. It analyses the data with convenient functions that use well-known methods.
    2. -
    +
  • +
  • +

    It analyses the data with convenient functions that use well-known methods.

    -
      -
    1. It teaches the user how to use all the above actions.
    2. -
    +
  • +
  • +

    It teaches the user how to use all the above actions.

    • The package contains extensive help pages with many examples.
    • It also contains an example data set called septic_patients. This data set contains: @@ -290,6 +290,8 @@
  • + +

    diff --git a/docs/news/index.html b/docs/news/index.html index e1d1981c..391f189f 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010

    @@ -229,33 +229,53 @@

    Changed

    @@ -496,15 +604,21 @@ New

    + +
  • Determining bacterial ID: +
    • New functions as.bactid and is.bactid to transform/ look up microbial ID’s.
    • The existing function guess_bactid is now an alias of as.bactid
    • New Becker classification for Staphylococcus to categorise them into Coagulase Negative Staphylococci (CoNS) and Coagulase Positve Staphylococci (CoPS)
    • New Lancefield classification for Streptococcus to categorise them into Lancefield groups
    • +
    +
  • For convience, new descriptive statistical functions kurtosis and skewness that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
  • Function g.test to perform the Χ2 distributed G-test, which use is the same as chisq.test
  • -
  • Function ratio to transform a vector of values to a preset ratio
  • +
  • +Function ratio to transform a vector of values to a preset ratio + +
  • Support for Addins menu in RStudio to quickly insert %in% or %like% (and give them keyboard shortcuts), or to view the datasets that come with this package
  • Function p.symbol to transform p values to their related symbols: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  • Functions clipboard_import and clipboard_export as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the clipr package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server)
  • -
  • New for frequency tables (function freq):
  • +
  • New for frequency tables (function freq): +
    • A vignette to explain its usage
    • Support for rsi (antimicrobial resistance) to use as input
    • Support for table to use as input: freq(table(x, y)) @@ -545,6 +669,8 @@
    • Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
    • Possibility to globally set the default for the amount of items to print, with options(max.print.freq = n) where n is your preset value
    +
  • +

    @@ -566,21 +692,27 @@
  • Small improvements to the microorganisms dataset (especially for Salmonella) and the column bactid now has the new class "bactid"
  • -
  • Combined MIC/RSI values will now be coerced by the rsi and mic functions:
  • +
  • Combined MIC/RSI values will now be coerced by the rsi and mic functions: + +
  • Now possible to coerce MIC values with a space between operator and value, i.e. as.mic("<= 0.002") now works
  • Classes rsi and mic do not add the attribute package.version anymore
  • Added "groups" option for atc_property(..., property). It will return a vector of the ATC hierarchy as defined by the WHO. The new function atc_groups is a convenient wrapper around this.
  • Build-in host check for atc_property as it requires the host set by url to be responsive
  • Improved first_isolate algorithm to exclude isolates where bacteria ID or genus is unavailable
  • Fix for warning hybrid evaluation forced for row_number (924b62) from the dplyr package v0.7.5 and above
  • -
  • Support for empty values and for 1 or 2 columns as input for guess_bactid (now called as.bactid)
  • +
  • Support for empty values and for 1 or 2 columns as input for guess_bactid (now called as.bactid) +
    • So yourdata %>% select(genus, species) %>% as.bactid() now also works
    • +
    +
  • Other small fixes
  • @@ -588,11 +720,14 @@

    Other

    @@ -611,10 +746,13 @@
  • Function guess_bactid to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA
  • Function guess_atc to determine the ATC of an antibiotic based on name, trade name, or known abbreviations
  • Function freq to create frequency tables, with additional info in a header
  • -
  • Function MDRO to determine Multi Drug Resistant Organisms (MDRO) with support for country-specific guidelines.
  • +
  • Function MDRO to determine Multi Drug Resistant Organisms (MDRO) with support for country-specific guidelines. + +
  • New algorithm to determine weighted isolates, can now be "points" or "keyantibiotics", see ?first_isolate
  • New print format for tibbles and data.tables
  • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 3c396dbb..31227eab 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,4 +1,4 @@ -pandoc: 1.17.2 +pandoc: 2.3.1 pkgdown: 1.3.0 pkgdown_sha: ~ articles: diff --git a/docs/reference/ITIS.html b/docs/reference/ITIS.html index 43278681..f52848ae 100644 --- a/docs/reference/ITIS.html +++ b/docs/reference/ITIS.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/age.html b/docs/reference/age.html index b7d10a4c..9300ff81 100644 --- a/docs/reference/age.html +++ b/docs/reference/age.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/age_groups.html b/docs/reference/age_groups.html index 82a47bf0..a33091c3 100644 --- a/docs/reference/age_groups.html +++ b/docs/reference/age_groups.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 2097430a..15529a4f 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/eucast_rules.html b/docs/reference/eucast_rules.html index 062f79e9..f0a4078a 100644 --- a/docs/reference/eucast_rules.html +++ b/docs/reference/eucast_rules.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/first_isolate.html b/docs/reference/first_isolate.html index e100c72a..367cf059 100644 --- a/docs/reference/first_isolate.html +++ b/docs/reference/first_isolate.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/g.test.html b/docs/reference/g.test.html index e11fbfef..b3d44776 100644 --- a/docs/reference/g.test.html +++ b/docs/reference/g.test.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/guess_ab_col.html b/docs/reference/guess_ab_col.html index 6f22e4e9..2c99e0a2 100644 --- a/docs/reference/guess_ab_col.html +++ b/docs/reference/guess_ab_col.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/index.html b/docs/reference/index.html index 3b8334ee..3b6ae348 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/key_antibiotics.html b/docs/reference/key_antibiotics.html index ca20ed4e..85cb95f8 100644 --- a/docs/reference/key_antibiotics.html +++ b/docs/reference/key_antibiotics.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/mdro.html b/docs/reference/mdro.html index e532d5ff..0bccdc5d 100644 --- a/docs/reference/mdro.html +++ b/docs/reference/mdro.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index 0a1d0742..141ff016 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/microorganisms.old.html b/docs/reference/microorganisms.old.html index c4c2548c..6a8449e5 100644 --- a/docs/reference/microorganisms.old.html +++ b/docs/reference/microorganisms.old.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index 93e6eb5b..a7547f80 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/docs/reference/resistance_predict.html b/docs/reference/resistance_predict.html index 1f1eb990..b80b0b65 100644 --- a/docs/reference/resistance_predict.html +++ b/docs/reference/resistance_predict.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9009 + 0.5.0.9010 diff --git a/index.md b/index.md index eb9e1cf5..0f4a2496 100644 --- a/index.md +++ b/index.md @@ -45,13 +45,13 @@ Developers: ### Get this package -This package is available on the official R network (CRAN). Install this package in R with: +This package is available on the official R network (CRAN), which has a peer-reviewed submission process. Install this package in R with: ```r install.packages("AMR") ``` -It will be downloaded and installed automatically. +It will be downloaded and installed automatically. For RStudio, click on menu *Tools* > *Install Packages...*, then type in "AMR" and press Install. ### Get started diff --git a/vignettes/benchmarks.Rmd b/vignettes/benchmarks.Rmd index e2308414..cc408fb0 100755 --- a/vignettes/benchmarks.Rmd +++ b/vignettes/benchmarks.Rmd @@ -29,6 +29,7 @@ Using the `microbenchmark` package, we can review the calculation performance of ```r library(microbenchmark) +library(AMR) ``` In the next test, we try to 'coerce' different input values for *Staphylococcus aureus*. The actual result is the same every time: it returns its MO code `B_STPHY_AUR` (*B* stands for *Bacteria*, the taxonomic kingdom).