diff --git a/DESCRIPTION b/DESCRIPTION index aaea9a0d..0f63d641 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.5.0.9009 -Date: 2019-01-15 +Version: 0.5.0.9010 +Date: 2019-01-17 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 9aac9a76..70daebb9 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ S3method(print,mo) S3method(print,rsi) S3method(pull,atc) S3method(pull,mo) +S3method(select,frequency_tbl) S3method(skewness,data.frame) S3method(skewness,default) S3method(skewness,matrix) @@ -167,6 +168,7 @@ exportMethods(print.mo) exportMethods(print.rsi) exportMethods(pull.atc) exportMethods(pull.mo) +exportMethods(select.frequency_tbl) exportMethods(skewness) exportMethods(skewness.data.frame) exportMethods(skewness.default) diff --git a/NEWS.md b/NEWS.md index fa50d777..480f68b9 100755 --- a/NEWS.md +++ b/NEWS.md @@ -7,12 +7,12 @@ * Contains the complete manual of this package and all of its functions with an explanation of their parameters * Contains a comprehensive tutorial about how to conduct antimicrobial resistance analysis * Support for the upcoming [`dplyr`](https://dplyr.tidyverse.org) version 0.8.0 -* Function `guess_ab_col()` to find an antibiotic column in a table -* Function `mo_failures()` to review values that could not be coerced to a valid MO code, using `as.mo()`. This latter function will now only show a maximum of 25 uncoerced values. -* Function `mo_renamed()` to get a list of all returned values from `as.mo()` that have had taxonomic renaming -* Function `age()` to calculate the (patients) age in years -* Function `age_groups()` to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis per age group. -* Function `ggplot_rsi_predict()` as well as the base R `plot()` function can now be used for resistance prediction calculated with `resistance_predict()`: +* New function `guess_ab_col()` to find an antibiotic column in a table +* New function `mo_failures()` to review values that could not be coerced to a valid MO code, using `as.mo()`. This latter function will now only show a maximum of 25 uncoerced values. +* New function `mo_renamed()` to get a list of all returned values from `as.mo()` that have had taxonomic renaming +* New function `age()` to calculate the (patients) age in years +* New function `age_groups()` to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis per age group. +* New function `ggplot_rsi_predict()` as well as the base R `plot()` function can now be used for resistance prediction calculated with `resistance_predict()`: ```r x <- resistance_predict(septic_patients, col_ab = "amox") plot(x) @@ -66,7 +66,8 @@ * The new `big.mark` parameter will at default be `","` when `decimal.mark = "."` and `"."` otherwise * Fix for header text where all observations are `NA` * New parameter `droplevels` to exclude empty factor levels when input is a factor - * Factor levels will be in header when present in input data + * Factor levels will be in header when present in input data (maximum of 5) + * Fix for using `select()` on frequency tables * Function `scale_y_percent()` now contains the `limits` parameter * Automatic parameter filling for `mdro()`, `key_antibiotics()` and `eucast_rules()` * Updated examples for resistance prediction (`resistance_predict()` function) diff --git a/R/freq.R b/R/freq.R index a3f69bd8..0d6ee9eb 100755 --- a/R/freq.R +++ b/R/freq.R @@ -965,6 +965,14 @@ as.data.frame.frequency_tbl <- function(x, ...) { as.data.frame.data.frame(x, ...) } +#' @exportMethod select.frequency_tbl +#' @export +#' @importFrom dplyr select +#' @noRd +select.frequency_tbl <- function(.data, ...) { + select(as.data.frame(.data), ...) +} + #' @noRd #' @exportMethod as_tibble.frequency_tbl #' @export diff --git a/R/globals.R b/R/globals.R index ee8aa43a..7c4d538b 100755 --- a/R/globals.R +++ b/R/globals.R @@ -19,26 +19,15 @@ # Visit our website for more info: https://msberends.gitab.io/AMR. # # ==================================================================== # - - - globalVariables(c(".", - "atc", - "certe", - "official", - "trade_name", - "umcg", - 'se_min', - 'se_max', - 'labs', - 'transmute', - 'observed', "..property", "antibiotic", "Antibiotic", "antibiotics", + "atc", "authors", "Becker", + "certe", "cnt", "count", "count.x", @@ -59,38 +48,46 @@ globalVariables(c(".", "key_ab", "key_ab_lag", "key_ab_other", + "labs", "Lancefield", "lbl", "median", "mic", "microorganisms", - "microorganisms.old", - "microorganismsDT", - "microorganisms.prevDT", - "microorganisms.unprevDT", - "microorganisms.oldDT", "microorganisms.certe", + "microorganisms.old", + "microorganisms.oldDT", + "microorganisms.prevDT", "microorganisms.umcg", + "microorganisms.unprevDT", + "microorganismsDT", "mo", "mo.old", "more_than_episode_ago", "n", "name", "observations", + "observed", + "official", "other_pat_or_mo", "Pasted", "patient_id", "prevalence", "psae", "R", - "ref", "real_first_isolate", + "ref", "S", + "se_max", + "se_min", "septic_patients", "shortname", "species", + "trade_name", + "transmute", "tsn", "tsn_new", + "umcg", "value", "Value", "y", diff --git a/README.md b/README.md index 0cc75d1e..51fc7bf9 100755 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ # `AMR` (for R) -*NOTE: the original source code is on GitLab (https://gitlab.com/msberends/AMR), so you can report a bug at https://gitlab.com/msberends/AMR/issues. There is a mirror repository on GitHub (https://github.com/msberends/AMR). As the mirror process is automated by GitLab, both repositories always contain the latest changes.* +### Not a developer? Then please visit our website [https://msberends.gitlab.io/AMR](https://msberends.gitlab.io/AMR) to read about this package. +**It contains documentation about all of the included functions and also a comprehensive tutorial about how to conduct AMR analysis.** ## Development source -This is the **development source** of `AMR`, a free and open-source [R package](https://www.r-project.org) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial properties by using evidence-based methods. +*NOTE: the original source code is on GitLab (https://gitlab.com/msberends/AMR), so you can report a bug at https://gitlab.com/msberends/AMR/issues. There is a mirror repository on GitHub (https://github.com/msberends/AMR). As the mirror process is automated by GitLab, both repositories always contain the latest changes.* -**Not a developer? Then our website https://msberends.gitlab.io/AMR is probably a better place to read about this package.** It contains documentation about all of the included functions and also a comprehensive tutorial about how to conduct AMR analysis. +This is the **development source** of `AMR`, a free and open-source [R package](https://www.r-project.org) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial properties by using evidence-based methods. ## Authors Matthijs S. Berends 1,2,a, diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index bcf568b9..dd71ced5 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -78,7 +78,7 @@ diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html index 8e87d706..25383803 100644 --- a/docs/articles/benchmarks.html +++ b/docs/articles/benchmarks.html @@ -40,7 +40,7 @@ @@ -178,7 +178,7 @@
benchmarks.Rmd
One of the most important features of this package is the complete microbial taxonomic database, supplied by ITIS (https://www.itis.gov). We created a function as.mo()
that transforms any user input value to a valid microbial ID by using AI (Artificial Intelligence) and based on the taxonomic tree of ITIS.
Using the microbenchmark
package, we can review the calculation performance of this function.
library(microbenchmark)
In the next test, we try to ‘coerce’ different input values for Staphylococcus aureus. The actual result is the same every time: it returns its MO code B_STPHY_AUR
(B stands for Bacteria, the taxonomic kingdom).
But the calculation time differs a lot. Here, the AI effect can be reviewed best:
-microbenchmark(A = as.mo("stau"),
- B = as.mo("staaur"),
- C = as.mo("S. aureus"),
- D = as.mo("S. aureus"),
- E = as.mo("STAAUR"),
- F = as.mo("Staphylococcus aureus"),
- G = as.mo("B_STPHY_AUR"),
- times = 10,
- unit = "ms")
-# Unit: milliseconds
-# expr min lq mean median uq max neval
-# A 34.745551 34.798630 35.2596102 34.8994810 35.258325 38.067062 10
-# B 7.095386 7.125348 7.2219948 7.1613865 7.240377 7.495857 10
-# C 11.677114 11.733826 11.8304789 11.7715050 11.843756 12.317559 10
-# D 11.694435 11.730054 11.9859313 11.8775585 12.206371 12.750016 10
-# E 7.044402 7.117387 7.2271630 7.1923610 7.246104 7.742396 10
-# F 6.642326 6.778446 6.8988042 6.8753165 6.923577 7.513945 10
-# G 0.106788 0.131023 0.1351229 0.1357725 0.144014 0.146458 10
microbenchmark(A = as.mo("stau"),
+ B = as.mo("staaur"),
+ C = as.mo("S. aureus"),
+ D = as.mo("S. aureus"),
+ E = as.mo("STAAUR"),
+ F = as.mo("Staphylococcus aureus"),
+ G = as.mo("B_STPHY_AUR"),
+ times = 10,
+ unit = "ms")
+# Unit: milliseconds
+# expr min lq mean median uq max neval
+# A 34.745551 34.798630 35.2596102 34.8994810 35.258325 38.067062 10
+# B 7.095386 7.125348 7.2219948 7.1613865 7.240377 7.495857 10
+# C 11.677114 11.733826 11.8304789 11.7715050 11.843756 12.317559 10
+# D 11.694435 11.730054 11.9859313 11.8775585 12.206371 12.750016 10
+# E 7.044402 7.117387 7.2271630 7.1923610 7.246104 7.742396 10
+# F 6.642326 6.778446 6.8988042 6.8753165 6.923577 7.513945 10
+# G 0.106788 0.131023 0.1351229 0.1357725 0.144014 0.146458 10
In the table above, all measurements are shown in milliseconds (thousands of seconds), tested on a quite regular Linux server from 2007 (Core 2 Duo 2.7 GHz, 2 GB DDR2 RAM). A value of 6.9 milliseconds means it will roughly determine 144 input values per second. It case of 39.2 milliseconds, this is only 26 input values per second. The more an input value resembles a full name (like C, D and F), the faster the result will be found. In case of G, the input is already a valid MO code, so it only almost takes no time at all (0.0001 seconds on our server).
To achieve this speed, the as.mo
function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined far less faster. See this example for the ID of Burkholderia nodosa (B_BRKHL_NOD
):
microbenchmark(A = as.mo("buno"),
- B = as.mo("burnod"),
- C = as.mo("B. nodosa"),
- D = as.mo("B. nodosa"),
- E = as.mo("BURNOD"),
- F = as.mo("Burkholderia nodosa"),
- G = as.mo("B_BRKHL_NOD"),
- times = 10,
- unit = "ms")
-# Unit: milliseconds
-# expr min lq mean median uq max neval
-# A 124.175427 124.474837 125.8610536 125.3750560 126.160945 131.485994 10
-# B 154.249713 155.364729 160.9077032 156.8738940 157.136183 197.315105 10
-# C 66.066571 66.162393 66.5538611 66.4488130 66.698077 67.623404 10
-# D 86.747693 86.918665 90.7831016 87.8149725 89.440982 116.767991 10
-# E 154.863827 155.208563 162.6535954 158.4062465 168.593785 187.378088 10
-# F 32.427028 32.638648 32.9929454 32.7860475 32.992813 34.674241 10
-# G 0.213155 0.216578 0.2369226 0.2338985 0.253734 0.285581 10
microbenchmark(A = as.mo("buno"),
+ B = as.mo("burnod"),
+ C = as.mo("B. nodosa"),
+ D = as.mo("B. nodosa"),
+ E = as.mo("BURNOD"),
+ F = as.mo("Burkholderia nodosa"),
+ G = as.mo("B_BRKHL_NOD"),
+ times = 10,
+ unit = "ms")
+# Unit: milliseconds
+# expr min lq mean median uq max neval
+# A 124.175427 124.474837 125.8610536 125.3750560 126.160945 131.485994 10
+# B 154.249713 155.364729 160.9077032 156.8738940 157.136183 197.315105 10
+# C 66.066571 66.162393 66.5538611 66.4488130 66.698077 67.623404 10
+# D 86.747693 86.918665 90.7831016 87.8149725 89.440982 116.767991 10
+# E 154.863827 155.208563 162.6535954 158.4062465 168.593785 187.378088 10
+# F 32.427028 32.638648 32.9929454 32.7860475 32.992813 34.674241 10
+# G 0.213155 0.216578 0.2369226 0.2338985 0.253734 0.285581 10
That takes up to 11 times as much time! A value of 158.4 milliseconds means it can only determine ~6 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance.
To relieve this pitfall and further improve performance, two important calculations take almost no time at all: repetitive results and already precalculated results.
Repetitive results mean that unique values are present more than once. Unique values will only be calculated once by as.mo()
. We will use mo_fullname()
for this test - a helper function that returns the full microbial name (genus, species and possibly subspecies) and uses as.mo()
internally.
library(dplyr)
-# take 500,000 random MO codes from the septic_patients data set
-x = septic_patients %>%
- sample_n(500000, replace = TRUE) %>%
- pull(mo)
-
-# got the right length?
-length(x)
-# [1] 500000
-
-# and how many unique values do we have?
-n_distinct(x)
-# [1] 96
-
-# only 96, but distributed in 500,000 results. now let's see:
-microbenchmark(X = mo_fullname(x),
- times = 10,
- unit = "ms")
-# Unit: milliseconds
-# expr min lq mean median uq max neval
-# X 114.9342 117.1076 129.6448 120.2047 131.5005 168.6371 10
library(dplyr)
+# take 500,000 random MO codes from the septic_patients data set
+x = septic_patients %>%
+ sample_n(500000, replace = TRUE) %>%
+ pull(mo)
+
+# got the right length?
+length(x)
+# [1] 500000
+
+# and how many unique values do we have?
+n_distinct(x)
+# [1] 96
+
+# only 96, but distributed in 500,000 results. now let's see:
+microbenchmark(X = mo_fullname(x),
+ times = 10,
+ unit = "ms")
+# Unit: milliseconds
+# expr min lq mean median uq max neval
+# X 114.9342 117.1076 129.6448 120.2047 131.5005 168.6371 10
So transforming 500,000 values (!) of 96 unique values only takes 0.12 seconds (120 ms). You only lose time on your unique input values.
Results of a tenfold - 5,000,000 values:
-# Unit: milliseconds
-# expr min lq mean median uq max neval
-# X 882.9045 901.3011 1001.677 940.3421 1168.088 1226.846 10
# Unit: milliseconds
+# expr min lq mean median uq max neval
+# X 882.9045 901.3011 1001.677 940.3421 1168.088 1226.846 10
Even the full names of 5 Million values are calculated within a second.
What about precalculated results? If the input is an already precalculated result of a helper function like mo_fullname()
, it almost doesn’t take any time at all (see ‘C’ below):
microbenchmark(A = mo_fullname("B_STPHY_AUR"),
- B = mo_fullname("S. aureus"),
- C = mo_fullname("Staphylococcus aureus"),
- times = 10,
- unit = "ms")
-# Unit: milliseconds
-# expr min lq mean median uq max neval
-# A 11.364086 11.460537 11.5104799 11.4795330 11.524860 11.818263 10
-# B 11.976454 12.012352 12.1704592 12.0853020 12.210004 12.881737 10
-# C 0.095823 0.102528 0.1167754 0.1153785 0.132629 0.140661 10
microbenchmark(A = mo_fullname("B_STPHY_AUR"),
+ B = mo_fullname("S. aureus"),
+ C = mo_fullname("Staphylococcus aureus"),
+ times = 10,
+ unit = "ms")
+# Unit: milliseconds
+# expr min lq mean median uq max neval
+# A 11.364086 11.460537 11.5104799 11.4795330 11.524860 11.818263 10
+# B 11.976454 12.012352 12.1704592 12.0853020 12.210004 12.881737 10
+# C 0.095823 0.102528 0.1167754 0.1153785 0.132629 0.140661 10
So going from mo_fullname("Staphylococcus aureus")
to "Staphylococcus aureus"
takes 0.0001 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:
microbenchmark(A = mo_species("aureus"),
- B = mo_genus("Staphylococcus"),
- C = mo_fullname("Staphylococcus aureus"),
- D = mo_family("Staphylococcaceae"),
- E = mo_order("Bacillales"),
- F = mo_class("Bacilli"),
- G = mo_phylum("Firmicutes"),
- H = mo_subkingdom("Posibacteria"),
- I = mo_kingdom("Bacteria"),
- times = 10,
- unit = "ms")
-# Unit: milliseconds
-# expr min lq mean median uq max neval
-# A 0.105181 0.121314 0.1478538 0.1465265 0.166711 0.211409 10
-# B 0.132558 0.146388 0.1584278 0.1499835 0.164895 0.208477 10
-# C 0.135492 0.160355 0.2341847 0.1884665 0.348857 0.395931 10
-# D 0.109650 0.115727 0.1270481 0.1264130 0.128648 0.168317 10
-# E 0.081574 0.096940 0.0992582 0.0980915 0.101479 0.120477 10
-# F 0.081575 0.088489 0.0988463 0.0989650 0.103365 0.126482 10
-# G 0.091981 0.095333 0.1043568 0.1001530 0.111327 0.129625 10
-# H 0.092610 0.093169 0.1009135 0.0985455 0.101828 0.120406 10
-# I 0.087371 0.091213 0.1069758 0.0941815 0.109302 0.192831 10
microbenchmark(A = mo_species("aureus"),
+ B = mo_genus("Staphylococcus"),
+ C = mo_fullname("Staphylococcus aureus"),
+ D = mo_family("Staphylococcaceae"),
+ E = mo_order("Bacillales"),
+ F = mo_class("Bacilli"),
+ G = mo_phylum("Firmicutes"),
+ H = mo_subkingdom("Posibacteria"),
+ I = mo_kingdom("Bacteria"),
+ times = 10,
+ unit = "ms")
+# Unit: milliseconds
+# expr min lq mean median uq max neval
+# A 0.105181 0.121314 0.1478538 0.1465265 0.166711 0.211409 10
+# B 0.132558 0.146388 0.1584278 0.1499835 0.164895 0.208477 10
+# C 0.135492 0.160355 0.2341847 0.1884665 0.348857 0.395931 10
+# D 0.109650 0.115727 0.1270481 0.1264130 0.128648 0.168317 10
+# E 0.081574 0.096940 0.0992582 0.0980915 0.101479 0.120477 10
+# F 0.081575 0.088489 0.0988463 0.0989650 0.103365 0.126482 10
+# G 0.091981 0.095333 0.1043568 0.1001530 0.111327 0.129625 10
+# H 0.092610 0.093169 0.1009135 0.0985455 0.101828 0.120406 10
+# I 0.087371 0.091213 0.1069758 0.0941815 0.109302 0.192831 10
Of course, when running mo_phylum("Firmicutes")
the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes"
too, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known microorganisms (according to ITIS), it can just return the initial value immediately.
When the system language is non-English and supported by this AMR
package, some functions take a little while longer:
mo_fullname("CoNS", language = "en") # or just mo_fullname("CoNS") on an English system
-# "Coagulase Negative Staphylococcus (CoNS)"
-
-mo_fullname("CoNS", language = "fr") # or just mo_fullname("CoNS") on a French system
-# "Staphylococcus à coagulase négative (CoNS)"
-
-microbenchmark(en = mo_fullname("CoNS", language = "en"),
- de = mo_fullname("CoNS", language = "de"),
- nl = mo_fullname("CoNS", language = "nl"),
- es = mo_fullname("CoNS", language = "es"),
- it = mo_fullname("CoNS", language = "it"),
- fr = mo_fullname("CoNS", language = "fr"),
- pt = mo_fullname("CoNS", language = "pt"),
- times = 10,
- unit = "ms")
-# Unit: milliseconds
-# expr min lq mean median uq max neval
-# en 6.093583 6.51724 6.555105 6.562986 6.630663 6.99698 100
-# de 13.934874 14.35137 16.891587 14.462210 14.764658 43.63956 100
-# nl 13.900092 14.34729 15.943268 14.424565 14.581535 43.76283 100
-# es 13.833813 14.34596 14.574783 14.439757 14.653994 17.49168 100
-# it 13.811883 14.36621 15.179060 14.453515 14.812359 43.64284 100
-# fr 13.798683 14.37019 16.344731 14.468775 14.697610 48.62923 100
-# pt 13.789674 14.36244 15.706321 14.443772 14.679905 44.76701 100
mo_fullname("CoNS", language = "en") # or just mo_fullname("CoNS") on an English system
+# "Coagulase Negative Staphylococcus (CoNS)"
+
+mo_fullname("CoNS", language = "fr") # or just mo_fullname("CoNS") on a French system
+# "Staphylococcus à coagulase négative (CoNS)"
+
+microbenchmark(en = mo_fullname("CoNS", language = "en"),
+ de = mo_fullname("CoNS", language = "de"),
+ nl = mo_fullname("CoNS", language = "nl"),
+ es = mo_fullname("CoNS", language = "es"),
+ it = mo_fullname("CoNS", language = "it"),
+ fr = mo_fullname("CoNS", language = "fr"),
+ pt = mo_fullname("CoNS", language = "pt"),
+ times = 10,
+ unit = "ms")
+# Unit: milliseconds
+# expr min lq mean median uq max neval
+# en 6.093583 6.51724 6.555105 6.562986 6.630663 6.99698 100
+# de 13.934874 14.35137 16.891587 14.462210 14.764658 43.63956 100
+# nl 13.900092 14.34729 15.943268 14.424565 14.581535 43.76283 100
+# es 13.833813 14.34596 14.574783 14.439757 14.653994 17.49168 100
+# it 13.811883 14.36621 15.179060 14.453515 14.812359 43.64284 100
+# fr 13.798683 14.37019 16.344731 14.468775 14.697610 48.62923 100
+# pt 13.789674 14.36244 15.706321 14.443772 14.679905 44.76701 100
Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.
This package is available on the official R network (CRAN). Install this package in R with:
-install.packages("AMR")
It will be downloaded and installed automatically.
+This package is available on the official R network (CRAN), which has a peer-reviewed submission process. Install this package in R with:
+ +It will be downloaded and installed automatically. For RStudio, click on menu Tools > Install Packages…, then type in “AMR” and press Install.
The AMR
package basically does four important things:
It cleanses existing data, by transforming it to reproducible and profound classes, making the most efficient use of R. These functions all use artificial intelligence to guess results that you would expect:
as.mo()
to get an ID of a microorganism. The IDs are human readable for the trained eye - the ID of Klebsiella pneumoniae is “B_KLBSL_PNE” (B stands for Bacteria) and the ID of S. aureus is “B_STPHY_AUR”. The function takes almost any text as input that looks like the name or code of a microorganism like “E. coli”, “esco” or “esccol” and tries to find expected results using artificial intelligence (AI) on the included ITIS data set, consisting of almost 20,000 microorganisms. It is very fast, please see our benchmarks. Moreover, it can group Staphylococci into coagulase negative and positive (CoNS and CoPS, see source) and can categorise Streptococci into Lancefield groups (like beta-haemolytic Streptococcus Group B, source).as.rsi()
to transform values to valid antimicrobial results. It produces just S, I or R based on your input and warns about invalid values. Even values like “<=0.002; S” (combined MIC/RSI) will result in “S”.as.mic()
to cleanse your MIC values. It produces a so-called factor (called ordinal in SPSS) with valid MIC values as levels. A value like “<=0.002; S” (combined MIC/RSI) will result in “<=0.002”.as.atc()
to get the ATC code of an antibiotic as defined by the WHO. This package contains a database with most LIS codes, official names, DDDs and even trade names of antibiotics. For example, the values “Furabid”, “Furadantin”, “nitro” all return the ATC code of Nitrofurantoine.It enhances existing data and adds new data from data sets included in this package.
eucast_rules()
to apply EUCAST expert rules to isolates.first_isolate()
to identify the first isolates of every patient using guidelines from the CLSI (Clinical and Laboratory Standards Institute).
@@ -267,9 +267,9 @@
microorganisms
contains the complete taxonomic tree of more than 18,000 microorganisms (bacteria, fungi/yeasts and protozoa). Furthermore, the colloquial name and Gram stain are available, which enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like mo_genus()
, mo_family()
, mo_gramstain()
or even mo_phylum()
. As they use as.mo()
internally, they also use artificial intelligence. For example, mo_genus("MRSA")
and mo_genus("S. aureus")
will both return "Staphylococcus"
. They also come with support for German, Dutch, Spanish, Italian, French and Portuguese. These functions can be used to add new variables to your data.antibiotics
contains the ATC code, LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains a total of 298 trade names. Use functions like ab_name()
and ab_tradenames()
to look up values. The ab_*
functions use as.atc()
internally so they support AI to guess your expected result. For example, ab_name("Fluclox")
, ab_name("Floxapen")
and ab_name("J01CF05")
will all return "Flucloxacillin"
. These functions can again be used to add new variables to your data.It analyses the data with convenient functions that use well-known methods.
portion_R()
, portion_IR()
, portion_I()
, portion_SI()
and portion_S()
functions. Similarly, the number of isolates can be determined with the count_R()
, count_IR()
, count_I()
, count_SI()
and count_S()
functions. All these functions can be used with the dplyr
package (e.g. in conjunction with summarise()
)geom_rsi()
, a function made for the ggplot2
packagekurtosis()
, skewness()
and create frequency tables with freq()
It teaches the user how to use all the above actions.
septic_patients
. This data set contains:
@@ -290,6 +290,8 @@
as.mo()
to identify an MO code.pkgdown
)pkgdown
)
+dplyr
version 0.8.0guess_ab_col()
to find an antibiotic column in a tablemo_failures()
to review values that could not be coerced to a valid MO code, using as.mo()
. This latter function will now only show a maximum of 25 uncoerced values.mo_renamed()
to get a list of all returned values from as.mo()
that have had taxonomic renamingage()
to calculate the (patients) age in yearsage_groups()
to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis per age group.ggplot_rsi_predict()
as well as the base R plot()
function can now be used for resistance prediction calculated with resistance_predict()
: r x <- resistance_predict(septic_patients, col_ab = "amox") plot(x) ggplot_rsi_predict(x)
+guess_ab_col()
to find an antibiotic column in a tablemo_failures()
to review values that could not be coerced to a valid MO code, using as.mo()
. This latter function will now only show a maximum of 25 uncoerced values.mo_renamed()
to get a list of all returned values from as.mo()
that have had taxonomic renamingage()
to calculate the (patients) age in yearsage_groups()
to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis per age group.New function ggplot_rsi_predict()
as well as the base R plot()
function can now be used for resistance prediction calculated with resistance_predict()
:
filter_first_isolate()
and filter_first_weighted_isolate()
to shorten and fasten filtering on data sets with antimicrobial results, e.g.: r septic_patients %>% filter_first_isolate(...) # or filter_first_isolate(septic_patients, ...)
is equal to: r septic_patients %>% mutate(only_firsts = first_isolate(septic_patients, ...)) %>% filter(only_firsts == TRUE) %>% select(-only_firsts)
+Functions filter_first_isolate()
and filter_first_weighted_isolate()
to shorten and fasten filtering on data sets with antimicrobial results, e.g.:
is equal to:
+New vignettes about how to conduct AMR analysis, predict antimicrobial resistance, use the G-test and more. These are also available (and even easier readable) on our website: https://msberends.gitlab.io/AMR.
eucast_rules()
:eucast_rules()
:
+guess_mo()
is now deprecated in favour of as.mo()
and will be removed in future versionsas.mo()
:as.mo()
:
+first_isolate()
:first_isolate()
:
+septic_patients
data set this yielded a difference of 0.15% more isolatescol_patientid
), when this parameter was left blankcol_keyantibiotics()
), when this parameter was left blankoutput_logical
, the function will now always return a logical valuefilter_specimen
to specimen_group
, although using filter_specimen
will still workportion
functions, that low counts can influence the outcome and that the portion
functions may camouflage this, since they only return the portion (albeit being dependent on the minimum
parameter)mo_taxonomy()
now contains the kingdom toois.rsi.eligible()
@@ -279,7 +304,8 @@
rsi
and mic
freq()
function):freq()
function):
+header
functionmo
to show unique count of families, genera and speciesdecimal.mark
setting, which just like format
defaults to getOption("OutDec")
@@ -288,7 +314,10 @@
NA
droplevels
to exclude empty factor levels when input is a factorselect()
on frequency tablesscale_y_percent()
now contains the limits
parametermdro()
, key_antibiotics()
and eucast_rules()
EUCAST_rules
was renamed to eucast_rules
, the old function still exists as a deprecated functioneucast_rules
function:eucast_rules
function:
+rules
to specify which rules should be applied (expert rules, breakpoints, others or all)verbose
which can be set to TRUE
to get very specific messages about which columns and rows were affectedseptic_patients
now reflects these changespipe
for piperacillin (J01CA12), also to the mdro
functionkingdom
to the microorganisms data set, and function mo_kingdom
to look up valuesas.mo
(and subsequently all mo_*
functions), as empty values wil be ignored a priori
as.mo
will return NAas.mo
(and all mo_*
wrappers) now supports genus abbreviations with “species” attached r as.mo("E. species") # B_ESCHR mo_fullname("E. spp.") # "Escherichia species" as.mo("S. spp") # B_STPHY mo_fullname("S. species") # "Staphylococcus species"
+Function as.mo
(and all mo_*
wrappers) now supports genus abbreviations with “species” attached
combine_IR
(TRUE/FALSE) to functions portion_df
and count_df
, to indicate that all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)portion_*(..., as_percent = TRUE)
when minimal number of isolates would not be metportion_*
functions now throws a warning when total available isolate is below parameter minimum
as.mo
, as.rsi
, as.mic
, as.atc
and freq
will not set package name as attribute anymorefreq()
:freq()
:
+Support for grouping variables, test with:
-septic_patients %>%
- group_by(hospital_id) %>%
- freq(gender)
Support for (un)selecting columns:
-septic_patients %>%
- freq(hospital_id) %>%
- select(-count, -cum_count) # only get item, percent, cum_percent
hms::is.hms
na
, to choose which character to print for empty valuesheader
to turn the header info off (default when markdown = TRUE
)title
to manually setbthe title of the frequency tablefirst_isolate
now tries to find columns to use as input when parameters are left blankmdro
)ggplot_rsi
and scale_y_percent
have breaks
parameteras.mo
:as.mo
:
+"CRS"
-> Stenotrophomonas maltophilia
"MSSE"
-> Staphylococcus epidermidis
join
functionsis.rsi.eligible
, now 15-20 times fasterg.test
, when sum(x)
is below 1000 or any of the expected values is below 5, Fisher’s Exact Test will be suggestedmicroorganisms
now contains all microbial taxonomic data from ITIS (kingdoms Bacteria, Fungi and Protozoa), the Integrated Taxonomy Information System, available via https://itis.gov. The data set now contains more than 18,000 microorganisms with all known bacteria, fungi and protozoa according ITIS with genus, species, subspecies, family, order, class, phylum and subkingdom. The new data set microorganisms.old
contains all previously known taxonomic names from those kingdoms.mo_property
:mo_property
:
+mo_phylum
, mo_class
, mo_order
, mo_family
, mo_genus
, mo_species
, mo_subspecies
mo_fullname
, mo_shortname
@@ -436,22 +480,52 @@
mo_ref
They also come with support for German, Dutch, French, Italian, Spanish and Portuguese: r mo_gramstain("E. coli") # [1] "Gram negative" mo_gramstain("E. coli", language = "de") # German # [1] "Gramnegativ" mo_gramstain("E. coli", language = "es") # Spanish # [1] "Gram negativo" mo_fullname("S. group A", language = "pt") # Portuguese # [1] "Streptococcus grupo A"
Furthermore, former taxonomic names will give a note about the current taxonomic name: r mo_gramstain("Esc blattae") # Note: 'Escherichia blattae' (Burgess et al., 1973) was renamed 'Shimwellia blattae' (Priest and Barker, 2010) # [1] "Gram negative"
They also come with support for German, Dutch, French, Italian, Spanish and Portuguese:
+mo_gramstain("E. coli")
+# [1] "Gram negative"
+mo_gramstain("E. coli", language = "de") # German
+# [1] "Gramnegativ"
+mo_gramstain("E. coli", language = "es") # Spanish
+# [1] "Gram negativo"
+mo_fullname("S. group A", language = "pt") # Portuguese
+# [1] "Streptococcus grupo A"
Furthermore, former taxonomic names will give a note about the current taxonomic name:
+ +count_R
, count_IR
, count_I
, count_SI
and count_S
to selectively count resistant or susceptible isolates
count_R
, count_IR
, count_I
, count_SI
and count_S
to selectively count resistant or susceptible isolatescount_df
(which works like portion_df
) to get all counts of S, I and R of a data set with antibiotic columns, with support for grouped variablesis.rsi.eligible
to check for columns that have valid antimicrobial results, but do not have the rsi
class yet. Transform the columns of your raw data with: data %>% mutate_if(is.rsi.eligible, as.rsi)
as.mo
and is.mo
as replacements for as.bactid
and is.bactid
(since the microoganisms
data set not only contains bacteria). These last two functions are deprecated and will be removed in a future release. The as.mo
function determines microbial IDs using Artificial Intelligence (AI): r as.mo("E. coli") # [1] B_ESCHR_COL as.mo("MRSA") # [1] B_STPHY_AUR as.mo("S group A") # [1] B_STRPTC_GRA
And with great speed too - on a quite regular Linux server from 2007 it takes us less than 0.02 seconds to transform 25,000 items: r thousands_of_E_colis <- rep("E. coli", 25000) microbenchmark::microbenchmark(as.mo(thousands_of_E_colis), unit = "s") # Unit: seconds # min median max neval # 0.01817717 0.01843957 0.03878077 100
+Functions as.mo
and is.mo
as replacements for as.bactid
and is.bactid
(since the microoganisms
data set not only contains bacteria). These last two functions are deprecated and will be removed in a future release. The as.mo
function determines microbial IDs using Artificial Intelligence (AI):
as.mo("E. coli")
+# [1] B_ESCHR_COL
+as.mo("MRSA")
+# [1] B_STPHY_AUR
+as.mo("S group A")
+# [1] B_STRPTC_GRA
And with great speed too - on a quite regular Linux server from 2007 it takes us less than 0.02 seconds to transform 25,000 items:
+reference_df
for as.mo
, so users can supply their own microbial IDs, name or codes as a reference tablebactid
to mo
, like:bactid
to mo
, like:
+EUCAST_rules
, first_isolate
and key_antibiotics
microorganisms
and septic_patients
labels_rsi_count
to print datalabels on a RSI ggplot2
modelFunctions as.atc
and is.atc
to transform/look up antibiotic ATC codes as defined by the WHO. The existing function guess_atc
is now an alias of as.atc
.
ab_property
and its aliases: ab_name
, ab_tradenames
, ab_certe
, ab_umcg
and ab_trivial_nl
@@ -466,7 +540,14 @@
Changed
antibiotics
data set: Terbinafine (D01BA02), Rifaximin (A07AA11) and Isoconazole (D01AC05)antibiotics
data set, it now contains 298 different trade names in total, e.g.: r ab_official("Bactroban") # [1] "Mupirocin" ab_name(c("Bactroban", "Amoxil", "Zithromax", "Floxapen")) # [1] "Mupirocin" "Amoxicillin" "Azithromycin" "Flucloxacillin" ab_atc(c("Bactroban", "Amoxil", "Zithromax", "Floxapen")) # [1] "R01AX06" "J01CA04" "J01FA10" "J01CF05"
+Added 163 trade names to the antibiotics
data set, it now contains 298 different trade names in total, e.g.:
first_isolate
, rows will be ignored when there’s no species availableratio
is now deprecated and will be removed in a future release, as it is not really the scope of this packageprevalence
column to the microorganisms
data setminimum
and as_percent
to portion_df
count_*
and portions_*
, and n_rsi
. This allows to check for more than 2 vectors or columns. ```r septic_patients %>% select(amox, cipr) %>% count_IR() # which is the same as: septic_patients %>% count_IR(amox, cipr)Support for quasiquotation in the functions series count_*
and portions_*
, and n_rsi
. This allows to check for more than 2 vectors or columns.
ggplot_rsi
and geom_rsi
so they can cope with count_df
. The new fun
parameter has value portion_df
at default, but can be set to count_df
.ggplot_rsi
when the ggplot2
package was not loadedlabels_rsi_count
to ggplot_rsi
+geom_rsi
(and ggplot_rsi
) so you can set your own preferencesquote
to the freq
functiondiff
for frequency tablesfreq
) header of class character
+Support for types (classes) list and matrix for freq
For lists, subsetting is possible:
+ +septic_patients %>% portion_S(amcl) septic_patients %>% portion_S(amcl, gent) septic_patients %>% portion_S(amcl, gent, pita) * Edited `ggplot_rsi` and `geom_rsi` so they can cope with `count_df`. The new `fun` parameter has value `portion_df` at default, but can be set to `count_df`. * Fix for `ggplot_rsi` when the `ggplot2` package was not loaded * Added datalabels function `labels_rsi_count` to `ggplot_rsi` * Added possibility to set any parameter to `geom_rsi` (and `ggplot_rsi`) so you can set your own preferences * Fix for joins, where predefined suffices would not be honoured * Added parameter `quote` to the `freq` function * Added generic function `diff` for frequency tables * Added longest en shortest character length in the frequency table (`freq`) header of class `character` * Support for types (classes) list and matrix for `freq`
r my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2)) freq(my_matrix) For lists, subsetting is possible:
r my_list = list(age = septic_patients$age, gender = septic_patients$gender) my_list %>% freq(age) my_list %>% freq(gender) ```
rsi_df
was removed in favour of new functions portion_R
, portion_IR
, portion_I
, portion_SI
and portion_S
to selectively calculate resistance or susceptibility. These functions are 20 to 30 times faster than the old rsi
function. The old function still works, but is deprecated.rsi_df
was removed in favour of new functions portion_R
, portion_IR
, portion_I
, portion_SI
and portion_S
to selectively calculate resistance or susceptibility. These functions are 20 to 30 times faster than the old rsi
function. The old function still works, but is deprecated.
+portion_df
to get all portions of S, I and R of a data set with antibiotic columns, with support for grouped variablesggplot2
+ggplot2
+geom_rsi
, facet_rsi
, scale_y_percent
, scale_rsi_colours
and theme_rsi
ggplot_rsi
to apply all above functions on a data set:
@@ -515,22 +629,32 @@
as.bactid
and is.bactid
to transform/ look up microbial ID’s.guess_bactid
is now an alias of as.bactid
kurtosis
and skewness
that are lacking in base R - they are generic functions and have support for vectors, data.frames and matricesg.test
to perform the Χ2 distributed G-test, which use is the same as chisq.test
ratio
to transform a vector of values to a preset ratioratio
to transform a vector of values to a preset ratioratio(c(10, 500, 10), ratio = "1:2:1")
would return 130, 260, 130
%in%
or %like%
(and give them keyboard shortcuts), or to view the datasets that come with this packagep.symbol
to transform p values to their related symbols: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
clipboard_import
and clipboard_export
as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the clipr
package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server)freq
):freq
):
+rsi
(antimicrobial resistance) to use as inputtable
to use as input: freq(table(x, y))
@@ -545,6 +669,8 @@
options(max.print.freq = n)
where n is your preset valuemicroorganisms
dataset (especially for Salmonella) and the column bactid
now has the new class "bactid"
rsi
and mic
functions:rsi
and mic
functions:
+as.rsi("<=0.002; S")
will return S
as.mic("<=0.002; S")
will return <=0.002
as.mic("<= 0.002")
now worksrsi
and mic
do not add the attribute package.version
anymore"groups"
option for atc_property(..., property)
. It will return a vector of the ATC hierarchy as defined by the WHO. The new function atc_groups
is a convenient wrapper around this.atc_property
as it requires the host set by url
to be responsivefirst_isolate
algorithm to exclude isolates where bacteria ID or genus is unavailable924b62
) from the dplyr
package v0.7.5 and aboveguess_bactid
(now called as.bactid
)guess_bactid
(now called as.bactid
)
+yourdata %>% select(genus, species) %>% as.bactid()
now also worksguess_bactid
to determine the ID of a microorganism based on genus/species or known abbreviations like MRSAguess_atc
to determine the ATC of an antibiotic based on name, trade name, or known abbreviationsfreq
to create frequency tables, with additional info in a headerMDRO
to determine Multi Drug Resistant Organisms (MDRO) with support for country-specific guidelines.MDRO
to determine Multi Drug Resistant Organisms (MDRO) with support for country-specific guidelines.
+BRMO
and MRGN
are wrappers for Dutch and German guidelines, respectively"points"
or "keyantibiotics"
, see ?first_isolate
tibble
s and data.table
s