diff --git a/.github/workflows/codecovr.yaml b/.github/workflows/codecovr.yaml
index 38a0f177..9871c13b 100644
--- a/.github/workflows/codecovr.yaml
+++ b/.github/workflows/codecovr.yaml
@@ -68,16 +68,28 @@ jobs:
as.data.frame(utils::installed.packages())[, "Version", drop = FALSE]
shell: Rscript {0}
+ # - name: Test coverage
+ # env:
+ # CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+ # run: |
+ # library(AMR)
+ # library(tinytest)
+ # library(covr)
+ # source_files <- list.files("R", pattern = ".R$", full.names = TRUE)
+ # test_files <- list.files("inst/tinytest", full.names = TRUE)
+ # cov <- file_coverage(source_files = source_files, test_files = test_files, parent_env = asNamespace("AMR"), line_exclusions = list("R/atc_online.R", "R/mo_source.R", "R/translate.R", "R/resistance_predict.R", "R/aa_helper_functions.R", "R/aa_helper_pm_functions.R", "R/zzz.R"))
+ # attr(cov, which = "package") <- list(path = ".") # until https://github.com/r-lib/covr/issues/478 is solved
+ # codecov(coverage = cov, quiet = FALSE)
+ # shell: Rscript {0}
+
- name: Test coverage
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+ R_RUN_TINYTEST: true
run: |
library(AMR)
library(tinytest)
- library(covr)
- source_files <- list.files("R", pattern = ".R$", full.names = TRUE)
- test_files <- list.files("inst/tinytest", full.names = TRUE)
- cov <- file_coverage(source_files = source_files, test_files = test_files, parent_env = asNamespace("AMR"), line_exclusions = list("R/atc_online.R", "R/mo_source.R", "R/translate.R", "R/resistance_predict.R", "R/aa_helper_functions.R", "R/aa_helper_pm_functions.R", "R/zzz.R"))
- attr(cov, which = "package") <- list(path = ".") # until https://github.com/r-lib/covr/issues/478 is solved
- codecov(coverage = cov, quiet = FALSE)
+ covr::codecov(line_exclusions = list("R/atc_online.R", "R/mo_source.R", "R/translate.R", "R/resistance_predict.R", "R/aa_helper_functions.R", "R/aa_helper_pm_functions.R", "R/zzz.R"))
shell: Rscript {0}
+
+
diff --git a/DESCRIPTION b/DESCRIPTION
index 3806104e..9187fe63 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
Package: AMR
-Version: 1.6.0.9065
+Version: 1.7.0
Date: 2021-05-24
Title: Antimicrobial Resistance Data Analysis
Authors@R: c(
diff --git a/NEWS.md b/NEWS.md
index af61cdf7..690d0526 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,5 @@
-# `AMR` 1.6.0.9065
-## Last updated: 24 May 2021
+# `AMR` 1.7.0
+
### Breaking change
* All antibiotic class selectors (such as `carbapenems()`, `aminoglycosides()`) can now be used for filtering as well, making all their accompanying `filter_*()` functions redundant (such as `filter_carbapenems()`, `filter_aminoglycosides()`). These functions are now deprecated and will be removed in a next release.
@@ -906,7 +906,7 @@ We've got a new website: [https://msberends.gitlab.io/AMR](https://msberends.git
* Emphasised in manual that penicillin is meant as benzylpenicillin (ATC [J01CE01](https://www.whocc.no/atc_ddd_index/?code=J01CE01))
* New info is returned when running this function, stating exactly what has been changed or added. Use `eucast_rules(..., verbose = TRUE)` to get a data set with all changed per bug and drug combination.
* Removed data sets `microorganisms.oldDT`, `microorganisms.prevDT`, `microorganisms.unprevDT` and `microorganismsDT` since they were no longer needed and only contained info already available in the `microorganisms` data set
-* Added 65 antibiotics to the `antibiotics` data set, from the [Pharmaceuticals Community Register](http://ec.europa.eu/health/documents/community-register/html/atc.htm) of the European Commission
+* Added 65 antibiotics to the `antibiotics` data set, from the [Pharmaceuticals Community Register](https://ec.europa.eu/health/documents/community-register/html/reg_hum_atc.htm) of the European Commission
* Removed columns `atc_group1_nl` and `atc_group2_nl` from the `antibiotics` data set
* Functions `atc_ddd()` and `atc_groups()` have been renamed `atc_online_ddd()` and `atc_online_groups()`. The old functions are deprecated and will be removed in a future version.
* Function `guess_mo()` is now deprecated in favour of `as.mo()` and will be removed in future versions
diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R
index 035cc239..17f2a05a 100755
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -192,7 +192,7 @@ search_type_in_df <- function(x, type, info = TRUE) {
}
# -- key antibiotics
- if (type == "keyantibiotics") {
+ if (type %in% c("keyantibiotics", "keyantimicrobials")) {
if (any(colnames(x) %like% "^key.*(ab|antibiotics|antimicrobials)")) {
found <- sort(colnames(x)[colnames(x) %like% "^key.*(ab|antibiotics|antimicrobials)"])[1]
}
@@ -865,7 +865,7 @@ unique_call_id <- function(entire_session = FALSE) {
# combination of environment ID (like "0x7fed4ee8c848")
# and highest system call
call <- paste0(deparse(sys.calls()[[1]]), collapse = "")
- if (call %like% "run_test_dir|test_all|tinytest|test_package|testthat") {
+ if (!interactive() || call %like% "run_test_dir|test_all|tinytest|test_package|testthat") {
# unit tests will keep the same call and environment - give them a unique ID
call <- paste0(sample(c(c(0:9), letters[1:6]), size = 64, replace = TRUE), collapse = "")
}
@@ -1122,7 +1122,7 @@ s3_register <- function(generic, class, method = NULL) {
# works exactly like round(), but rounds `round2(44.55, 1)` to 44.6 instead of 44.5
# and adds decimal zeroes until `digits` is reached when force_zero = TRUE
-round2 <- function(x, digits = 0, force_zero = TRUE) {
+round2 <- function(x, digits = 1, force_zero = TRUE) {
x <- as.double(x)
# https://stackoverflow.com/a/12688836/4575331
val <- (trunc((abs(x) * 10 ^ digits) + 0.5) / 10 ^ digits) * sign(x)
@@ -1174,7 +1174,7 @@ percentage <- function(x, digits = NULL, ...) {
# round right: percentage(0.4455) and format(as.percentage(0.4455), 1) should return "44.6%", not "44.5%"
x_formatted <- format(round2(as.double(x), digits = digits + 2) * 100,
scientific = FALSE,
- digits = digits,
+ digits = max(1, digits),
nsmall = digits,
...)
x_formatted <- paste0(x_formatted, "%")
diff --git a/R/ab.R b/R/ab.R
index 9297d3ab..c8a4fdcf 100755
--- a/R/ab.R
+++ b/R/ab.R
@@ -50,7 +50,7 @@
#'
#' WHONET 2019 software: \url{http://www.whonet.org/software.html}
#'
-#' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{http://ec.europa.eu/health/documents/community-register/html/atc.htm}
+#' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{https://ec.europa.eu/health/documents/community-register/html/reg_hum_atc.htm}
#' @aliases ab
#' @return A [character] [vector] with additional class [`ab`]
#' @seealso
diff --git a/R/data.R b/R/data.R
index 8d6f8473..05aaaa35 100755
--- a/R/data.R
+++ b/R/data.R
@@ -71,7 +71,7 @@
#'
#' WHONET 2019 software: Note: values on this page will change with every website update since they are based on randomly created values and the page was written in R Markdown. However, the methodology remains unchanged. This page was generated on 27 April 2021. Note: values on this page will change with every website update since they are based on randomly created values and the page was written in R Markdown. However, the methodology remains unchanged. This page was generated on 24 May 2021. As with many uses in R, we need some additional packages for AMR data analysis. Our package works closely together with the tidyverse packages We will also use the To start with patients, we need a unique list of patients. The The first 135 patient IDs are now male, the other 125 are female. Let’s pretend that our data consists of blood cultures isolates from between 1 January 2010 and 1 January 2018. This For this tutorial, we will uses four different microorganisms: Escherichia coli, Staphylococcus aureus, Streptococcus pneumoniae, and Klebsiella pneumoniae: Using the Using the The resulting data set contains 20,000 blood culture isolates. With the We also created a package dedicated to data cleaning and checking, called the For example, for the Frequency table Class: character So, we can draw at least two conclusions immediately. From a data scientists perspective, the data looks clean: only values The data is already quite clean, but we still need to transform some variables. The We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The Finally, we will apply EUCAST rules on our antimicrobial results. In Europe, most medical microbiological laboratories already apply these rules. Our package features their latest insights on intrinsic resistance and exceptional phenotypes. Moreover, the Because the amoxicillin (column Now that we have the microbial ID, we can add some taxonomic properties: (…) When preparing a cumulative antibiogram to guide clinical decisions about empirical antimicrobial therapy of initial infections, only the first isolate of a given species per patient, per analysis period (eg, one year) should be included, irrespective of body site, antimicrobial susceptibility profile, or other phenotypical characteristics (eg, biotype). The first isolate is easily identified, and cumulative antimicrobial susceptibility test data prepared using the first isolate are generally comparable to cumulative antimicrobial susceptibility test data calculated by other methods, providing duplicate isolates are excluded. This This The outcome of the function can easily be added to our data: So only 53.1% is suitable for resistance analysis! We can now filter on it with the For future use, the above two syntaxes can be shortened with the We made a slight twist to the CLSI algorithm, to take into account the antimicrobial susceptibility profile. Have a look at all E. coli isolates of patient C8, sorted on date: Only 3 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The If a column exists with a name like ‘key(…)ab’ the Instead of 3, now 3 isolates are flagged. In total, 53.1% of all isolates are marked ‘first weighted’ - 0% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline. As with So we end up with 10,622 isolates for analysis. We can remove unneeded columns: Now our data looks like: So only 53.5% is suitable for resistance analysis! We can now filter on it with the For future use, the above two syntaxes can be shortened: So we end up with 10,696 isolates for analysis. Now our data looks like: (omitted 27 entries, n = 56 [11.20%]) Frequency table Class: factor > ordered > rsi (numeric) An easy In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 200 milliseconds, this is only 5 input values per second. It is clear that accepted taxonomic names are extremely fast, but some variations are up to 200 times slower to determine. To improve performance, we implemented two important algorithms to save unnecessary calculations: repetitive results and already precalculated results. So getting official taxonomic names of 2,000,000 (!!) items consisting of 90 unique values only takes 0.228 seconds. That is 114 nanoseconds on average. You only lose time on your unique input values. So getting official taxonomic names of 2,000,000 (!!) items consisting of 90 unique values only takes 0.221 seconds. That is 110 nanoseconds on average. You only lose time on your unique input values. So going from So going from Of course, when running Currently supported non-English languages are German, Dutch, Spanish, Italian, French and Portuguese. As with many uses in R, we need some additional packages for AMR data analysis. Our package works closely together with the tidyverse packages Our Our package contains a function It is basically as easy as: The function will look for a date column itself if When running any of these commands, a summary of the regression model will be printed unless using This text is only a printed summary - the actual result (output) of the function is a The function This is the fastest way to plot the result. It automatically adds the right axes, error bars, titles, number of available observations and type of model. We also support the Resistance is not easily predicted; if we look at vancomycin resistance in Gram-positive bacteria, the spread (i.e. standard error) is enormous: Vancomycin resistance could be 100% in ten years, but might also stay around 0%. You can define the model with the For the vancomycin resistance in Gram-positive bacteria, a linear model might be more appropriate since no binomial distribution is to be expected based on the observed years: This seems more likely, doesn’t it? The model itself is also available from the object, as an This package contains all ~550 antibiotic, antimycotic and antiviral drugs and their Anatomical Therapeutic Chemical (ATC) codes, ATC groups and Defined Daily Dose (DDD, oral and IV) from the World Health Organization Collaborating Centre for Drug Statistics Methodology (WHOCC, https://www.whocc.no) and the Pharmaceuticals Community Register of the European Commission. This package contains all ~550 antibiotic, antimycotic and antiviral drugs and their Anatomical Therapeutic Chemical (ATC) codes, ATC groups and Defined Daily Dose (DDD, oral and IV) from the World Health Organization Collaborating Centre for Drug Statistics Methodology (WHOCC, https://www.whocc.no) and the Pharmaceuticals Community Register of the European Commission. NOTE: The WHOCC copyright does not allow use for commercial purposes, unlike any other info from this package. See https://www.whocc.no/copyright_disclaimer/. Read more about the data from WHOCC in our manual. Making this package independent of especially the tidyverse (e.g. packages Negative effects of this change are: This is important, because a value like Function Function Renamed data set speed improvement for microbial IDs fixed factor level names for R Markdown Support for tidyverse quasiquotation! Now you can create frequency tables of function outcomes: Header info is now available as a list, with the The argument Using Functions Frequency tables - Frequency tables - Support for grouping variables, test with: Support for (un)selecting columns: Check for Removed diacritics from all authors (columns Fix for Fix for Support for named vectors of class Support for named vectors of class AI improvements for For lists, subsetting is possible:
How to conduct AMR data analysis
Matthijs S. Berends
- 27 April 2021
+ 24 May 2021
Source: vignettes/AMR.Rmd
AMR.Rmd
Introduction
@@ -234,21 +233,21 @@
-
2021-04-27
+2021-05-24
abcd
Escherichia coli
S
S
-
2021-04-27
+2021-05-24
abcd
Escherichia coli
S
R
-
2021-04-27
+2021-05-24
efgh
Escherichia coli
R
@@ -261,14 +260,14 @@
Needed R packages
dplyr
and ggplot2
by RStudio. The tidyverse tremendously improves the way we conduct data science - it allows for a very natural way of writing syntaxes and creating beautiful plots in R.cleaner
package, that can be used for cleaning data and creating frequency tables.
-library(dplyr)
-library(ggplot2)
+
+
library(dplyr)
+library(ggplot2)
library(AMR)
library(cleaner)
# (if not yet installed, install with:)
-# install.packages(c("dplyr", "ggplot2", "AMR", "cleaner"))
Patients
LETTERS
object is available in R - it’s a vector with 26 characters: A
to Z
. The patients
object we just created is now a vector of length 260, with values (patient IDs) varying from A1
to Z10
. Now we we also set the gender of our patients, by putting the ID and the gender in a table:
-patients_table <- data.frame(patient_id = patients,
+
+
patients_table <- data.frame(patient_id = patients,
gender = c(rep("M", 135),
- rep("F", 125)))
Dates
dates
object now contains all days in our date range.
Microorganisms
-bacteria <- c("Escherichia coli", "Staphylococcus aureus",
- "Streptococcus pneumoniae", "Klebsiella pneumoniae")
+
bacteria <- c("Escherichia coli", "Staphylococcus aureus",
+ "Streptococcus pneumoniae", "Klebsiella pneumoniae")
Put everything together
sample()
function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results, using the random_rsi()
function.
-sample_size <- 20000
+
+
sample_size <- 20000
data <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
patient_id = sample(patients, size = sample_size, replace = TRUE),
hospital = sample(c("Hospital A",
@@ -324,13 +323,13 @@
AMX = random_rsi(sample_size, prob_RSI = c(0.35, 0.60, 0.05)),
AMC = random_rsi(sample_size, prob_RSI = c(0.15, 0.75, 0.10)),
CIP = random_rsi(sample_size, prob_RSI = c(0.20, 0.80, 0.00)),
- GEN = random_rsi(sample_size, prob_RSI = c(0.08, 0.92, 0.00)))
left_join()
function from the dplyr
package, we can ‘map’ the gender to the patient ID using the patients_table
object we created earlier:
-data <- data %>% left_join(patients_table)
+
data <- data %>% left_join(patients_table)
head()
function we can preview the first 6 rows of this data set:
-head(data)
+
head(data)
@@ -420,8 +419,8 @@
Cleaning the data
date
@@ -345,32 +344,32 @@
-
-2016-05-26
-M10
-Hospital D
-Staphylococcus aureus
-S
-S
-R
-S
-M
-
-
+2014-05-22
-K4
+2010-04-11
+W8
Hospital B
Staphylococcus aureus
-R
+S
+S
+S
+S
+F
+
+
2013-06-11
+I7
+Hospital A
+Staphylococcus aureus
+S
S
S
S
M
-
2012-07-31
-I4
+2016-12-30
+C6
Hospital D
-Streptococcus pneumoniae
+Escherichia coli
R
S
S
@@ -378,37 +377,37 @@
M
-
2011-03-22
-V6
-Hospital C
+2010-03-16
+P5
+Hospital B
Staphylococcus aureus
-S
+R
S
R
S
F
-
2012-04-09
-G7
-Hospital B
+2014-03-27
+U2
+Hospital D
Escherichia coli
+S
I
S
S
-S
-M
+F
-
2015-08-25
-Z3
-Hospital C
-Staphylococcus aureus
+2011-01-31
+E5
+Hospital B
+Escherichia coli
S
S
+R
S
-S
-F
+M
cleaner
package. It freq()
function can be used to create frequency tables.gender
variable:
-data %>% freq(gender)
+
data %>% freq(gender)
Length: 20,000
@@ -442,16 +441,16 @@ Longest: 1
1
M
-10,477
-52.39%
-10,477
-52.39%
+10,485
+52.43%
+10,485
+52.43%
@@ -459,27 +458,32 @@ Longest: 1
2
F
-9,523
-47.62%
+9,515
+47.58%
20,000
100.00%
M
and F
. From a researchers perspective: there are slightly more men. Nothing we didn’t already know.bacteria
column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate()
function of the dplyr
package makes this really easy:
+
+
as.rsi()
function ensures reliability and reproducibility in these kind of variables. The is.rsi.eligible()
can check which columns are probably columns with R/SI test results. Using mutate()
and across()
, we can apply the transformation to the formal <rsi>
class:
+
is.rsi.eligible(data)
+# [1] FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE FALSE
+colnames(data)[is.rsi.eligible(data)]
+# [1] "AMX" "AMC" "CIP" "GEN"
+
data <- data %>%
- mutate(bacteria = as.mo(bacteria))
as.rsi()
function ensures reliability and reproducibility in these kind of variables. The mutate_at()
will run the as.rsi()
function on defined variables:eucast_rules()
function can also apply additional rules, like forcing AMX
) and amoxicillin/clavulanic acid (column AMC
) in our data were generated randomly, some rows will undoubtedly contain AMX = S and AMC = R, which is technically impossible. The eucast_rules()
fixes this:
-data <- eucast_rules(data, col_mo = "bacteria", rules = "all")
+
data <- eucast_rules(data, col_mo = "bacteria", rules = "all")
Adding new variables
-data <- data %>%
+
+
data <- data %>%
mutate(gramstain = mo_gramstain(bacteria),
genus = mo_genus(bacteria),
- species = mo_species(bacteria))
First isolates
@@ -489,322 +493,47 @@ Longest: 1
-
M39-A4 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition. CLSI, 2014. Chapter 6.4AMR
package includes this methodology with the first_isolate()
function. It adopts the episode of a year (can be changed by user) and it starts counting days after every selected isolate. This new variable can easily be added to our data:
-data <- data %>%
- mutate(first = first_isolate())
+
AMR
package includes this methodology with the first_isolate()
function and is able to apply the four different methods as defined by Hindler et al. in 2007: phenotype-based, episode-based, patient-based, isolate-based. The right method depends on your goals and analysis, but the default phenotype-based method is in any case the method to properly correct for most duplicate isolates. This method also takes into account the antimicrobial susceptibility test results using all_microbials()
. Read more about the methods on the first_isolate()
page.
+
data <- data %>%
+ mutate(first = first_isolate(info = TRUE))
+# Determining first isolates using the 'phenotype-based' method and an
+# episode length of 365 days
# ℹ Using column 'bacteria' as input for `col_mo`.
# ℹ Using column 'date' as input for `col_date`.
-# ℹ Using column 'patient_id' as input for `col_patient_id`.
filter()
function, also from the dplyr
package:
-data_1st <- data %>%
- filter(first == TRUE)
filter_first_isolate()
function:
-data_1st <- data %>%
- filter_first_isolate()
-First weighted isolates
-
-
-
-
-
-isolate
-date
-patient_id
-bacteria
-AMX
-AMC
-CIP
-GEN
-first
-
-
-1
-2010-02-15
-C8
-B_ESCHR_COLI
-S
-S
-S
-S
-TRUE
-
-
-2
-2010-03-23
-C8
-B_ESCHR_COLI
-R
-S
-S
-S
-FALSE
-
-
-3
-2010-05-20
-C8
-B_ESCHR_COLI
-S
-S
-S
-S
-FALSE
-
-
-4
-2011-01-07
-C8
-B_ESCHR_COLI
-R
-S
-S
-S
-FALSE
-
-
-5
-2011-01-09
-C8
-B_ESCHR_COLI
-R
-S
-S
-R
-FALSE
-
-
-6
-2011-01-25
-C8
-B_ESCHR_COLI
-S
-S
-S
-S
-TRUE
-
-
-7
-2011-02-09
-C8
-B_ESCHR_COLI
-S
-S
-R
-S
-FALSE
-
-
-8
-2011-03-09
-C8
-B_ESCHR_COLI
-S
-S
-S
-S
-TRUE
-
-
-9
-2011-04-04
-C8
-B_ESCHR_COLI
-R
-S
-S
-S
-FALSE
-
-
-
-10
-2011-09-26
-C8
-B_ESCHR_COLI
-R
-I
-S
-S
-FALSE
-key_antibiotics()
function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.first_isolate()
function will automatically use it and determine the first weighted isolates. Mind the NOTEs in below output:
-data <- data %>%
- mutate(keyab = key_antibiotics()) %>%
- mutate(first_weighted = first_isolate())
-
-
-
-
-isolate
-date
-patient_id
-bacteria
-AMX
-AMC
-CIP
-GEN
-first
-first_weighted
-
-
-1
-2010-02-15
-C8
-B_ESCHR_COLI
-S
-S
-S
-S
-TRUE
-TRUE
-
-
-2
-2010-03-23
-C8
-B_ESCHR_COLI
-R
-S
-S
-S
-FALSE
-FALSE
-
-
-3
-2010-05-20
-C8
-B_ESCHR_COLI
-S
-S
-S
-S
-FALSE
-FALSE
-
-
-4
-2011-01-07
-C8
-B_ESCHR_COLI
-R
-S
-S
-S
-FALSE
-FALSE
-
-
-5
-2011-01-09
-C8
-B_ESCHR_COLI
-R
-S
-S
-R
-FALSE
-FALSE
-
-
-6
-2011-01-25
-C8
-B_ESCHR_COLI
-S
-S
-S
-S
-TRUE
-TRUE
-
-
-7
-2011-02-09
-C8
-B_ESCHR_COLI
-S
-S
-R
-S
-FALSE
-FALSE
-
-
-8
-2011-03-09
-C8
-B_ESCHR_COLI
-S
-S
-S
-S
-TRUE
-TRUE
-
-
-9
-2011-04-04
-C8
-B_ESCHR_COLI
-R
-S
-S
-S
-FALSE
-FALSE
-
-
-
-10
-2011-09-26
-C8
-B_ESCHR_COLI
-R
-I
-S
-S
-FALSE
-FALSE
-filter_first_isolate()
, there’s a shortcut for this new algorithm too:
-data_1st <- data %>%
- filter_first_weighted_isolate()
-head(data_1st)
filter()
function, also from the dplyr
package:
+
data_1st <- data %>%
+ filter(first == TRUE)
+
data_1st <- data %>%
+ filter_first_isolate()
+
head(data_1st)
-# our transformed antibiotic columns
+
+
# our transformed antibiotic columns
# amoxicillin/clavulanic acid (J01CR02) as an example
-data %>% freq(AMC_ND2)
Length: 500
@@ -392,11 +391,11 @@ Drug group: Beta-lactams/penicillins
A first glimpse at results
ggplot
will already give a lot of information, using the included ggplot_rsi()
function:
-data %>%
+
+
data %>%
group_by(Country) %>%
select(Country, AMP_ND2, AMC_ED20, CAZ_ED10, CIP_ED5) %>%
- ggplot_rsi(translate_ab = 'ab', facet = "Country", datalabels = FALSE)
Benchmarks
@@ -224,21 +224,34 @@
as.mo("MRSA"), # Methicillin Resistant S. aureus
as.mo("VISA"), # Vancomycin Intermediate S. aureus
times = 25)
-print(S.aureus, unit = "ms", signif = 2)
+print(S.aureus, unit = "ms", signif = 2)
# Unit: milliseconds
-# expr min lq mean median uq max neval
-# as.mo("sau") 9.6 10.0 12.0 10 11.0 42 25
-# as.mo("stau") 52.0 55.0 74.0 59 94.0 110 25
-# as.mo("STAU") 51.0 53.0 73.0 57 91.0 100 25
-# as.mo("staaur") 9.5 9.9 11.0 10 12.0 13 25
-# as.mo("STAAUR") 9.4 10.0 18.0 10 12.0 52 25
-# as.mo("S. aureus") 24.0 26.0 31.0 26 29.0 62 25
-# as.mo("S aureus") 25.0 25.0 42.0 29 62.0 68 25
-# as.mo("Staphylococcus aureus") 2.5 2.9 3.2 3 3.5 4 25
-# as.mo("Staphylococcus aureus (MRSA)") 240.0 240.0 260.0 250 260.0 390 25
-# as.mo("Sthafilokkockus aaureuz") 150.0 190.0 190.0 190 190.0 250 25
-# as.mo("MRSA") 8.7 10.0 15.0 11 12.0 49 25
-# as.mo("VISA") 17.0 19.0 25.0 21 22.0 57 25
@@ -289,13 +302,13 @@
B = mo_name("S. aureus"),
C = mo_name("Staphylococcus aureus"),
times = 10)
-print(run_it, unit = "ms", signif = 3)
+print(run_it, unit = "ms", signif = 3)
# Unit: milliseconds
# expr min lq mean median uq max neval
-# A 6.62 6.84 7.30 6.91 8.10 8.74 10
-# B 22.20 23.10 33.20 24.50 27.70 70.80 10
-# C 1.37 1.50 1.66 1.71 1.81 1.84 10
mo_name("Staphylococcus aureus")
to "Staphylococcus aureus"
takes 0.0017 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:mo_name("Staphylococcus aureus")
to "Staphylococcus aureus"
takes 0.0018 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:
run_it <- microbenchmark(A = mo_species("aureus"),
B = mo_genus("Staphylococcus"),
@@ -306,17 +319,17 @@
G = mo_phylum("Firmicutes"),
H = mo_kingdom("Bacteria"),
times = 10)
-print(run_it, unit = "ms", signif = 3)
+print(run_it, unit = "ms", signif = 3)
# Unit: milliseconds
# expr min lq mean median uq max neval
-# A 1.19 1.21 1.43 1.28 1.65 2.03 10
-# B 1.19 1.24 1.38 1.26 1.53 1.83 10
-# C 1.24 1.28 1.45 1.36 1.41 2.24 10
-# D 1.20 1.22 1.33 1.23 1.41 1.77 10
-# E 1.21 1.22 1.35 1.27 1.38 1.69 10
-# F 1.20 1.21 1.34 1.26 1.37 1.71 10
-# G 1.17 1.22 1.31 1.27 1.31 1.71 10
-# H 1.21 1.27 1.42 1.37 1.47 1.84 10
mo_phylum("Firmicutes")
the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes"
anyway, there is no point in calculating the result. And because this package contains all phyla of all known bacteria, it can just return the initial value immediately.
How to predict antimicrobial resistance
@@ -206,37 +205,37 @@
Needed R packages
dplyr
and ggplot2
by Dr Hadley Wickham. The tidyverse tremendously improves the way we conduct data science - it allows for a very natural way of writing syntaxes and creating beautiful plots in R.AMR
package depends on these packages and even extends their use and functions.
Prediction analysis
resistance_predict()
, which takes the same input as functions for other AMR data analysis. Based on a date column, it calculates cases per year and uses a regression model to predict antimicrobial resistance.# resistance prediction of piperacillin/tazobactam (TZP):
-resistance_predict(tbl = example_isolates, col_date = "date", col_ab = "TZP", model = "binomial")
-
-# or:
-example_isolates %>%
- resistance_predict(col_ab = "TZP",
- model "binomial")
-
-# to bind it to object 'predict_TZP' for example:
-predict_TZP <- example_isolates %>%
- resistance_predict(col_ab = "TZP",
- model = "binomial")
# resistance prediction of piperacillin/tazobactam (TZP):
+resistance_predict(tbl = example_isolates, col_date = "date", col_ab = "TZP", model = "binomial")
+
+# or:
+example_isolates %>%
+ resistance_predict(col_ab = "TZP",
+ model "binomial")
+
+# to bind it to object 'predict_TZP' for example:
+predict_TZP <- example_isolates %>%
+ resistance_predict(col_ab = "TZP",
+ model = "binomial")
col_date
is not set.resistance_predict(..., info = FALSE)
.# ℹ Using column 'date' as input for `col_date`.
data.frame
containing for each year: the number of observations, the actual observed resistance, the estimated resistance and the standard error below and above the estimation:
-predict_TZP
+
+
predict_TZP
# year value se_min se_max observations observed estimated
# 1 2002 0.20000000 NA NA 15 0.20000000 0.05616378
# 2 2003 0.06250000 NA NA 32 0.06250000 0.06163839
@@ -267,30 +266,31 @@
# 27 2028 0.43730688 0.3418075 0.5328063 NA NA 0.43730688
# 28 2029 0.46175755 0.3597639 0.5637512 NA NA 0.46175755
# 29 2030 0.48639359 0.3782932 0.5944939 NA NA 0.48639359
-# 30 2031 0.51109592 0.3973697 0.6248221 NA NA 0.51109592
plot
is available in base R, and can be extended by other packages to depend the output based on the type of input. We extended its function to cope with resistance predictions:
-plot(predict_TZP)
+
plot(predict_TZP)
ggplot2
package with our custom function ggplot_rsi_predict()
to create more appealing plots:
-ggplot_rsi_predict(predict_TZP)
+
ggplot_rsi_predict(predict_TZP)
-
+
+
# choose for error bars instead of a ribbon
-ggplot_rsi_predict(predict_TZP, ribbon = FALSE)
Choosing the right model
-example_isolates %>%
+
+
example_isolates %>%
filter(mo_gramstain(mo, language = NULL) == "Gram-positive") %>%
resistance_predict(col_ab = "VAN", year_min = 2010, info = FALSE, model = "binomial") %>%
- ggplot_rsi_predict()
model
parameter. The model chosen above is a generalised linear regression model using a binomial distribution, assuming that a period of zero resistance was followed by a period of increasing resistance leading slowly to more and more resistance.
-example_isolates %>%
+
+
example_isolates %>%
filter(mo_gramstain(mo, language = NULL) == "Gram-positive") %>%
resistance_predict(col_ab = "VAN", year_min = 2010, info = FALSE, model = "linear") %>%
- ggplot_rsi_predict()
attribute
:
-model <- attributes(predict_TZP)$model
+
+
model <- attributes(predict_TZP)$model
summary(model)$family
#
@@ -350,7 +351,7 @@
summary(model)$coefficients
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -200.67944891 46.17315349 -4.346237 1.384932e-05
-# year 0.09883005 0.02295317 4.305725 1.664395e-05
-
@@ -1420,7 +1415,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
Welcome to the AMR package
diff --git a/docs/articles/welcome_to_AMR_files/header-attrs-2.8/header-attrs.js b/docs/articles/welcome_to_AMR_files/header-attrs-2.8/header-attrs.js
new file mode 100644
index 00000000..dd57d92e
--- /dev/null
+++ b/docs/articles/welcome_to_AMR_files/header-attrs-2.8/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+ var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+ var i, h, a;
+ for (i = 0; i < hs.length; i++) {
+ h = hs[i];
+ if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
+ a = h.attributes;
+ while (a.length > 0) h.removeAttribute(a[0].name);
+ }
+});
diff --git a/docs/authors.html b/docs/authors.html
index 9899ed21..aa6ef6b2 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -81,7 +81,7 @@
AMR (for R)
- 1.6.0.9065
+ 1.7.0
@Article{,
title = {AMR - An R Package for Working with Antimicrobial Resistance Data},
author = {M S Berends and C F Luz and A W Friedrich and B N M Sinha and C J Albers and C Glasner},
- doi = {1.24720276528394e-05},
+ doi = {10.1101/810622},
journal = {Journal of Statistical Software},
pages = {Accepted for publication},
year = {2021},
diff --git a/docs/index.html b/docs/index.html
index 9bfe1b75..cf4f90c2 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -42,7 +42,7 @@
AMR (for R)
- 1.6.0.9065
+ 1.7.0
Antimicrobial reference data
-NEWS.md
- Unreleased
-AMR
1.6.0.9065
-Last updated: 24 May 2021
-
+
+ Unreleased
AMR
1.7.0
Breaking change
@@ -347,7 +343,6 @@
tinytest
package, instead of the testthat
package. The testthat
package unfortunately requires tons of dependencies that are also heavy and only usable for recent R versions, disallowing developers to test a package under any R 3.* version. On the contrary, the tinytest
package is very lightweight and dependency-free.
@@ -461,7 +456,7 @@
is.rsi.eligible()
now detects if the column name resembles an antibiotic name or code and now returns TRUE
immediately if the input contains any of the values “R”, “S” or “I”. This drastically improves speed, also for a lot of other functions that rely on automatic determination of antibiotic columns.
get_episode()
and is_new_episode()
now support less than a day as value for argument episode_days
(e.g., to include one patient/test per hour)ampc_cephalosporin_resistance
in eucast_rules()
now also applies to value “I” (not only “S”)print()
and summary()
on a Principal Components Analysis object (pca()
) now print additional group info if the original data was grouped using dplyr::group_by()
+print()
and summary()
on a Principal Components Analysis object (pca()
) now print additional group info if the original data was grouped using dplyr::group_by()
guess_ab_col()
. As this also internally improves the reliability of first_isolate()
and mdro()
, this might have a slight impact on the results of those functions.mo_name()
when used in other languages than Englishdplyr
and tidyr
) tremendously increases sustainability on the long term, since tidyverse functions change quite often. Good for users, but hard for package maintainers. Most of our functions are replaced with versions that only rely on base R, which keeps this package fully functional for many years to come, without requiring a lot of maintenance to keep up with other packages anymore. Another upside it that this package can now be used with all versions of R since R-3.0.0 (April 2013). Our package is being used in settings where the resources are very limited. Fewer dependencies on newer software is helpful for such settings.
-
freq()
that was borrowed from the cleaner
package was removed. Use cleaner::freq()
, or run library("cleaner")
before you use freq()
.freq()
that was borrowed from the cleaner
package was removed. Use cleaner::freq()
, or run library("cleaner")
before you use freq()
.Printing values of class mo
or rsi
in a tibble will no longer be in colour and printing rsi
in a tibble will show the class <ord>
, not <rsi>
anymore. This is purely a visual effect.All functions from the mo_*
family (like mo_name()
and mo_gramstain()
) are noticeably slower when running on hundreds of thousands of rows.mo
and ab
now both also inherit class character
, to support any data transformation. This change invalidates code that checks for class length == 1."testvalue"
could never be understood by e.g. mo_name()
, although the class would suggest a valid microbial code.freq()
has moved to a new package, clean
(CRAN link), since creating frequency tables actually does not fit the scope of this package. The freq()
function still works, since it is re-exported from the clean
package (which will be installed automatically upon updating this AMR
package).freq()
has moved to a new package, clean
(CRAN link), since creating frequency tables actually does not fit the scope of this package. The freq()
function still works, since it is re-exported from the clean
package (which will be installed automatically upon updating this AMR
package).septic_patients
to example_isolates
age()
function gained a new argument exact
to determine ages with decimalsguess_mo()
, guess_atc()
, EUCAST_rules()
, interpretive_reading()
, rsi()
freq()
):
+freq()
):
age_groups()
, to let groups of fives and tens end with 100+ instead of 120+freq()
for when all values are NA
+freq()
for when all values are NA
first_isolate()
for when dates are missingguess_ab_col()
@@ -1577,7 +1572,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
microorganisms.oldDT
, microorganisms.prevDT
, microorganisms.unprevDT
and microorganismsDT
since they were no longer needed and only contained info already available in the microorganisms
data setantibiotics
data set, from the Pharmaceuticals Community Register of the European Commissionantibiotics
data set, from the Pharmaceuticals Community Register of the European Commissionatc_group1_nl
and atc_group2_nl
from the antibiotics
data setatc_ddd()
and atc_groups()
have been renamed atc_online_ddd()
and atc_online_groups()
. The old functions are deprecated and will be removed in a future version.guess_mo()
is now deprecated in favour of as.mo()
and will be removed in future versionsfreq()
function):
+freq()
function):
header
functionheader
is now set to TRUE
at default, even for markdownportion_*
functions now throws a warning when total available isolate is below argument minimum
as.mo
, as.rsi
, as.mic
, as.atc
and freq
will not set package name as attribute anymorefreq()
:freq()
:
@@ -2038,13 +2033,13 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
septic_patients %>%
group_by(hospital_id) %>%
- freq(gender)
+ freq(gender)
hms::is.hms
microorganisms$ref
and microorganisms.old$ref
) to comply with CRAN policy to only allow ASCII charactersmo_property
not working properlyeucast_rules
where some Streptococci would become ceftazidime R in EUCAST rule 4.5mo
, useful for top_freq()
mo
, useful for top_freq()
ggplot_rsi
and scale_y_percent
have breaks
argumentas.mo
:
my_list = list(age = septic_patients$age, gender = septic_patients$gender)
-my_list %>% freq(age)
-my_list %>% freq(gender)
rsi
(antimicrobial resistance) to use as inputtable
to use as input: freq(table(x, y))
+table
to use as input: freq(table(x, y))
hist
and plot
to use a frequency table as input: hist(freq(df$age))
as.vector
, as.data.frame
, as_tibble
and format
freq(mydata, mycolumn)
is the same as mydata %>% freq(mycolumn)
+freq(mydata, mycolumn)
is the same as mydata %>% freq(mycolumn)
top_freq
function to return the top/below n items as vector
-This package contains all ~550 antibiotic, antimycotic and antiviral drugs and their Anatomical Therapeutic Chemical (ATC) codes, ATC groups and Defined Daily Dose (DDD) from the World Health Organization Collaborating Centre for Drug Statistics Methodology (WHOCC, https://www.whocc.no) and the Pharmaceuticals Community Register of the European Commission (http://ec.europa.eu/health/documents/community-register/html/atc.htm).
These have become the gold standard for international drug utilisation monitoring and research.
The WHOCC is located in Oslo at the Norwegian Institute of Public Health and funded by the Norwegian government. The European Commission is the executive of the European Union and promotes its general interest.
NOTE: The WHOCC copyright does not allow use for commercial purposes, unlike any other info from this package. See https://www.whocc.no/copyright_disclaimer/.
diff --git a/docs/reference/WHONET.html b/docs/reference/WHONET.html index 5d365603..a0272c41 100644 --- a/docs/reference/WHONET.html +++ b/docs/reference/WHONET.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9055 + 1.7.0 diff --git a/docs/reference/ab_from_text.html b/docs/reference/ab_from_text.html index b0bc2653..e8e0a5f6 100644 --- a/docs/reference/ab_from_text.html +++ b/docs/reference/ab_from_text.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9055 + 1.7.0 diff --git a/docs/reference/ab_property.html b/docs/reference/ab_property.html index ee4aa939..32c155e3 100644 --- a/docs/reference/ab_property.html +++ b/docs/reference/ab_property.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9055 + 1.7.0 @@ -332,7 +332,7 @@ The lifecycle of this function is stableWorld Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology: https://www.whocc.no/atc_ddd_index/WHONET 2019 software: http://www.whonet.org/software.html
-European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: http://ec.europa.eu/health/documents/community-register/html/atc.htm
+European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: https://ec.europa.eu/health/documents/community-register/html/reg_hum_atc.htm
World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology (WHOCC): https://www.whocc.no/atc_ddd_index/
WHONET 2019 software: http://www.whonet.org/software.html
-European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: http://ec.europa.eu/health/documents/community-register/html/atc.htm
+European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: https://ec.europa.eu/health/documents/community-register/html/reg_hum_atc.htm
Properties that are based on an ATC code are only available when an ATC is available. These properties are: atc_group1
, atc_group2
, oral_ddd
, oral_units
, iv_ddd
and iv_units
.
-This package contains all ~550 antibiotic, antimycotic and antiviral drugs and their Anatomical Therapeutic Chemical (ATC) codes, ATC groups and Defined Daily Dose (DDD) from the World Health Organization Collaborating Centre for Drug Statistics Methodology (WHOCC, https://www.whocc.no) and the Pharmaceuticals Community Register of the European Commission (http://ec.europa.eu/health/documents/community-register/html/atc.htm).
These have become the gold standard for international drug utilisation monitoring and research.
The WHOCC is located in Oslo at the Norwegian Institute of Public Health and funded by the Norwegian government. The European Commission is the executive of the European Union and promotes its general interest.
NOTE: The WHOCC copyright does not allow use for commercial purposes, unlike any other info from this package. See https://www.whocc.no/copyright_disclaimer/.
diff --git a/docs/reference/as.ab.html b/docs/reference/as.ab.html index 36d7e62d..0faca9b4 100644 --- a/docs/reference/as.ab.html +++ b/docs/reference/as.ab.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9063 + 1.7.0 @@ -288,7 +288,7 @@World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology: https://www.whocc.no/atc_ddd_index/
WHONET 2019 software: http://www.whonet.org/software.html
-European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: http://ec.europa.eu/health/documents/community-register/html/atc.htm
+European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: https://ec.europa.eu/health/documents/community-register/html/reg_hum_atc.htm
These have become the gold standard for international drug utilisation monitoring and research.
The WHOCC is located in Oslo at the Norwegian Institute of Public Health and funded by the Norwegian government. The European Commission is the executive of the European Union and promotes its general interest.
NOTE: The WHOCC copyright does not allow use for commercial purposes, unlike any other info from this package. See https://www.whocc.no/copyright_disclaimer/.
diff --git a/docs/reference/as.disk.html b/docs/reference/as.disk.html index bdb69bcb..1af44927 100644 --- a/docs/reference/as.disk.html +++ b/docs/reference/as.disk.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9055 + 1.7.0 diff --git a/docs/reference/as.mic.html b/docs/reference/as.mic.html index 94e09ffd..ef8e44c8 100644 --- a/docs/reference/as.mic.html +++ b/docs/reference/as.mic.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9055 + 1.7.0 diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 6ed38fb0..95122def 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9055 + 1.7.0 diff --git a/docs/reference/as.rsi.html b/docs/reference/as.rsi.html index 1b56d3e3..43b244e3 100644 --- a/docs/reference/as.rsi.html +++ b/docs/reference/as.rsi.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9063 + 1.7.0 diff --git a/docs/reference/atc_online.html b/docs/reference/atc_online.html index 6632c63e..ef8cf6e5 100644 --- a/docs/reference/atc_online.html +++ b/docs/reference/atc_online.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9064 + 1.7.0 diff --git a/docs/reference/availability.html b/docs/reference/availability.html index 45353ce4..0dd1d73c 100644 --- a/docs/reference/availability.html +++ b/docs/reference/availability.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9063 + 1.7.0 diff --git a/docs/reference/bug_drug_combinations.html b/docs/reference/bug_drug_combinations.html index 6425cdeb..c8184853 100644 --- a/docs/reference/bug_drug_combinations.html +++ b/docs/reference/bug_drug_combinations.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9055 + 1.7.0 diff --git a/docs/reference/catalogue_of_life.html b/docs/reference/catalogue_of_life.html index 2e8353f1..1d4a4254 100644 --- a/docs/reference/catalogue_of_life.html +++ b/docs/reference/catalogue_of_life.html @@ -82,7 +82,7 @@ AMR (for R) - 1.6.0.9011 + 1.7.0 @@ -90,14 +90,14 @@