diff --git a/.Rbuildignore b/.Rbuildignore index d20eb122..1d616f7b 100755 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -13,6 +13,7 @@ ^doc$ ^docs$ ^git.sh$ +^gitmerge.sh$ ^index\.md$ ^Meta$ ^packrat/ diff --git a/.gitignore b/.gitignore index 66e5baae..abc6fc9a 100755 --- a/.gitignore +++ b/.gitignore @@ -18,5 +18,6 @@ vignettes/*.R .Rprofile ^CRAN-RELEASE$ git.sh +gitmerge.sh packrat/lib*/ packrat/src/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e24afe1d..e7873c2a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -41,8 +41,8 @@ before_script: cache: key: "$CI_COMMIT_REF_SLUG" paths: - - /usr/local/lib/R/ - - /usr/lib/R/ + - /usr/local/lib/R/* + - /usr/lib/R/* R 3: stage: build @@ -55,6 +55,7 @@ R 3: - R CMD build . --no-build-vignettes --no-manual - PKG_FILE_NAME=$(ls -1t *.tar.gz | head -n 1) - R CMD check "${PKG_FILE_NAME}" --no-build-vignettes --no-manual --as-cran + - Rscript -e 'print(.libPaths())' artifacts: paths: - '*.Rcheck/*' diff --git a/R/globals.R b/R/globals.R index 9c131ca4..aa0d6ae2 100755 --- a/R/globals.R +++ b/R/globals.R @@ -20,6 +20,11 @@ # ==================================================================== # globalVariables(c(".", + "atc", + "certe", + "official", + "trade_name", + "umcg", "..property", "antibiotic", "Antibiotic", diff --git a/R/itis.R b/R/itis.R index 3b72285d..6b3e78fa 100644 --- a/R/itis.R +++ b/R/itis.R @@ -35,7 +35,8 @@ #' @examples #' # Get a note when a species was renamed #' mo_shortname("Chlamydia psittaci") -#' # Note: 'Chlamydia psittaci' (Page, 1968) was renamed 'Chlamydophila psittaci' (Everett et al., 1999) +#' # Note: 'Chlamydia psittaci' (Page, 1968) was renamed +#' # 'Chlamydophila psittaci' (Everett et al., 1999) #' # [1] "C. psittaci" #' #' # Get any property from the entire taxonomic tree for all included species diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index f585914e..8b3a9a1a 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -179,7 +179,7 @@ -

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 02 January 2019.

+

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 04 January 2019.

Introduction

@@ -195,21 +195,21 @@ -2019-01-02 +2019-01-04 abcd Escherichia coli S S -2019-01-02 +2019-01-04 abcd Escherichia coli S R -2019-01-02 +2019-01-04 efgh Escherichia coli R @@ -223,9 +223,9 @@ Needed R packages

As with many uses in R, we need some additional packages for AMR analysis. The most important one is dplyr, which tremendously improves the way we work with data - it allows for a very natural way of writing syntaxes in R. Another important dependency is ggplot2. This package can be used to create beautiful plots in R.

Our AMR package depends on these packages and even extends their use and functions.

-
library(dplyr)   # the data science package
-library(AMR)     # this package, to simplify and automate AMR analysis
-library(ggplot2) # for appealing plots
+
library(dplyr)   # the data science package
+library(AMR)     # this package, to simplify and automate AMR analysis
+library(ggplot2) # for appealing plots

@@ -236,51 +236,51 @@

Patients

To start with patients, we need a unique list of patients.

-
patients <- unlist(lapply(LETTERS, paste0, 1:10))
+
patients <- unlist(lapply(LETTERS, paste0, 1:10))

The LETTERS object is available in R - it’s a vector with 26 characters: A to Z. The patients object we just created is now a vector of length 260, with values (patient IDs) varying from A1 to Z10. Now we we also set the gender of our patients, by putting the ID and the gender in a table:

-
patients_table <- data.frame(patient_id = patients,
-                             gender = c(rep("M", 135),
-                                        rep("F", 125)))
+
patients_table <- data.frame(patient_id = patients,
+                             gender = c(rep("M", 135),
+                                        rep("F", 125)))

The first 135 patient IDs are now male, the other 125 are female.

Dates

Let’s pretend that our data consists of blood cultures isolates from 1 January 2010 until 1 January 2018.

-
dates <- seq(as.Date("2010-01-01"), as.Date("2018-01-01"), by = "day")
+
dates <- seq(as.Date("2010-01-01"), as.Date("2018-01-01"), by = "day")

This dates object now contains all days in our date range.

Microorganisms

For this tutorial, we will uses four different microorganisms: Escherichia coli, Staphylococcus aureus, Streptococcus pneumoniae, and Klebsiella pneumoniae:

-
bacteria <- c("Escherichia coli", "Staphylococcus aureus",
-              "Streptococcus pneumoniae", "Klebsiella pneumoniae")
+
bacteria <- c("Escherichia coli", "Staphylococcus aureus",
+              "Streptococcus pneumoniae", "Klebsiella pneumoniae")

Other variables

For completeness, we can also add the hospital where the patients was admitted and we need to define valid antibmicrobial results for our randomisation:

-
hospitals <- c("Hospital A", "Hospital B", "Hospital C", "Hospital D")
-ab_interpretations <- c("S", "I", "R")
+
hospitals <- c("Hospital A", "Hospital B", "Hospital C", "Hospital D")
+ab_interpretations <- c("S", "I", "R")

Put everything together

-

Using the sample() function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the prob parameter.

-
data <- data.frame(date = sample(dates, 5000, replace = TRUE),
-                   patient_id = sample(patients, 5000, replace = TRUE),
-                   hospital = sample(hospitals, 5000, replace = TRUE, prob = c(0.30, 0.35, 0.15, 0.20)),
-                   bacteria = sample(bacteria, 5000, replace = TRUE, prob = c(0.50, 0.25, 0.15, 0.10)),
-                   amox = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.60, 0.05, 0.35)),
-                   amcl = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.75, 0.10, 0.15)),
-                   cipr = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.80, 0.00, 0.20)),
-                   gent = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.92, 0.00, 0.08))
-                   )
-

Using the left_join() function from the dplyr package, we can ‘map’ the gender to the patient ID using the patients_table object we created earlier:

-
data <- data %>% left_join(patients_table)
+

Using the sample() function, we can randomly select items from all objects we defined earlier. To let our fake data reflect reality a bit, we will also approximately define the probabilities of bacteria and the antibiotic results with the prob parameter.

+
data <- data.frame(date = sample(dates, 5000, replace = TRUE),
+                   patient_id = sample(patients, 5000, replace = TRUE),
+                   hospital = sample(hospitals, 5000, replace = TRUE, prob = c(0.30, 0.35, 0.15, 0.20)),
+                   bacteria = sample(bacteria, 5000, replace = TRUE, prob = c(0.50, 0.25, 0.15, 0.10)),
+                   amox = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.60, 0.05, 0.35)),
+                   amcl = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.75, 0.10, 0.15)),
+                   cipr = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.80, 0.00, 0.20)),
+                   gent = sample(ab_interpretations, 5000, replace = TRUE, prob = c(0.92, 0.00, 0.08))
+                   )
+

Using the left_join() function from the dplyr package, we can ‘map’ the gender to the patient ID using the patients_table object we created earlier:

+
data <- data %>% left_join(patients_table)

The resulting data set contains 5,000 blood culture isolates. With the head() function we can preview the first 6 values of this data set:

-
head(data)
+
head(data)
@@ -295,10 +295,10 @@ - - - - + + + + @@ -306,60 +306,60 @@ - - + + + - - + - - + + - - - + + + - - - - - + + + + + - - - - - - - - - - - - - - + + + - + + + + + + + + + + + +
date
2017-03-28D2Hospital BStaphylococcus aureus2014-02-20D5Hospital AEscherichia coli R S SM
2010-06-26O32010-06-17N4 Hospital A Escherichia coli S SR SSFM
2015-12-14M82017-08-02A6 Hospital B Staphylococcus aureusSSSRIR S M
2013-02-26C7Hospital BKlebsiella pneumoniaeS2012-11-25K7Hospital AEscherichia coliI S S S M
2012-11-01Y5Hospital DEscherichia coliSSRSF
2015-04-04K10Hospital A2012-06-24G9Hospital B Escherichia coli R SRS R M
2011-05-20T7Hospital CEscherichia coliSSSSF

Now, let’s start the cleaning and the analysis!

@@ -369,7 +369,7 @@

Cleaning the data

Use the frequency table function freq() to look specifically for unique values in any variable. For example, for the gender variable:

-
data %>% freq(gender) # this would be the same: freq(data$gender)
+
data %>% freq(gender) # this would be the same: freq(data$gender)
# Frequency table of `gender` 
 # Class:   factor (numeric)  
 # Levels:  F, M  
@@ -378,67 +378,67 @@
 # 
 #      Item    Count   Percent   Cum. Count   Cum. Percent
 # ---  -----  ------  --------  -----------  -------------
-# 1    M       2,549     51.0%        2,549          51.0%
-# 2    F       2,451     49.0%        5,000         100.0%
+# 1 M 2,636 52.7% 2,636 52.7% +# 2 F 2,364 47.3% 5,000 100.0%

So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values M and F. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.

-

The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

-
data <- data %>%
-  mutate(bacteria = as.mo(bacteria))
-

We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The as.rsi() function ensures reliability and reproducibility in these kind of variables. The mutate_at() will run the as.rsi() function on defined variables:

-
data <- data %>%
-  mutate_at(vars(amox:gent), as.rsi)
+

The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

+
data <- data %>%
+  mutate(bacteria = as.mo(bacteria))
+

We also want to transform the antibiotics, because in real life data we don’t know if they are really clean. The as.rsi() function ensures reliability and reproducibility in these kind of variables. The mutate_at() will run the as.rsi() function on defined variables:

+
data <- data %>%
+  mutate_at(vars(amox:gent), as.rsi)

Finally, we will apply EUCAST rules on our antimicrobial results. In Europe, most medical microbiological laboratories already apply these rules. Our package features their latest insights on intrinsic resistance and exceptional phenotypes. Moreover, the eucast_rules() function can also apply additional rules, like forcing ampicillin = R when amoxicillin/clavulanic acid = R.

Because the amoxicillin (column amox) and amoxicillin/clavulanic acid (column amcl) in our data were generated randomly, some rows will undoubtedly contain amox = S and amcl = R, which is technically impossible. The eucast_rules() fixes this:

-
data <- eucast_rules(data, col_mo = "bacteria")
-# 
-# Rules by the European Committee on Antimicrobial Susceptibility Testing (EUCAST)
-# 
-# EUCAST Clinical Breakpoints (v8.1, 2018)
-# Enterobacteriales (Order) (no changes)
-# Staphylococcus (no changes)
-# Enterococcus (no changes)
-# Streptococcus groups A, B, C, G (no changes)
-# Streptococcus pneumoniae (364 changes)
-# Viridans group streptococci (no changes)
-# Haemophilus influenzae (no changes)
-# Moraxella catarrhalis (no changes)
-# Anaerobic Gram positives (no changes)
-# Anaerobic Gram negatives (no changes)
-# Pasteurella multocida (no changes)
-# Campylobacter jejuni and C. coli (no changes)
-# Aerococcus sanguinicola and A. urinae (no changes)
-# Kingella kingae (no changes)
-# 
-# EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
-# Table 1:  Intrinsic resistance in Enterobacteriaceae (303 changes)
-# Table 2:  Intrinsic resistance in non-fermentative Gram-negative bacteria (no changes)
-# Table 3:  Intrinsic resistance in other Gram-negative bacteria (no changes)
-# Table 4:  Intrinsic resistance in Gram-positive bacteria (641 changes)
-# Table 8:  Interpretive rules for B-lactam agents and Gram-positive cocci (no changes)
-# Table 9:  Interpretive rules for B-lactam agents and Gram-negative rods (no changes)
-# Table 10: Interpretive rules for B-lactam agents and other Gram-negative bacteria (no changes)
-# Table 11: Interpretive rules for macrolides, lincosamides, and streptogramins (no changes)
-# Table 12: Interpretive rules for aminoglycosides (no changes)
-# Table 13: Interpretive rules for quinolones (no changes)
-# 
-# Other rules
-# Non-EUCAST: ampicillin = R where amoxicillin/clav acid = R (403 changes)
-# Non-EUCAST: piperacillin = R where piperacillin/tazobactam = R (no changes)
-# Non-EUCAST: trimethoprim = R where trimethoprim/sulfa = R (no changes)
-# Non-EUCAST: amoxicillin/clav acid = S where ampicillin = S (231 changes)
-# Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no changes)
-# Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no changes)
-# 
-# => EUCAST rules affected 4,588 out of 5,000 rows -> changed 1,942 test results.
+
data <- eucast_rules(data, col_mo = "bacteria")
+# 
+# Rules by the European Committee on Antimicrobial Susceptibility Testing (EUCAST)
+# 
+# EUCAST Clinical Breakpoints (v8.1, 2018)
+# Enterobacteriales (Order) (no changes)
+# Staphylococcus (no changes)
+# Enterococcus (no changes)
+# Streptococcus groups A, B, C, G (no changes)
+# Streptococcus pneumoniae (no changes)
+# Viridans group streptococci (no changes)
+# Haemophilus influenzae (no changes)
+# Moraxella catarrhalis (no changes)
+# Anaerobic Gram positives (no changes)
+# Anaerobic Gram negatives (no changes)
+# Pasteurella multocida (no changes)
+# Campylobacter jejuni and C. coli (no changes)
+# Aerococcus sanguinicola and A. urinae (no changes)
+# Kingella kingae (no changes)
+# 
+# EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
+# Table 1:  Intrinsic resistance in Enterobacteriaceae (332 changes)
+# Table 2:  Intrinsic resistance in non-fermentative Gram-negative bacteria (no changes)
+# Table 3:  Intrinsic resistance in other Gram-negative bacteria (no changes)
+# Table 4:  Intrinsic resistance in Gram-positive bacteria (658 changes)
+# Table 8:  Interpretive rules for B-lactam agents and Gram-positive cocci (no changes)
+# Table 9:  Interpretive rules for B-lactam agents and Gram-negative rods (no changes)
+# Table 10: Interpretive rules for B-lactam agents and other Gram-negative bacteria (no changes)
+# Table 11: Interpretive rules for macrolides, lincosamides, and streptogramins (no changes)
+# Table 12: Interpretive rules for aminoglycosides (no changes)
+# Table 13: Interpretive rules for quinolones (no changes)
+# 
+# Other rules
+# Non-EUCAST: ampicillin = R where amoxicillin/clav acid = R (no changes)
+# Non-EUCAST: piperacillin = R where piperacillin/tazobactam = R (no changes)
+# Non-EUCAST: trimethoprim = R where trimethoprim/sulfa = R (no changes)
+# Non-EUCAST: amoxicillin/clav acid = S where ampicillin = S (no changes)
+# Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no changes)
+# Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no changes)
+# 
+# => EUCAST rules affected 1,828 out of 5,000 rows -> changed 990 test results.

Adding new variables

Now that we have the microbial ID, we can add some taxonomic properties:

-
data <- data %>% 
-  mutate(gramstain = mo_gramstain(bacteria),
-         genus = mo_genus(bacteria),
-         species = mo_species(bacteria))
+
data <- data %>% 
+  mutate(gramstain = mo_gramstain(bacteria),
+         genus = mo_genus(bacteria),
+         species = mo_species(bacteria))

First isolates

@@ -449,18 +449,18 @@

(…) When preparing a cumulative antibiogram to guide clinical decisions about empirical antimicrobial therapy of initial infections, only the first isolate of a given species per patient, per analysis period (eg, one year) should be included, irrespective of body site, antimicrobial susceptibility profile, or other phenotypical characteristics (eg, biotype). The first isolate is easily identified, and cumulative antimicrobial susceptibility test data prepared using the first isolate are generally comparable to cumulative antimicrobial susceptibility test data calculated by other methods, providing duplicate isolates are excluded.
M39-A4 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition. CLSI, 2014. Chapter 6.4

This AMR package includes this methodology with the first_isolate() function. It adopts the episode of a year (can be changed by user) and it starts counting days after every selected isolate. This new variable can easily be added to our data:

- -

So only 58.5% is suitable for resistance analysis! We can now filter on is with the filter() function, also from the dplyr package:

- +
data <- data %>% 
+  mutate(first = first_isolate(.))
+# NOTE: Using column `bacteria` as input for `col_mo`.
+# NOTE: Using column `date` as input for `col_date`.
+# NOTE: Using column `patient_id` as input for `col_patient_id`.
+# => Found 2,950 first isolates (59.0% of total)
+

So only 59% is suitable for resistance analysis! We can now filter on is with the filter() function, also from the dplyr package:

+
data_1st <- data %>% 
+  filter(first == TRUE)

For future use, the above two syntaxes can be shortened with the filter_first_isolate() function:

- +
data_1st <- data %>% 
+  filter_first_isolate()

@@ -481,52 +481,63 @@ 1 -2010-07-09 -W3 +2010-05-26 +D5 B_ESCHR_COL -S -S +R +R R S TRUE 2 -2010-07-22 -W3 +2010-06-05 +D5 B_ESCHR_COL -R -R +S +S S S FALSE 3 -2011-02-02 -W3 +2010-09-13 +D5 B_ESCHR_COL S S -R -R +S +S FALSE 4 -2012-01-18 -W3 +2010-12-19 +D5 B_ESCHR_COL +S R S S +FALSE + + +5 +2012-03-01 +D5 +B_ESCHR_COL +S +R +S S TRUE - -5 -2012-07-21 -W3 + +6 +2012-04-14 +D5 B_ESCHR_COL S S @@ -534,21 +545,10 @@ S FALSE - -6 -2013-04-19 -W3 -B_ESCHR_COL -R -S -S -S -TRUE - 7 -2013-09-01 -W3 +2013-01-04 +D5 B_ESCHR_COL S S @@ -558,53 +558,56 @@ 8 -2013-12-07 -W3 +2013-01-22 +D5 B_ESCHR_COL -S -S -S +I +I +R S FALSE 9 -2013-12-15 -W3 +2013-03-20 +D5 B_ESCHR_COL -R S S S -FALSE +S +TRUE 10 -2014-02-03 -W3 +2013-09-13 +D5 B_ESCHR_COL S S -R S +R FALSE

Only 3 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and show be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

If a column exists with a name like ‘key(…)ab’ the first_isolate() function will automatically use it and determine the first weighted isolates. Mind the NOTEs in below output:

- +
data <- data %>% 
+  mutate(keyab = key_antibiotics(.)) %>% 
+  mutate(first_weighted = first_isolate(.))
+# NOTE: Using column `bacteria` as input for `col_mo`.
+#   amox   amcl   cipr   gent 
+# "amox" "amcl" "cipr" "gent" 
+# [1] "amox" "amcl" "cipr"
+#   amox   amcl   cipr   gent 
+# "amox" "amcl" "cipr" "gent"
+# NOTE: Using column `bacteria` as input for `col_mo`.
+# NOTE: Using column `date` as input for `col_date`.
+# NOTE: Using column `patient_id` as input for `col_patient_id`.
+# NOTE: Using column `keyab` as input for `col_keyantibiotics`. Use col_keyantibiotics = FALSE to prevent this.
+# [Criterion] Inclusion based on key antibiotics, ignoring I.
+# => Found 4,430 first weighted isolates (88.6% of total)
@@ -621,11 +624,11 @@ - - + + - - + + @@ -633,11 +636,11 @@ - - + + - - + + @@ -645,115 +648,114 @@ - - + + - - + + + - - - + + + - - + - - + + + - - + - - + + - - + + - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - + + - - + + - - + + - +
isolate
12010-07-09W32010-05-26D5 B_ESCHR_COLSSRR R S TRUE
22010-07-22W32010-06-05D5 B_ESCHR_COLRRSS S S FALSE
32011-02-02W32010-09-13D5 B_ESCHR_COL S SRRSSFALSE FALSETRUE
42012-01-18W32010-12-19D5 B_ESCHR_COLS R S SSTRUEFALSE TRUE
52012-07-21W32012-03-01D5 B_ESCHR_COL SR S SSFALSETRUE TRUE
62013-04-19W32012-04-14D5 B_ESCHR_COLR S S STRUESFALSE TRUE
72013-09-01W32013-01-04D5 B_ESCHR_COL S S S S FALSEFALSE
82013-01-22D5B_ESCHR_COLIIRSFALSE TRUE
82013-12-07W3B_ESCHR_COLSSSSFALSEFALSE
92013-12-15W32013-03-20D5 B_ESCHR_COLR S S SFALSESTRUE TRUE
102014-02-03W32013-09-13D5 B_ESCHR_COL S SR SR FALSE TRUE
-

Instead of 3, now 9 isolates are flagged. In total, 87.8% of all isolates are marked ‘first weighted’ - 146.3% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

+

Instead of 3, now 8 isolates are flagged. In total, 88.6% of all isolates are marked ‘first weighted’ - 147.6% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

As with filter_first_isolate(), there’s a shortcut for this new algorithm too:

- -

So we end up with 4,390 isolates for analysis.

+
data_1st <- data %>% 
+  filter_first_weighted_isolate()
+

So we end up with 4,430 isolates for analysis.

We can remove unneeded columns:

- +
data_1st <- data_1st %>% 
+  select(-c(first, keyab))

Now our data looks like:

-
head(data_1st)
+
head(data_1st)
- @@ -770,78 +772,73 @@ - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - + + + - + @@ -850,19 +847,18 @@ - - - - - - - - + + + + + + + - - - + + + @@ -875,12 +871,12 @@ Analysing the data

You might want to start by getting an idea of how the data is distributed. It’s an important start, because it also decides how you will continue your analysis. ## Dispersion of species To just get an idea how the species are distributed, create a frequency table with our freq() function. We created the genus and species column earlier based on the microbial ID. With paste(), we can concatenate them together.

The freq() function can be used like the base R language was intended:

-
freq(paste(data_1st$genus, data_1st$species))
+
freq(paste(data_1st$genus, data_1st$species))

Or can be used like the dplyr way, which is easier readable:

-
data_1st %>% freq(genus, species)
+
data_1st %>% freq(genus, species)

Frequency table of genus and species
Columns: 2
-Length: 4,390 (of which NA: 0 = 0.00%)
+Length: 4,430 (of which NA: 0 = 0.00%)
Unique: 4

Shortest: 16
Longest: 24

@@ -897,33 +893,33 @@ Longest: 24

- - - - + + + + - - - - + + + + - - + + - + - - - + + + @@ -932,12 +928,12 @@ Longest: 24

Resistance percentages

The functions portion_R, portion_RI, portion_I, portion_IS and portion_S can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own:

- -

Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

-
data_1st %>% 
-  group_by(hospital) %>% 
-  summarise(amoxicillin = portion_IR(amox))
+
data_1st %>% portion_IR(amox)
+# [1] 0.46614
+

Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

+
data_1st %>% 
+  group_by(hospital) %>% 
+  summarise(amoxicillin = portion_IR(amox))
date patient_id hospital
12017-03-28D2Hospital BB_STPHY_AUR2014-02-20D5Hospital AB_ESCHR_COL R S S S MGram negativeEscherichiacoliTRUE
2010-06-17N4Hospital AB_ESCHR_COLSSRSMGram negativeEscherichiacoliTRUE
2017-08-02A6Hospital BB_STPHY_AURRIRSM Gram positive Staphylococcus aureus TRUE
22010-06-26O32012-11-25K7 Hospital A B_ESCHR_COLSSSSFGram negativeEscherichiacoliTRUE
42013-02-26C7Hospital BB_KLBSL_PNERI S S S M Gram negativeKlebsiellapneumoniaeTRUE
52012-11-01Y5Hospital DB_ESCHR_COLSSRSFGram negative Escherichia coli TRUE
62015-04-04K10Hospital A2012-06-24G9Hospital B B_ESCHR_COL R SRS R M Gram negativeTRUE
72012-02-22Z6Hospital BB_STPHY_AURRRR2011-05-20T7Hospital CB_ESCHR_COLSSS S FGram positiveStaphylococcusaureusGram negativeEscherichiacoli TRUE
1 Escherichia coli2,19650.0%2,19650.0%2,20649.8%2,20649.8%
2 Staphylococcus aureus1,14826.2%3,34476.2%1,09324.7%3,29974.5%
3 Streptococcus pneumoniae62214.2%66715.1% 3,96690.3%89.5%
4 Klebsiella pneumoniae4249.7%4,39046410.5%4,430 100.0%
@@ -946,27 +942,27 @@ Longest: 24

- + - + - + - +
hospital
Hospital A0.55740180.4605873
Hospital B0.54687500.4624277
Hospital C0.52815830.4853801
Hospital D0.54066440.4659218
-

Of course it would be very convenient to know the number of isolates responsible for the percentages. For that purpose the n_rsi() can be used, which works exactly like n_distinct() from the dplyr package. It counts all isolates available for every group (i.e. values S, I or R):

-
data_1st %>% 
-  group_by(hospital) %>% 
-  summarise(amoxicillin = portion_IR(amox),
-            available = n_rsi(amox))
+

Of course it would be very convenient to know the number of isolates responsible for the percentages. For that purpose the n_rsi() can be used, which works exactly like n_distinct() from the dplyr package. It counts all isolates available for every group (i.e. values S, I or R):

+
data_1st %>% 
+  group_by(hospital) %>% 
+  summarise(amoxicillin = portion_IR(amox),
+            available = n_rsi(amox))
@@ -976,32 +972,32 @@ Longest: 24

- - + + - - + + - - + + - - + +
hospital
Hospital A0.557401813240.46058731294
Hospital B0.546875015360.46242771557
Hospital C0.52815836570.4853801684
Hospital D0.54066448730.4659218895

These functions can also be used to get the portion of multiple antibiotics, to calculate co-resistance very easily:

-
data_1st %>% 
-  group_by(genus) %>% 
-  summarise(amoxicillin = portion_S(amcl),
-            gentamicin = portion_S(gent),
-            "amox + gent" = portion_S(amcl, gent))
+
data_1st %>% 
+  group_by(genus) %>% 
+  summarise(amoxicillin = portion_S(amcl),
+            gentamicin = portion_S(gent),
+            "amox + gent" = portion_S(amcl, gent))
@@ -1012,94 +1008,94 @@ Longest: 24

- - - + + + - - - + + + - - - + + + - + - +
genus
Escherichia0.79234970.91302370.98178510.73073440.91205800.9782412
Klebsiella0.76415090.91273580.98113210.75215520.91810340.9806034
Staphylococcus0.80400700.92247390.98344950.74199450.92680700.9798719
Streptococcus0.56430870.7256372 0.00000000.56430870.7256372

To make a transition to the next part, let’s see how this difference could be plotted:

-
data_1st %>% 
-  group_by(genus) %>% 
-  summarise("1. Amoxicillin" = portion_S(amcl),
-            "2. Gentamicin" = portion_S(gent),
-            "3. Amox + gent" = portion_S(amcl, gent)) %>% 
-  tidyr::gather("Antibiotic", "S", -genus) %>%
-  ggplot(aes(x = genus,
-             y = S,
-             fill = Antibiotic)) +
-  geom_col(position = "dodge2")
+
data_1st %>% 
+  group_by(genus) %>% 
+  summarise("1. Amoxicillin" = portion_S(amcl),
+            "2. Gentamicin" = portion_S(gent),
+            "3. Amox + gent" = portion_S(amcl, gent)) %>% 
+  tidyr::gather("Antibiotic", "S", -genus) %>%
+  ggplot(aes(x = genus,
+             y = S,
+             fill = Antibiotic)) +
+  geom_col(position = "dodge2")

Plots

To show results in plots, most R users would nowadays use the ggplot2 package. This package lets you create plots in layers. You can read more about it on their website. A quick example would look like these syntaxes:

-
ggplot(data = a_data_set,
-       mapping = aes(x = year,
-                     y = value)) +
-  geom_col() +
-  labs(title = "A title",
-       subtitle = "A subtitle",
-       x = "My X axis",
-       y = "My Y axis")
-
-ggplot(a_data_set,
-       aes(year, value) +
-  geom_bar()
+
ggplot(data = a_data_set,
+       mapping = aes(x = year,
+                     y = value)) +
+  geom_col() +
+  labs(title = "A title",
+       subtitle = "A subtitle",
+       x = "My X axis",
+       y = "My Y axis")
+
+ggplot(a_data_set,
+       aes(year, value) +
+  geom_bar()

The AMR package contains functions to extend this ggplot2 package, for example geom_rsi(). It automatically transforms data with count_df() or portion_df() and show results in stacked bars. Its simplest and shortest example:

-
ggplot(data_1st) +
-  geom_rsi(translate_ab = FALSE)
+
ggplot(data_1st) +
+  geom_rsi(translate_ab = FALSE)

Omit the translate_ab = FALSE to have the antibiotic codes (amox, amcl, cipr, gent) translated to official WHO names (amoxicillin, amoxicillin and betalactamase inhibitor, ciprofloxacin, gentamicin).

If we group on e.g. the genus column and add some additional functions from our package, we can create this:

- +
# group the data on `genus`
+ggplot(data_1st %>% group_by(genus)) + 
+  # create bars with genus on x axis
+  # it looks for variables with class `rsi`,
+  # of which we have 4 (earlier created with `as.rsi`)
+  geom_rsi(x = "genus") + 
+  # split plots on antibiotic
+  facet_rsi(facet = "Antibiotic") +
+  # make R red, I yellow and S green
+  scale_rsi_colours() +
+  # show percentages on y axis
+  scale_y_percent(breaks = 0:4 * 25) +
+  # turn 90 degrees, make it bars instead of columns
+  coord_flip() +
+  # add labels
+  labs(title = "Resistance per genus and antibiotic", 
+       subtitle = "(this is fake data)") +
+  # and print genus in italic to follow our convention
+  # (is now y axis because we turned the plot)
+  theme(axis.text.y = element_text(face = "italic"))

To simplify this, we also created the ggplot_rsi() function, which combines almost all above functions:

- +
data_1st %>% 
+  group_by(genus) %>%
+  ggplot_rsi(x = "genus",
+             facet = "Antibiotic",
+             breaks = 0:4 * 25,
+             datalabels = FALSE) +
+  coord_flip()

@@ -1127,26 +1123,26 @@ Longest: 24

We can transform the data and apply the test in only a couple of lines:

-
septic_patients %>%
-  filter(hospital_id %in% c("A", "D")) %>% # filter on only hospitals A and D
-  select(hospital_id, fosf) %>%            # select the hospitals and fosfomycin
-  group_by(hospital_id) %>%                # group on the hospitals
-  count_df(combine_IR = TRUE) %>%          # count all isolates per group (hospital_id)
-  tidyr::spread(hospital_id, Value) %>%    # transform output so A and D are columns
-  select(A, D) %>%                         # and select these only
-  as.matrix() %>%                          # transform to good old matrix for fisher.test()
-  fisher.test()                            # do Fisher's Exact Test
-# 
-#   Fisher's Exact Test for Count Data
-# 
-# data:  .
-# p-value = 0.03104
-# alternative hypothesis: true odds ratio is not equal to 1
-# 95 percent confidence interval:
-#  1.054283 4.735995
-# sample estimates:
-# odds ratio 
-#   2.228006
+
septic_patients %>%
+  filter(hospital_id %in% c("A", "D")) %>% # filter on only hospitals A and D
+  select(hospital_id, fosf) %>%            # select the hospitals and fosfomycin
+  group_by(hospital_id) %>%                # group on the hospitals
+  count_df(combine_IR = TRUE) %>%          # count all isolates per group (hospital_id)
+  tidyr::spread(hospital_id, Value) %>%    # transform output so A and D are columns
+  select(A, D) %>%                         # and select these only
+  as.matrix() %>%                          # transform to good old matrix for fisher.test()
+  fisher.test()                            # do Fisher's Exact Test
+# 
+#   Fisher's Exact Test for Count Data
+# 
+# data:  .
+# p-value = 0.03104
+# alternative hypothesis: true odds ratio is not equal to 1
+# 95 percent confidence interval:
+#  1.054283 4.735995
+# sample estimates:
+# odds ratio 
+#   2.228006

As can be seen, the p value is 0.03, which means that the fosfomycin resistances found in hospital A and D are really different.

diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png index eb824d6f..6d329cc5 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png index 11267f63..6a313374 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png index 7cb52457..5ba457e2 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png index 1680a19e..0b8e3508 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ diff --git a/docs/articles/freq.html b/docs/articles/freq.html index 89d7a9cb..5fbbb15e 100644 --- a/docs/articles/freq.html +++ b/docs/articles/freq.html @@ -188,7 +188,7 @@

Frequencies of one variable

To only show and quickly review the content of one variable, you can just select this variable in various ways. Let’s say we want to get the frequencies of the gender variable of the septic_patients dataset:

-
septic_patients %>% freq(gender)
+
septic_patients %>% freq(gender)

Frequency table of gender

@@ -225,21 +225,21 @@ Frequencies of more than one variable

Multiple variables will be pasted into one variable to review individual cases, keeping a univariate frequency table.

For illustration, we could add some more variables to the septic_patients dataset to learn about bacterial properties:

-
my_patients <- septic_patients %>% left_join_microorganisms()
-# Joining, by = "mo"
+
my_patients <- septic_patients %>% left_join_microorganisms()
+# Joining, by = "mo"

Now all variables of the microorganisms dataset have been joined to the septic_patients dataset. The microorganisms dataset consists of the following variables:

-
colnames(microorganisms)
-#  [1] "mo"         "tsn"        "genus"      "species"    "subspecies"
-#  [6] "fullname"   "family"     "order"      "class"      "phylum"    
-# [11] "subkingdom" "kingdom"    "gramstain"  "prevalence" "ref"
+
colnames(microorganisms)
+#  [1] "mo"         "tsn"        "genus"      "species"    "subspecies"
+#  [6] "fullname"   "family"     "order"      "class"      "phylum"    
+# [11] "subkingdom" "kingdom"    "gramstain"  "prevalence" "ref"

If we compare the dimensions between the old and new dataset, we can see that these 14 variables were added:

-
dim(septic_patients)
-# [1] 2000   49
-dim(my_patients)
-# [1] 2000   63
+
dim(septic_patients)
+# [1] 2000   49
+dim(my_patients)
+# [1] 2000   63

So now the genus and species variables are available. A frequency table of these combined variables can be created like this:

-
my_patients %>%
-  freq(genus, species, nmax = 15)
+
my_patients %>%
+  freq(genus, species, nmax = 15)

Frequency table of genus and species

@@ -380,10 +380,10 @@ Frequencies of numeric values

Frequency tables can be created of any input.

In case of numeric values (like integers, doubles, etc.) additional descriptive statistics will be calculated and shown into the header:

-
# # get age distribution of unique patients
-septic_patients %>% 
-  distinct(patient_id, .keep_all = TRUE) %>% 
-  freq(age, nmax = 5, header = TRUE)
+
# # get age distribution of unique patients
+septic_patients %>% 
+  distinct(patient_id, .keep_all = TRUE) %>% 
+  freq(age, nmax = 5, header = TRUE)

Frequency table of age
Class: numeric
Length: 981 (of which NA: 0 = 0.00%)
@@ -461,8 +461,8 @@ Outliers: 15 (unique count: 12)

Frequencies of factors

To sort frequencies of factors on factor level instead of item count, use the sort.count parameter.

sort.count is TRUE by default. Compare this default behaviour…

-
septic_patients %>%
-  freq(hospital_id)
+
septic_patients %>%
+  freq(hospital_id)

Frequency table of hospital_id

@@ -509,8 +509,8 @@ Outliers: 15 (unique count: 12)

… with this, where items are now sorted on count:

-
septic_patients %>%
-  freq(hospital_id, sort.count = FALSE)
+
septic_patients %>%
+  freq(hospital_id, sort.count = FALSE)

Frequency table of hospital_id

@@ -557,8 +557,8 @@ Outliers: 15 (unique count: 12)

All classes will be printed into the header (default is FALSE when using markdown like this document). Variables with the new rsi class of this AMR package are actually ordered factors and have three classes (look at Class in the header):

-
septic_patients %>%
-  freq(amox, header = TRUE)
+
septic_patients %>%
+  freq(amox, header = TRUE)

Frequency table of amox
Class: factor > ordered > rsi (numeric)
Levels: S < I < R
@@ -606,8 +606,8 @@ Unique: 3

Frequencies of dates

Frequencies of dates will show the oldest and newest date in the data, and the amount of days between them:

-
septic_patients %>%
-  freq(date, nmax = 5, header = TRUE)
+
septic_patients %>%
+  freq(date, nmax = 5, header = TRUE)

Frequency table of date
Class: Date (numeric)
Length: 2,000 (of which NA: 0 = 0.00%)
@@ -673,11 +673,11 @@ Median: 31 July 2009 (47.39%)

Assigning a frequency table to an object

A frequency table is actaually a regular data.frame, with the exception that it contains an additional class.

-
my_df <- septic_patients %>% freq(age)
-class(my_df)
+
my_df <- septic_patients %>% freq(age)
+class(my_df)

[1] “frequency_tbl” “data.frame”

Because of this additional class, a frequency table prints like the examples above. But the object itself contains the complete table without a row limitation:

-
dim(my_df)
+
dim(my_df)

[1] 74 5

@@ -688,8 +688,8 @@ Median: 31 July 2009 (47.39%)

Parameter na.rm

With the na.rm parameter (defaults to TRUE, but they will always be shown into the header), you can include NA values in the frequency table:

-
septic_patients %>%
-  freq(amox, na.rm = FALSE)
+
septic_patients %>%
+  freq(amox, na.rm = FALSE)

Frequency table of amox

@@ -741,8 +741,8 @@ Median: 31 July 2009 (47.39%)

Parameter row.names

The default frequency tables shows row indices. To remove them, use row.names = FALSE:

-
septic_patients %>%
-  freq(hospital_id, row.names = FALSE)
+
septic_patients %>%
+  freq(hospital_id, row.names = FALSE)

Frequency table of hospital_id

@@ -789,8 +789,8 @@ Median: 31 July 2009 (47.39%)

Parameter markdown

The markdown parameter is TRUE at default in non-interactive sessions, like in reports created with R Markdown. This will always print all rows, unless nmax is set.

-
septic_patients %>%
-  freq(hospital_id, markdown = TRUE)
+
septic_patients %>%
+  freq(hospital_id, markdown = TRUE)

Frequency table of hospital_id

diff --git a/docs/index.html b/docs/index.html index ebd9129a..f787cd81 100644 --- a/docs/index.html +++ b/docs/index.html @@ -215,7 +215,7 @@

Get this package

This package is available on the official R network. Install this package in R with:

-
install.packages("AMR")
+
install.packages("AMR")

It will be downloaded and installed automatically.

@@ -231,17 +231,17 @@

All (sub)species from the taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available. This allows users to use authoritative taxonomic information for their data analysis on any microorganism, not only human pathogens. It also helps to quickly determine the Gram stain of bacteria, since all bacteria are classified into subkingdom Negibacteria or Posibacteria. ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists.

The AMR package basically does four important things:

    -
  1. -

    It cleanses existing data, by transforming it to reproducible and profound classes, making the most efficient use of R. These functions all use artificial intelligence to guess results that you would expect:

    +
  2. It cleanses existing data, by transforming it to reproducible and profound classes, making the most efficient use of R. These functions all use artificial intelligence to guess results that you would expect:
  3. +
- -
  • -

    It enhances existing data and adds new data from data sets included in this package.

    +
      +
    1. It enhances existing data and adds new data from data sets included in this package.
    2. +
    • Use eucast_rules() to apply EUCAST expert rules to isolates.
    • Use first_isolate() to identify the first isolates of every patient using guidelines from the CLSI (Clinical and Laboratory Standards Institute). @@ -253,9 +253,9 @@
    • The data set microorganisms contains the complete taxonomic tree of more than 18,000 microorganisms (bacteria, fungi/yeasts and protozoa). Furthermore, the colloquial name and Gram stain are available, which enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like mo_genus(), mo_family(), mo_gramstain() or even mo_phylum(). As they use as.mo() internally, they also use artificial intelligence. For example, mo_genus("MRSA") and mo_genus("S. aureus") will both return "Staphylococcus". They also come with support for German, Dutch, Spanish, Italian, French and Portuguese. These functions can be used to add new variables to your data.
    • The data set antibiotics contains the ATC code, LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains a total of 298 trade names. Use functions like ab_name() and ab_tradenames() to look up values. The ab_* functions use as.atc() internally so they support AI to guess your expected result. For example, ab_name("Fluclox"), ab_name("Floxapen") and ab_name("J01CF05") will all return "Flucloxacillin". These functions can again be used to add new variables to your data.
    -
  • -
  • -

    It analyses the data with convenient functions that use well-known methods.

    +
      +
    1. It analyses the data with convenient functions that use well-known methods.
    2. +
    -
  • -
  • -

    It teaches the user how to use all the above actions.

    +
      +
    1. It teaches the user how to use all the above actions.
    2. +
    • The package contains extensive help pages with many examples.
    • It also contains an example data set called septic_patients. This data set contains: @@ -276,8 +276,6 @@
  • - -

    diff --git a/docs/news/index.html b/docs/news/index.html index 2be082b4..f9eb0469 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -222,29 +222,18 @@
    @@ -252,8 +241,7 @@ Changed

    @@ -580,21 +481,15 @@ New

    - -
  • Determining bacterial ID: -
      +
    • Determining bacterial ID:
    • New functions as.bactid and is.bactid to transform/ look up microbial ID’s.
    • The existing function guess_bactid is now an alias of as.bactid
    • New Becker classification for Staphylococcus to categorise them into Coagulase Negative Staphylococci (CoNS) and Coagulase Positve Staphylococci (CoPS)
    • New Lancefield classification for Streptococcus to categorise them into Lancefield groups
    • -
    -
  • For convience, new descriptive statistical functions kurtosis and skewness that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
  • Function g.test to perform the Χ2 distributed G-test, which use is the same as chisq.test
  • -
  • -Function ratio to transform a vector of values to a preset ratio - -
  • Support for Addins menu in RStudio to quickly insert %in% or %like% (and give them keyboard shortcuts), or to view the datasets that come with this package
  • Function p.symbol to transform p values to their related symbols: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  • Functions clipboard_import and clipboard_export as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the clipr package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server)
  • -
  • New for frequency tables (function freq): -
      +
    • New for frequency tables (function freq):
    • A vignette to explain its usage
    • Support for rsi (antimicrobial resistance) to use as input
    • Support for table to use as input: freq(table(x, y)) @@ -645,8 +530,6 @@
    • Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
    • Possibility to globally set the default for the amount of items to print, with options(max.print.freq = n) where n is your preset value
    -
  • -

    @@ -668,27 +551,21 @@
  • Small improvements to the microorganisms dataset (especially for Salmonella) and the column bactid now has the new class "bactid"
  • -
  • Combined MIC/RSI values will now be coerced by the rsi and mic functions: - -
  • Now possible to coerce MIC values with a space between operator and value, i.e. as.mic("<= 0.002") now works
  • Classes rsi and mic do not add the attribute package.version anymore
  • Added "groups" option for atc_property(..., property). It will return a vector of the ATC hierarchy as defined by the WHO. The new function atc_groups is a convenient wrapper around this.
  • Build-in host check for atc_property as it requires the host set by url to be responsive
  • Improved first_isolate algorithm to exclude isolates where bacteria ID or genus is unavailable
  • Fix for warning hybrid evaluation forced for row_number (924b62) from the dplyr package v0.7.5 and above
  • -
  • Support for empty values and for 1 or 2 columns as input for guess_bactid (now called as.bactid) -
      +
    • Support for empty values and for 1 or 2 columns as input for guess_bactid (now called as.bactid)
    • So yourdata %>% select(genus, species) %>% as.bactid() now also works
    • -
    -
  • Other small fixes
  • @@ -696,14 +573,11 @@

    Other

    @@ -718,19 +592,16 @@
    + + + + + + + + + + + + + +
    tbl

    a data.frame

    col

    a character to look for

    verbose

    a logical to indicate whether additional info should be printed

    + +

    Read more on our website!

    + + +


    +On our website https://msberends.gitlab.io/AMR you can find a omprehensive tutorial about how to conduct AMR analysis and find the complete documentation of all functions, which reads a lot easier than in R.

    + + +
    + + + + + + + + + + + + + + diff --git a/docs/reference/itis.html b/docs/reference/itis.html index 3e2c0333..abe4b60b 100644 --- a/docs/reference/itis.html +++ b/docs/reference/itis.html @@ -240,7 +240,8 @@ On our website https://msberends.gitla
    # NOT RUN {
     # Get a note when a species was renamed
     mo_shortname("Chlamydia psittaci")
    -# Note: 'Chlamydia psittaci' (Page, 1968) was renamed 'Chlamydophila psittaci' (Everett et al., 1999)
    +# Note: 'Chlamydia psittaci' (Page, 1968) was renamed 
    +#       'Chlamydophila psittaci' (Everett et al., 1999)
     # [1] "C. psittaci"
     
     # Get any property from the entire taxonomic tree for all included species
    diff --git a/docs/reference/key_antibiotics.html b/docs/reference/key_antibiotics.html
    index 606c08c1..a65d91d2 100644
    --- a/docs/reference/key_antibiotics.html
    +++ b/docs/reference/key_antibiotics.html
    @@ -220,13 +220,17 @@
         
         
     
    -    
    key_antibiotics(tbl, col_mo = NULL, universal_1 = "amox",
    -  universal_2 = "amcl", universal_3 = "cfur", universal_4 = "pita",
    -  universal_5 = "cipr", universal_6 = "trsu", GramPos_1 = "vanc",
    -  GramPos_2 = "teic", GramPos_3 = "tetr", GramPos_4 = "eryt",
    -  GramPos_5 = "oxac", GramPos_6 = "rifa", GramNeg_1 = "gent",
    -  GramNeg_2 = "tobr", GramNeg_3 = "coli", GramNeg_4 = "cfot",
    -  GramNeg_5 = "cfta", GramNeg_6 = "mero", warnings = TRUE, ...)
    +    
    key_antibiotics(tbl, col_mo = NULL, universal_1 = guess_ab(tbl,
    +  "amox"), universal_2 = guess_ab(tbl, "amcl"),
    +  universal_3 = guess_ab(tbl, "cfur"), universal_4 = guess_ab(tbl,
    +  "pita"), universal_5 = guess_ab(tbl, "cipr"),
    +  universal_6 = guess_ab(tbl, "trsu"), GramPos_1 = guess_ab(tbl,
    +  "vanc"), GramPos_2 = guess_ab(tbl, "teic"), GramPos_3 = guess_ab(tbl,
    +  "tetr"), GramPos_4 = guess_ab(tbl, "eryt"), GramPos_5 = guess_ab(tbl,
    +  "oxac"), GramPos_6 = guess_ab(tbl, "rifa"), GramNeg_1 = guess_ab(tbl,
    +  "gent"), GramNeg_2 = guess_ab(tbl, "tobr"), GramNeg_3 = guess_ab(tbl,
    +  "coli"), GramNeg_4 = guess_ab(tbl, "cfot"), GramNeg_5 = guess_ab(tbl,
    +  "cfta"), GramNeg_6 = guess_ab(tbl, "mero"), warnings = TRUE, ...)
     
     key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"),
       ignore_I = TRUE, points_threshold = 2, info = FALSE)
    @@ -320,8 +324,8 @@ On our website https://msberends.gitla library(dplyr) # set key antibiotics to a new variable my_patients <- septic_patients %>% - mutate(keyab = key_antibiotics(.)) %>% - mutate( + mutate(keyab = key_antibiotics(.)) %>% + mutate( # now calculate first isolates first_regular = first_isolate(., col_keyantibiotics = FALSE), # and first WEIGHTED isolates diff --git a/docs/reference/like.html b/docs/reference/like.html index c58c5398..9037d65d 100644 --- a/docs/reference/like.html +++ b/docs/reference/like.html @@ -288,7 +288,7 @@ On our website https://msberends.gitla library(dplyr) septic_patients %>% left_join_microorganisms() %>% - filter(genus %like% '^ent') %>% + filter(genus %like% '^ent') %>% freq(genus, species) # }
    diff --git a/docs/reference/mdro.html b/docs/reference/mdro.html index 6d5619e7..f6844cd4 100644 --- a/docs/reference/mdro.html +++ b/docs/reference/mdro.html @@ -221,21 +221,26 @@
    mdro(tbl, country = NULL, col_mo = NULL, info = TRUE,
    -  amcl = "amcl", amik = "amik", amox = "amox", ampi = "ampi",
    -  azit = "azit", aztr = "aztr", cefa = "cefa", cfra = "cfra",
    -  cfep = "cfep", cfot = "cfot", cfox = "cfox", cfta = "cfta",
    -  cftr = "cftr", cfur = "cfur", chlo = "chlo", cipr = "cipr",
    -  clar = "clar", clin = "clin", clox = "clox", coli = "coli",
    -  czol = "czol", dapt = "dapt", doxy = "doxy", erta = "erta",
    -  eryt = "eryt", fosf = "fosf", fusi = "fusi", gent = "gent",
    -  imip = "imip", kana = "kana", levo = "levo", linc = "linc",
    -  line = "line", mero = "mero", metr = "metr", mino = "mino",
    -  moxi = "moxi", nali = "nali", neom = "neom", neti = "neti",
    -  nitr = "nitr", novo = "novo", norf = "norf", oflo = "oflo",
    -  peni = "peni", pipe = "pipe", pita = "pita", poly = "poly",
    -  qida = "qida", rifa = "rifa", roxi = "roxi", siso = "siso",
    -  teic = "teic", tetr = "tetr", tica = "tica", tige = "tige",
    -  tobr = "tobr", trim = "trim", trsu = "trsu", vanc = "vanc")
    +  amcl = guess_ab(), amik = guess_ab(), amox = guess_ab(),
    +  ampi = guess_ab(), azit = guess_ab(), aztr = guess_ab(),
    +  cefa = guess_ab(), cfra = guess_ab(), cfep = guess_ab(),
    +  cfot = guess_ab(), cfox = guess_ab(), cfta = guess_ab(),
    +  cftr = guess_ab(), cfur = guess_ab(), chlo = guess_ab(),
    +  cipr = guess_ab(), clar = guess_ab(), clin = guess_ab(),
    +  clox = guess_ab(), coli = guess_ab(), czol = guess_ab(),
    +  dapt = guess_ab(), doxy = guess_ab(), erta = guess_ab(),
    +  eryt = guess_ab(), fosf = guess_ab(), fusi = guess_ab(),
    +  gent = guess_ab(), imip = guess_ab(), kana = guess_ab(),
    +  levo = guess_ab(), linc = guess_ab(), line = guess_ab(),
    +  mero = guess_ab(), metr = guess_ab(), mino = guess_ab(),
    +  moxi = guess_ab(), nali = guess_ab(), neom = guess_ab(),
    +  neti = guess_ab(), nitr = guess_ab(), novo = guess_ab(),
    +  norf = guess_ab(), oflo = guess_ab(), peni = guess_ab(),
    +  pipe = guess_ab(), pita = guess_ab(), poly = guess_ab(),
    +  qida = guess_ab(), rifa = guess_ab(), roxi = guess_ab(),
    +  siso = guess_ab(), teic = guess_ab(), tetr = guess_ab(),
    +  tica = guess_ab(), tige = guess_ab(), tobr = guess_ab(),
    +  trim = guess_ab(), trsu = guess_ab(), vanc = guess_ab())
     
     brmo(..., country = "nl")
     
    @@ -519,7 +524,7 @@
         

    Antibiotics

    -

    To define antibiotics column names, input a text (case-insensitive) or use NULL to skip a column (e.g. tica = NULL). Non-existing columns will anyway be skipped with a warning.

    +

    To define antibiotics column names, leave as it is to determine it automatically with guess_ab or input a text (case-insensitive) or use NULL to skip a column (e.g. tica = NULL). Non-existing columns will anyway be skipped with a warning.

    Abbrevations of the column containing antibiotics in the form: abbreviation: generic name (ATC code)

    amcl: amoxicillin+clavulanic acid (J01CR02), amik: amikacin (J01GB06), @@ -596,7 +601,7 @@ On our website https://msberends.gitla library(dplyr) septic_patients %>% - mutate(EUCAST = mdro(.), + mutate(EUCAST = mdro(.), BRMO = brmo(.)) # }

    diff --git a/docs/reference/portion.html b/docs/reference/portion.html index e5ff8a10..f86ad334 100644 --- a/docs/reference/portion.html +++ b/docs/reference/portion.html @@ -333,17 +333,17 @@ On our website https://msberends.gitla septic_patients %>% portion_SI(amox) septic_patients %>% - group_by(hospital_id) %>% - summarise(p = portion_S(cipr), + group_by(hospital_id) %>% + summarise(p = portion_S(cipr), n = n_rsi(cipr)) # n_rsi works like n_distinct in dplyr septic_patients %>% - group_by(hospital_id) %>% - summarise(R = portion_R(cipr, as_percent = TRUE), + group_by(hospital_id) %>% + summarise(R = portion_R(cipr, as_percent = TRUE), I = portion_I(cipr, as_percent = TRUE), S = portion_S(cipr, as_percent = TRUE), n = n_rsi(cipr), # works like n_distinct in dplyr - total = n()) # NOT the amount of tested isolates! + total = n()) # NOT the amount of tested isolates! # Calculate co-resistance between amoxicillin/clav acid and gentamicin, # so we can see that combination therapy does a lot more than mono therapy: @@ -358,8 +358,8 @@ On our website https://msberends.gitla septic_patients %>% - group_by(hospital_id) %>% - summarise(cipro_p = portion_S(cipr, as_percent = TRUE), + group_by(hospital_id) %>% + summarise(cipro_p = portion_S(cipr, as_percent = TRUE), cipro_n = count_all(cipr), genta_p = portion_S(gent, as_percent = TRUE), genta_n = count_all(gent), @@ -368,22 +368,22 @@ On our website https://msberends.gitla # Get portions S/I/R immediately of all rsi columns septic_patients %>% - select(amox, cipr) %>% + select(amox, cipr) %>% portion_df(translate = FALSE) # It also supports grouping variables septic_patients %>% - select(hospital_id, amox, cipr) %>% - group_by(hospital_id) %>% + select(hospital_id, amox, cipr) %>% + group_by(hospital_id) %>% portion_df(translate = FALSE) # }# NOT RUN { # calculate current empiric combination therapy of Helicobacter gastritis: my_table %>% - filter(first_isolate == TRUE, + filter(first_isolate == TRUE, genus == "Helicobacter") %>% - summarise(p = portion_S(amox, metr), # amoxicillin with metronidazole + summarise(p = portion_S(amox, metr), # amoxicillin with metronidazole n = count_all(amox, metr)) # }
    diff --git a/docs/reference/resistance_predict.html b/docs/reference/resistance_predict.html index 2fc7b71a..57407280 100644 --- a/docs/reference/resistance_predict.html +++ b/docs/reference/resistance_predict.html @@ -311,7 +311,7 @@ On our website https://msberends.gitla # or use dplyr so you can actually read it: library(dplyr) tbl %>% - filter(first_isolate == TRUE, + filter(first_isolate == TRUE, genus == "Haemophilus") %>% resistance_predict(amcl, date) # }# NOT RUN { @@ -322,9 +322,9 @@ On our website https://msberends.gitla # get bacteria properties like genus and species left_join_microorganisms("mo") %>% # calculate first isolates - mutate(first_isolate = first_isolate(.)) %>% + mutate(first_isolate = first_isolate(.)) %>% # filter on first E. coli isolates - filter(genus == "Escherichia", + filter(genus == "Escherichia", species == "coli", first_isolate == TRUE) %>% # predict resistance of cefotaxime for next years @@ -338,27 +338,27 @@ On our website https://msberends.gitla if (!require(ggplot2)) { data <- septic_patients %>% - filter(mo == as.mo("E. coli")) %>% + filter(mo == as.mo("E. coli")) %>% resistance_predict(col_ab = "amox", col_date = "date", info = FALSE, minimum = 15) - ggplot(data, - aes(x = year)) + - geom_col(aes(y = value), + ggplot(data, + aes(x = year)) + + geom_col(aes(y = value), fill = "grey75") + - geom_errorbar(aes(ymin = se_min, + geom_errorbar(aes(ymin = se_min, ymax = se_max), colour = "grey50") + - scale_y_continuous(limits = c(0, 1), + scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.1), labels = paste0(seq(0, 100, 10), "%")) + - labs(title = expression(paste("Forecast of amoxicillin resistance in ", + labs(title = expression(paste("Forecast of amoxicillin resistance in ", italic("E. coli"))), y = "%IR", x = "Year") + - theme_minimal(base_size = 13) + theme_minimal(base_size = 13) } # } diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 505867c8..6aec9f24 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -60,6 +60,9 @@ https://msberends.gitlab.io/AMR/reference/ggplot_rsi.html + + https://msberends.gitlab.io/AMR/reference/guess_ab.html + https://msberends.gitlab.io/AMR/reference/itis.html diff --git a/man/itis.Rd b/man/itis.Rd index edfd94c5..15aa9e06 100644 --- a/man/itis.Rd +++ b/man/itis.Rd @@ -25,7 +25,8 @@ On our website \url{https://msberends.gitlab.io/AMR} you can find \href{https:// \examples{ # Get a note when a species was renamed mo_shortname("Chlamydia psittaci") -# Note: 'Chlamydia psittaci' (Page, 1968) was renamed 'Chlamydophila psittaci' (Everett et al., 1999) +# Note: 'Chlamydia psittaci' (Page, 1968) was renamed +# 'Chlamydophila psittaci' (Everett et al., 1999) # [1] "C. psittaci" # Get any property from the entire taxonomic tree for all included species