diff --git a/DESCRIPTION b/DESCRIPTION index 137b5ee2e..7bb150cfe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.5.0.9018 -Date: 2019-02-25 +Version: 0.5.0.9019 +Date: 2019-02-26 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index 79a22b84a..971c292ab 100755 --- a/NEWS.md +++ b/NEWS.md @@ -12,13 +12,11 @@ We've got a new website: [https://msberends.gitlab.io/AMR](https://msberends.git * **BREAKING**: removed deprecated functions, parameters and references to 'bactid'. Use `as.mo()` to identify an MO code. * Catalogue of Life as a new taxonomic source for data about microorganisms, which also contains all ITIS data we used previously. The `microorganisms` data set now contains: * All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria, Protozoa and Viruses - * All ~3,000 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales and Schizosaccharomycetales. - - The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant (sub)species are covered (like all species of *Aspergillus*, *Candida*, *Pneumocystis*, *Saccharomyces* and *Trichophyton*). + * All ~3,000 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales and Schizosaccharomycetales (covering at least like all species of *Aspergillus*, *Candida*, *Pneumocystis*, *Saccharomyces* and *Trichophyton*) * All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed * The responsible author(s) and year of scientific publication - This data is updated annually - check the included version with `catalogue_of_life_version()`. + This data is updated annually - check the included version with the new function `catalogue_of_life_version()`. * Due to this change, some `mo` codes changed (e.g. *Streptococcus* changed from `B_STRPTC` to `B_STRPT`). A translation table is used internally to support older microorganism IDs, so users will not notice this difference. * New function `mo_rank()` for the taxonomic rank (genus, species, infraspecies, etc.) * New function `mo_url()` to get the URL to the Catalogue of Life @@ -86,7 +84,7 @@ We've got a new website: [https://msberends.gitlab.io/AMR](https://msberends.git * Better handling for *Salmonellae* * Understanding of highly virulent *E. coli* strains like EIEC, EPEC and STEC * There will be looked for uncertain results at default - these results will be returned with an informative warning - * Manual now contains more info about the algorithms + * Manual (help page) now contains more info about the algorithms * Progress bar will be shown when it takes more than 3 seconds to get results * Support for formatted console text * Console will return the percentage of uncoercable input diff --git a/R/catalogue_of_life.R b/R/catalogue_of_life.R index 89fa65076..09763da64 100755 --- a/R/catalogue_of_life.R +++ b/R/catalogue_of_life.R @@ -71,3 +71,24 @@ #' mo_fullname("C. elegans") #' # [1] "Chroococcus limneticus elegans" # Because a microorganism was found NULL + +#' Version info of included Catalogue of Life +#' +#' This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year. +#' @seealso \code{\link{microorganisms}} +#' @inheritSection catalogue_of_life Catalogue of Life +#' @inheritSection AMR Read more on our website! +#' @export +#' @examples +#' library(dplyr) +#' microorganisms %>% freq(kingdom) +#' microorganisms %>% group_by(kingdom) %>% freq(phylum, nmax = NULL) +catalogue_of_life_version <- function() { + # see the `catalogue_of_life` list in R/data.R + list(version = catalogue_of_life$version, + url = catalogue_of_life$url, + # annual release always somewhere in March + is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")), + n_species = nrow(AMR::microorganisms), + n_synonyms = nrow(AMR::microorganisms.old)) +} diff --git a/R/data.R b/R/data.R index 45d43fcac..8ff7db5fa 100755 --- a/R/data.R +++ b/R/data.R @@ -168,20 +168,6 @@ catalogue_of_life <- list( url = "http://www.catalogueoflife.org/annual-checklist/2018" ) -#' Version info of included Catalogue of Life -#' @seealso \code{\link{microorganisms}} -#' @inheritSection catalogue_of_life Catalogue of Life -#' @inheritSection AMR Read more on our website! -#' @export -catalogue_of_life_version <- function() { - list(version = catalogue_of_life$version, - url = catalogue_of_life$url, - # annual release always somewhere in March - is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")), - no_of_species = nrow(AMR::microorganisms), - no_of_synonyms = nrow(AMR::microorganisms.old)) -} - #' Data set with previously accepted taxonomic names #' #' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by \code{\link{as.mo}}. diff --git a/R/mo.R b/R/mo.R index dd419d062..4faa3b9f4 100755 --- a/R/mo.R +++ b/R/mo.R @@ -166,7 +166,12 @@ #' mutate(mo = as.mo(paste(genus, species))) #' } as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, reference_df = get_mo_source()) { - if (all(x %in% AMR::microorganisms$fullname) + if (all(x %in% AMR::microorganisms$mo) + & isFALSE(Becker) + & isFALSE(Lancefield) + & is.null(reference_df)) { + y <- x + } else if (all(x %in% AMR::microorganisms$fullname) & isFALSE(Becker) & isFALSE(Lancefield) & is.null(reference_df)) { @@ -179,12 +184,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, if (any(is.na(y))) { y[is.na(y)] <- microorganismsDT[prevalence == 3][data.table(fullname = x[is.na(y)]), on = "fullname", "mo"][[1]] } - return(y) + } else { + # will be checked for mo class in validation and uses exec_as.mo internally if necessary + y <- mo_validate(x = x, property = "mo", + Becker = Becker, Lancefield = Lancefield, + allow_uncertain = allow_uncertain, reference_df = reference_df) } - # will be checked for mo class in validation - mo_validate(x = x, property = "mo", - Becker = Becker, Lancefield = Lancefield, - allow_uncertain = allow_uncertain, reference_df = reference_df) + structure(.Data = y, class = "mo") } #' @rdname as.mo @@ -891,6 +897,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, stringsAsFactors = FALSE) df_input <- data.frame(input = as.character(x_input), stringsAsFactors = FALSE) + x <- df_input %>% left_join(df_found, by = "input") %>% diff --git a/R/mo_property.R b/R/mo_property.R index 0d3dc9403..f25e5eb27 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -132,7 +132,7 @@ #' mo_taxonomy("E. coli") mo_fullname <- function(x, language = get_locale(), ...) { x <- mo_validate(x = x, property = "fullname", ...) - mo_translate(x, language = language) + translate(x, language = language) } #' @rdname mo_property @@ -148,46 +148,64 @@ mo_shortname <- function(x, language = get_locale(), ...) { if (is.null(Lancefield)) { Lancefield <- FALSE } - if (Becker %in% c(TRUE, "all") | Lancefield == TRUE) { - res1 <- AMR::as.mo(x, Becker = FALSE, Lancefield = FALSE, reference_df = dots$reference_df) - res2 <- suppressWarnings(AMR::as.mo(res1, ...)) - res2_fullname <- mo_fullname(res2, language = language) - res2_fullname[res2_fullname %like% " \\(CoNS\\)"] <- "CoNS" - res2_fullname[res2_fullname %like% " \\(CoPS\\)"] <- "CoPS" - res2_fullname[res2_fullname %like% " \\(KNS\\)"] <- "KNS" - res2_fullname[res2_fullname %like% " \\(KPS\\)"] <- "KPS" - res2_fullname[res2_fullname %like% " \\(CNS\\)"] <- "CNS" - res2_fullname[res2_fullname %like% " \\(CPS\\)"] <- "CPS" - res2_fullname <- gsub("Streptococcus (group|Gruppe|gruppe|groep|grupo|gruppo|groupe) (.)", - "G\\2S", - res2_fullname) # turn "Streptococcus group A" and "Streptococcus grupo A" to "GAS" - res2_fullname_vector <- res2_fullname[res2_fullname == mo_fullname(res1)] - res2_fullname[res2_fullname == mo_fullname(res1)] <- paste0(substr(mo_genus(res2_fullname_vector), 1, 1), - ". ", - suppressWarnings(mo_species(res2_fullname_vector))) - if (sum(res1 == res2, na.rm = TRUE) > 0) { - res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1), - ". ", - suppressWarnings(mo_species(res1[res1 == res2]))) - } - res1[res1 != res2] <- res2_fullname - result <- as.character(res1) - } else { - x <- AMR::as.mo(x, ...) - suppressWarnings( - result <- data.frame(mo = x) %>% - left_join(AMR::microorganisms, by = "mo") %>% - mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>% - pull(shortname) - ) + + shorten <- function(x) { + # easiest: no transformations needed + x <- mo_fullname(x, language = "en") + # shorten for the ones that have a space: shorten first word and write out second word + shorten_these <- x %like% " " & !x %like% "Streptococcus group " + x[shorten_these] <- paste0(substr(x[shorten_these], 1, 1), + ". ", + x[shorten_these] %>% + strsplit(" ", fixed = TRUE) %>% + unlist() %>% + .[2]) + x } - mo_translate(result, language = language) + + if (isFALSE(Becker) & isFALSE(Lancefield)) { + result <- shorten(x) + + } else { + # get result without transformations + res1 <- AMR::as.mo(x, Becker = FALSE, Lancefield = FALSE, reference_df = dots$reference_df) + # and result with transformations + res2 <- suppressWarnings(AMR::as.mo(res1, ...)) + if (res1 == res2 + & !res1 %like% "^B_STRPT_GR") { + result <- shorten(x) + } else { + res2_fullname <- mo_fullname(res2, language = language) + res2_fullname[res2_fullname %like% " \\(CoNS\\)"] <- "CoNS" + res2_fullname[res2_fullname %like% " \\(CoPS\\)"] <- "CoPS" + res2_fullname[res2_fullname %like% " \\(KNS\\)"] <- "KNS" + res2_fullname[res2_fullname %like% " \\(KPS\\)"] <- "KPS" + res2_fullname[res2_fullname %like% " \\(CNS\\)"] <- "CNS" + res2_fullname[res2_fullname %like% " \\(CPS\\)"] <- "CPS" + res2_fullname <- gsub("Streptococcus (group|Gruppe|gruppe|groep|grupo|gruppo|groupe) (.)", + "G\\2S", + res2_fullname) # turn "Streptococcus group A" and "Streptococcus grupo A" to "GAS" + res2_fullname_vector <- res2_fullname[res2_fullname == mo_fullname(res1)] + res2_fullname[res2_fullname == mo_fullname(res1)] <- paste0(substr(mo_genus(res2_fullname_vector), 1, 1), + ". ", + suppressWarnings(mo_species(res2_fullname_vector))) + if (sum(res1 == res2, na.rm = TRUE) > 0) { + res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1), + ". ", + suppressWarnings(mo_species(res1[res1 == res2]))) + } + res1[res1 != res2] <- res2_fullname + result <- as.character(res1) + } + } + + translate(result, language = language) } #' @rdname mo_property #' @export mo_subspecies <- function(x, language = get_locale(), ...) { - mo_translate(mo_validate(x = x, property = "subspecies", ...), language = language) + translate(validate(x = x, property = "subspecies", ...), language = language) } #' @rdname mo_property diff --git a/data/microorganisms.rda b/data/microorganisms.rda index b1cd62666..ad0de3bae 100755 Binary files a/data/microorganisms.rda and b/data/microorganisms.rda differ diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 8c8e0abf0..054820559 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index 178e6d5c9..0c3fafb8a 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -192,7 +192,7 @@

How to conduct AMR analysis

Matthijs S. Berends

-

25 February 2019

+

26 February 2019

@@ -201,7 +201,7 @@ -

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 25 February 2019.

+

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 26 February 2019.

Introduction

@@ -217,21 +217,21 @@ -2019-02-25 +2019-02-26 abcd Escherichia coli S S -2019-02-25 +2019-02-26 abcd Escherichia coli S R -2019-02-25 +2019-02-26 efgh Escherichia coli R @@ -327,42 +327,42 @@ -2011-05-27 -C5 -Hospital B +2015-04-06 +E7 +Hospital C Escherichia coli R +I S -S -S +R M -2010-08-22 -Q6 +2016-10-23 +S6 +Hospital B +Streptococcus pneumoniae +S +S +S +S +F + + +2010-02-02 +O1 Hospital D Escherichia coli S S -S -S -F - - -2017-03-11 -V9 -Hospital A -Klebsiella pneumoniae -S -S -S +R S F -2012-12-25 -A1 -Hospital B +2014-03-12 +H4 +Hospital A Escherichia coli S S @@ -371,22 +371,22 @@ M -2011-09-17 -I5 +2011-11-01 +X1 Hospital B Escherichia coli -S R S S -M +R +F -2017-05-19 -O7 -Hospital C -Streptococcus pneumoniae -S +2016-12-10 +W4 +Hospital B +Escherichia coli +R S S S @@ -411,8 +411,8 @@ #> #> Item Count Percent Cum. Count Cum. Percent #> --- ----- ------- -------- ----------- ------------- -#> 1 M 10,437 52.2% 10,437 52.2% -#> 2 F 9,563 47.8% 20,000 100.0% +#> 1 M 10,479 52.4% 10,479 52.4% +#> 2 F 9,521 47.6% 20,000 100.0%

So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values M and F. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.

The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

data <- data %>%
@@ -443,10 +443,10 @@
 #> Kingella kingae (no changes)
 #> 
 #> EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
-#> Table 1:  Intrinsic resistance in Enterobacteriaceae (1294 changes)
+#> Table 1:  Intrinsic resistance in Enterobacteriaceae (1324 changes)
 #> Table 2:  Intrinsic resistance in non-fermentative Gram-negative bacteria (no changes)
 #> Table 3:  Intrinsic resistance in other Gram-negative bacteria (no changes)
-#> Table 4:  Intrinsic resistance in Gram-positive bacteria (2675 changes)
+#> Table 4:  Intrinsic resistance in Gram-positive bacteria (2776 changes)
 #> Table 8:  Interpretive rules for B-lactam agents and Gram-positive cocci (no changes)
 #> Table 9:  Interpretive rules for B-lactam agents and Gram-negative rods (no changes)
 #> Table 10: Interpretive rules for B-lactam agents and other Gram-negative bacteria (no changes)
@@ -462,9 +462,9 @@
 #> Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no changes)
 #> Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no changes)
 #> 
-#> => EUCAST rules affected 7,390 out of 20,000 rows
+#> => EUCAST rules affected 7,376 out of 20,000 rows
 #>    -> added 0 test results
-#>    -> changed 3,969 test results (0 to S; 0 to I; 3,969 to R)
+#> -> changed 4,100 test results (0 to S; 0 to I; 4,100 to R)

@@ -489,7 +489,7 @@ #> NOTE: Using column `bacteria` as input for `col_mo`. #> NOTE: Using column `date` as input for `col_date`. #> NOTE: Using column `patient_id` as input for `col_patient_id`. -#> => Found 5,670 first isolates (28.4% of total)

+#> => Found 5,678 first isolates (28.4% of total)

So only 28.4% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

data_1st <- data %>% 
   filter(first == TRUE)
@@ -516,43 +516,43 @@ 1 -2010-03-10 -F7 +2010-01-25 +B9 B_ESCHR_COL -S -S R +R +S S TRUE 2 -2010-03-12 -F7 +2010-03-01 +B9 B_ESCHR_COL I -I S +R S FALSE 3 -2010-06-18 -F7 +2010-06-15 +B9 B_ESCHR_COL +R S -I S S FALSE 4 -2010-10-16 -F7 +2010-07-08 +B9 B_ESCHR_COL -S +R S S S @@ -560,8 +560,8 @@ 5 -2010-11-12 -F7 +2010-07-20 +B9 B_ESCHR_COL S S @@ -571,19 +571,19 @@ 6 -2010-11-24 -F7 +2010-09-18 +B9 B_ESCHR_COL +R S -I S S FALSE 7 -2011-02-24 -F7 +2010-09-21 +B9 B_ESCHR_COL S S @@ -593,21 +593,21 @@ 8 -2011-03-30 -F7 +2010-11-24 +B9 B_ESCHR_COL +S R S S -S -TRUE +FALSE 9 -2011-08-09 -F7 +2010-12-08 +B9 B_ESCHR_COL -S +R R S S @@ -615,18 +615,18 @@ 10 -2011-08-14 -F7 +2011-01-19 +B9 B_ESCHR_COL S -S +I S S FALSE -

Only 2 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

+

Only 1 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

If a column exists with a name like ‘key(…)ab’ the first_isolate() function will automatically use it and determine the first weighted isolates. Mind the NOTEs in below output:

data <- data %>% 
   mutate(keyab = key_antibiotics(.)) %>% 
@@ -637,7 +637,7 @@
 #> NOTE: Using column `patient_id` as input for `col_patient_id`.
 #> NOTE: Using column `keyab` as input for `col_keyantibiotics`. Use col_keyantibiotics  = FALSE to prevent this.
 #> [Criterion] Inclusion based on key antibiotics, ignoring I.
-#> => Found 15,887 first weighted isolates (79.4% of total)
+#> => Found 15,822 first weighted isolates (79.1% of total) @@ -654,46 +654,46 @@ - - + + - - + + - - + + - + - - + + + - - + - - + + - + @@ -702,58 +702,58 @@ - - + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - + @@ -762,11 +762,11 @@ - - + + - + @@ -774,11 +774,11 @@
isolate
12010-03-10F72010-01-25B9 B_ESCHR_COLSS RRS S TRUE TRUE
22010-03-12F72010-03-01B9 B_ESCHR_COL II SR S FALSE TRUE
32010-06-18F72010-06-15B9 B_ESCHR_COLR SI S S FALSEFALSETRUE
42010-10-16F72010-07-08B9 B_ESCHR_COLSR S S S
52010-11-12F72010-07-20B9 B_ESCHR_COL S S S S FALSEFALSETRUE
62010-11-24F7B_ESCHR_COLSISSFALSEFALSE
72011-02-24F7B_ESCHR_COLSSSSFALSEFALSE
82011-03-30F72010-09-18B9 B_ESCHR_COL R S S SFALSE TRUE
72010-09-21B9B_ESCHR_COLSSSSFALSETRUE
82010-11-24B9B_ESCHR_COLSRSSFALSE TRUE
92011-08-09F72010-12-08B9 B_ESCHR_COLSR R S S
102011-08-14F72011-01-19B9 B_ESCHR_COL SSI S S FALSE
-

Instead of 2, now 5 isolates are flagged. In total, 79.4% of all isolates are marked ‘first weighted’ - 51.1% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

+

Instead of 1, now 9 isolates are flagged. In total, 79.1% of all isolates are marked ‘first weighted’ - 50.7% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

As with filter_first_isolate(), there’s a shortcut for this new algorithm too:

data_1st <- data %>% 
   filter_first_weighted_isolate()
-

So we end up with 15,887 isolates for analysis.

+

So we end up with 15,822 isolates for analysis.

We can remove unneeded columns:

data_1st <- data_1st %>% 
   select(-c(first, keyab))
@@ -804,14 +804,14 @@ 1 -2011-05-27 -C5 -Hospital B +2015-04-06 +E7 +Hospital C B_ESCHR_COL R +I S -S -S +R M Gram negative Escherichia @@ -820,14 +820,46 @@ 2 -2010-08-22 -Q6 +2016-10-23 +S6 +Hospital B +B_STRPT_PNE +S +S +S +R +F +Gram positive +Streptococcus +pneumoniae +TRUE + + +3 +2010-02-02 +O1 Hospital D B_ESCHR_COL S S +R +S +F +Gram negative +Escherichia +coli +TRUE + + +5 +2011-11-01 +X1 +Hospital B +B_ESCHR_COL +R S S +R F Gram negative Escherichia @@ -836,27 +868,11 @@ 6 -2017-05-19 -O7 -Hospital C -B_STRPT_PNE -S -S -S -R -F -Gram positive -Streptococcus -pneumoniae -TRUE - - -8 -2011-01-19 -Q7 +2016-12-10 +W4 Hospital B B_ESCHR_COL -S +R S S S @@ -866,36 +882,20 @@ coli TRUE - -9 -2014-06-24 -S9 -Hospital B -B_STRPT_PNE -R -R -S -R -F -Gram positive -Streptococcus -pneumoniae -TRUE - -10 -2011-10-01 -O10 -Hospital B -B_STPHY_AUR -R +7 +2015-07-07 +P8 +Hospital D +B_ESCHR_COL S R S +S F -Gram positive -Staphylococcus -aureus +Gram negative +Escherichia +coli TRUE @@ -915,9 +915,9 @@
freq(paste(data_1st$genus, data_1st$species))

Or can be used like the dplyr way, which is easier readable:

data_1st %>% freq(genus, species)
-

Frequency table of genus and species from a data.frame (15,887 x 13)

+

Frequency table of genus and species from a data.frame (15,822 x 13)

Columns: 2
-Length: 15,887 (of which NA: 0 = 0.00%)
+Length: 15,822 (of which NA: 0 = 0.00%)
Unique: 4

Shortest: 16
Longest: 24

@@ -934,33 +934,33 @@ Longest: 24

1 Escherichia coli -7,900 -49.7% -7,900 -49.7% +7,838 +49.5% +7,838 +49.5% 2 Staphylococcus aureus -3,951 -24.9% -11,851 +3,965 +25.1% +11,803 74.6% 3 Streptococcus pneumoniae -2,413 -15.2% -14,264 -89.8% +2,457 +15.5% +14,260 +90.1% 4 Klebsiella pneumoniae -1,623 -10.2% -15,887 +1,562 +9.9% +15,822 100.0% @@ -971,7 +971,7 @@ Longest: 24

Resistance percentages

The functions portion_R, portion_RI, portion_I, portion_IS and portion_S can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own:

data_1st %>% portion_IR(amox)
-#> [1] 0.4763014
+#> [1] 0.4722538

Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

data_1st %>% 
   group_by(hospital) %>% 
@@ -984,19 +984,19 @@ Longest: 24

Hospital A -0.4774125 +0.4692014 Hospital B -0.4762754 +0.4694061 Hospital C -0.4630957 +0.4845361 Hospital D -0.4845070 +0.4727669 @@ -1014,23 +1014,23 @@ Longest: 24

Hospital A -0.4774125 -4715 +0.4692014 +4708 Hospital B -0.4762754 -5606 +0.4694061 +5573 Hospital C -0.4630957 -2371 +0.4845361 +2328 Hospital D -0.4845070 -3195 +0.4727669 +3213 @@ -1050,27 +1050,27 @@ Longest: 24

Escherichia -0.7267089 -0.9002532 -0.9720253 +0.7269712 +0.9050778 +0.9744833 Klebsiella -0.7233518 -0.8983364 -0.9741220 +0.7349552 +0.8988476 +0.9763124 Staphylococcus -0.7296887 -0.9248292 -0.9787396 +0.7263556 +0.9235813 +0.9793190 Streptococcus -0.7318690 +0.7391127 0.0000000 -0.7318690 +0.7391127 diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png index 82f756043..13b0d9f9c 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png index fb5561c1d..38decffc3 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png index d8a26ca37..65da5ffc3 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png index b1b196370..d03665cbd 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ diff --git a/docs/articles/EUCAST.html b/docs/articles/EUCAST.html index 00358ab38..c0c90e4e1 100644 --- a/docs/articles/EUCAST.html +++ b/docs/articles/EUCAST.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019
@@ -192,7 +192,7 @@

How to apply EUCAST rules

Matthijs S. Berends

-

25 February 2019

+

26 February 2019

diff --git a/docs/articles/G_test.html b/docs/articles/G_test.html index fab4163e5..d0cb97635 100644 --- a/docs/articles/G_test.html +++ b/docs/articles/G_test.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -192,7 +192,7 @@

How to use the G-test

Matthijs S. Berends

-

25 February 2019

+

26 February 2019

diff --git a/docs/articles/SPSS.html b/docs/articles/SPSS.html index 09c41dc60..48307f1d9 100644 --- a/docs/articles/SPSS.html +++ b/docs/articles/SPSS.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/articles/WHONET.html b/docs/articles/WHONET.html index f2fbb6dc7..364015860 100644 --- a/docs/articles/WHONET.html +++ b/docs/articles/WHONET.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -192,7 +192,7 @@

How to work with WHONET data

Matthijs S. Berends

-

25 February 2019

+

26 February 2019

diff --git a/docs/articles/atc_property.html b/docs/articles/atc_property.html index 10c2d65a4..ff8088b32 100644 --- a/docs/articles/atc_property.html +++ b/docs/articles/atc_property.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -192,7 +192,7 @@

How to get properties of an antibiotic

Matthijs S. Berends

-

25 February 2019

+

26 February 2019

diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png index fabe6abcf..ab59c7dba 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 6645d5b52..8641eb003 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/authors.html b/docs/authors.html index 391ebb8a1..c39fc2b4c 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/index.html b/docs/index.html index e4c42d8a5..55733410c 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/AMR-deprecated.html b/docs/reference/AMR-deprecated.html index 61dff2d8a..66bada1f0 100644 --- a/docs/reference/AMR-deprecated.html +++ b/docs/reference/AMR-deprecated.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/AMR.html b/docs/reference/AMR.html index db0a739ad..c634b4435 100644 --- a/docs/reference/AMR.html +++ b/docs/reference/AMR.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/WHOCC.html b/docs/reference/WHOCC.html index a8cfa70f1..06c540475 100644 --- a/docs/reference/WHOCC.html +++ b/docs/reference/WHOCC.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/WHONET.html b/docs/reference/WHONET.html index 97e891f53..4b55fbad9 100644 --- a/docs/reference/WHONET.html +++ b/docs/reference/WHONET.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/abname.html b/docs/reference/abname.html index ebb643123..ec256fd72 100644 --- a/docs/reference/abname.html +++ b/docs/reference/abname.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/age.html b/docs/reference/age.html index 20a427fa3..969e0d09a 100644 --- a/docs/reference/age.html +++ b/docs/reference/age.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/age_groups.html b/docs/reference/age_groups.html index d7f253a83..4aec10c54 100644 --- a/docs/reference/age_groups.html +++ b/docs/reference/age_groups.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/antibiotics.html b/docs/reference/antibiotics.html index b5687ad8e..46d80e729 100644 --- a/docs/reference/antibiotics.html +++ b/docs/reference/antibiotics.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/as.atc.html b/docs/reference/as.atc.html index 9ac2f6199..e25742144 100644 --- a/docs/reference/as.atc.html +++ b/docs/reference/as.atc.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/as.mic.html b/docs/reference/as.mic.html index 76e4b9187..b705333b5 100644 --- a/docs/reference/as.mic.html +++ b/docs/reference/as.mic.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 982f5f614..f224e8628 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/as.rsi.html b/docs/reference/as.rsi.html index 45ac519c6..eb6d619d6 100644 --- a/docs/reference/as.rsi.html +++ b/docs/reference/as.rsi.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/atc_online.html b/docs/reference/atc_online.html index 968ab87a3..2df54cc9a 100644 --- a/docs/reference/atc_online.html +++ b/docs/reference/atc_online.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/atc_property.html b/docs/reference/atc_property.html index 40034769e..678f6634e 100644 --- a/docs/reference/atc_property.html +++ b/docs/reference/atc_property.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/availability.html b/docs/reference/availability.html index affa0a25d..27309eefe 100644 --- a/docs/reference/availability.html +++ b/docs/reference/availability.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/catalogue_of_life.html b/docs/reference/catalogue_of_life.html index 493bc4de0..3437f6245 100644 --- a/docs/reference/catalogue_of_life.html +++ b/docs/reference/catalogue_of_life.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/catalogue_of_life_version.html b/docs/reference/catalogue_of_life_version.html index 810eaf0bf..bab36e174 100644 --- a/docs/reference/catalogue_of_life_version.html +++ b/docs/reference/catalogue_of_life_version.html @@ -47,7 +47,7 @@ - + @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -237,7 +237,7 @@
-

Version info of included Catalogue of Life

+

This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year.

@@ -268,6 +268,12 @@ This package contains the complete taxonomic tree of almost all microorganisms f

microorganisms

+

Examples

+
# NOT RUN {
+library(dplyr)
+microorganisms %>% freq(kingdom)
+microorganisms %>% group_by(kingdom) %>% freq(phylum, nmax = NULL)
+# }
diff --git a/docs/reference/count.html b/docs/reference/count.html index 7e6895d69..e822e5a83 100644 --- a/docs/reference/count.html +++ b/docs/reference/count.html @@ -81,7 +81,7 @@ count_R and count_IR can be used to count resistant isolates, count_S and count_ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/eucast_rules.html b/docs/reference/eucast_rules.html index 0018acc88..58f28a24a 100644 --- a/docs/reference/eucast_rules.html +++ b/docs/reference/eucast_rules.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/first_isolate.html b/docs/reference/first_isolate.html index 462f6069a..163cc5558 100644 --- a/docs/reference/first_isolate.html +++ b/docs/reference/first_isolate.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/freq.html b/docs/reference/freq.html index 7c2c4748e..cec35cfb2 100644 --- a/docs/reference/freq.html +++ b/docs/reference/freq.html @@ -81,7 +81,7 @@ top_freq can be used to get the top/bottom n items of a frequency table, with co AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/g.test.html b/docs/reference/g.test.html index f57baf1bd..4307c17c4 100644 --- a/docs/reference/g.test.html +++ b/docs/reference/g.test.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/get_locale.html b/docs/reference/get_locale.html index 7baea359e..ae890fa49 100644 --- a/docs/reference/get_locale.html +++ b/docs/reference/get_locale.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/ggplot_rsi.html b/docs/reference/ggplot_rsi.html index fce27feb0..b3705ad21 100644 --- a/docs/reference/ggplot_rsi.html +++ b/docs/reference/ggplot_rsi.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/guess_ab_col.html b/docs/reference/guess_ab_col.html index 8fa8e4a6e..71d550465 100644 --- a/docs/reference/guess_ab_col.html +++ b/docs/reference/guess_ab_col.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/index.html b/docs/reference/index.html index a4673d17a..59649e543 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/join.html b/docs/reference/join.html index 2a8644231..ecd57502d 100644 --- a/docs/reference/join.html +++ b/docs/reference/join.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/key_antibiotics.html b/docs/reference/key_antibiotics.html index 858be0179..96e2200f5 100644 --- a/docs/reference/key_antibiotics.html +++ b/docs/reference/key_antibiotics.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/kurtosis.html b/docs/reference/kurtosis.html index 3aa692ba5..595d1c1f2 100644 --- a/docs/reference/kurtosis.html +++ b/docs/reference/kurtosis.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/like.html b/docs/reference/like.html index f6135b1fb..6bca2172b 100644 --- a/docs/reference/like.html +++ b/docs/reference/like.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/mdro.html b/docs/reference/mdro.html index 89e04653b..abe6ba928 100644 --- a/docs/reference/mdro.html +++ b/docs/reference/mdro.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/microorganisms.codes.html b/docs/reference/microorganisms.codes.html index 70b51b939..5d7de0a8a 100644 --- a/docs/reference/microorganisms.codes.html +++ b/docs/reference/microorganisms.codes.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index 546afd18a..733669262 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/microorganisms.old.html b/docs/reference/microorganisms.old.html index f28ed60a2..bd787fa5b 100644 --- a/docs/reference/microorganisms.old.html +++ b/docs/reference/microorganisms.old.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index b6f525ca2..e4b13fe7f 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/mo_source.html b/docs/reference/mo_source.html index af0ac9cc6..90c98efbc 100644 --- a/docs/reference/mo_source.html +++ b/docs/reference/mo_source.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/p.symbol.html b/docs/reference/p.symbol.html index 4c2550f13..10c07b8d0 100644 --- a/docs/reference/p.symbol.html +++ b/docs/reference/p.symbol.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/portion.html b/docs/reference/portion.html index 25f4214b0..28e9bf9e0 100644 --- a/docs/reference/portion.html +++ b/docs/reference/portion.html @@ -81,7 +81,7 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/read.4D.html b/docs/reference/read.4D.html index 5298404d0..ae9df9d36 100644 --- a/docs/reference/read.4D.html +++ b/docs/reference/read.4D.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/resistance_predict.html b/docs/reference/resistance_predict.html index 3e2d62ee9..a0c4268d0 100644 --- a/docs/reference/resistance_predict.html +++ b/docs/reference/resistance_predict.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/rsi.html b/docs/reference/rsi.html index 4df754627..8b8725c05 100644 --- a/docs/reference/rsi.html +++ b/docs/reference/rsi.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/septic_patients.html b/docs/reference/septic_patients.html index e2c105811..4125bbb11 100644 --- a/docs/reference/septic_patients.html +++ b/docs/reference/septic_patients.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/skewness.html b/docs/reference/skewness.html index eb86b6e23..a179fb9dc 100644 --- a/docs/reference/skewness.html +++ b/docs/reference/skewness.html @@ -81,7 +81,7 @@ When negative: the left tail is longer; the mass of the distribution is concentr AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/man/catalogue_of_life_version.Rd b/man/catalogue_of_life_version.Rd index 3e9627839..a6e14dedf 100644 --- a/man/catalogue_of_life_version.Rd +++ b/man/catalogue_of_life_version.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R +% Please edit documentation in R/catalogue_of_life.R \name{catalogue_of_life_version} \alias{catalogue_of_life_version} \title{Version info of included Catalogue of Life} @@ -7,7 +7,7 @@ catalogue_of_life_version() } \description{ -Version info of included Catalogue of Life +This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year. } \section{Catalogue of Life}{ @@ -33,6 +33,11 @@ The syntax used to transform the original data to a cleansed R format, can be fo On our website \url{https://msberends.gitlab.io/AMR} you can find \href{https://msberends.gitlab.io/AMR/articles/AMR.html}{a comprehensive tutorial} about how to conduct AMR analysis, the \href{https://msberends.gitlab.io/AMR/reference}{complete documentation of all functions} (which reads a lot easier than here in R) and \href{https://msberends.gitlab.io/AMR/articles/WHONET.html}{an example analysis using WHONET data}. } +\examples{ +library(dplyr) +microorganisms \%>\% freq(kingdom) +microorganisms \%>\% group_by(kingdom) \%>\% freq(phylum, nmax = NULL) +} \seealso{ \code{\link{microorganisms}} } diff --git a/reproduction_of_microorganisms.R b/reproduction_of_microorganisms.R index 3722ca099..8db0197a3 100644 --- a/reproduction_of_microorganisms.R +++ b/reproduction_of_microorganisms.R @@ -116,6 +116,7 @@ MOs <- MOs %>% # only old names of species that are in MOs: MOs.old <- MOs.old %>% filter(col_id_new %in% MOs$col_id) +# add abbreviations so we can easily know which ones are which ones MOs <- MOs %>% group_by(kingdom) %>% # abbreviations may be same for genera between kingdoms, @@ -147,7 +148,12 @@ MOs <- MOs %>% abbr_species, abbr_subspecies, sep = "_")))) %>% - mutate(mo = ifelse(duplicated(.$mo), paste0(mo, "1"), mo)) %>% + mutate(mo = ifelse(duplicated(.$mo), + paste0(mo, "1"), + mo), + fullname = ifelse(fullname == "", + trimws(paste(genus, species, subspecies), + fullname))) %>% select(mo, everything(), -abbr_genus, -abbr_species, -abbr_subspecies)