diff --git a/DESCRIPTION b/DESCRIPTION index 137b5ee2..7bb150cf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.5.0.9018 -Date: 2019-02-25 +Version: 0.5.0.9019 +Date: 2019-02-26 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index 79a22b84..971c292a 100755 --- a/NEWS.md +++ b/NEWS.md @@ -12,13 +12,11 @@ We've got a new website: [https://msberends.gitlab.io/AMR](https://msberends.git * **BREAKING**: removed deprecated functions, parameters and references to 'bactid'. Use `as.mo()` to identify an MO code. * Catalogue of Life as a new taxonomic source for data about microorganisms, which also contains all ITIS data we used previously. The `microorganisms` data set now contains: * All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria, Protozoa and Viruses - * All ~3,000 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales and Schizosaccharomycetales. - - The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant (sub)species are covered (like all species of *Aspergillus*, *Candida*, *Pneumocystis*, *Saccharomyces* and *Trichophyton*). + * All ~3,000 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales and Schizosaccharomycetales (covering at least like all species of *Aspergillus*, *Candida*, *Pneumocystis*, *Saccharomyces* and *Trichophyton*) * All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed * The responsible author(s) and year of scientific publication - This data is updated annually - check the included version with `catalogue_of_life_version()`. + This data is updated annually - check the included version with the new function `catalogue_of_life_version()`. * Due to this change, some `mo` codes changed (e.g. *Streptococcus* changed from `B_STRPTC` to `B_STRPT`). A translation table is used internally to support older microorganism IDs, so users will not notice this difference. * New function `mo_rank()` for the taxonomic rank (genus, species, infraspecies, etc.) * New function `mo_url()` to get the URL to the Catalogue of Life @@ -86,7 +84,7 @@ We've got a new website: [https://msberends.gitlab.io/AMR](https://msberends.git * Better handling for *Salmonellae* * Understanding of highly virulent *E. coli* strains like EIEC, EPEC and STEC * There will be looked for uncertain results at default - these results will be returned with an informative warning - * Manual now contains more info about the algorithms + * Manual (help page) now contains more info about the algorithms * Progress bar will be shown when it takes more than 3 seconds to get results * Support for formatted console text * Console will return the percentage of uncoercable input diff --git a/R/catalogue_of_life.R b/R/catalogue_of_life.R index 89fa6507..09763da6 100755 --- a/R/catalogue_of_life.R +++ b/R/catalogue_of_life.R @@ -71,3 +71,24 @@ #' mo_fullname("C. elegans") #' # [1] "Chroococcus limneticus elegans" # Because a microorganism was found NULL + +#' Version info of included Catalogue of Life +#' +#' This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year. +#' @seealso \code{\link{microorganisms}} +#' @inheritSection catalogue_of_life Catalogue of Life +#' @inheritSection AMR Read more on our website! +#' @export +#' @examples +#' library(dplyr) +#' microorganisms %>% freq(kingdom) +#' microorganisms %>% group_by(kingdom) %>% freq(phylum, nmax = NULL) +catalogue_of_life_version <- function() { + # see the `catalogue_of_life` list in R/data.R + list(version = catalogue_of_life$version, + url = catalogue_of_life$url, + # annual release always somewhere in March + is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")), + n_species = nrow(AMR::microorganisms), + n_synonyms = nrow(AMR::microorganisms.old)) +} diff --git a/R/data.R b/R/data.R index 45d43fca..8ff7db5f 100755 --- a/R/data.R +++ b/R/data.R @@ -168,20 +168,6 @@ catalogue_of_life <- list( url = "http://www.catalogueoflife.org/annual-checklist/2018" ) -#' Version info of included Catalogue of Life -#' @seealso \code{\link{microorganisms}} -#' @inheritSection catalogue_of_life Catalogue of Life -#' @inheritSection AMR Read more on our website! -#' @export -catalogue_of_life_version <- function() { - list(version = catalogue_of_life$version, - url = catalogue_of_life$url, - # annual release always somewhere in March - is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")), - no_of_species = nrow(AMR::microorganisms), - no_of_synonyms = nrow(AMR::microorganisms.old)) -} - #' Data set with previously accepted taxonomic names #' #' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by \code{\link{as.mo}}. diff --git a/R/mo.R b/R/mo.R index dd419d06..4faa3b9f 100755 --- a/R/mo.R +++ b/R/mo.R @@ -166,7 +166,12 @@ #' mutate(mo = as.mo(paste(genus, species))) #' } as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, reference_df = get_mo_source()) { - if (all(x %in% AMR::microorganisms$fullname) + if (all(x %in% AMR::microorganisms$mo) + & isFALSE(Becker) + & isFALSE(Lancefield) + & is.null(reference_df)) { + y <- x + } else if (all(x %in% AMR::microorganisms$fullname) & isFALSE(Becker) & isFALSE(Lancefield) & is.null(reference_df)) { @@ -179,12 +184,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, if (any(is.na(y))) { y[is.na(y)] <- microorganismsDT[prevalence == 3][data.table(fullname = x[is.na(y)]), on = "fullname", "mo"][[1]] } - return(y) + } else { + # will be checked for mo class in validation and uses exec_as.mo internally if necessary + y <- mo_validate(x = x, property = "mo", + Becker = Becker, Lancefield = Lancefield, + allow_uncertain = allow_uncertain, reference_df = reference_df) } - # will be checked for mo class in validation - mo_validate(x = x, property = "mo", - Becker = Becker, Lancefield = Lancefield, - allow_uncertain = allow_uncertain, reference_df = reference_df) + structure(.Data = y, class = "mo") } #' @rdname as.mo @@ -891,6 +897,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, stringsAsFactors = FALSE) df_input <- data.frame(input = as.character(x_input), stringsAsFactors = FALSE) + x <- df_input %>% left_join(df_found, by = "input") %>% diff --git a/R/mo_property.R b/R/mo_property.R index 0d3dc940..f25e5eb2 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -132,7 +132,7 @@ #' mo_taxonomy("E. coli") mo_fullname <- function(x, language = get_locale(), ...) { x <- mo_validate(x = x, property = "fullname", ...) - mo_translate(x, language = language) + translate(x, language = language) } #' @rdname mo_property @@ -148,46 +148,64 @@ mo_shortname <- function(x, language = get_locale(), ...) { if (is.null(Lancefield)) { Lancefield <- FALSE } - if (Becker %in% c(TRUE, "all") | Lancefield == TRUE) { - res1 <- AMR::as.mo(x, Becker = FALSE, Lancefield = FALSE, reference_df = dots$reference_df) - res2 <- suppressWarnings(AMR::as.mo(res1, ...)) - res2_fullname <- mo_fullname(res2, language = language) - res2_fullname[res2_fullname %like% " \\(CoNS\\)"] <- "CoNS" - res2_fullname[res2_fullname %like% " \\(CoPS\\)"] <- "CoPS" - res2_fullname[res2_fullname %like% " \\(KNS\\)"] <- "KNS" - res2_fullname[res2_fullname %like% " \\(KPS\\)"] <- "KPS" - res2_fullname[res2_fullname %like% " \\(CNS\\)"] <- "CNS" - res2_fullname[res2_fullname %like% " \\(CPS\\)"] <- "CPS" - res2_fullname <- gsub("Streptococcus (group|Gruppe|gruppe|groep|grupo|gruppo|groupe) (.)", - "G\\2S", - res2_fullname) # turn "Streptococcus group A" and "Streptococcus grupo A" to "GAS" - res2_fullname_vector <- res2_fullname[res2_fullname == mo_fullname(res1)] - res2_fullname[res2_fullname == mo_fullname(res1)] <- paste0(substr(mo_genus(res2_fullname_vector), 1, 1), - ". ", - suppressWarnings(mo_species(res2_fullname_vector))) - if (sum(res1 == res2, na.rm = TRUE) > 0) { - res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1), - ". ", - suppressWarnings(mo_species(res1[res1 == res2]))) - } - res1[res1 != res2] <- res2_fullname - result <- as.character(res1) - } else { - x <- AMR::as.mo(x, ...) - suppressWarnings( - result <- data.frame(mo = x) %>% - left_join(AMR::microorganisms, by = "mo") %>% - mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>% - pull(shortname) - ) + + shorten <- function(x) { + # easiest: no transformations needed + x <- mo_fullname(x, language = "en") + # shorten for the ones that have a space: shorten first word and write out second word + shorten_these <- x %like% " " & !x %like% "Streptococcus group " + x[shorten_these] <- paste0(substr(x[shorten_these], 1, 1), + ". ", + x[shorten_these] %>% + strsplit(" ", fixed = TRUE) %>% + unlist() %>% + .[2]) + x } - mo_translate(result, language = language) + + if (isFALSE(Becker) & isFALSE(Lancefield)) { + result <- shorten(x) + + } else { + # get result without transformations + res1 <- AMR::as.mo(x, Becker = FALSE, Lancefield = FALSE, reference_df = dots$reference_df) + # and result with transformations + res2 <- suppressWarnings(AMR::as.mo(res1, ...)) + if (res1 == res2 + & !res1 %like% "^B_STRPT_GR") { + result <- shorten(x) + } else { + res2_fullname <- mo_fullname(res2, language = language) + res2_fullname[res2_fullname %like% " \\(CoNS\\)"] <- "CoNS" + res2_fullname[res2_fullname %like% " \\(CoPS\\)"] <- "CoPS" + res2_fullname[res2_fullname %like% " \\(KNS\\)"] <- "KNS" + res2_fullname[res2_fullname %like% " \\(KPS\\)"] <- "KPS" + res2_fullname[res2_fullname %like% " \\(CNS\\)"] <- "CNS" + res2_fullname[res2_fullname %like% " \\(CPS\\)"] <- "CPS" + res2_fullname <- gsub("Streptococcus (group|Gruppe|gruppe|groep|grupo|gruppo|groupe) (.)", + "G\\2S", + res2_fullname) # turn "Streptococcus group A" and "Streptococcus grupo A" to "GAS" + res2_fullname_vector <- res2_fullname[res2_fullname == mo_fullname(res1)] + res2_fullname[res2_fullname == mo_fullname(res1)] <- paste0(substr(mo_genus(res2_fullname_vector), 1, 1), + ". ", + suppressWarnings(mo_species(res2_fullname_vector))) + if (sum(res1 == res2, na.rm = TRUE) > 0) { + res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1), + ". ", + suppressWarnings(mo_species(res1[res1 == res2]))) + } + res1[res1 != res2] <- res2_fullname + result <- as.character(res1) + } + } + + translate(result, language = language) } #' @rdname mo_property #' @export mo_subspecies <- function(x, language = get_locale(), ...) { - mo_translate(mo_validate(x = x, property = "subspecies", ...), language = language) + translate(validate(x = x, property = "subspecies", ...), language = language) } #' @rdname mo_property diff --git a/data/microorganisms.rda b/data/microorganisms.rda index b1cd6266..ad0de3ba 100755 Binary files a/data/microorganisms.rda and b/data/microorganisms.rda differ diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 8c8e0abf..05482055 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index 178e6d5c..0c3fafb8 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -192,7 +192,7 @@

How to conduct AMR analysis

Matthijs S. Berends

-

25 February 2019

+

26 February 2019

@@ -201,7 +201,7 @@ -

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 25 February 2019.

+

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 26 February 2019.

Introduction

@@ -217,21 +217,21 @@ -2019-02-25 +2019-02-26 abcd Escherichia coli S S -2019-02-25 +2019-02-26 abcd Escherichia coli S R -2019-02-25 +2019-02-26 efgh Escherichia coli R @@ -327,42 +327,42 @@ -2011-05-27 -C5 -Hospital B +2015-04-06 +E7 +Hospital C Escherichia coli R +I S -S -S +R M -2010-08-22 -Q6 +2016-10-23 +S6 +Hospital B +Streptococcus pneumoniae +S +S +S +S +F + + +2010-02-02 +O1 Hospital D Escherichia coli S S -S -S -F - - -2017-03-11 -V9 -Hospital A -Klebsiella pneumoniae -S -S -S +R S F -2012-12-25 -A1 -Hospital B +2014-03-12 +H4 +Hospital A Escherichia coli S S @@ -371,22 +371,22 @@ M -2011-09-17 -I5 +2011-11-01 +X1 Hospital B Escherichia coli -S R S S -M +R +F -2017-05-19 -O7 -Hospital C -Streptococcus pneumoniae -S +2016-12-10 +W4 +Hospital B +Escherichia coli +R S S S @@ -411,8 +411,8 @@ #> #> Item Count Percent Cum. Count Cum. Percent #> --- ----- ------- -------- ----------- ------------- -#> 1 M 10,437 52.2% 10,437 52.2% -#> 2 F 9,563 47.8% 20,000 100.0% +#> 1 M 10,479 52.4% 10,479 52.4% +#> 2 F 9,521 47.6% 20,000 100.0%

So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values M and F. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.

The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

data <- data %>%
@@ -443,10 +443,10 @@
 #> Kingella kingae (no changes)
 #> 
 #> EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
-#> Table 1:  Intrinsic resistance in Enterobacteriaceae (1294 changes)
+#> Table 1:  Intrinsic resistance in Enterobacteriaceae (1324 changes)
 #> Table 2:  Intrinsic resistance in non-fermentative Gram-negative bacteria (no changes)
 #> Table 3:  Intrinsic resistance in other Gram-negative bacteria (no changes)
-#> Table 4:  Intrinsic resistance in Gram-positive bacteria (2675 changes)
+#> Table 4:  Intrinsic resistance in Gram-positive bacteria (2776 changes)
 #> Table 8:  Interpretive rules for B-lactam agents and Gram-positive cocci (no changes)
 #> Table 9:  Interpretive rules for B-lactam agents and Gram-negative rods (no changes)
 #> Table 10: Interpretive rules for B-lactam agents and other Gram-negative bacteria (no changes)
@@ -462,9 +462,9 @@
 #> Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no changes)
 #> Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no changes)
 #> 
-#> => EUCAST rules affected 7,390 out of 20,000 rows
+#> => EUCAST rules affected 7,376 out of 20,000 rows
 #>    -> added 0 test results
-#>    -> changed 3,969 test results (0 to S; 0 to I; 3,969 to R)
+#> -> changed 4,100 test results (0 to S; 0 to I; 4,100 to R)

@@ -489,7 +489,7 @@ #> NOTE: Using column `bacteria` as input for `col_mo`. #> NOTE: Using column `date` as input for `col_date`. #> NOTE: Using column `patient_id` as input for `col_patient_id`. -#> => Found 5,670 first isolates (28.4% of total)

+#> => Found 5,678 first isolates (28.4% of total)

So only 28.4% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

data_1st <- data %>% 
   filter(first == TRUE)
@@ -516,43 +516,43 @@ 1 -2010-03-10 -F7 +2010-01-25 +B9 B_ESCHR_COL -S -S R +R +S S TRUE 2 -2010-03-12 -F7 +2010-03-01 +B9 B_ESCHR_COL I -I S +R S FALSE 3 -2010-06-18 -F7 +2010-06-15 +B9 B_ESCHR_COL +R S -I S S FALSE 4 -2010-10-16 -F7 +2010-07-08 +B9 B_ESCHR_COL -S +R S S S @@ -560,8 +560,8 @@ 5 -2010-11-12 -F7 +2010-07-20 +B9 B_ESCHR_COL S S @@ -571,19 +571,19 @@ 6 -2010-11-24 -F7 +2010-09-18 +B9 B_ESCHR_COL +R S -I S S FALSE 7 -2011-02-24 -F7 +2010-09-21 +B9 B_ESCHR_COL S S @@ -593,21 +593,21 @@ 8 -2011-03-30 -F7 +2010-11-24 +B9 B_ESCHR_COL +S R S S -S -TRUE +FALSE 9 -2011-08-09 -F7 +2010-12-08 +B9 B_ESCHR_COL -S +R R S S @@ -615,18 +615,18 @@ 10 -2011-08-14 -F7 +2011-01-19 +B9 B_ESCHR_COL S -S +I S S FALSE -

Only 2 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

+

Only 1 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

If a column exists with a name like ‘key(…)ab’ the first_isolate() function will automatically use it and determine the first weighted isolates. Mind the NOTEs in below output:

data <- data %>% 
   mutate(keyab = key_antibiotics(.)) %>% 
@@ -637,7 +637,7 @@
 #> NOTE: Using column `patient_id` as input for `col_patient_id`.
 #> NOTE: Using column `keyab` as input for `col_keyantibiotics`. Use col_keyantibiotics  = FALSE to prevent this.
 #> [Criterion] Inclusion based on key antibiotics, ignoring I.
-#> => Found 15,887 first weighted isolates (79.4% of total)
+#> => Found 15,822 first weighted isolates (79.1% of total) @@ -654,46 +654,46 @@ - - + + - - + + - - + + - + - - + + + - - + - - + + - + @@ -702,58 +702,58 @@ - - + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - + @@ -762,11 +762,11 @@ - - + + - + @@ -774,11 +774,11 @@
isolate
12010-03-10F72010-01-25B9 B_ESCHR_COLSS RRS S TRUE TRUE
22010-03-12F72010-03-01B9 B_ESCHR_COL II SR S FALSE TRUE
32010-06-18F72010-06-15B9 B_ESCHR_COLR SI S S FALSEFALSETRUE
42010-10-16F72010-07-08B9 B_ESCHR_COLSR S S S
52010-11-12F72010-07-20B9 B_ESCHR_COL S S S S FALSEFALSETRUE
62010-11-24F7B_ESCHR_COLSISSFALSEFALSE
72011-02-24F7B_ESCHR_COLSSSSFALSEFALSE
82011-03-30F72010-09-18B9 B_ESCHR_COL R S S SFALSE TRUE
72010-09-21B9B_ESCHR_COLSSSSFALSETRUE
82010-11-24B9B_ESCHR_COLSRSSFALSE TRUE
92011-08-09F72010-12-08B9 B_ESCHR_COLSR R S S
102011-08-14F72011-01-19B9 B_ESCHR_COL SSI S S FALSE
-

Instead of 2, now 5 isolates are flagged. In total, 79.4% of all isolates are marked ‘first weighted’ - 51.1% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

+

Instead of 1, now 9 isolates are flagged. In total, 79.1% of all isolates are marked ‘first weighted’ - 50.7% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

As with filter_first_isolate(), there’s a shortcut for this new algorithm too:

data_1st <- data %>% 
   filter_first_weighted_isolate()
-

So we end up with 15,887 isolates for analysis.

+

So we end up with 15,822 isolates for analysis.

We can remove unneeded columns:

data_1st <- data_1st %>% 
   select(-c(first, keyab))
@@ -804,14 +804,14 @@ 1 -2011-05-27 -C5 -Hospital B +2015-04-06 +E7 +Hospital C B_ESCHR_COL R +I S -S -S +R M Gram negative Escherichia @@ -820,14 +820,46 @@ 2 -2010-08-22 -Q6 +2016-10-23 +S6 +Hospital B +B_STRPT_PNE +S +S +S +R +F +Gram positive +Streptococcus +pneumoniae +TRUE + + +3 +2010-02-02 +O1 Hospital D B_ESCHR_COL S S +R +S +F +Gram negative +Escherichia +coli +TRUE + + +5 +2011-11-01 +X1 +Hospital B +B_ESCHR_COL +R S S +R F Gram negative Escherichia @@ -836,27 +868,11 @@ 6 -2017-05-19 -O7 -Hospital C -B_STRPT_PNE -S -S -S -R -F -Gram positive -Streptococcus -pneumoniae -TRUE - - -8 -2011-01-19 -Q7 +2016-12-10 +W4 Hospital B B_ESCHR_COL -S +R S S S @@ -866,36 +882,20 @@ coli TRUE - -9 -2014-06-24 -S9 -Hospital B -B_STRPT_PNE -R -R -S -R -F -Gram positive -Streptococcus -pneumoniae -TRUE - -10 -2011-10-01 -O10 -Hospital B -B_STPHY_AUR -R +7 +2015-07-07 +P8 +Hospital D +B_ESCHR_COL S R S +S F -Gram positive -Staphylococcus -aureus +Gram negative +Escherichia +coli TRUE @@ -915,9 +915,9 @@
freq(paste(data_1st$genus, data_1st$species))

Or can be used like the dplyr way, which is easier readable:

data_1st %>% freq(genus, species)
-

Frequency table of genus and species from a data.frame (15,887 x 13)

+

Frequency table of genus and species from a data.frame (15,822 x 13)

Columns: 2
-Length: 15,887 (of which NA: 0 = 0.00%)
+Length: 15,822 (of which NA: 0 = 0.00%)
Unique: 4

Shortest: 16
Longest: 24

@@ -934,33 +934,33 @@ Longest: 24

1 Escherichia coli -7,900 -49.7% -7,900 -49.7% +7,838 +49.5% +7,838 +49.5% 2 Staphylococcus aureus -3,951 -24.9% -11,851 +3,965 +25.1% +11,803 74.6% 3 Streptococcus pneumoniae -2,413 -15.2% -14,264 -89.8% +2,457 +15.5% +14,260 +90.1% 4 Klebsiella pneumoniae -1,623 -10.2% -15,887 +1,562 +9.9% +15,822 100.0% @@ -971,7 +971,7 @@ Longest: 24

Resistance percentages

The functions portion_R, portion_RI, portion_I, portion_IS and portion_S can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own:

data_1st %>% portion_IR(amox)
-#> [1] 0.4763014
+#> [1] 0.4722538

Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

data_1st %>% 
   group_by(hospital) %>% 
@@ -984,19 +984,19 @@ Longest: 24

Hospital A -0.4774125 +0.4692014 Hospital B -0.4762754 +0.4694061 Hospital C -0.4630957 +0.4845361 Hospital D -0.4845070 +0.4727669 @@ -1014,23 +1014,23 @@ Longest: 24

Hospital A -0.4774125 -4715 +0.4692014 +4708 Hospital B -0.4762754 -5606 +0.4694061 +5573 Hospital C -0.4630957 -2371 +0.4845361 +2328 Hospital D -0.4845070 -3195 +0.4727669 +3213 @@ -1050,27 +1050,27 @@ Longest: 24

Escherichia -0.7267089 -0.9002532 -0.9720253 +0.7269712 +0.9050778 +0.9744833 Klebsiella -0.7233518 -0.8983364 -0.9741220 +0.7349552 +0.8988476 +0.9763124 Staphylococcus -0.7296887 -0.9248292 -0.9787396 +0.7263556 +0.9235813 +0.9793190 Streptococcus -0.7318690 +0.7391127 0.0000000 -0.7318690 +0.7391127 diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png index 82f75604..13b0d9f9 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png index fb5561c1..38decffc 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png index d8a26ca3..65da5ffc 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png index b1b19637..d03665cb 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ diff --git a/docs/articles/EUCAST.html b/docs/articles/EUCAST.html index 00358ab3..c0c90e4e 100644 --- a/docs/articles/EUCAST.html +++ b/docs/articles/EUCAST.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019
@@ -192,7 +192,7 @@

How to apply EUCAST rules

Matthijs S. Berends

-

25 February 2019

+

26 February 2019

diff --git a/docs/articles/G_test.html b/docs/articles/G_test.html index fab4163e..d0cb9763 100644 --- a/docs/articles/G_test.html +++ b/docs/articles/G_test.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -192,7 +192,7 @@

How to use the G-test

Matthijs S. Berends

-

25 February 2019

+

26 February 2019

diff --git a/docs/articles/SPSS.html b/docs/articles/SPSS.html index 09c41dc6..48307f1d 100644 --- a/docs/articles/SPSS.html +++ b/docs/articles/SPSS.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/articles/WHONET.html b/docs/articles/WHONET.html index f2fbb6dc..36401586 100644 --- a/docs/articles/WHONET.html +++ b/docs/articles/WHONET.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -192,7 +192,7 @@

How to work with WHONET data

Matthijs S. Berends

-

25 February 2019

+

26 February 2019

diff --git a/docs/articles/atc_property.html b/docs/articles/atc_property.html index 10c2d65a..ff8088b3 100644 --- a/docs/articles/atc_property.html +++ b/docs/articles/atc_property.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -192,7 +192,7 @@

How to get properties of an antibiotic

Matthijs S. Berends

-

25 February 2019

+

26 February 2019

diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png index fabe6abc..ab59c7db 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 6645d5b5..8641eb00 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/authors.html b/docs/authors.html index 391ebb8a..c39fc2b4 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/index.html b/docs/index.html index e4c42d8a..55733410 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/AMR-deprecated.html b/docs/reference/AMR-deprecated.html index 61dff2d8..66bada1f 100644 --- a/docs/reference/AMR-deprecated.html +++ b/docs/reference/AMR-deprecated.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/AMR.html b/docs/reference/AMR.html index db0a739a..c634b443 100644 --- a/docs/reference/AMR.html +++ b/docs/reference/AMR.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/WHOCC.html b/docs/reference/WHOCC.html index a8cfa70f..06c54047 100644 --- a/docs/reference/WHOCC.html +++ b/docs/reference/WHOCC.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/WHONET.html b/docs/reference/WHONET.html index 97e891f5..4b55fbad 100644 --- a/docs/reference/WHONET.html +++ b/docs/reference/WHONET.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/abname.html b/docs/reference/abname.html index ebb64312..ec256fd7 100644 --- a/docs/reference/abname.html +++ b/docs/reference/abname.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/age.html b/docs/reference/age.html index 20a427fa..969e0d09 100644 --- a/docs/reference/age.html +++ b/docs/reference/age.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/age_groups.html b/docs/reference/age_groups.html index d7f253a8..4aec10c5 100644 --- a/docs/reference/age_groups.html +++ b/docs/reference/age_groups.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/antibiotics.html b/docs/reference/antibiotics.html index b5687ad8..46d80e72 100644 --- a/docs/reference/antibiotics.html +++ b/docs/reference/antibiotics.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/as.atc.html b/docs/reference/as.atc.html index 9ac2f619..e2574214 100644 --- a/docs/reference/as.atc.html +++ b/docs/reference/as.atc.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/as.mic.html b/docs/reference/as.mic.html index 76e4b918..b705333b 100644 --- a/docs/reference/as.mic.html +++ b/docs/reference/as.mic.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 982f5f61..f224e862 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/as.rsi.html b/docs/reference/as.rsi.html index 45ac519c..eb6d619d 100644 --- a/docs/reference/as.rsi.html +++ b/docs/reference/as.rsi.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/atc_online.html b/docs/reference/atc_online.html index 968ab87a..2df54cc9 100644 --- a/docs/reference/atc_online.html +++ b/docs/reference/atc_online.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/atc_property.html b/docs/reference/atc_property.html index 40034769..678f6634 100644 --- a/docs/reference/atc_property.html +++ b/docs/reference/atc_property.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/availability.html b/docs/reference/availability.html index affa0a25..27309eef 100644 --- a/docs/reference/availability.html +++ b/docs/reference/availability.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/catalogue_of_life.html b/docs/reference/catalogue_of_life.html index 493bc4de..3437f624 100644 --- a/docs/reference/catalogue_of_life.html +++ b/docs/reference/catalogue_of_life.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/catalogue_of_life_version.html b/docs/reference/catalogue_of_life_version.html index 810eaf0b..bab36e17 100644 --- a/docs/reference/catalogue_of_life_version.html +++ b/docs/reference/catalogue_of_life_version.html @@ -47,7 +47,7 @@ - + @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -237,7 +237,7 @@
-

Version info of included Catalogue of Life

+

This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year.

@@ -268,6 +268,12 @@ This package contains the complete taxonomic tree of almost all microorganisms f

microorganisms

+

Examples

+
# NOT RUN {
+library(dplyr)
+microorganisms %>% freq(kingdom)
+microorganisms %>% group_by(kingdom) %>% freq(phylum, nmax = NULL)
+# }
diff --git a/docs/reference/count.html b/docs/reference/count.html index 7e6895d6..e822e5a8 100644 --- a/docs/reference/count.html +++ b/docs/reference/count.html @@ -81,7 +81,7 @@ count_R and count_IR can be used to count resistant isolates, count_S and count_ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/eucast_rules.html b/docs/reference/eucast_rules.html index 0018acc8..58f28a24 100644 --- a/docs/reference/eucast_rules.html +++ b/docs/reference/eucast_rules.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/first_isolate.html b/docs/reference/first_isolate.html index 462f6069..163cc555 100644 --- a/docs/reference/first_isolate.html +++ b/docs/reference/first_isolate.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/freq.html b/docs/reference/freq.html index 7c2c4748..cec35cfb 100644 --- a/docs/reference/freq.html +++ b/docs/reference/freq.html @@ -81,7 +81,7 @@ top_freq can be used to get the top/bottom n items of a frequency table, with co AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/g.test.html b/docs/reference/g.test.html index f57baf1b..4307c17c 100644 --- a/docs/reference/g.test.html +++ b/docs/reference/g.test.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/get_locale.html b/docs/reference/get_locale.html index 7baea359..ae890fa4 100644 --- a/docs/reference/get_locale.html +++ b/docs/reference/get_locale.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/ggplot_rsi.html b/docs/reference/ggplot_rsi.html index fce27feb..b3705ad2 100644 --- a/docs/reference/ggplot_rsi.html +++ b/docs/reference/ggplot_rsi.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/guess_ab_col.html b/docs/reference/guess_ab_col.html index 8fa8e4a6..71d55046 100644 --- a/docs/reference/guess_ab_col.html +++ b/docs/reference/guess_ab_col.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/index.html b/docs/reference/index.html index a4673d17..59649e54 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/join.html b/docs/reference/join.html index 2a864423..ecd57502 100644 --- a/docs/reference/join.html +++ b/docs/reference/join.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/key_antibiotics.html b/docs/reference/key_antibiotics.html index 858be017..96e2200f 100644 --- a/docs/reference/key_antibiotics.html +++ b/docs/reference/key_antibiotics.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/kurtosis.html b/docs/reference/kurtosis.html index 3aa692ba..595d1c1f 100644 --- a/docs/reference/kurtosis.html +++ b/docs/reference/kurtosis.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/like.html b/docs/reference/like.html index f6135b1f..6bca2172 100644 --- a/docs/reference/like.html +++ b/docs/reference/like.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/mdro.html b/docs/reference/mdro.html index 89e04653..abe6ba92 100644 --- a/docs/reference/mdro.html +++ b/docs/reference/mdro.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/microorganisms.codes.html b/docs/reference/microorganisms.codes.html index 70b51b93..5d7de0a8 100644 --- a/docs/reference/microorganisms.codes.html +++ b/docs/reference/microorganisms.codes.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index 546afd18..73366926 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/microorganisms.old.html b/docs/reference/microorganisms.old.html index f28ed60a..bd787fa5 100644 --- a/docs/reference/microorganisms.old.html +++ b/docs/reference/microorganisms.old.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index b6f525ca..e4b13fe7 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/mo_source.html b/docs/reference/mo_source.html index af0ac9cc..90c98efb 100644 --- a/docs/reference/mo_source.html +++ b/docs/reference/mo_source.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/p.symbol.html b/docs/reference/p.symbol.html index 4c2550f1..10c07b8d 100644 --- a/docs/reference/p.symbol.html +++ b/docs/reference/p.symbol.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/portion.html b/docs/reference/portion.html index 25f4214b..28e9bf9e 100644 --- a/docs/reference/portion.html +++ b/docs/reference/portion.html @@ -81,7 +81,7 @@ portion_R and portion_IR can be used to calculate resistance, portion_S and port AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/read.4D.html b/docs/reference/read.4D.html index 5298404d..ae9df9d3 100644 --- a/docs/reference/read.4D.html +++ b/docs/reference/read.4D.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/resistance_predict.html b/docs/reference/resistance_predict.html index 3e2d62ee..a0c4268d 100644 --- a/docs/reference/resistance_predict.html +++ b/docs/reference/resistance_predict.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/rsi.html b/docs/reference/rsi.html index 4df75462..8b8725c0 100644 --- a/docs/reference/rsi.html +++ b/docs/reference/rsi.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/septic_patients.html b/docs/reference/septic_patients.html index e2c10581..4125bbb1 100644 --- a/docs/reference/septic_patients.html +++ b/docs/reference/septic_patients.html @@ -80,7 +80,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/docs/reference/skewness.html b/docs/reference/skewness.html index eb86b6e2..a179fb9d 100644 --- a/docs/reference/skewness.html +++ b/docs/reference/skewness.html @@ -81,7 +81,7 @@ When negative: the left tail is longer; the mass of the distribution is concentr AMR (for R) - 0.5.0.9018 + 0.5.0.9019 diff --git a/man/catalogue_of_life_version.Rd b/man/catalogue_of_life_version.Rd index 3e962783..a6e14ded 100644 --- a/man/catalogue_of_life_version.Rd +++ b/man/catalogue_of_life_version.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R +% Please edit documentation in R/catalogue_of_life.R \name{catalogue_of_life_version} \alias{catalogue_of_life_version} \title{Version info of included Catalogue of Life} @@ -7,7 +7,7 @@ catalogue_of_life_version() } \description{ -Version info of included Catalogue of Life +This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year. } \section{Catalogue of Life}{ @@ -33,6 +33,11 @@ The syntax used to transform the original data to a cleansed R format, can be fo On our website \url{https://msberends.gitlab.io/AMR} you can find \href{https://msberends.gitlab.io/AMR/articles/AMR.html}{a comprehensive tutorial} about how to conduct AMR analysis, the \href{https://msberends.gitlab.io/AMR/reference}{complete documentation of all functions} (which reads a lot easier than here in R) and \href{https://msberends.gitlab.io/AMR/articles/WHONET.html}{an example analysis using WHONET data}. } +\examples{ +library(dplyr) +microorganisms \%>\% freq(kingdom) +microorganisms \%>\% group_by(kingdom) \%>\% freq(phylum, nmax = NULL) +} \seealso{ \code{\link{microorganisms}} } diff --git a/reproduction_of_microorganisms.R b/reproduction_of_microorganisms.R index 3722ca09..8db0197a 100644 --- a/reproduction_of_microorganisms.R +++ b/reproduction_of_microorganisms.R @@ -116,6 +116,7 @@ MOs <- MOs %>% # only old names of species that are in MOs: MOs.old <- MOs.old %>% filter(col_id_new %in% MOs$col_id) +# add abbreviations so we can easily know which ones are which ones MOs <- MOs %>% group_by(kingdom) %>% # abbreviations may be same for genera between kingdoms, @@ -147,7 +148,12 @@ MOs <- MOs %>% abbr_species, abbr_subspecies, sep = "_")))) %>% - mutate(mo = ifelse(duplicated(.$mo), paste0(mo, "1"), mo)) %>% + mutate(mo = ifelse(duplicated(.$mo), + paste0(mo, "1"), + mo), + fullname = ifelse(fullname == "", + trimws(paste(genus, species, subspecies), + fullname))) %>% select(mo, everything(), -abbr_genus, -abbr_species, -abbr_subspecies)