1
0
mirror of https://github.com/msberends/AMR.git synced 2024-12-27 07:26:11 +01:00
AMR/R/catalogue_of_life.R

137 lines
9.0 KiB
R
Raw Normal View History

2019-02-20 00:04:48 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
# https://gitlab.com/msberends/AMR #
# #
# LICENCE #
# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# This R package was created for academic research and was publicly #
# released in the hope that it will be useful, but it comes WITHOUT #
# ANY WARRANTY OR LIABILITY. #
2019-04-05 18:47:39 +02:00
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
2019-02-20 00:04:48 +01:00
# ==================================================================== #
#' The Catalogue of Life
#'
#' This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life.
#' @section Catalogue of Life:
2019-02-28 13:56:28 +01:00
#' \if{html}{\figure{logo_col.png}{options: height=40px style=margin-bottom:5px} \cr}
2019-04-07 22:40:02 +02:00
#' This package contains the complete taxonomic tree of almost all microorganisms (~65,000 species) from the authoritative and comprehensive Catalogue of Life (\url{http://www.catalogueoflife.org}). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.
2019-02-20 00:04:48 +01:00
#'
2019-02-28 13:56:28 +01:00
#' \link[=catalogue_of_life]{Click here} for more information about the included taxa. The Catalogue of Life releases updates annually; check which version was included in this package with \code{\link{catalogue_of_life_version}()}.
#' @section Included taxa:
2019-02-20 00:04:48 +01:00
#' Included are:
#' \itemize{
2019-03-18 14:29:41 +01:00
#' \item{All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria and Protozoa}
2019-02-28 13:56:28 +01:00
#' \item{All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of \emph{Aspergillus}, \emph{Candida}, \emph{Cryptococcus}, \emph{Histplasma}, \emph{Pneumocystis}, \emph{Saccharomyces} and \emph{Trichophyton}).}
#' \item{All ~2,000 (sub)species from ~100 other relevant genera, from the kingdoms of Animalia and Plantae (like \emph{Strongyloides} and \emph{Taenia})}
2019-04-05 18:47:39 +02:00
#' \item{All ~21,000 previously accepted names of included (sub)species that have been taxonomically renamed}
2019-02-20 00:04:48 +01:00
#' \item{The complete taxonomic tree of all included (sub)species: from kingdom to subspecies}
#' \item{The responsible author(s) and year of scientific publication}
#' }
#'
#' The Catalogue of Life (\url{http://www.catalogueoflife.org}) is the most comprehensive and authoritative global index of species currently available. It holds essential information on the names, relationships and distributions of over 1.6 million species. The Catalogue of Life is used to support the major biodiversity and conservation information services such as the Global Biodiversity Information Facility (GBIF), Encyclopedia of Life (EoL) and the International Union for Conservation of Nature Red List. It is recognised by the Convention on Biological Diversity as a significant component of the Global Taxonomy Initiative and a contribution to Target 1 of the Global Strategy for Plant Conservation.
#'
#' The syntax used to transform the original data to a cleansed R format, can be found here: \url{https://gitlab.com/msberends/AMR/blob/master/reproduction_of_microorganisms.R}.
#' @inheritSection AMR Read more on our website!
#' @name catalogue_of_life
#' @rdname catalogue_of_life
2019-03-18 14:29:41 +01:00
#' @seealso Data set \code{\link{microorganisms}} for the actual data. \cr
#' Function \code{\link{as.mo}()} to use the data for intelligent determination of microorganisms.
2019-02-20 00:04:48 +01:00
#' @examples
#' # Get version info of included data set
#' catalogue_of_life_version()
#'
#'
#' # Get a note when a species was renamed
#' mo_shortname("Chlamydia psittaci")
#' # Note: 'Chlamydia psittaci' (Page, 1968) was renamed
#' # 'Chlamydophila psittaci' (Everett et al., 1999)
#' # [1] "C. psittaci"
#'
#' # Get any property from the entire taxonomic tree for all included species
#' mo_class("E. coli")
#' # [1] "Gammaproteobacteria"
#'
#' mo_family("E. coli")
#' # [1] "Enterobacteriaceae"
#'
#' mo_gramstain("E. coli") # based on kingdom and phylum, see ?mo_gramstain
#' # [1] "Gram negative"
#'
#' mo_ref("E. coli")
#' # [1] "Castellani et al., 1919"
#'
#' # Do not get mistaken - the package only includes microorganisms
#' mo_phylum("C. elegans")
#' # [1] "Cyanobacteria" # Bacteria?!
#' mo_fullname("C. elegans")
#' # [1] "Chroococcus limneticus elegans" # Because a microorganism was found
NULL
2019-02-26 12:33:26 +01:00
#' Version info of included Catalogue of Life
#'
2019-03-18 14:29:41 +01:00
#' This function returns information about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year.
2019-02-26 12:33:26 +01:00
#' @seealso \code{\link{microorganisms}}
2019-03-26 14:24:03 +01:00
#' @details The list item \code{...$catalogue_of_life$is_latest_annual_release} is based on the system date.
2019-03-18 14:29:41 +01:00
#'
#' For DSMZ, see \code{?microorganisms}.
2019-03-26 14:24:03 +01:00
#' @return a \code{list}, which prints in pretty format
2019-02-26 12:33:26 +01:00
#' @inheritSection catalogue_of_life Catalogue of Life
#' @inheritSection AMR Read more on our website!
2019-03-18 14:29:41 +01:00
#' @importFrom crayon bold underline
#' @importFrom dplyr filter
2019-02-26 12:33:26 +01:00
#' @export
#' @examples
#' library(dplyr)
#' microorganisms %>% freq(kingdom)
#' microorganisms %>% group_by(kingdom) %>% freq(phylum, nmax = NULL)
catalogue_of_life_version <- function() {
# see the `catalogue_of_life` list in R/data.R
2019-03-18 14:29:41 +01:00
lst <- list(catalogue_of_life =
list(version = gsub("{year}", catalogue_of_life$year, catalogue_of_life$version, fixed = TRUE),
url = gsub("{year}", catalogue_of_life$year, catalogue_of_life$url_CoL, fixed = TRUE),
2019-03-26 14:24:03 +01:00
# annual release always somewhere in May, so before June is TRUE, FALSE otherwise
is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-06-01")),
2019-03-18 14:29:41 +01:00
n = nrow(filter(AMR::microorganisms, source == "CoL"))),
deutsche_sammlung_von_mikroorganismen_und_zellkulturen =
list(version = "Prokaryotic Nomenclature Up-to-Date from DSMZ",
url = catalogue_of_life$url_DSMZ,
yearmonth = catalogue_of_life$yearmonth_DSMZ,
n = nrow(filter(AMR::microorganisms, source == "DSMZ"))),
total_included =
list(
n_total_species = nrow(AMR::microorganisms),
n_total_synonyms = nrow(AMR::microorganisms.old)))
2019-03-26 14:24:03 +01:00
structure(.Data = lst,
class = c("catalogue_of_life_version", "list"))
}
#' @exportMethod print.catalogue_of_life_version
#' @export
#' @noRd
print.catalogue_of_life_version <- function(x, ...) {
lst <- x
cat(paste0(bold("Included in this AMR package are:\n\n"),
2019-03-18 14:29:41 +01:00
underline(lst$catalogue_of_life$version), "\n",
" Available at: ", lst$catalogue_of_life$url, "\n",
" Number of included species: ", format(lst$catalogue_of_life$n, big.mark = ","), "\n",
" (based on your system time, this is most likely ", ifelse(lst$catalogue_of_life$is_latest_annual_release, "", "not "), "the latest annual release)\n\n",
underline(paste0(lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$version, " (",
lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$yearmonth, ")")), "\n",
" Available at: ", lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$url, "\n",
" Number of included species: ", format(lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$n, big.mark = ","), "\n\n",
2019-03-26 14:24:03 +01:00
"=> Total number of species included: ", format(lst$total_included$n_total_species, big.mark = ","), "\n",
"=> Total number of synonyms included: ", format(lst$total_included$n_total_synonyms, big.mark = ","), "\n\n",
2019-03-18 14:29:41 +01:00
"See for more info ?microorganisms and ?catalogue_of_life.\n"))
2019-02-26 12:33:26 +01:00
}