1
0
mirror of https://github.com/msberends/AMR.git synced 2024-12-26 09:26:13 +01:00

DSMZ data

This commit is contained in:
dr. M.S. (Matthijs) Berends 2019-03-18 14:29:41 +01:00
parent b561de462a
commit c6a12266e7
38 changed files with 575 additions and 363 deletions

View File

@ -1,6 +1,6 @@
Package: AMR Package: AMR
Version: 0.5.0.9023 Version: 0.5.0.9024
Date: 2019-03-15 Date: 2019-03-18
Title: Antimicrobial Resistance Analysis Title: Antimicrobial Resistance Analysis
Authors@R: c( Authors@R: c(
person( person(

View File

@ -220,6 +220,7 @@ importFrom(crayon,magenta)
importFrom(crayon,red) importFrom(crayon,red)
importFrom(crayon,silver) importFrom(crayon,silver)
importFrom(crayon,strip_style) importFrom(crayon,strip_style)
importFrom(crayon,underline)
importFrom(crayon,yellow) importFrom(crayon,yellow)
importFrom(data.table,as.data.table) importFrom(data.table,as.data.table)
importFrom(data.table,data.table) importFrom(data.table,data.table)

View File

@ -30,7 +30,7 @@
#' @section Included taxa: #' @section Included taxa:
#' Included are: #' Included are:
#' \itemize{ #' \itemize{
#' \item{All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria, Protozoa and Viruses} #' \item{All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria and Protozoa}
#' \item{All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of \emph{Aspergillus}, \emph{Candida}, \emph{Cryptococcus}, \emph{Histplasma}, \emph{Pneumocystis}, \emph{Saccharomyces} and \emph{Trichophyton}).} #' \item{All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of \emph{Aspergillus}, \emph{Candida}, \emph{Cryptococcus}, \emph{Histplasma}, \emph{Pneumocystis}, \emph{Saccharomyces} and \emph{Trichophyton}).}
#' \item{All ~2,000 (sub)species from ~100 other relevant genera, from the kingdoms of Animalia and Plantae (like \emph{Strongyloides} and \emph{Taenia})} #' \item{All ~2,000 (sub)species from ~100 other relevant genera, from the kingdoms of Animalia and Plantae (like \emph{Strongyloides} and \emph{Taenia})}
#' \item{All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed} #' \item{All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed}
@ -44,6 +44,8 @@
#' @inheritSection AMR Read more on our website! #' @inheritSection AMR Read more on our website!
#' @name catalogue_of_life #' @name catalogue_of_life
#' @rdname catalogue_of_life #' @rdname catalogue_of_life
#' @seealso Data set \code{\link{microorganisms}} for the actual data. \cr
#' Function \code{\link{as.mo}()} to use the data for intelligent determination of microorganisms.
#' @examples #' @examples
#' # Get version info of included data set #' # Get version info of included data set
#' catalogue_of_life_version() #' catalogue_of_life_version()
@ -77,11 +79,16 @@ NULL
#' Version info of included Catalogue of Life #' Version info of included Catalogue of Life
#' #'
#' This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year. #' This function returns information about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year.
#' @seealso \code{\link{microorganisms}} #' @seealso \code{\link{microorganisms}}
#' @details The list item \code{is_latest_annual_release} is based on the system date. #' @details The list item \code{is_latest_annual_release} is based on the system date.
#'
#' For DSMZ, see \code{?microorganisms}.
#' @return a \code{list}, invisibly
#' @inheritSection catalogue_of_life Catalogue of Life #' @inheritSection catalogue_of_life Catalogue of Life
#' @inheritSection AMR Read more on our website! #' @inheritSection AMR Read more on our website!
#' @importFrom crayon bold underline
#' @importFrom dplyr filter
#' @export #' @export
#' @examples #' @examples
#' library(dplyr) #' library(dplyr)
@ -89,10 +96,34 @@ NULL
#' microorganisms %>% group_by(kingdom) %>% freq(phylum, nmax = NULL) #' microorganisms %>% group_by(kingdom) %>% freq(phylum, nmax = NULL)
catalogue_of_life_version <- function() { catalogue_of_life_version <- function() {
# see the `catalogue_of_life` list in R/data.R # see the `catalogue_of_life` list in R/data.R
list(version = catalogue_of_life$version, lst <- list(catalogue_of_life =
url = catalogue_of_life$url, list(version = gsub("{year}", catalogue_of_life$year, catalogue_of_life$version, fixed = TRUE),
# annual release always somewhere in March url = gsub("{year}", catalogue_of_life$year, catalogue_of_life$url_CoL, fixed = TRUE),
is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")), # annual release always somewhere in March, so before April is TRUE, FALSE otherwise
n_species = nrow(AMR::microorganisms), is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")),
n_synonyms = nrow(AMR::microorganisms.old)) n = nrow(filter(AMR::microorganisms, source == "CoL"))),
deutsche_sammlung_von_mikroorganismen_und_zellkulturen =
list(version = "Prokaryotic Nomenclature Up-to-Date from DSMZ",
url = catalogue_of_life$url_DSMZ,
yearmonth = catalogue_of_life$yearmonth_DSMZ,
n = nrow(filter(AMR::microorganisms, source == "DSMZ"))),
total_included =
list(
n_total_species = nrow(AMR::microorganisms),
n_total_synonyms = nrow(AMR::microorganisms.old)))
cat(paste0(bold("Included in this package are:\n\n"),
underline(lst$catalogue_of_life$version), "\n",
" Available at: ", lst$catalogue_of_life$url, "\n",
" Number of included species: ", format(lst$catalogue_of_life$n, big.mark = ","), "\n",
" (based on your system time, this is most likely ", ifelse(lst$catalogue_of_life$is_latest_annual_release, "", "not "), "the latest annual release)\n\n",
underline(paste0(lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$version, " (",
lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$yearmonth, ")")), "\n",
" Available at: ", lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$url, "\n",
" Number of included species: ", format(lst$deutsche_sammlung_von_mikroorganismen_und_zellkulturen$n, big.mark = ","), "\n\n",
"Total number of species included: ", format(lst$total_included$n_total_species, big.mark = ","), "\n",
"Total number of synonyms included: ", format(lst$total_included$n_total_synonyms, big.mark = ","), "\n\n",
"See for more info ?microorganisms and ?catalogue_of_life.\n"))
return(base::invisible(lst))
} }

View File

@ -130,11 +130,11 @@
# #
"antibiotics" "antibiotics"
#' Data set with ~60,000 microorganisms #' Data set with ~65,000 microorganisms
#' #'
#' A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using \code{\link{as.mo}}. #' A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using \code{\link{as.mo}}.
#' @inheritSection catalogue_of_life Catalogue of Life #' @inheritSection catalogue_of_life Catalogue of Life
#' @format A \code{\link{data.frame}} with 59,985 observations and 15 variables: #' @format A \code{\link{data.frame}} with 65,629 observations and 16 variables:
#' \describe{ #' \describe{
#' \item{\code{mo}}{ID of microorganism as used by this package} #' \item{\code{mo}}{ID of microorganism as used by this package}
#' \item{\code{col_id}}{Catalogue of Life ID} #' \item{\code{col_id}}{Catalogue of Life ID}
@ -150,30 +150,40 @@
#' \item{\code{rank}}{Taxonomic rank of the microorganism, like \code{"species"} or \code{"genus"}} #' \item{\code{rank}}{Taxonomic rank of the microorganism, like \code{"species"} or \code{"genus"}}
#' \item{\code{ref}}{Author(s) and year of concerning scientific publication} #' \item{\code{ref}}{Author(s) and year of concerning scientific publication}
#' \item{\code{species_id}}{ID of the species as used by the Catalogue of Life} #' \item{\code{species_id}}{ID of the species as used by the Catalogue of Life}
#' \item{\code{source}}{Either \code{"CoL"}, \code{"DSMZ"} (see source) or "manually added"}
#' \item{\code{prevalence}}{Prevalence of the microorganism, see \code{?as.mo}} #' \item{\code{prevalence}}{Prevalence of the microorganism, see \code{?as.mo}}
#' } #' }
#' @source Catalogue of Life: Annual Checklist (public online database), \url{www.catalogueoflife.org}.
#' @details Manually added were: #' @details Manually added were:
#' \itemize{ #' \itemize{
#' \item{9 species of \emph{Streptococcus} (beta haemolytic groups A, B, C, D, F, G, H, K and unspecified)} #' \item{9 species of \emph{Streptococcus} (beta haemolytic groups A, B, C, D, F, G, H, K and unspecified)}
#' \item{2 species of \emph{Staphylococcus} (coagulase-negative [CoNS] and coagulase-positive [CoPS])} #' \item{2 species of \emph{Staphylococcus} (coagulase-negative [CoNS] and coagulase-positive [CoPS])}
#' \item{2 other undefined (unknown Gram negatives and unknown Gram positives)} #' \item{3 other undefined (unknown, unknown Gram negatives and unknown Gram positives)}
#' \item{8,830 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) that are not in the Catalogue of Life}
#' } #' }
#' @section About the records from DSMZ (see source):
#' Names of prokaryotes are defined as being validly published by the International Code of Nomenclature of Bacteria. Validly published are all names which are included in the Approved Lists of Bacterial Names and the names subsequently published in the International Journal of Systematic Bacteriology (IJSB) and, from January 2000, in the International Journal of Systematic and Evolutionary Microbiology (IJSEM) as original articles or in the validation lists.
#'
#' From: \url{https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads/readme.html}
#' @source Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}).
#'
#' Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, \url{http://www.dsmz.de/bacterial-diversity/prokaryotic-nomenclature-up-to-date} (check included version with \code{\link{catalogue_of_life_version}()}).
#' @inheritSection AMR Read more on our website! #' @inheritSection AMR Read more on our website!
#' @seealso \code{\link{as.mo}}, \code{\link{mo_property}}, \code{\link{microorganisms.codes}} #' @seealso \code{\link{as.mo}}, \code{\link{mo_property}}, \code{\link{microorganisms.codes}}
"microorganisms" "microorganisms"
catalogue_of_life <- list( catalogue_of_life <- list(
year = 2018, year = 2018,
version = "Catalogue of Life: 2018 Annual Checklist", version = "Catalogue of Life: {year} Annual Checklist",
url = "http://www.catalogueoflife.org/annual-checklist/2018" url_CoL = "http://www.catalogueoflife.org/annual-checklist/{year}/",
url_DSMZ = "https://www.dsmz.de/microorganisms/pnu/bacterial_nomenclature_info_mm.php",
yearmonth_DSMZ = "February 2019"
) )
#' Data set with previously accepted taxonomic names #' Data set with previously accepted taxonomic names
#' #'
#' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by \code{\link{as.mo}}. #' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by \code{\link{as.mo}}.
#' @inheritSection catalogue_of_life Catalogue of Life #' @inheritSection catalogue_of_life Catalogue of Life
#' @format A \code{\link{data.frame}} with 17,069 observations and 4 variables: #' @format A \code{\link{data.frame}} with 16,911 observations and 4 variables:
#' \describe{ #' \describe{
#' \item{\code{col_id}}{Catalogue of Life ID} #' \item{\code{col_id}}{Catalogue of Life ID}
#' \item{\code{tsn_new}}{New Catalogue of Life ID} #' \item{\code{tsn_new}}{New Catalogue of Life ID}

View File

@ -80,6 +80,7 @@ globalVariables(c(".",
"phylum", "phylum",
"prevalence", "prevalence",
"prevalent", "prevalent",
"property",
"psae", "psae",
"R", "R",
"real_first_isolate", "real_first_isolate",

View File

@ -150,7 +150,7 @@ mdro <- function(tbl,
} else if (guideline$country$code == 'nl') { } else if (guideline$country$code == 'nl') {
guideline$country$name <- 'The Netherlands' guideline$country$name <- 'The Netherlands'
guideline$name <- 'WIP-Richtlijn BRMO' guideline$name <- 'WIP-Richtlijn BRMO'
guideline$version <- 'Revision of December 2017' guideline$version <- 'Revision as of December 2017'
guideline$source <- 'https://www.rivm.nl/Documenten_en_publicaties/Professioneel_Praktisch/Richtlijnen/Infectieziekten/WIP_Richtlijnen/WIP_Richtlijnen/Ziekenhuizen/WIP_richtlijn_BRMO_Bijzonder_Resistente_Micro_Organismen_ZKH' guideline$source <- 'https://www.rivm.nl/Documenten_en_publicaties/Professioneel_Praktisch/Richtlijnen/Infectieziekten/WIP_Richtlijnen/WIP_Richtlijnen/Ziekenhuizen/WIP_richtlijn_BRMO_Bijzonder_Resistente_Micro_Organismen_ZKH'
# add here more countries like this: # add here more countries like this:
# } else if (country$code == 'xx') { # } else if (country$code == 'xx') {

253
R/mo.R
View File

@ -21,9 +21,9 @@
#' Transform to microorganism ID #' Transform to microorganism ID
#' #'
#' Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea, Viruses, and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. Please see Examples. #' Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. Please see Examples.
#' @param x a character vector or a \code{data.frame} with one or two columns #' @param x a character vector or a \code{data.frame} with one or two columns
#' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. #' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. Note that this does not include species that were newly named after this publication.
#' #'
#' This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS". #' This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".
#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. #' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L.
@ -50,13 +50,15 @@
#' | | ----> species, a 3-4 letter acronym #' | | ----> species, a 3-4 letter acronym
#' | ----> genus, a 5-7 letter acronym, mostly without vowels #' | ----> genus, a 5-7 letter acronym, mostly without vowels
#' ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista), #' ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista),
#' F (Fungi), P (Protozoa), PL (Plantae) or V (Viruses) #' F (Fungi), P (Protozoa) or PL (Plantae)
#' } #' }
#' #'
#' Values that cannot be coered will be considered 'unknown' and have an MO code \code{UNKNOWN}. #' Values that cannot be coered will be considered 'unknown' and have an MO code \code{UNKNOWN}.
#' #'
#' Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples. #' Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples.
#' #'
#' The algorithm uses data from the Catalogue of Life (see below) and from one other source (see \code{?microorganisms}).
#'
#' \strong{Self-learning algoritm} \cr #' \strong{Self-learning algoritm} \cr
#' The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}. #' The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}.
#' #'
@ -65,7 +67,7 @@
#' \itemize{ #' \itemize{
#' \item{Valid MO codes and full names: it first searches in already valid MO code and known genus/species combinations} #' \item{Valid MO codes and full names: it first searches in already valid MO code and known genus/species combinations}
#' \item{Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones (see \emph{Microbial prevalence of pathogens in humans} below)} #' \item{Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones (see \emph{Microbial prevalence of pathogens in humans} below)}
#' \item{Taxonomic kingdom: it first searches in Bacteria/Chromista, then Fungi, then Protozoa, then Viruses} #' \item{Taxonomic kingdom: it first searches in Bacteria/Chromista, then Fungi, then Protozoa}
#' \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches} #' \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches}
#' } #' }
#' #'
@ -82,7 +84,6 @@
#' \itemize{ #' \itemize{
#' \item{(uncertainty level 1): It tries to look for only matching genera} #' \item{(uncertainty level 1): It tries to look for only matching genera}
#' \item{(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names} #' \item{(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names}
#' \item{(uncertainty level 1): It tries to look for some manual changes which are not (yet) published to the Catalogue of Life (like \emph{Propionibacterium} being \emph{Cutibacterium})}
#' \item{(uncertainty level 2): It strips off values between brackets and the brackets itself, and re-evaluates the input with all previous rules} #' \item{(uncertainty level 2): It strips off values between brackets and the brackets itself, and re-evaluates the input with all previous rules}
#' \item{(uncertainty level 2): It strips off words from the end one by one and re-evaluates the input with all previous rules} #' \item{(uncertainty level 2): It strips off words from the end one by one and re-evaluates the input with all previous rules}
#' \item{(uncertainty level 3): It strips off words from the start one by one and re-evaluates the input with all previous rules} #' \item{(uncertainty level 3): It strips off words from the start one by one and re-evaluates the input with all previous rules}
@ -144,6 +145,12 @@
#' as.mo("VISA") # Vancomycin Intermediate S. aureus #' as.mo("VISA") # Vancomycin Intermediate S. aureus
#' as.mo("VRSA") # Vancomycin Resistant S. aureus #' as.mo("VRSA") # Vancomycin Resistant S. aureus
#' #'
#' # Dyslexia is no problem - these all work:
#' as.mo("Ureaplasma urealyticum")
#' as.mo("Ureaplasma urealyticus")
#' as.mo("Ureaplasmium urealytica")
#' as.mo("Ureaplazma urealitycium")
#'
#' as.mo("Streptococcus group A") #' as.mo("Streptococcus group A")
#' as.mo("GAS") # Group A Streptococci #' as.mo("GAS") # Group A Streptococci
#' as.mo("GBS") # Group B Streptococci #' as.mo("GBS") # Group B Streptococci
@ -154,13 +161,9 @@
#' as.mo("S. pyogenes") # will remain species: B_STRPT_PYO #' as.mo("S. pyogenes") # will remain species: B_STRPT_PYO
#' as.mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPT_GRA #' as.mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPT_GRA
#' #'
#' # Use mo_* functions to get a specific property based on `mo` #' # All mo_* functions use as.mo() internally too (see ?mo_property):
#' Ecoli <- as.mo("E. coli") # returns `B_ESCHR_COL`
#' mo_genus(Ecoli) # returns "Escherichia"
#' mo_gramstain(Ecoli) # returns "Gram negative"
#' # but it uses as.mo internally too, so you could also just use:
#' mo_genus("E. coli") # returns "Escherichia" #' mo_genus("E. coli") # returns "Escherichia"
#' #' mo_gramstain("E. coli") # returns "Gram negative"#'
#' #'
#' \dontrun{ #' \dontrun{
#' df$mo <- as.mo(df$microorganism_name) #' df$mo <- as.mo(df$microorganism_name)
@ -246,13 +249,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
# save them to history # save them to history
set_mo_history(x, y, force = isTRUE(list(...)$force_mo_history)) set_mo_history(x, y, force = isTRUE(list(...)$force_mo_history))
} else { } else {
# will be checked for mo class in validation and uses exec_as.mo internally if necessary # will be checked for mo class in validation and uses exec_as.mo internally if necessary
y <- mo_validate(x = x, property = "mo", y <- mo_validate(x = x, property = "mo",
Becker = Becker, Lancefield = Lancefield, Becker = Becker, Lancefield = Lancefield,
allow_uncertain = allow_uncertain, reference_df = reference_df, allow_uncertain = allow_uncertain, reference_df = reference_df,
force_mo_history = isTRUE(list(...)$force_mo_history)) force_mo_history = isTRUE(list(...)$force_mo_history))
} }
structure(.Data = y, class = "mo") structure(.Data = y, class = "mo")
@ -270,6 +273,7 @@ is.mo <- function(x) {
# param property a column name of AMR::microorganisms # param property a column name of AMR::microorganisms
# param initial_search logical - is FALSE when coming from uncertain tries, which uses exec_as.mo internally too # param initial_search logical - is FALSE when coming from uncertain tries, which uses exec_as.mo internally too
# param force_mo_history logical - whether found result must be saved with set_mo_history (default FALSE on non-interactive sessions) # param force_mo_history logical - whether found result must be saved with set_mo_history (default FALSE on non-interactive sessions)
# param debug logical - show different lookup texts while searching
exec_as.mo <- function(x, exec_as.mo <- function(x,
Becker = FALSE, Becker = FALSE,
Lancefield = FALSE, Lancefield = FALSE,
@ -277,7 +281,8 @@ exec_as.mo <- function(x,
reference_df = get_mo_source(), reference_df = get_mo_source(),
property = "mo", property = "mo",
initial_search = TRUE, initial_search = TRUE,
force_mo_history = FALSE) { force_mo_history = FALSE,
debug = FALSE) {
if (!"AMR" %in% base::.packages()) { if (!"AMR" %in% base::.packages()) {
library("AMR") library("AMR")
@ -336,6 +341,7 @@ exec_as.mo <- function(x,
& !identical(x, "") & !identical(x, "")
& !identical(x, "xxx") & !identical(x, "xxx")
& !identical(x, "con")] & !identical(x, "con")]
x_input_backup <- x
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life) # conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) { if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) {
@ -455,6 +461,9 @@ exec_as.mo <- function(x,
x <- gsub("(ph|f|v)+", "(ph|f|v)+", x, ignore.case = TRUE) x <- gsub("(ph|f|v)+", "(ph|f|v)+", x, ignore.case = TRUE)
x <- gsub("(th|t)+", "(th|t)+", x, ignore.case = TRUE) x <- gsub("(th|t)+", "(th|t)+", x, ignore.case = TRUE)
x <- gsub("a+", "a+", x, ignore.case = TRUE) x <- gsub("a+", "a+", x, ignore.case = TRUE)
# allow any ending of -um, -us, -ium, -ius and -a (needs perl for the negative backward lookup):
x <- gsub("(um|u\\[sz\\]\\+|\\[iy\\]\\+um|\\[iy\\]\\+u\\[sz\\]\\+|a\\+)(?![a-z[])",
"(um|us|ium|ius|a)", x, ignore.case = TRUE, perl = TRUE)
x <- gsub("e+", "e+", x, ignore.case = TRUE) x <- gsub("e+", "e+", x, ignore.case = TRUE)
x <- gsub("o+", "o+", x, ignore.case = TRUE) x <- gsub("o+", "o+", x, ignore.case = TRUE)
@ -474,16 +483,18 @@ exec_as.mo <- function(x,
x_withspaces_end_only <- paste0(x_withspaces, '$') x_withspaces_end_only <- paste0(x_withspaces, '$')
x_withspaces_start_end <- paste0('^', x_withspaces, '$') x_withspaces_start_end <- paste0('^', x_withspaces, '$')
# cat(paste0('x "', x, '"\n')) if (debug == TRUE) {
# cat(paste0('x_species "', x_species, '"\n')) cat(paste0('x "', x, '"\n'))
# cat(paste0('x_withspaces_start_only "', x_withspaces_start_only, '"\n')) cat(paste0('x_species "', x_species, '"\n'))
# cat(paste0('x_withspaces_end_only "', x_withspaces_end_only, '"\n')) cat(paste0('x_withspaces_start_only "', x_withspaces_start_only, '"\n'))
# cat(paste0('x_withspaces_start_end "', x_withspaces_start_end, '"\n')) cat(paste0('x_withspaces_end_only "', x_withspaces_end_only, '"\n'))
# cat(paste0('x_backup "', x_backup, '"\n')) cat(paste0('x_withspaces_start_end "', x_withspaces_start_end, '"\n'))
# cat(paste0('x_backup_without_spp "', x_backup_without_spp, '"\n')) cat(paste0('x_backup "', x_backup, '"\n'))
# cat(paste0('x_trimmed "', x_trimmed, '"\n')) cat(paste0('x_backup_without_spp "', x_backup_without_spp, '"\n'))
# cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n')) cat(paste0('x_trimmed "', x_trimmed, '"\n'))
# cat(paste0('x_trimmed_without_group "', x_trimmed_without_group, '"\n')) cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n'))
cat(paste0('x_trimmed_without_group "', x_trimmed_without_group, '"\n'))
}
progress <- progress_estimated(n = length(x), min_time = 3) progress <- progress_estimated(n = length(x), min_time = 3)
@ -509,13 +520,13 @@ exec_as.mo <- function(x,
# most probable: is exact match in fullname # most probable: is exact match in fullname
if (length(found) > 0) { if (length(found) > 0) {
x[i] <- found[1L] x[i] <- found[1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
if (any(x_backup_without_spp[i] %in% c(NA, "", "xxx", "con"))) { if (any(tolower(x_backup_without_spp[i]) %in% c(NA, "", "xxx", "con", "na", "nan"))) {
x[i] <- NA_character_ x[i] <- NA_character_
next next
} }
@ -523,8 +534,8 @@ exec_as.mo <- function(x,
if (tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) { if (tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) {
# empty and nonsense values, ignore without warning # empty and nonsense values, ignore without warning
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -540,8 +551,8 @@ exec_as.mo <- function(x,
# return first genus that begins with x_trimmed, e.g. when "E. spp." # return first genus that begins with x_trimmed, e.g. when "E. spp."
if (length(found) > 0) { if (length(found) > 0) {
x[i] <- found[1L] x[i] <- found[1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -549,9 +560,9 @@ exec_as.mo <- function(x,
} }
# fewer than 3 chars and not looked for species, add as failure # fewer than 3 chars and not looked for species, add as failure
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
failures <- c(failures, x_backup[i]) if (initial_search == TRUE) {
if (property == "mo" & initial_search == TRUE) { failures <- c(failures, x_backup[i])
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -559,9 +570,9 @@ exec_as.mo <- function(x,
if (x_backup_without_spp[i] %like% "virus") { if (x_backup_without_spp[i] %like% "virus") {
# there is no fullname like virus, so don't try to coerce it # there is no fullname like virus, so don't try to coerce it
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
failures <- c(failures, x_backup[i]) if (initial_search == TRUE) {
if (property == "mo" & initial_search == TRUE) { failures <- c(failures, x_backup[i])
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -570,38 +581,38 @@ exec_as.mo <- function(x,
if (!is.na(x_trimmed[i])) { if (!is.na(x_trimmed[i])) {
if (toupper(x_backup_without_spp[i]) %in% c('MRSA', 'MSSA', 'VISA', 'VRSA')) { if (toupper(x_backup_without_spp[i]) %in% c('MRSA', 'MSSA', 'VISA', 'VRSA')) {
x[i] <- microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_STPHY_AUR', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
if (toupper(x_backup_without_spp[i]) %in% c('MRSE', 'MSSE')) { if (toupper(x_backup_without_spp[i]) %in% c('MRSE', 'MSSE')) {
x[i] <- microorganismsDT[mo == 'B_STPHY_EPI', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_STPHY_EPI', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
if (toupper(x_backup_without_spp[i]) == "VRE" if (toupper(x_backup_without_spp[i]) == "VRE"
| x_backup_without_spp[i] %like% '(enterococci|enterokok|enterococo)[a-z]*?$') { | x_backup_without_spp[i] %like% '(enterococci|enterokok|enterococo)[a-z]*?$') {
x[i] <- microorganismsDT[mo == 'B_ENTRC', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_ENTRC', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
if (toupper(x_backup_without_spp[i]) %in% c("EHEC", "EPEC", "EIEC", "STEC", "ATEC")) { if (toupper(x_backup_without_spp[i]) %in% c("EHEC", "EPEC", "EIEC", "STEC", "ATEC")) {
x[i] <- microorganismsDT[mo == 'B_ESCHR_COL', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_ESCHR_COL', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
if (toupper(x_backup_without_spp[i]) == 'MRPA') { if (toupper(x_backup_without_spp[i]) == 'MRPA') {
# multi resistant P. aeruginosa # multi resistant P. aeruginosa
x[i] <- microorganismsDT[mo == 'B_PSDMN_AER', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_PSDMN_AER', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -609,40 +620,40 @@ exec_as.mo <- function(x,
| toupper(x_backup_without_spp[i]) == 'CRSM') { | toupper(x_backup_without_spp[i]) == 'CRSM') {
# co-trim resistant S. maltophilia # co-trim resistant S. maltophilia
x[i] <- microorganismsDT[mo == 'B_STNTR_MAL', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_STNTR_MAL', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
if (toupper(x_backup_without_spp[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) { if (toupper(x_backup_without_spp[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) {
# peni I, peni R, vanco I, vanco R: S. pneumoniae # peni I, peni R, vanco I, vanco R: S. pneumoniae
x[i] <- microorganismsDT[mo == 'B_STRPT_PNE', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_STRPT_PNE', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
if (x_backup_without_spp[i] %like% '^G[ABCDFGHK]S$') { if (x_backup_without_spp[i] %like% '^G[ABCDFGHK]S$') {
# Streptococci, like GBS = Group B Streptococci (B_STRPT_GRB) # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRB)
x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L] x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
if (x_backup_without_spp[i] %like% '(streptococ|streptokok).* [ABCDFGHK]$') { if (x_backup_without_spp[i] %like% '(streptococ|streptokok).* [ABCDFGHK]$') {
# Streptococci in different languages, like "estreptococos grupo B" # Streptococci in different languages, like "estreptococos grupo B"
x[i] <- microorganismsDT[mo == gsub(".*(streptococ|streptokok|estreptococ).* ([ABCDFGHK])$", "B_STRPT_GR\\2", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L] x[i] <- microorganismsDT[mo == gsub(".*(streptococ|streptokok|estreptococ).* ([ABCDFGHK])$", "B_STRPT_GR\\2", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
if (x_backup_without_spp[i] %like% 'group [ABCDFGHK] (streptococ|streptokok|estreptococ)') { if (x_backup_without_spp[i] %like% 'group [ABCDFGHK] (streptococ|streptokok|estreptococ)') {
# Streptococci in different languages, like "Group A Streptococci" # Streptococci in different languages, like "Group A Streptococci"
x[i] <- microorganismsDT[mo == gsub(".*group ([ABCDFGHK]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L] x[i] <- microorganismsDT[mo == gsub(".*group ([ABCDFGHK]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GR\\1", x_backup_without_spp[i], ignore.case = TRUE), ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -652,8 +663,8 @@ exec_as.mo <- function(x,
| x_backup_without_spp[i] %like% '[ck]o?ns[^a-z]?$') { | x_backup_without_spp[i] %like% '[ck]o?ns[^a-z]?$') {
# coerce S. coagulase negative # coerce S. coagulase negative
x[i] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -662,8 +673,8 @@ exec_as.mo <- function(x,
| x_backup_without_spp[i] %like% '[ck]o?ps[^a-z]?$') { | x_backup_without_spp[i] %like% '[ck]o?ps[^a-z]?$') {
# coerce S. coagulase positive # coerce S. coagulase positive
x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -672,8 +683,8 @@ exec_as.mo <- function(x,
| x_trimmed[i] %like% 'gram[ -]?neg.*') { | x_trimmed[i] %like% 'gram[ -]?neg.*') {
# coerce Gram negatives # coerce Gram negatives
x[i] <- microorganismsDT[mo == 'B_GRAMN', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_GRAMN', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -682,8 +693,8 @@ exec_as.mo <- function(x,
| x_trimmed[i] %like% 'gram[ -]?pos.*') { | x_trimmed[i] %like% 'gram[ -]?pos.*') {
# coerce Gram positives # coerce Gram positives
x[i] <- microorganismsDT[mo == 'B_GRAMP', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_GRAMP', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -691,8 +702,8 @@ exec_as.mo <- function(x,
if (x_backup_without_spp[i] %like% "Salmonella group") { if (x_backup_without_spp[i] %like% "Salmonella group") {
# Salmonella Group A to Z, just return S. species for now # Salmonella Group A to Z, just return S. species for now
x[i] <- microorganismsDT[mo == 'B_SLMNL', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_SLMNL', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
options(mo_renamed = c(getOption("mo_renamed"), options(mo_renamed = c(getOption("mo_renamed"),
magenta(paste0("Note: ", magenta(paste0("Note: ",
@ -703,8 +714,8 @@ exec_as.mo <- function(x,
} else { } else {
# Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica
x[i] <- microorganismsDT[mo == 'B_SLMNL_ENT', ..property][[1]][1L] x[i] <- microorganismsDT[mo == 'B_SLMNL_ENT', ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
options(mo_renamed = c(getOption("mo_renamed"), options(mo_renamed = c(getOption("mo_renamed"),
magenta(paste0("Note: ", magenta(paste0("Note: ",
@ -723,8 +734,8 @@ exec_as.mo <- function(x,
found <- microorganismsDT[fullname_lower %in% tolower(c(x_species[i], x_trimmed_species[i])), ..property][[1]] found <- microorganismsDT[fullname_lower %in% tolower(c(x_species[i], x_trimmed_species[i])), ..property][[1]]
if (length(found) > 0) { if (length(found) > 0) {
x[i] <- found[1L] x[i] <- found[1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -732,8 +743,8 @@ exec_as.mo <- function(x,
found <- microorganismsDT[fullname_lower %like% paste0("^", unregex(x_backup_without_spp[i]), "[a-z]+"), ..property][[1]] found <- microorganismsDT[fullname_lower %like% paste0("^", unregex(x_backup_without_spp[i]), "[a-z]+"), ..property][[1]]
if (length(found) > 0) { if (length(found) > 0) {
x[i] <- found[1L] x[i] <- found[1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -747,8 +758,8 @@ exec_as.mo <- function(x,
mo_found <- AMR::microorganisms.codes[toupper(x_backup[i]) == AMR::microorganisms.codes[, 1], "mo"][1L] mo_found <- AMR::microorganisms.codes[toupper(x_backup[i]) == AMR::microorganisms.codes[, 1], "mo"][1L]
if (length(mo_found) > 0) { if (length(mo_found) > 0) {
x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L] x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L]
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -769,9 +780,9 @@ exec_as.mo <- function(x,
# allow no codes less than 4 characters long, was already checked for WHONET above # allow no codes less than 4 characters long, was already checked for WHONET above
if (nchar(x_backup_without_spp[i]) < 4) { if (nchar(x_backup_without_spp[i]) < 4) {
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
failures <- c(failures, x_backup[i]) if (initial_search == TRUE) {
if (property == "mo" & initial_search == TRUE) { failures <- c(failures, x_backup[i])
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -790,11 +801,6 @@ exec_as.mo <- function(x,
if (length(found) > 0) { if (length(found) > 0) {
return(found[1L]) return(found[1L])
} }
found <- data_to_check[fullname_lower %like% b.x_trimmed
| fullname_lower %like% c.x_trimmed_without_group, ..property][[1]]
if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
return(found[1L])
}
# try any match keeping spaces ---- # try any match keeping spaces ----
found <- data_to_check[fullname %like% d.x_withspaces_start_end, ..property][[1]] found <- data_to_check[fullname %like% d.x_withspaces_start_end, ..property][[1]]
@ -818,6 +824,14 @@ exec_as.mo <- function(x,
return(found[1L]) return(found[1L])
} }
# try a trimmed version
found <- data_to_check[fullname_lower %like% b.x_trimmed
| fullname_lower %like% c.x_trimmed_without_group, ..property][[1]]
if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
return(found[1L])
}
# try splitting of characters in the middle and then find ID ---- # try splitting of characters in the middle and then find ID ----
# only when text length is 6 or lower # only when text length is 6 or lower
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
@ -854,8 +868,8 @@ exec_as.mo <- function(x,
f.x_withspaces_end_only = x_withspaces_end_only[i], f.x_withspaces_end_only = x_withspaces_end_only[i],
g.x_backup_without_spp = x_backup_without_spp[i]) g.x_backup_without_spp = x_backup_without_spp[i])
if (!empty_result(x[i])) { if (!empty_result(x[i])) {
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -869,8 +883,8 @@ exec_as.mo <- function(x,
f.x_withspaces_end_only = x_withspaces_end_only[i], f.x_withspaces_end_only = x_withspaces_end_only[i],
g.x_backup_without_spp = x_backup_without_spp[i]) g.x_backup_without_spp = x_backup_without_spp[i])
if (!empty_result(x[i])) { if (!empty_result(x[i])) {
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -884,8 +898,8 @@ exec_as.mo <- function(x,
f.x_withspaces_end_only = x_withspaces_end_only[i], f.x_withspaces_end_only = x_withspaces_end_only[i],
g.x_backup_without_spp = x_backup_without_spp[i]) g.x_backup_without_spp = x_backup_without_spp[i])
if (!empty_result(x[i])) { if (!empty_result(x[i])) {
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -910,8 +924,8 @@ exec_as.mo <- function(x,
ref_old = found[1, ref], ref_old = found[1, ref],
ref_new = microorganismsDT[col_id == found[1, col_id_new], ref], ref_new = microorganismsDT[col_id == found[1, col_id_new], ref],
mo = microorganismsDT[col_id == found[1, col_id_new], mo]) mo = microorganismsDT[col_id == found[1, col_id_new], mo])
if (property == "mo" & initial_search == TRUE) { if (initial_search == TRUE) {
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
next next
} }
@ -954,19 +968,6 @@ exec_as.mo <- function(x,
mo = paste("CoL", found[1, col_id]))) mo = paste("CoL", found[1, col_id])))
return(x) return(x)
} }
# (2) not yet implemented taxonomic changes in Catalogue of Life ----
found <- suppressMessages(suppressWarnings(exec_as.mo(TEMPORARY_TAXONOMY(b.x_trimmed), initial_search = FALSE, allow_uncertain = FALSE)))
if (!empty_result(found)) {
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
uncertainties <<- rbind(uncertainties,
data.frame(uncertainty = 1,
input = a.x_backup,
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
mo = found_result[1L]))
return(found[1L])
}
} }
if (allow_uncertain >= 2) { if (allow_uncertain >= 2) {
@ -1074,17 +1075,17 @@ exec_as.mo <- function(x,
next next
} }
# not found ---- # no results found: make them UNKNOWN ----
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
failures <- c(failures, x_backup[i]) if (initial_search == TRUE) {
if (property == "mo" & initial_search == TRUE) { failures <- c(failures, x_backup[i])
set_mo_history(x_backup[i], x[i], force = force_mo_history) set_mo_history(x_backup[i], get_mo_code(x[i], property), force = force_mo_history)
} }
} }
} }
# handling failures ---- # handling failures ----
failures <- x_input[x == "UNKNOWN"] # failures[!failures %in% c(NA, NULL, NaN)] failures <- failures[!failures %in% c(NA, NULL, NaN)]
if (length(failures) > 0 & initial_search == TRUE) { if (length(failures) > 0 & initial_search == TRUE) {
options(mo_failures = sort(unique(failures))) options(mo_failures = sort(unique(failures)))
plural <- c("value", "it", "was") plural <- c("value", "it", "was")
@ -1172,7 +1173,6 @@ exec_as.mo <- function(x,
x[x == microorganismsDT[mo == 'B_STRPT_SAL', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRK', ..property][[1]][1L] x[x == microorganismsDT[mo == 'B_STRPT_SAL', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRK', ..property][[1]][1L]
} }
# Wrap up ---------------------------------------------------------------- # Wrap up ----------------------------------------------------------------
# comply to x, which is also unique and without empty values # comply to x, which is also unique and without empty values
@ -1189,10 +1189,12 @@ exec_as.mo <- function(x,
df_input <- data.frame(input = as.character(x_input), df_input <- data.frame(input = as.character(x_input),
stringsAsFactors = FALSE) stringsAsFactors = FALSE)
x <- df_input %>% suppressWarnings(
left_join(df_found, x <- df_input %>%
by = "input") %>% left_join(df_found,
pull(found) by = "input") %>%
pull(found)
)
if (property == "mo") { if (property == "mo") {
class(x) <- "mo" class(x) <- "mo"
@ -1217,11 +1219,6 @@ empty_result <- function(x) {
all(x %in% c(NA, "UNKNOWN")) all(x %in% c(NA, "UNKNOWN"))
} }
TEMPORARY_TAXONOMY <- function(x) {
x[x %like% 'Cutibacterium'] <- gsub('Cutibacterium', 'Propionibacterium', x[x %like% 'Cutibacterium'])
x
}
#' @importFrom crayon italic #' @importFrom crayon italic
was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "") { was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "") {
if (!is.na(ref_old)) { if (!is.na(ref_old)) {
@ -1368,3 +1365,11 @@ nr2char <- function(x) {
unregex <- function(x) { unregex <- function(x) {
gsub("[^a-zA-Z0-9 -]", "", x) gsub("[^a-zA-Z0-9 -]", "", x)
} }
get_mo_code <- function(x, property) {
if (property == "mo") {
unique(x)
} else {
AMR::microorganisms[base::which(AMR::microorganisms[, property] %in% x),]$mo
}
}

View File

@ -20,15 +20,21 @@
# ==================================================================== # # ==================================================================== #
# print successful as.mo coercions to file, not uncertain ones # print successful as.mo coercions to file, not uncertain ones
#' @importFrom dplyr distinct #' @importFrom dplyr %>% distinct filter
set_mo_history <- function(x, mo, force = FALSE) { set_mo_history <- function(x, mo, force = FALSE) {
file_location <- base::path.expand('~/.Rhistory_mo') file_location <- base::path.expand('~/.Rhistory_mo')
if (base::interactive() | force == TRUE) { if (base::interactive() | force == TRUE) {
mo_hist <- read_mo_history(force = force) mo_hist <- read_mo_history(force = force)
df <- distinct(data.frame(x, mo, stringsAsFactors = FALSE), x, .keep_all = TRUE) df <- data.frame(x, mo, stringsAsFactors = FALSE) %>%
x <- df$x distinct(x, .keep_all = TRUE) %>%
filter(!is.na(x) & !is.na(mo))
if (nrow(df) == 0) {
return(base::invisible())
}
x <- toupper(df$x)
mo <- df$mo mo <- df$mo
for (i in 1:length(x)) { for (i in 1:length(x)) {
# save package version too, as both the as.mo() algorithm and the reference data set may change
if (NROW(mo_hist[base::which(mo_hist$x == x[i] & mo_hist$package_version == utils::packageVersion("AMR")),]) == 0) { if (NROW(mo_hist[base::which(mo_hist$x == x[i] & mo_hist$package_version == utils::packageVersion("AMR")),]) == 0) {
base::write(x = c(x[i], mo[i], base::as.character(utils::packageVersion("AMR"))), base::write(x = c(x[i], mo[i], base::as.character(utils::packageVersion("AMR"))),
file = file_location, file = file_location,
@ -46,7 +52,7 @@ get_mo_history <- function(x, force = FALSE) {
if (base::is.null(file_read)) { if (base::is.null(file_read)) {
NA NA
} else { } else {
data.frame(x, stringsAsFactors = FALSE) %>% data.frame(x = toupper(x), stringsAsFactors = FALSE) %>%
left_join(file_read, by = "x") %>% left_join(file_read, by = "x") %>%
pull(mo) pull(mo)
} }

View File

@ -26,7 +26,7 @@
#' @param property one of the column names of one of the \code{\link{microorganisms}} data set or \code{"shortname"} #' @param property one of the column names of one of the \code{\link{microorganisms}} data set or \code{"shortname"}
#' @param language language of the returned text, defaults to system language (see \code{\link{get_locale}}) and can also be set with \code{\link{getOption}("AMR_locale")}. Use \code{language = NULL} or \code{language = ""} to prevent translation. #' @param language language of the returned text, defaults to system language (see \code{\link{get_locale}}) and can also be set with \code{\link{getOption}("AMR_locale")}. Use \code{language = NULL} or \code{language = ""} to prevent translation.
#' @param ... other parameters passed on to \code{\link{as.mo}} #' @param ... other parameters passed on to \code{\link{as.mo}}
#' @param open browse the URL using \code{\link[utils]{browseURL}} #' @param open browse the URL using \code{\link[utils]{browseURL}()}
#' @details All functions will return the most recently known taxonomic property according to the Catalogue of Life, except for \code{mo_ref}, \code{mo_authors} and \code{mo_year}. This leads to the following results: #' @details All functions will return the most recently known taxonomic property according to the Catalogue of Life, except for \code{mo_ref}, \code{mo_authors} and \code{mo_year}. This leads to the following results:
#' \itemize{ #' \itemize{
#' \item{\code{mo_fullname("Chlamydia psittaci")} will return \code{"Chlamydophila psittaci"} (with a warning about the renaming)} #' \item{\code{mo_fullname("Chlamydia psittaci")} will return \code{"Chlamydophila psittaci"} (with a warning about the renaming)}
@ -34,9 +34,9 @@
#' \item{\code{mo_ref("Chlamydophila psittaci")} will return \code{"Everett et al., 1999"} (without a warning)} #' \item{\code{mo_ref("Chlamydophila psittaci")} will return \code{"Everett et al., 1999"} (without a warning)}
#' } #' }
#' #'
#' The Gram stain - \code{mo_gramstain()} - will be determined on the taxonomic kingdom and phylum. According to Cavalier-Smith (2002) who defined subkingdoms Negibacteria and Posibacteria, only these phyla are Posibacteria: Actinobacteria, Chloroflexi, Firmicutes and Tenericutes (ref: \url{https://itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=956097}). These bacteria are considered Gram positive - all other bacteria are considered Gram negative. Species outside the kingdom of Bacteria will return a value \code{NA}. #' The Gram stain - \code{mo_gramstain()} - will be determined on the taxonomic kingdom and phylum. According to Cavalier-Smith (2002) who defined subkingdoms Negibacteria and Posibacteria, only these phyla are Posibacteria: Actinobacteria, Chloroflexi, Firmicutes and Tenericutes. These bacteria are considered Gram positive - all other bacteria are considered Gram negative. Species outside the kingdom of Bacteria will return a value \code{NA}.
#' #'
#' The function \code{mo_url()} will return the direct URL to the species in the Catalogue of Life. #' The function \code{mo_url()} will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species.
#' @inheritSection get_locale Supported languages #' @inheritSection get_locale Supported languages
#' @inheritSection catalogue_of_life Catalogue of Life #' @inheritSection catalogue_of_life Catalogue of Life
#' @inheritSection as.mo Source #' @inheritSection as.mo Source
@ -99,7 +99,7 @@
#' #'
#' # Becker classification, see ?as.mo #' # Becker classification, see ?as.mo
#' mo_fullname("S. epi") # "Staphylococcus epidermidis" #' mo_fullname("S. epi") # "Staphylococcus epidermidis"
#' mo_fullname("S. epi", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)" #' mo_fullname("S. epi", Becker = TRUE) # "Coagulase-negative Staphylococcus (CoNS)"
#' mo_shortname("S. epi") # "S. epidermidis" #' mo_shortname("S. epi") # "S. epidermidis"
#' mo_shortname("S. epi", Becker = TRUE) # "CoNS" #' mo_shortname("S. epi", Becker = TRUE) # "CoNS"
#' #'
@ -320,14 +320,24 @@ mo_taxonomy <- function(x, language = get_locale(), ...) {
#' @rdname mo_property #' @rdname mo_property
#' @importFrom utils browseURL #' @importFrom utils browseURL
#' @importFrom dplyr %>% left_join select mutate case_when
#' @export #' @export
mo_url <- function(x, open = FALSE, ...) { mo_url <- function(x, open = FALSE, ...) {
u <- mo_validate(x = x, property = "species_id", ...) mo <- AMR::as.mo(x = x, ... = ...)
u[u != ""] <- paste0(catalogue_of_life$url, "/details/species/id/", u) df <- data.frame(mo, stringsAsFactors = FALSE) %>%
names(u) <- mo_fullname(x = x, ... = ...) left_join(select(AMR::microorganisms, mo, source, species_id), by = "mo") %>%
mutate(url = case_when(source == "CoL" ~
paste0(gsub("{year}", catalogue_of_life$year, catalogue_of_life$url_CoL, fixed = TRUE), "details/species/id/", species_id),
source == "DSMZ" ~
paste0(catalogue_of_life$url_DSMZ, "?bnu_no=", species_id, "#", species_id),
TRUE ~
NA_character_))
u <- df$url
names(u) <- mo_fullname(mo)
if (open == TRUE) { if (open == TRUE) {
if (length(u) > 1) { if (length(u) > 1) {
warning("only the first URL will be opened, as `browseURL` only suports one string.") warning("only the first URL will be opened, as `browseURL()` only suports one string.")
} }
browseURL(u[1L]) browseURL(u[1L])
} }
@ -364,7 +374,7 @@ mo_translate <- function(x, language) {
} }
x_tobetranslated <- grepl(x = x, x_tobetranslated <- grepl(x = x,
pattern = "(Coagulase Negative Staphylococcus|Coagulase Positive Staphylococcus|Beta-haemolytic Streptococcus|unknown Gram negatives|unknown Gram positives|unknown name|unknown kingdom|unknown phylum|unknown class|unknown order|unknown family|unknown genus|unknown species|unknown subspecies|unknown rank|CoNS|CoPS|Gram negative|Gram positive|Bacteria|Fungi|Protozoa|biogroup|biotype|vegetative|group|Group)") pattern = "(Coagulase-negative Staphylococcus|Coagulase-positive Staphylococcus|Beta-haemolytic Streptococcus|unknown Gram negatives|unknown Gram positives|unknown name|unknown kingdom|unknown phylum|unknown class|unknown order|unknown family|unknown genus|unknown species|unknown subspecies|unknown rank|CoNS|CoPS|Gram negative|Gram positive|Bacteria|Fungi|Protozoa|biogroup|biotype|vegetative|group|Group)")
if (sum(x_tobetranslated, na.rm = TRUE) == 0) { if (sum(x_tobetranslated, na.rm = TRUE) == 0) {
return(x) return(x)
@ -374,8 +384,8 @@ mo_translate <- function(x, language) {
x[x_tobetranslated] <- case_when( x[x_tobetranslated] <- case_when(
# German # German
language == "de" ~ x[x_tobetranslated] %>% language == "de" ~ x[x_tobetranslated] %>%
gsub("Coagulase Negative Staphylococcus","Koagulase-negative Staphylococcus", ., fixed = TRUE) %>% gsub("Coagulase-negative Staphylococcus","Koagulase-negative Staphylococcus", ., fixed = TRUE) %>%
gsub("Coagulase Positive Staphylococcus","Koagulase-positive Staphylococcus", ., fixed = TRUE) %>% gsub("Coagulase-positive Staphylococcus","Koagulase-positive Staphylococcus", ., fixed = TRUE) %>%
gsub("Beta-haemolytic Streptococcus", "Beta-h\u00e4molytischer Streptococcus", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Beta-h\u00e4molytischer Streptococcus", ., fixed = TRUE) %>%
gsub("unknown Gram negatives", "unbekannte Gramnegativen", ., fixed = TRUE) %>% gsub("unknown Gram negatives", "unbekannte Gramnegativen", ., fixed = TRUE) %>%
gsub("unknown Gram positives", "unbekannte Grampositiven", ., fixed = TRUE) %>% gsub("unknown Gram positives", "unbekannte Grampositiven", ., fixed = TRUE) %>%
@ -405,8 +415,8 @@ mo_translate <- function(x, language) {
# Dutch # Dutch
language == "nl" ~ x[x_tobetranslated] %>% language == "nl" ~ x[x_tobetranslated] %>%
gsub("Coagulase Negative Staphylococcus","Coagulase-negatieve Staphylococcus", ., fixed = TRUE) %>% gsub("Coagulase-negative Staphylococcus","Coagulase-negatieve Staphylococcus", ., fixed = TRUE) %>%
gsub("Coagulase Positive Staphylococcus","Coagulase-positieve Staphylococcus", ., fixed = TRUE) %>% gsub("Coagulase-positive Staphylococcus","Coagulase-positieve Staphylococcus", ., fixed = TRUE) %>%
gsub("Beta-haemolytic Streptococcus", "Beta-hemolytische Streptococcus", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Beta-hemolytische Streptococcus", ., fixed = TRUE) %>%
gsub("unknown Gram negatives", "onbekende Gram-negatieven", ., fixed = TRUE) %>% gsub("unknown Gram negatives", "onbekende Gram-negatieven", ., fixed = TRUE) %>%
gsub("unknown Gram positives", "onbekende Gram-positieven", ., fixed = TRUE) %>% gsub("unknown Gram positives", "onbekende Gram-positieven", ., fixed = TRUE) %>%
@ -436,8 +446,8 @@ mo_translate <- function(x, language) {
# Spanish # Spanish
language == "es" ~ x[x_tobetranslated] %>% language == "es" ~ x[x_tobetranslated] %>%
gsub("Coagulase Negative Staphylococcus","Staphylococcus coagulasa negativo", ., fixed = TRUE) %>% gsub("Coagulase-negative Staphylococcus","Staphylococcus coagulasa negativo", ., fixed = TRUE) %>%
gsub("Coagulase Positive Staphylococcus","Staphylococcus coagulasa positivo", ., fixed = TRUE) %>% gsub("Coagulase-positive Staphylococcus","Staphylococcus coagulasa positivo", ., fixed = TRUE) %>%
gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>%
gsub("unknown Gram negatives", "Gram negativos desconocidos", ., fixed = TRUE) %>% gsub("unknown Gram negatives", "Gram negativos desconocidos", ., fixed = TRUE) %>%
gsub("unknown Gram positives", "Gram positivos desconocidos", ., fixed = TRUE) %>% gsub("unknown Gram positives", "Gram positivos desconocidos", ., fixed = TRUE) %>%
@ -465,8 +475,8 @@ mo_translate <- function(x, language) {
# Italian # Italian
language == "it" ~ x[x_tobetranslated] %>% language == "it" ~ x[x_tobetranslated] %>%
gsub("Coagulase Negative Staphylococcus","Staphylococcus negativo coagulasi", ., fixed = TRUE) %>% gsub("Coagulase-negative Staphylococcus","Staphylococcus negativo coagulasi", ., fixed = TRUE) %>%
gsub("Coagulase Positive Staphylococcus","Staphylococcus positivo coagulasi", ., fixed = TRUE) %>% gsub("Coagulase-positive Staphylococcus","Staphylococcus positivo coagulasi", ., fixed = TRUE) %>%
gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-emolitico", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-emolitico", ., fixed = TRUE) %>%
gsub("unknown Gram negatives", "Gram negativi sconosciuti", ., fixed = TRUE) %>% gsub("unknown Gram negatives", "Gram negativi sconosciuti", ., fixed = TRUE) %>%
gsub("unknown Gram positives", "Gram positivi sconosciuti", ., fixed = TRUE) %>% gsub("unknown Gram positives", "Gram positivi sconosciuti", ., fixed = TRUE) %>%
@ -493,8 +503,8 @@ mo_translate <- function(x, language) {
# French # French
language == "fr" ~ x[x_tobetranslated] %>% language == "fr" ~ x[x_tobetranslated] %>%
gsub("Coagulase Negative Staphylococcus","Staphylococcus \u00e0 coagulase n\u00e9gative", ., fixed = TRUE) %>% gsub("Coagulase-negative Staphylococcus","Staphylococcus \u00e0 coagulase n\u00e9gative", ., fixed = TRUE) %>%
gsub("Coagulase Positive Staphylococcus","Staphylococcus \u00e0 coagulase positif", ., fixed = TRUE) %>% gsub("Coagulase-positive Staphylococcus","Staphylococcus \u00e0 coagulase positif", ., fixed = TRUE) %>%
gsub("Beta-haemolytic Streptococcus", "Streptococcus B\u00eata-h\u00e9molytique", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Streptococcus B\u00eata-h\u00e9molytique", ., fixed = TRUE) %>%
gsub("unknown Gram negatives", "Gram n\u00e9gatifs inconnus", ., fixed = TRUE) %>% gsub("unknown Gram negatives", "Gram n\u00e9gatifs inconnus", ., fixed = TRUE) %>%
gsub("unknown Gram positives", "Gram positifs inconnus", ., fixed = TRUE) %>% gsub("unknown Gram positives", "Gram positifs inconnus", ., fixed = TRUE) %>%
@ -522,8 +532,8 @@ mo_translate <- function(x, language) {
# Portuguese # Portuguese
language == "pt" ~ x[x_tobetranslated] %>% language == "pt" ~ x[x_tobetranslated] %>%
gsub("Coagulase Negative Staphylococcus","Staphylococcus coagulase negativo", ., fixed = TRUE) %>% gsub("Coagulase-negative Staphylococcus","Staphylococcus coagulase negativo", ., fixed = TRUE) %>%
gsub("Coagulase Positive Staphylococcus","Staphylococcus coagulase positivo", ., fixed = TRUE) %>% gsub("Coagulase-positive Staphylococcus","Staphylococcus coagulase positivo", ., fixed = TRUE) %>%
gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>%
gsub("unknown Gram negatives", "Gram negativos desconhecidos", ., fixed = TRUE) %>% gsub("unknown Gram negatives", "Gram negativos desconhecidos", ., fixed = TRUE) %>%
gsub("unknown Gram positives", "Gram positivos desconhecidos", ., fixed = TRUE) %>% gsub("unknown Gram positives", "Gram positivos desconhecidos", ., fixed = TRUE) %>%
@ -550,7 +560,6 @@ mo_translate <- function(x, language) {
iconv(to = "UTF-8")) iconv(to = "UTF-8"))
x x
} }
mo_validate <- function(x, property, ...) { mo_validate <- function(x, property, ...) {

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -78,7 +78,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a> <a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>

View File

@ -40,7 +40,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>
@ -192,7 +192,7 @@
<h1>Benchmarks</h1> <h1>Benchmarks</h1>
<h4 class="author">Matthijs S. Berends</h4> <h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">15 March 2019</h4> <h4 class="date">18 March 2019</h4>
<div class="hidden name"><code>benchmarks.Rmd</code></div> <div class="hidden name"><code>benchmarks.Rmd</code></div>
@ -217,14 +217,14 @@
<a class="sourceLine" id="cb2-8" title="8"> <span class="dt">times =</span> <span class="dv">10</span>)</a> <a class="sourceLine" id="cb2-8" title="8"> <span class="dt">times =</span> <span class="dv">10</span>)</a>
<a class="sourceLine" id="cb2-9" title="9"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(S.aureus, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">2</span>)</a> <a class="sourceLine" id="cb2-9" title="9"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(S.aureus, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">2</span>)</a>
<a class="sourceLine" id="cb2-10" title="10"><span class="co">#&gt; Unit: milliseconds</span></a> <a class="sourceLine" id="cb2-10" title="10"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb2-11" title="11"><span class="co">#&gt; expr min lq mean median uq max neval</span></a> <a class="sourceLine" id="cb2-11" title="11"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb2-12" title="12"><span class="co">#&gt; as.mo("sau") 17.0 17.0 22.0 17.0 19.0 59.0 10</span></a> <a class="sourceLine" id="cb2-12" title="12"><span class="co">#&gt; as.mo("sau") 18.0 18.0 22 18.0 18.0 61 10</span></a>
<a class="sourceLine" id="cb2-13" title="13"><span class="co">#&gt; as.mo("stau") 41.0 41.0 46.0 41.0 44.0 83.0 10</span></a> <a class="sourceLine" id="cb2-13" title="13"><span class="co">#&gt; as.mo("stau") 49.0 50.0 62 50.0 50.0 130 10</span></a>
<a class="sourceLine" id="cb2-14" title="14"><span class="co">#&gt; as.mo("staaur") 17.0 17.0 26.0 17.0 18.0 74.0 10</span></a> <a class="sourceLine" id="cb2-14" title="14"><span class="co">#&gt; as.mo("staaur") 18.0 18.0 27 18.0 18.0 66 10</span></a>
<a class="sourceLine" id="cb2-15" title="15"><span class="co">#&gt; as.mo("STAAUR") 17.0 17.0 29.0 17.0 52.0 62.0 10</span></a> <a class="sourceLine" id="cb2-15" title="15"><span class="co">#&gt; as.mo("STAAUR") 18.0 18.0 23 18.0 19.0 66 10</span></a>
<a class="sourceLine" id="cb2-16" title="16"><span class="co">#&gt; as.mo("S. aureus") 31.0 31.0 32.0 31.0 32.0 32.0 10</span></a> <a class="sourceLine" id="cb2-16" title="16"><span class="co">#&gt; as.mo("S. aureus") 29.0 29.0 39 29.0 42.0 73 10</span></a>
<a class="sourceLine" id="cb2-17" title="17"><span class="co">#&gt; as.mo("S. aureus") 31.0 31.0 48.0 32.0 73.0 110.0 10</span></a> <a class="sourceLine" id="cb2-17" title="17"><span class="co">#&gt; as.mo("S. aureus") 29.0 29.0 38 29.0 31.0 72 10</span></a>
<a class="sourceLine" id="cb2-18" title="18"><span class="co">#&gt; as.mo("Staphylococcus aureus") 7.4 7.4 7.7 7.4 8.2 8.6 10</span></a></code></pre></div> <a class="sourceLine" id="cb2-18" title="18"><span class="co">#&gt; as.mo("Staphylococcus aureus") 8.3 8.3 12 8.3 8.8 44 10</span></a></code></pre></div>
<p>In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first one is a WHONET code) or common laboratory codes, or common full organism names like the last one. Full organism names are always preferred.</p> <p>In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first one is a WHONET code) or common laboratory codes, or common full organism names like the last one. Full organism names are always preferred.</p>
<p>To achieve this speed, the <code>as.mo</code> function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of <em>Thermus islandicus</em> (<code>B_THERMS_ISL</code>), a bug probably never found before in humans:</p> <p>To achieve this speed, the <code>as.mo</code> function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of <em>Thermus islandicus</em> (<code>B_THERMS_ISL</code>), a bug probably never found before in humans:</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb3-1" title="1">T.islandicus &lt;-<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/microbenchmark/topics/microbenchmark">microbenchmark</a></span>(<span class="kw"><a href="../reference/as.mo.html">as.mo</a></span>(<span class="st">"theisl"</span>),</a> <div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb3-1" title="1">T.islandicus &lt;-<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/microbenchmark/topics/microbenchmark">microbenchmark</a></span>(<span class="kw"><a href="../reference/as.mo.html">as.mo</a></span>(<span class="st">"theisl"</span>),</a>
@ -236,12 +236,12 @@
<a class="sourceLine" id="cb3-7" title="7"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(T.islandicus, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">2</span>)</a> <a class="sourceLine" id="cb3-7" title="7"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(T.islandicus, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">2</span>)</a>
<a class="sourceLine" id="cb3-8" title="8"><span class="co">#&gt; Unit: milliseconds</span></a> <a class="sourceLine" id="cb3-8" title="8"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb3-9" title="9"><span class="co">#&gt; expr min lq mean median uq max neval</span></a> <a class="sourceLine" id="cb3-9" title="9"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb3-10" title="10"><span class="co">#&gt; as.mo("theisl") 420 430 450 470 470 470 10</span></a> <a class="sourceLine" id="cb3-10" title="10"><span class="co">#&gt; as.mo("theisl") 470 470 490 470 510 520 10</span></a>
<a class="sourceLine" id="cb3-11" title="11"><span class="co">#&gt; as.mo("THEISL") 420 440 480 470 480 680 10</span></a> <a class="sourceLine" id="cb3-11" title="11"><span class="co">#&gt; as.mo("THEISL") 470 470 500 500 520 530 10</span></a>
<a class="sourceLine" id="cb3-12" title="12"><span class="co">#&gt; as.mo("T. islandicus") 290 290 310 300 330 350 10</span></a> <a class="sourceLine" id="cb3-12" title="12"><span class="co">#&gt; as.mo("T. islandicus") 74 74 84 75 77 130 10</span></a>
<a class="sourceLine" id="cb3-13" title="13"><span class="co">#&gt; as.mo("T. islandicus") 300 300 330 330 350 350 10</span></a> <a class="sourceLine" id="cb3-13" title="13"><span class="co">#&gt; as.mo("T. islandicus") 74 74 84 74 75 120 10</span></a>
<a class="sourceLine" id="cb3-14" title="14"><span class="co">#&gt; as.mo("Thermus islandicus") 67 67 86 68 110 120 10</span></a></code></pre></div> <a class="sourceLine" id="cb3-14" title="14"><span class="co">#&gt; as.mo("Thermus islandicus") 74 78 100 120 120 130 10</span></a></code></pre></div>
<p>That takes 11 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like <em>Thermus islandicus</em>) are almost fast - these are the most probable input from most data sets.</p> <p>That takes 7.9 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like <em>Thermus islandicus</em>) are almost fast - these are the most probable input from most data sets.</p>
<p>In the figure below, we compare <em>Escherichia coli</em> (which is very common) with <em>Prevotella brevis</em> (which is moderately common) and with <em>Thermus islandicus</em> (which is very uncommon):</p> <p>In the figure below, we compare <em>Escherichia coli</em> (which is very common) with <em>Prevotella brevis</em> (which is moderately common) and with <em>Thermus islandicus</em> (which is very uncommon):</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb4-1" title="1"><span class="kw"><a href="https://www.rdocumentation.org/packages/graphics/topics/par">par</a></span>(<span class="dt">mar =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/c">c</a></span>(<span class="dv">5</span>, <span class="dv">16</span>, <span class="dv">4</span>, <span class="dv">2</span>)) <span class="co"># set more space for left margin text (16)</span></a> <div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb4-1" title="1"><span class="kw"><a href="https://www.rdocumentation.org/packages/graphics/topics/par">par</a></span>(<span class="dt">mar =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/c">c</a></span>(<span class="dv">5</span>, <span class="dv">16</span>, <span class="dv">4</span>, <span class="dv">2</span>)) <span class="co"># set more space for left margin text (16)</span></a>
<a class="sourceLine" id="cb4-2" title="2"></a> <a class="sourceLine" id="cb4-2" title="2"></a>
@ -290,8 +290,8 @@
<a class="sourceLine" id="cb5-24" title="24"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a> <a class="sourceLine" id="cb5-24" title="24"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a>
<a class="sourceLine" id="cb5-25" title="25"><span class="co">#&gt; Unit: milliseconds</span></a> <a class="sourceLine" id="cb5-25" title="25"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb5-26" title="26"><span class="co">#&gt; expr min lq mean median uq max neval</span></a> <a class="sourceLine" id="cb5-26" title="26"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb5-27" title="27"><span class="co">#&gt; mo_fullname(x) 738 813 847 819 921 975 10</span></a></code></pre></div> <a class="sourceLine" id="cb5-27" title="27"><span class="co">#&gt; mo_fullname(x) 770 811 822 817 824 952 10</span></a></code></pre></div>
<p>So transforming 500,000 values (!!) of 50 unique values only takes 0.82 seconds (818 ms). You only lose time on your unique input values.</p> <p>So transforming 500,000 values (!!) of 50 unique values only takes 0.82 seconds (816 ms). You only lose time on your unique input values.</p>
</div> </div>
<div id="precalculated-results" class="section level3"> <div id="precalculated-results" class="section level3">
<h3 class="hasAnchor"> <h3 class="hasAnchor">
@ -304,10 +304,10 @@
<a class="sourceLine" id="cb6-5" title="5"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a> <a class="sourceLine" id="cb6-5" title="5"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a>
<a class="sourceLine" id="cb6-6" title="6"><span class="co">#&gt; Unit: milliseconds</span></a> <a class="sourceLine" id="cb6-6" title="6"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb6-7" title="7"><span class="co">#&gt; expr min lq mean median uq max neval</span></a> <a class="sourceLine" id="cb6-7" title="7"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb6-8" title="8"><span class="co">#&gt; A 11.000 11.100 15.700 11.300 11.400 52.900 10</span></a> <a class="sourceLine" id="cb6-8" title="8"><span class="co">#&gt; A 12.000 12.600 12.900 13.200 13.200 13.300 10</span></a>
<a class="sourceLine" id="cb6-9" title="9"><span class="co">#&gt; B 28.700 28.900 29.400 29.200 29.500 30.500 10</span></a> <a class="sourceLine" id="cb6-9" title="9"><span class="co">#&gt; B 26.100 26.200 27.200 26.600 28.100 30.400 10</span></a>
<a class="sourceLine" id="cb6-10" title="10"><span class="co">#&gt; C 0.322 0.556 0.523 0.568 0.581 0.586 10</span></a></code></pre></div> <a class="sourceLine" id="cb6-10" title="10"><span class="co">#&gt; C 0.394 0.738 0.745 0.774 0.869 0.982 10</span></a></code></pre></div>
<p>So going from <code><a href="../reference/mo_property.html">mo_fullname("Staphylococcus aureus")</a></code> to <code>"Staphylococcus aureus"</code> takes 0.0006 seconds - it doesnt even start calculating <em>if the result would be the same as the expected resulting value</em>. That goes for all helper functions:</p> <p>So going from <code><a href="../reference/mo_property.html">mo_fullname("Staphylococcus aureus")</a></code> to <code>"Staphylococcus aureus"</code> takes 0.0008 seconds - it doesnt even start calculating <em>if the result would be the same as the expected resulting value</em>. That goes for all helper functions:</p>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb7-1" title="1">run_it &lt;-<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/microbenchmark/topics/microbenchmark">microbenchmark</a></span>(<span class="dt">A =</span> <span class="kw"><a href="../reference/mo_property.html">mo_species</a></span>(<span class="st">"aureus"</span>),</a> <div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb7-1" title="1">run_it &lt;-<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/microbenchmark/topics/microbenchmark">microbenchmark</a></span>(<span class="dt">A =</span> <span class="kw"><a href="../reference/mo_property.html">mo_species</a></span>(<span class="st">"aureus"</span>),</a>
<a class="sourceLine" id="cb7-2" title="2"> <span class="dt">B =</span> <span class="kw"><a href="../reference/mo_property.html">mo_genus</a></span>(<span class="st">"Staphylococcus"</span>),</a> <a class="sourceLine" id="cb7-2" title="2"> <span class="dt">B =</span> <span class="kw"><a href="../reference/mo_property.html">mo_genus</a></span>(<span class="st">"Staphylococcus"</span>),</a>
<a class="sourceLine" id="cb7-3" title="3"> <span class="dt">C =</span> <span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"Staphylococcus aureus"</span>),</a> <a class="sourceLine" id="cb7-3" title="3"> <span class="dt">C =</span> <span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"Staphylococcus aureus"</span>),</a>
@ -320,14 +320,14 @@
<a class="sourceLine" id="cb7-10" title="10"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a> <a class="sourceLine" id="cb7-10" title="10"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a>
<a class="sourceLine" id="cb7-11" title="11"><span class="co">#&gt; Unit: milliseconds</span></a> <a class="sourceLine" id="cb7-11" title="11"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb7-12" title="12"><span class="co">#&gt; expr min lq mean median uq max neval</span></a> <a class="sourceLine" id="cb7-12" title="12"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb7-13" title="13"><span class="co">#&gt; A 0.314 0.339 0.399 0.380 0.460 0.507 10</span></a> <a class="sourceLine" id="cb7-13" title="13"><span class="co">#&gt; A 0.316 0.382 0.407 0.430 0.434 0.457 10</span></a>
<a class="sourceLine" id="cb7-14" title="14"><span class="co">#&gt; B 0.347 0.387 0.455 0.402 0.493 0.684 10</span></a> <a class="sourceLine" id="cb7-14" title="14"><span class="co">#&gt; B 0.371 0.470 0.497 0.498 0.548 0.579 10</span></a>
<a class="sourceLine" id="cb7-15" title="15"><span class="co">#&gt; C 0.429 0.505 0.566 0.588 0.656 0.660 10</span></a> <a class="sourceLine" id="cb7-15" title="15"><span class="co">#&gt; C 0.410 0.465 0.662 0.606 0.851 0.944 10</span></a>
<a class="sourceLine" id="cb7-16" title="16"><span class="co">#&gt; D 0.321 0.340 0.383 0.367 0.412 0.490 10</span></a> <a class="sourceLine" id="cb7-16" title="16"><span class="co">#&gt; D 0.366 0.376 0.406 0.393 0.429 0.493 10</span></a>
<a class="sourceLine" id="cb7-17" title="17"><span class="co">#&gt; E 0.303 0.328 0.369 0.379 0.403 0.449 10</span></a> <a class="sourceLine" id="cb7-17" title="17"><span class="co">#&gt; E 0.301 0.318 0.374 0.350 0.426 0.476 10</span></a>
<a class="sourceLine" id="cb7-18" title="18"><span class="co">#&gt; F 0.251 0.323 0.346 0.348 0.391 0.400 10</span></a> <a class="sourceLine" id="cb7-18" title="18"><span class="co">#&gt; F 0.304 0.331 0.387 0.392 0.438 0.482 10</span></a>
<a class="sourceLine" id="cb7-19" title="19"><span class="co">#&gt; G 0.286 0.305 0.345 0.338 0.389 0.398 10</span></a> <a class="sourceLine" id="cb7-19" title="19"><span class="co">#&gt; G 0.303 0.331 0.381 0.374 0.432 0.473 10</span></a>
<a class="sourceLine" id="cb7-20" title="20"><span class="co">#&gt; H 0.272 0.297 0.355 0.338 0.427 0.450 10</span></a></code></pre></div> <a class="sourceLine" id="cb7-20" title="20"><span class="co">#&gt; H 0.316 0.374 0.430 0.399 0.443 0.709 10</span></a></code></pre></div>
<p>Of course, when running <code><a href="../reference/mo_property.html">mo_phylum("Firmicutes")</a></code> the function has zero knowledge about the actual microorganism, namely <em>S. aureus</em>. But since the result would be <code>"Firmicutes"</code> too, there is no point in calculating the result. And because this package knows all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.</p> <p>Of course, when running <code><a href="../reference/mo_property.html">mo_phylum("Firmicutes")</a></code> the function has zero knowledge about the actual microorganism, namely <em>S. aureus</em>. But since the result would be <code>"Firmicutes"</code> too, there is no point in calculating the result. And because this package knows all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.</p>
</div> </div>
<div id="results-in-other-languages" class="section level3"> <div id="results-in-other-languages" class="section level3">
@ -335,7 +335,7 @@
<a href="#results-in-other-languages" class="anchor"></a>Results in other languages</h3> <a href="#results-in-other-languages" class="anchor"></a>Results in other languages</h3>
<p>When the system language is non-English and supported by this <code>AMR</code> package, some functions will have a translated result. This almost doest take extra time:</p> <p>When the system language is non-English and supported by this <code>AMR</code> package, some functions will have a translated result. This almost doest take extra time:</p>
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb8-1" title="1"><span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"CoNS"</span>, <span class="dt">language =</span> <span class="st">"en"</span>) <span class="co"># or just mo_fullname("CoNS") on an English system</span></a> <div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb8-1" title="1"><span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"CoNS"</span>, <span class="dt">language =</span> <span class="st">"en"</span>) <span class="co"># or just mo_fullname("CoNS") on an English system</span></a>
<a class="sourceLine" id="cb8-2" title="2"><span class="co">#&gt; [1] "Coagulase Negative Staphylococcus (CoNS)"</span></a> <a class="sourceLine" id="cb8-2" title="2"><span class="co">#&gt; [1] "Coagulase-negative Staphylococcus (CoNS)"</span></a>
<a class="sourceLine" id="cb8-3" title="3"></a> <a class="sourceLine" id="cb8-3" title="3"></a>
<a class="sourceLine" id="cb8-4" title="4"><span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"CoNS"</span>, <span class="dt">language =</span> <span class="st">"es"</span>) <span class="co"># or just mo_fullname("CoNS") on a Spanish system</span></a> <a class="sourceLine" id="cb8-4" title="4"><span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"CoNS"</span>, <span class="dt">language =</span> <span class="st">"es"</span>) <span class="co"># or just mo_fullname("CoNS") on a Spanish system</span></a>
<a class="sourceLine" id="cb8-5" title="5"><span class="co">#&gt; [1] "Staphylococcus coagulasa negativo (CoNS)"</span></a> <a class="sourceLine" id="cb8-5" title="5"><span class="co">#&gt; [1] "Staphylococcus coagulasa negativo (CoNS)"</span></a>
@ -354,13 +354,13 @@
<a class="sourceLine" id="cb8-18" title="18"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">4</span>)</a> <a class="sourceLine" id="cb8-18" title="18"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">4</span>)</a>
<a class="sourceLine" id="cb8-19" title="19"><span class="co">#&gt; Unit: milliseconds</span></a> <a class="sourceLine" id="cb8-19" title="19"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb8-20" title="20"><span class="co">#&gt; expr min lq mean median uq max neval</span></a> <a class="sourceLine" id="cb8-20" title="20"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb8-21" title="21"><span class="co">#&gt; en 18.05 18.11 19.33 18.25 18.65 25.12 10</span></a> <a class="sourceLine" id="cb8-21" title="21"><span class="co">#&gt; en 19.22 19.33 20.42 19.58 19.84 28.13 10</span></a>
<a class="sourceLine" id="cb8-22" title="22"><span class="co">#&gt; de 30.15 30.84 43.57 31.08 72.47 73.96 10</span></a> <a class="sourceLine" id="cb8-22" title="22"><span class="co">#&gt; de 31.28 31.62 41.16 32.79 34.86 75.79 10</span></a>
<a class="sourceLine" id="cb8-23" title="23"><span class="co">#&gt; nl 30.30 30.63 34.96 30.71 30.73 73.40 10</span></a> <a class="sourceLine" id="cb8-23" title="23"><span class="co">#&gt; nl 31.56 31.71 36.86 31.97 33.34 78.40 10</span></a>
<a class="sourceLine" id="cb8-24" title="24"><span class="co">#&gt; es 30.24 30.49 31.39 30.97 32.20 33.68 10</span></a> <a class="sourceLine" id="cb8-24" title="24"><span class="co">#&gt; es 31.32 31.94 42.76 32.98 41.72 81.33 10</span></a>
<a class="sourceLine" id="cb8-25" title="25"><span class="co">#&gt; it 30.53 30.71 31.18 30.83 31.71 32.38 10</span></a> <a class="sourceLine" id="cb8-25" title="25"><span class="co">#&gt; it 31.31 31.67 31.96 31.90 32.15 33.26 10</span></a>
<a class="sourceLine" id="cb8-26" title="26"><span class="co">#&gt; fr 29.64 30.49 35.32 30.84 32.25 73.00 10</span></a> <a class="sourceLine" id="cb8-26" title="26"><span class="co">#&gt; fr 31.09 31.43 37.49 32.53 33.73 75.53 10</span></a>
<a class="sourceLine" id="cb8-27" title="27"><span class="co">#&gt; pt 30.73 30.81 39.47 31.09 32.29 73.25 10</span></a></code></pre></div> <a class="sourceLine" id="cb8-27" title="27"><span class="co">#&gt; pt 31.24 31.82 36.35 31.95 32.12 76.57 10</span></a></code></pre></div>
<p>Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.</p> <p>Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.</p>
</div> </div>
</div> </div>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

View File

@ -78,7 +78,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>

View File

@ -78,7 +78,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a> <a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>

View File

@ -42,7 +42,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a> <a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>
@ -197,12 +197,12 @@
<p><em>(<help title="Too Long, Didn't Read">TLDR</help> - to find out how to conduct AMR analysis, please <a href="./articles/AMR.html">continue reading here to get started</a>.</em></p> <p><em>(<help title="Too Long, Didn't Read">TLDR</help> - to find out how to conduct AMR analysis, please <a href="./articles/AMR.html">continue reading here to get started</a>.</em></p>
<hr> <hr>
<p><code>AMR</code> is a free and open-source <a href="https://www.r-project.org">R package</a> to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial properties by using evidence-based methods. It supports any data format, including WHONET/EARS-Net data.</p> <p><code>AMR</code> is a free and open-source <a href="https://www.r-project.org">R package</a> to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial properties by using evidence-based methods. It supports any data format, including WHONET/EARS-Net data.</p>
<p>After installing this package, R knows almost all ~60,000 microorganisms and ~500 antibiotics by name and code, and knows all about valid RSI and MIC values.</p> <p>After installing this package, R knows ~65,000 microorganisms and ~500 antibiotics by name and code, and knows all about valid RSI and MIC values.</p>
<p><strong>Used to SPSS?</strong> Read our <a href="./articles/SPSS.html">tutorial on how to import data from SPSS, SAS or Stata</a> and learn in which ways R outclasses any of these statistical packages.</p> <p><strong>Used to SPSS?</strong> Read our <a href="./articles/SPSS.html">tutorial on how to import data from SPSS, SAS or Stata</a> and learn in which ways R outclasses any of these statistical packages.</p>
<p>We created this package for both academic research and routine analysis at the Faculty of Medical Sciences of the University of Groningen, the Netherlands, and the Medical Microbiology &amp; Infection Prevention (MMBI) department of the University Medical Center Groningen (UMCG). This R package is actively maintained and is free software; you can freely use and distribute it for both personal and commercial (but <strong>not</strong> patent) purposes under the terms of the GNU General Public License version 2.0 (GPL-2), as published by the Free Software Foundation. Read the full license <a href="./LICENSE-text.html">here</a>.</p> <p>We created this package for both academic research and routine analysis at the Faculty of Medical Sciences of the University of Groningen, the Netherlands, and the Medical Microbiology &amp; Infection Prevention (MMBI) department of the University Medical Center Groningen (UMCG). This R package is actively maintained and is free software; you can freely use and distribute it for both personal and commercial (but <strong>not</strong> patent) purposes under the terms of the GNU General Public License version 2.0 (GPL-2), as published by the Free Software Foundation. Read the full license <a href="./LICENSE-text.html">here</a>.</p>
<p>This package can be used for:</p> <p>This package can be used for:</p>
<ul> <ul>
<li>Reference for microorganisms, since it contains almost all 60,000 microbial (sub)species from the <a href="http://www.catalogueoflife.org">Catalogue of Life</a> <li>Reference for microorganisms, since it contains all microbial (sub)species from the <a href="http://www.catalogueoflife.org">Catalogue of Life</a>
</li> </li>
<li>Calculating antimicrobial resistance</li> <li>Calculating antimicrobial resistance</li>
<li>Calculating empirical susceptibility of both mono therapy and combination therapy</li> <li>Calculating empirical susceptibility of both mono therapy and combination therapy</li>
@ -313,7 +313,7 @@
<li> <li>
<p>It <strong>cleanses existing data</strong> by providing new <em>classes</em> for microoganisms, antibiotics and antimicrobial results (both S/I/R and MIC). By installing this package, you teach R everything about microbiology that is needed for analysis. These functions all use intelligent rules to guess results that you would expect:</p> <p>It <strong>cleanses existing data</strong> by providing new <em>classes</em> for microoganisms, antibiotics and antimicrobial results (both S/I/R and MIC). By installing this package, you teach R everything about microbiology that is needed for analysis. These functions all use intelligent rules to guess results that you would expect:</p>
<ul> <ul>
<li>Use <code><a href="reference/as.mo.html">as.mo()</a></code> to get an ID of a microorganism. The IDs are human readable for the trained eye - the ID of <em>Klebsiella pneumoniae</em> is “B_KLBSL_PNE” (B stands for Bacteria) and the ID of <em>S. aureus</em> is “B_STPHY_AUR”. The function takes almost any text as input that looks like the name or code of a microorganism like “E. coli”, “esco” or “esccol” and tries to find expected results using intelligent rules combined with the included Catalogue of Life data set, consisting of almost 60,000 microorganisms. It only takes milliseconds to find results, please see our <a href="./articles/benchmarks.html">benchmarks</a>. Moreover, it can group <em>Staphylococci</em> into coagulase negative and positive (CoNS and CoPS, see <a href="./reference/as.mo.html#source">source</a>) and can categorise <em>Streptococci</em> into Lancefield groups (like beta-haemolytic <em>Streptococcus</em> Group B, <a href="./reference/as.mo.html#source">source</a>).</li> <li>Use <code><a href="reference/as.mo.html">as.mo()</a></code> to get an ID of a microorganism. The IDs are human readable for the trained eye - the ID of <em>Klebsiella pneumoniae</em> is “B_KLBSL_PNE” (B stands for Bacteria) and the ID of <em>S. aureus</em> is “B_STPHY_AUR”. The function takes almost any text as input that looks like the name or code of a microorganism like “E. coli”, “esco” or “esccol” and tries to find expected results using intelligent rules combined with the included Catalogue of Life data set. It only takes milliseconds to find results, please see our <a href="./articles/benchmarks.html">benchmarks</a>. Moreover, it can group <em>Staphylococci</em> into coagulase negative and positive (CoNS and CoPS, see <a href="./reference/as.mo.html#source">source</a>) and can categorise <em>Streptococci</em> into Lancefield groups (like beta-haemolytic <em>Streptococcus</em> Group B, <a href="./reference/as.mo.html#source">source</a>).</li>
<li>Use <code><a href="reference/as.rsi.html">as.rsi()</a></code> to transform values to valid antimicrobial results. It produces just S, I or R based on your input and warns about invalid values. Even values like “&lt;=0.002; S” (combined MIC/RSI) will result in “S”.</li> <li>Use <code><a href="reference/as.rsi.html">as.rsi()</a></code> to transform values to valid antimicrobial results. It produces just S, I or R based on your input and warns about invalid values. Even values like “&lt;=0.002; S” (combined MIC/RSI) will result in “S”.</li>
<li>Use <code><a href="reference/as.mic.html">as.mic()</a></code> to cleanse your MIC values. It produces a so-called factor (called <em>ordinal</em> in SPSS) with valid MIC values as levels. A value like “&lt;=0.002; S” (combined MIC/RSI) will result in “&lt;=0.002”.</li> <li>Use <code><a href="reference/as.mic.html">as.mic()</a></code> to cleanse your MIC values. It produces a so-called factor (called <em>ordinal</em> in SPSS) with valid MIC values as levels. A value like “&lt;=0.002; S” (combined MIC/RSI) will result in “&lt;=0.002”.</li>
<li>Use <code><a href="reference/as.atc.html">as.atc()</a></code> to get the ATC code of an antibiotic as defined by the WHO. This package contains a database with most LIS codes, official names, DDDs and even trade names of antibiotics. For example, the values “Furabid”, “Furadantin”, “nitro” all return the ATC code of Nitrofurantoine.</li> <li>Use <code><a href="reference/as.atc.html">as.atc()</a></code> to get the ATC code of an antibiotic as defined by the WHO. This package contains a database with most LIS codes, official names, DDDs and even trade names of antibiotics. For example, the values “Furabid”, “Furadantin”, “nitro” all return the ATC code of Nitrofurantoine.</li>
@ -329,7 +329,7 @@
</ul> </ul>
</li> </li>
<li>Use <code><a href="reference/mdro.html">mdro()</a></code> (abbreviation of Multi Drug Resistant Organisms) to check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently, national guidelines for Germany and the Netherlands are supported.</li> <li>Use <code><a href="reference/mdro.html">mdro()</a></code> (abbreviation of Multi Drug Resistant Organisms) to check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently, national guidelines for Germany and the Netherlands are supported.</li>
<li>The <a href="./reference/microorganisms.html">data set <code>microorganisms</code></a> contains the complete taxonomic tree of almost 60,000 microorganisms. Furthermore, the colloquial name and Gram stain are available, which enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like <code><a href="reference/mo_property.html">mo_genus()</a></code>, <code><a href="reference/mo_property.html">mo_family()</a></code>, <code><a href="reference/mo_property.html">mo_gramstain()</a></code> or even <code><a href="reference/mo_property.html">mo_phylum()</a></code>. As they use <code><a href="reference/as.mo.html">as.mo()</a></code> internally, they also use the same intelligent rules for determination. For example, <code><a href="reference/mo_property.html">mo_genus("MRSA")</a></code> and <code><a href="reference/mo_property.html">mo_genus("S. aureus")</a></code> will both return <code>"Staphylococcus"</code>. They also come with support for German, Dutch, Spanish, Italian, French and Portuguese. These functions can be used to add new variables to your data.</li> <li>The <a href="./reference/microorganisms.html">data set <code>microorganisms</code></a> contains the complete taxonomic tree of ~65,000 microorganisms. Furthermore, some colloquial names and all Gram stains are available, which enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like <code><a href="reference/mo_property.html">mo_genus()</a></code>, <code><a href="reference/mo_property.html">mo_family()</a></code>, <code><a href="reference/mo_property.html">mo_gramstain()</a></code> or even <code><a href="reference/mo_property.html">mo_phylum()</a></code>. As they use <code><a href="reference/as.mo.html">as.mo()</a></code> internally, they also use the same intelligent rules for determination. For example, <code><a href="reference/mo_property.html">mo_genus("MRSA")</a></code> and <code><a href="reference/mo_property.html">mo_genus("S. aureus")</a></code> will both return <code>"Staphylococcus"</code>. They also come with support for German, Dutch, Spanish, Italian, French and Portuguese. These functions can be used to add new variables to your data.</li>
<li>The <a href="./reference/antibiotics.html">data set <code>antibiotics</code></a> contains almost 500 antimicrobial drugs with their ATC code, EARS-Net code, common LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains hundreds of trade names. Use functions like <code><a href="reference/atc_property.html">atc_name()</a></code> and <code><a href="reference/atc_property.html">atc_tradenames()</a></code> to look up values. The <code>atc_*</code> functions use <code><a href="reference/as.atc.html">as.atc()</a></code> internally so they support the same intelligent rules to guess the most probable result. For example, <code><a href="reference/atc_property.html">atc_name("Fluclox")</a></code>, <code><a href="reference/atc_property.html">atc_name("Floxapen")</a></code> and <code><a href="reference/atc_property.html">atc_name("J01CF05")</a></code> will all return <code>"Flucloxacillin"</code>. These functions can again be used to add new variables to your data.</li> <li>The <a href="./reference/antibiotics.html">data set <code>antibiotics</code></a> contains almost 500 antimicrobial drugs with their ATC code, EARS-Net code, common LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains hundreds of trade names. Use functions like <code><a href="reference/atc_property.html">atc_name()</a></code> and <code><a href="reference/atc_property.html">atc_tradenames()</a></code> to look up values. The <code>atc_*</code> functions use <code><a href="reference/as.atc.html">as.atc()</a></code> internally so they support the same intelligent rules to guess the most probable result. For example, <code><a href="reference/atc_property.html">atc_name("Fluclox")</a></code>, <code><a href="reference/atc_property.html">atc_name("Floxapen")</a></code> and <code><a href="reference/atc_property.html">atc_name("J01CF05")</a></code> will all return <code>"Flucloxacillin"</code>. These functions can again be used to add new variables to your data.</li>
</ul> </ul>
</li> </li>

View File

@ -78,7 +78,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>

View File

@ -47,7 +47,7 @@
<script src="../extra.js"></script> <script src="../extra.js"></script>
<meta property="og:title" content="Transform to microorganism ID — as.mo" /> <meta property="og:title" content="Transform to microorganism ID — as.mo" />
<meta property="og:description" content="Use this function to determine a valid microorganism ID (mo). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea, Viruses, and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like &quot;Staphylococcus aureus&quot;), an abbreviated name (like &quot;S. aureus&quot;), an abbreviation known in the field (like &quot;MRSA&quot;), or just a genus. Please see Examples." /> <meta property="og:description" content="Use this function to determine a valid microorganism ID (mo). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like &quot;Staphylococcus aureus&quot;), an abbreviated name (like &quot;S. aureus&quot;), an abbreviation known in the field (like &quot;MRSA&quot;), or just a genus. Please see Examples." />
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png" /> <meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png" />
<meta name="twitter:card" content="summary" /> <meta name="twitter:card" content="summary" />
@ -80,7 +80,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>
@ -237,7 +237,7 @@
<div class="ref-description"> <div class="ref-description">
<p>Use this function to determine a valid microorganism ID (<code>mo</code>). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea, Viruses, and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like <code>"Staphylococcus aureus"</code>), an abbreviated name (like <code>"S. aureus"</code>), an abbreviation known in the field (like <code>"MRSA"</code>), or just a genus. Please see Examples.</p> <p>Use this function to determine a valid microorganism ID (<code>mo</code>). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like <code>"Staphylococcus aureus"</code>), an abbreviated name (like <code>"S. aureus"</code>), an abbreviation known in the field (like <code>"MRSA"</code>), or just a genus. Please see Examples.</p>
</div> </div>
@ -263,7 +263,7 @@
</tr> </tr>
<tr> <tr>
<th>Becker</th> <th>Becker</th>
<td><p>a logical to indicate whether <em>Staphylococci</em> should be categorised into Coagulase Negative <em>Staphylococci</em> ("CoNS") and Coagulase Positive <em>Staphylococci</em> ("CoPS") instead of their own species, according to Karsten Becker <em>et al.</em> [1].</p> <td><p>a logical to indicate whether <em>Staphylococci</em> should be categorised into Coagulase Negative <em>Staphylococci</em> ("CoNS") and Coagulase Positive <em>Staphylococci</em> ("CoPS") instead of their own species, according to Karsten Becker <em>et al.</em> [1]. Note that this does not include species that were newly named after this publication.</p>
<p>This excludes <em>Staphylococcus aureus</em> at default, use <code>Becker = "all"</code> to also categorise <em>S. aureus</em> as "CoPS".</p></td> <p>This excludes <em>Staphylococcus aureus</em> at default, use <code>Becker = "all"</code> to also categorise <em>S. aureus</em> as "CoPS".</p></td>
</tr> </tr>
<tr> <tr>
@ -304,17 +304,18 @@ A microbial ID from this package (class: <code>mo</code>) typically looks like t
| | ----&gt; species, a 3-4 letter acronym | | ----&gt; species, a 3-4 letter acronym
| ----&gt; genus, a 5-7 letter acronym, mostly without vowels | ----&gt; genus, a 5-7 letter acronym, mostly without vowels
----&gt; taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista), ----&gt; taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista),
F (Fungi), P (Protozoa), PL (Plantae) or V (Viruses) F (Fungi), P (Protozoa) or PL (Plantae)
</pre> </pre>
<p>Values that cannot be coered will be considered 'unknown' and have an MO code <code>UNKNOWN</code>.</p> <p>Values that cannot be coered will be considered 'unknown' and have an MO code <code>UNKNOWN</code>.</p>
<p>Use the <code><a href='mo_property.html'>mo_property</a>_*</code> functions to get properties based on the returned code, see Examples.</p> <p>Use the <code><a href='mo_property.html'>mo_property</a>_*</code> functions to get properties based on the returned code, see Examples.</p>
<p>The algorithm uses data from the Catalogue of Life (see below) and from one other source (see <code><a href='microorganisms.html'>?microorganisms</a></code>).</p>
<p><strong>Self-learning algoritm</strong> <br /> <p><strong>Self-learning algoritm</strong> <br />
The <code>as.mo()</code> function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use <code>clean_mo_history()</code> to reset the algorithms. Only experience from your current <code>AMR</code> package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95% faster than the first try. The algorithm saves its previous findings to <code>~/.Rhistory_mo</code>.</p> The <code>as.mo()</code> function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use <code>clean_mo_history()</code> to reset the algorithms. Only experience from your current <code>AMR</code> package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95% faster than the first try. The algorithm saves its previous findings to <code>~/.Rhistory_mo</code>.</p>
<p><strong>Intelligent rules</strong> <br /> <p><strong>Intelligent rules</strong> <br />
This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order:</p><ul> This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order:</p><ul>
<li><p>Valid MO codes and full names: it first searches in already valid MO code and known genus/species combinations</p></li> <li><p>Valid MO codes and full names: it first searches in already valid MO code and known genus/species combinations</p></li>
<li><p>Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones (see <em>Microbial prevalence of pathogens in humans</em> below)</p></li> <li><p>Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones (see <em>Microbial prevalence of pathogens in humans</em> below)</p></li>
<li><p>Taxonomic kingdom: it first searches in Bacteria/Chromista, then Fungi, then Protozoa, then Viruses</p></li> <li><p>Taxonomic kingdom: it first searches in Bacteria/Chromista, then Fungi, then Protozoa</p></li>
<li><p>Breakdown of input values: from here it starts to breakdown input values to find possible matches</p></li> <li><p>Breakdown of input values: from here it starts to breakdown input values to find possible matches</p></li>
</ul> </ul>
<p>A couple of effects because of these rules:</p><ul> <p>A couple of effects because of these rules:</p><ul>
@ -326,7 +327,6 @@ This function uses intelligent rules to help getting fast and logical results. I
The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is <code>allow_uncertain = TRUE</code>, which is uqual to uncertainty level 2. Using <code>allow_uncertain = FALSE</code> will skip all of these additional rules:</p><ul> The algorithm can additionally use three different levels of uncertainty to guess valid results. The default is <code>allow_uncertain = TRUE</code>, which is uqual to uncertainty level 2. Using <code>allow_uncertain = FALSE</code> will skip all of these additional rules:</p><ul>
<li><p>(uncertainty level 1): It tries to look for only matching genera</p></li> <li><p>(uncertainty level 1): It tries to look for only matching genera</p></li>
<li><p>(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names</p></li> <li><p>(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names</p></li>
<li><p>(uncertainty level 1): It tries to look for some manual changes which are not (yet) published to the Catalogue of Life (like <em>Propionibacterium</em> being <em>Cutibacterium</em>)</p></li>
<li><p>(uncertainty level 2): It strips off values between brackets and the brackets itself, and re-evaluates the input with all previous rules</p></li> <li><p>(uncertainty level 2): It strips off values between brackets and the brackets itself, and re-evaluates the input with all previous rules</p></li>
<li><p>(uncertainty level 2): It strips off words from the end one by one and re-evaluates the input with all previous rules</p></li> <li><p>(uncertainty level 2): It strips off words from the end one by one and re-evaluates the input with all previous rules</p></li>
<li><p>(uncertainty level 3): It strips off words from the start one by one and re-evaluates the input with all previous rules</p></li> <li><p>(uncertainty level 3): It strips off words from the start one by one and re-evaluates the input with all previous rules</p></li>
@ -391,6 +391,12 @@ The <code><a href='mo_property.html'>mo_property</a></code> functions (like <cod
<span class='fu'>as.mo</span>(<span class='st'>"VISA"</span>) <span class='co'># Vancomycin Intermediate S. aureus</span> <span class='fu'>as.mo</span>(<span class='st'>"VISA"</span>) <span class='co'># Vancomycin Intermediate S. aureus</span>
<span class='fu'>as.mo</span>(<span class='st'>"VRSA"</span>) <span class='co'># Vancomycin Resistant S. aureus</span> <span class='fu'>as.mo</span>(<span class='st'>"VRSA"</span>) <span class='co'># Vancomycin Resistant S. aureus</span>
<span class='co'># Dyslexia is no problem - these all work:</span>
<span class='fu'>as.mo</span>(<span class='st'>"Ureaplasma urealyticum"</span>)
<span class='fu'>as.mo</span>(<span class='st'>"Ureaplasma urealyticus"</span>)
<span class='fu'>as.mo</span>(<span class='st'>"Ureaplasmium urealytica"</span>)
<span class='fu'>as.mo</span>(<span class='st'>"Ureaplazma urealitycium"</span>)
<span class='fu'>as.mo</span>(<span class='st'>"Streptococcus group A"</span>) <span class='fu'>as.mo</span>(<span class='st'>"Streptococcus group A"</span>)
<span class='fu'>as.mo</span>(<span class='st'>"GAS"</span>) <span class='co'># Group A Streptococci</span> <span class='fu'>as.mo</span>(<span class='st'>"GAS"</span>) <span class='co'># Group A Streptococci</span>
<span class='fu'>as.mo</span>(<span class='st'>"GBS"</span>) <span class='co'># Group B Streptococci</span> <span class='fu'>as.mo</span>(<span class='st'>"GBS"</span>) <span class='co'># Group B Streptococci</span>
@ -401,13 +407,9 @@ The <code><a href='mo_property.html'>mo_property</a></code> functions (like <cod
<span class='fu'>as.mo</span>(<span class='st'>"S. pyogenes"</span>) <span class='co'># will remain species: B_STRPT_PYO</span> <span class='fu'>as.mo</span>(<span class='st'>"S. pyogenes"</span>) <span class='co'># will remain species: B_STRPT_PYO</span>
<span class='fu'>as.mo</span>(<span class='st'>"S. pyogenes"</span>, <span class='kw'>Lancefield</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>) <span class='co'># will not remain species: B_STRPT_GRA</span> <span class='fu'>as.mo</span>(<span class='st'>"S. pyogenes"</span>, <span class='kw'>Lancefield</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>) <span class='co'># will not remain species: B_STRPT_GRA</span>
<span class='co'># Use mo_* functions to get a specific property based on `mo`</span> <span class='co'># All mo_* functions use as.mo() internally too (see ?mo_property):</span>
<span class='no'>Ecoli</span> <span class='kw'>&lt;-</span> <span class='fu'>as.mo</span>(<span class='st'>"E. coli"</span>) <span class='co'># returns `B_ESCHR_COL`</span>
<span class='fu'><a href='mo_property.html'>mo_genus</a></span>(<span class='no'>Ecoli</span>) <span class='co'># returns "Escherichia"</span>
<span class='fu'><a href='mo_property.html'>mo_gramstain</a></span>(<span class='no'>Ecoli</span>) <span class='co'># returns "Gram negative"</span>
<span class='co'># but it uses as.mo internally too, so you could also just use:</span>
<span class='fu'><a href='mo_property.html'>mo_genus</a></span>(<span class='st'>"E. coli"</span>) <span class='co'># returns "Escherichia"</span> <span class='fu'><a href='mo_property.html'>mo_genus</a></span>(<span class='st'>"E. coli"</span>) <span class='co'># returns "Escherichia"</span>
<span class='fu'><a href='mo_property.html'>mo_gramstain</a></span>(<span class='st'>"E. coli"</span>) <span class='co'># returns "Gram negative"#'</span>
<span class='co'># }</span><span class='co'># NOT RUN {</span> <span class='co'># }</span><span class='co'># NOT RUN {</span>
<span class='no'>df</span>$<span class='no'>mo</span> <span class='kw'>&lt;-</span> <span class='fu'>as.mo</span>(<span class='no'>df</span>$<span class='no'>microorganism_name</span>) <span class='no'>df</span>$<span class='no'>mo</span> <span class='kw'>&lt;-</span> <span class='fu'>as.mo</span>(<span class='no'>df</span>$<span class='no'>microorganism_name</span>)

View File

@ -80,7 +80,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>
@ -253,7 +253,7 @@ This package contains the complete taxonomic tree of almost all microorganisms (
<p>Included are:</p><ul> <p>Included are:</p><ul>
<li><p>All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria, Protozoa and Viruses</p></li> <li><p>All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria and Protozoa</p></li>
<li><p>All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of <em>Aspergillus</em>, <em>Candida</em>, <em>Cryptococcus</em>, <em>Histplasma</em>, <em>Pneumocystis</em>, <em>Saccharomyces</em> and <em>Trichophyton</em>).</p></li> <li><p>All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of <em>Aspergillus</em>, <em>Candida</em>, <em>Cryptococcus</em>, <em>Histplasma</em>, <em>Pneumocystis</em>, <em>Saccharomyces</em> and <em>Trichophyton</em>).</p></li>
<li><p>All ~2,000 (sub)species from ~100 other relevant genera, from the kingdoms of Animalia and Plantae (like <em>Strongyloides</em> and <em>Taenia</em>)</p></li> <li><p>All ~2,000 (sub)species from ~100 other relevant genera, from the kingdoms of Animalia and Plantae (like <em>Strongyloides</em> and <em>Taenia</em>)</p></li>
<li><p>All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed</p></li> <li><p>All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed</p></li>
@ -268,6 +268,11 @@ This package contains the complete taxonomic tree of almost all microorganisms (
<p>On our website <a href='https://msberends.gitlab.io/AMR'>https://msberends.gitlab.io/AMR</a> you can find <a href='https://msberends.gitlab.io/AMR/articles/AMR.html'>a comprehensive tutorial</a> about how to conduct AMR analysis, the <a href='https://msberends.gitlab.io/AMR/reference'>complete documentation of all functions</a> (which reads a lot easier than here in R) and <a href='https://msberends.gitlab.io/AMR/articles/WHONET.html'>an example analysis using WHONET data</a>.</p> <p>On our website <a href='https://msberends.gitlab.io/AMR'>https://msberends.gitlab.io/AMR</a> you can find <a href='https://msberends.gitlab.io/AMR/articles/AMR.html'>a comprehensive tutorial</a> about how to conduct AMR analysis, the <a href='https://msberends.gitlab.io/AMR/reference'>complete documentation of all functions</a> (which reads a lot easier than here in R) and <a href='https://msberends.gitlab.io/AMR/articles/WHONET.html'>an example analysis using WHONET data</a>.</p>
<h2 class="hasAnchor" id="see-also"><a class="anchor" href="#see-also"></a>See also</h2>
<div class='dont-index'><p>Data set <code><a href='microorganisms.html'>microorganisms</a></code> for the actual data. <br />
Function <code><a href='as.mo.html'>as.mo</a>()</code> to use the data for intelligent determination of microorganisms.</p></div>
<h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2> <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
<pre class="examples"><span class='co'># NOT RUN {</span> <pre class="examples"><span class='co'># NOT RUN {</span>
@ -311,6 +316,8 @@ This package contains the complete taxonomic tree of almost all microorganisms (
<li><a href="#read-more-on-our-website-">Read more on our website!</a></li> <li><a href="#read-more-on-our-website-">Read more on our website!</a></li>
<li><a href="#see-also">See also</a></li>
<li><a href="#examples">Examples</a></li> <li><a href="#examples">Examples</a></li>
</ul> </ul>

View File

@ -47,7 +47,7 @@
<script src="../extra.js"></script> <script src="../extra.js"></script>
<meta property="og:title" content="Version info of included Catalogue of Life — catalogue_of_life_version" /> <meta property="og:title" content="Version info of included Catalogue of Life — catalogue_of_life_version" />
<meta property="og:description" content="This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year." /> <meta property="og:description" content="This function returns information about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year." />
<meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png" /> <meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.png" />
<meta name="twitter:card" content="summary" /> <meta name="twitter:card" content="summary" />
@ -80,7 +80,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>
@ -237,15 +237,20 @@
<div class="ref-description"> <div class="ref-description">
<p>This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year.</p> <p>This function returns information about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year.</p>
</div> </div>
<pre class="usage"><span class='fu'>catalogue_of_life_version</span>()</pre> <pre class="usage"><span class='fu'>catalogue_of_life_version</span>()</pre>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
<p>a <code>list</code>, invisibly</p>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2> <h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>The list item <code>is_latest_annual_release</code> is based on the system date.</p> <p>The list item <code>is_latest_annual_release</code> is based on the system date.</p>
<p>For DSMZ, see <code><a href='microorganisms.html'>?microorganisms</a></code>.</p>
<h2 class="hasAnchor" id="catalogue-of-life"><a class="anchor" href="#catalogue-of-life"></a>Catalogue of Life</h2> <h2 class="hasAnchor" id="catalogue-of-life"><a class="anchor" href="#catalogue-of-life"></a>Catalogue of Life</h2>
@ -275,6 +280,8 @@ This package contains the complete taxonomic tree of almost all microorganisms (
<h2>Contents</h2> <h2>Contents</h2>
<ul class="nav nav-pills nav-stacked"> <ul class="nav nav-pills nav-stacked">
<li><a href="#value">Value</a></li>
<li><a href="#details">Details</a></li> <li><a href="#details">Details</a></li>
<li><a href="#catalogue-of-life">Catalogue of Life</a></li> <li><a href="#catalogue-of-life">Catalogue of Life</a></li>

View File

@ -78,7 +78,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>
@ -493,7 +493,7 @@
<td> <td>
<p><code><a href="microorganisms.html">microorganisms</a></code> </p> <p><code><a href="microorganisms.html">microorganisms</a></code> </p>
</td> </td>
<td><p>Data set with ~60,000 microorganisms</p></td> <td><p>Data set with ~65,000 microorganisms</p></td>
</tr><tr> </tr><tr>
<td> <td>

View File

@ -6,7 +6,7 @@
<meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Data set with ~60,000 microorganisms — microorganisms • AMR (for R)</title> <title>Data set with ~65,000 microorganisms — microorganisms • AMR (for R)</title>
<!-- favicons --> <!-- favicons -->
<link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"> <link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
@ -45,7 +45,7 @@
<link href="../extra.css" rel="stylesheet"> <link href="../extra.css" rel="stylesheet">
<script src="../extra.js"></script> <script src="../extra.js"></script>
<meta property="og:title" content="Data set with ~60,000 microorganisms — microorganisms" /> <meta property="og:title" content="Data set with ~65,000 microorganisms — microorganisms" />
<meta property="og:description" content="A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using as.mo." /> <meta property="og:description" content="A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using as.mo." />
@ -80,7 +80,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>
@ -230,7 +230,7 @@
<div class="row"> <div class="row">
<div class="col-md-9 contents"> <div class="col-md-9 contents">
<div class="page-header"> <div class="page-header">
<h1>Data set with ~60,000 microorganisms</h1> <h1>Data set with ~65,000 microorganisms</h1>
<div class="hidden name"><code>microorganisms.Rd</code></div> <div class="hidden name"><code>microorganisms.Rd</code></div>
</div> </div>
@ -245,7 +245,7 @@
<h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2> <h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>
<p>A <code><a href='https://www.rdocumentation.org/packages/base/topics/data.frame'>data.frame</a></code> with 59,985 observations and 15 variables:</p><dl class='dl-horizontal'> <p>A <code><a href='https://www.rdocumentation.org/packages/base/topics/data.frame'>data.frame</a></code> with 65,629 observations and 16 variables:</p><dl class='dl-horizontal'>
<dt><code>mo</code></dt><dd><p>ID of microorganism as used by this package</p></dd> <dt><code>mo</code></dt><dd><p>ID of microorganism as used by this package</p></dd>
<dt><code>col_id</code></dt><dd><p>Catalogue of Life ID</p></dd> <dt><code>col_id</code></dt><dd><p>Catalogue of Life ID</p></dd>
<dt><code>fullname</code></dt><dd><p>Full name, like <code>"Echerichia coli"</code></p></dd> <dt><code>fullname</code></dt><dd><p>Full name, like <code>"Echerichia coli"</code></p></dd>
@ -260,21 +260,30 @@
<dt><code>rank</code></dt><dd><p>Taxonomic rank of the microorganism, like <code>"species"</code> or <code>"genus"</code></p></dd> <dt><code>rank</code></dt><dd><p>Taxonomic rank of the microorganism, like <code>"species"</code> or <code>"genus"</code></p></dd>
<dt><code>ref</code></dt><dd><p>Author(s) and year of concerning scientific publication</p></dd> <dt><code>ref</code></dt><dd><p>Author(s) and year of concerning scientific publication</p></dd>
<dt><code>species_id</code></dt><dd><p>ID of the species as used by the Catalogue of Life</p></dd> <dt><code>species_id</code></dt><dd><p>ID of the species as used by the Catalogue of Life</p></dd>
<dt><code>source</code></dt><dd><p>Either <code>"CoL"</code>, <code>"DSMZ"</code> (see source) or "manually added"</p></dd>
<dt><code>prevalence</code></dt><dd><p>Prevalence of the microorganism, see <code><a href='as.mo.html'>?as.mo</a></code></p></dd> <dt><code>prevalence</code></dt><dd><p>Prevalence of the microorganism, see <code><a href='as.mo.html'>?as.mo</a></code></p></dd>
</dl> </dl>
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2> <h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>
<p>Catalogue of Life: Annual Checklist (public online database), <a href='www.catalogueoflife.org'>www.catalogueoflife.org</a>.</p> <p>Catalogue of Life: Annual Checklist (public online taxonomic database), <a href='www.catalogueoflife.org'>www.catalogueoflife.org</a> (check included annual version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version</a>()</code>).</p>
<p>Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, <a href='http://www.dsmz.de/bacterial-diversity/prokaryotic-nomenclature-up-to-date'>http://www.dsmz.de/bacterial-diversity/prokaryotic-nomenclature-up-to-date</a> (check included version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version</a>()</code>).</p>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2> <h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>Manually added were:</p><ul> <p>Manually added were:</p><ul>
<li><p>9 species of <em>Streptococcus</em> (beta haemolytic groups A, B, C, D, F, G, H, K and unspecified)</p></li> <li><p>9 species of <em>Streptococcus</em> (beta haemolytic groups A, B, C, D, F, G, H, K and unspecified)</p></li>
<li><p>2 species of <em>Staphylococcus</em> (coagulase-negative [CoNS] and coagulase-positive [CoPS])</p></li> <li><p>2 species of <em>Staphylococcus</em> (coagulase-negative [CoNS] and coagulase-positive [CoPS])</p></li>
<li><p>2 other undefined (unknown Gram negatives and unknown Gram positives)</p></li> <li><p>3 other undefined (unknown, unknown Gram negatives and unknown Gram positives)</p></li>
<li><p>8,830 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) that are not in the Catalogue of Life</p></li>
</ul> </ul>
<h2 class="hasAnchor" id="about-the-records-from-dsmz-see-source-"><a class="anchor" href="#about-the-records-from-dsmz-see-source-"></a>About the records from DSMZ (see source)</h2>
<p>Names of prokaryotes are defined as being validly published by the International Code of Nomenclature of Bacteria. Validly published are all names which are included in the Approved Lists of Bacterial Names and the names subsequently published in the International Journal of Systematic Bacteriology (IJSB) and, from January 2000, in the International Journal of Systematic and Evolutionary Microbiology (IJSEM) as original articles or in the validation lists.</p>
<p>From: <a href='https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads/readme.html'>https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads/readme.html</a></p>
<h2 class="hasAnchor" id="catalogue-of-life"><a class="anchor" href="#catalogue-of-life"></a>Catalogue of Life</h2> <h2 class="hasAnchor" id="catalogue-of-life"><a class="anchor" href="#catalogue-of-life"></a>Catalogue of Life</h2>
@ -303,6 +312,8 @@ This package contains the complete taxonomic tree of almost all microorganisms (
<li><a href="#details">Details</a></li> <li><a href="#details">Details</a></li>
<li><a href="#about-the-records-from-dsmz-see-source-">About the records from DSMZ (see source)</a></li>
<li><a href="#catalogue-of-life">Catalogue of Life</a></li> <li><a href="#catalogue-of-life">Catalogue of Life</a></li>
<li><a href="#read-more-on-our-website-">Read more on our website!</a></li> <li><a href="#read-more-on-our-website-">Read more on our website!</a></li>

View File

@ -80,7 +80,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>
@ -245,7 +245,7 @@
<h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2> <h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>
<p>A <code><a href='https://www.rdocumentation.org/packages/base/topics/data.frame'>data.frame</a></code> with 17,069 observations and 4 variables:</p><dl class='dl-horizontal'> <p>A <code><a href='https://www.rdocumentation.org/packages/base/topics/data.frame'>data.frame</a></code> with 16,911 observations and 4 variables:</p><dl class='dl-horizontal'>
<dt><code>col_id</code></dt><dd><p>Catalogue of Life ID</p></dd> <dt><code>col_id</code></dt><dd><p>Catalogue of Life ID</p></dd>
<dt><code>tsn_new</code></dt><dd><p>New Catalogue of Life ID</p></dd> <dt><code>tsn_new</code></dt><dd><p>New Catalogue of Life ID</p></dd>
<dt><code>fullname</code></dt><dd><p>Old taxonomic name of the microorganism</p></dd> <dt><code>fullname</code></dt><dd><p>Old taxonomic name of the microorganism</p></dd>

View File

@ -80,7 +80,7 @@
</button> </button>
<span class="navbar-brand"> <span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a> <a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9023</span> <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.5.0.9024</span>
</span> </span>
</div> </div>
@ -296,7 +296,7 @@
</tr> </tr>
<tr> <tr>
<th>open</th> <th>open</th>
<td><p>browse the URL using <code><a href='https://www.rdocumentation.org/packages/utils/topics/browseURL'>browseURL</a></code></p></td> <td><p>browse the URL using <code><a href='https://www.rdocumentation.org/packages/utils/topics/browseURL'>browseURL</a>()</code></p></td>
</tr> </tr>
<tr> <tr>
<th>property</th> <th>property</th>
@ -322,8 +322,8 @@
<li><p><code>mo_ref("Chlamydia psittaci")</code> will return <code>"Page, 1968"</code> (with a warning about the renaming)</p></li> <li><p><code>mo_ref("Chlamydia psittaci")</code> will return <code>"Page, 1968"</code> (with a warning about the renaming)</p></li>
<li><p><code>mo_ref("Chlamydophila psittaci")</code> will return <code>"Everett et al., 1999"</code> (without a warning)</p></li> <li><p><code>mo_ref("Chlamydophila psittaci")</code> will return <code>"Everett et al., 1999"</code> (without a warning)</p></li>
</ul> </ul>
<p>The Gram stain - <code>mo_gramstain()</code> - will be determined on the taxonomic kingdom and phylum. According to Cavalier-Smith (2002) who defined subkingdoms Negibacteria and Posibacteria, only these phyla are Posibacteria: Actinobacteria, Chloroflexi, Firmicutes and Tenericutes (ref: <a href='https://itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&amp;search_value=956097'>https://itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&amp;search_value=956097</a>). These bacteria are considered Gram positive - all other bacteria are considered Gram negative. Species outside the kingdom of Bacteria will return a value <code>NA</code>.</p> <p>The Gram stain - <code>mo_gramstain()</code> - will be determined on the taxonomic kingdom and phylum. According to Cavalier-Smith (2002) who defined subkingdoms Negibacteria and Posibacteria, only these phyla are Posibacteria: Actinobacteria, Chloroflexi, Firmicutes and Tenericutes. These bacteria are considered Gram positive - all other bacteria are considered Gram negative. Species outside the kingdom of Bacteria will return a value <code>NA</code>.</p>
<p>The function <code>mo_url()</code> will return the direct URL to the species in the Catalogue of Life.</p> <p>The function <code>mo_url()</code> will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species.</p>
<h2 class="hasAnchor" id="supported-languages"><a class="anchor" href="#supported-languages"></a>Supported languages</h2> <h2 class="hasAnchor" id="supported-languages"><a class="anchor" href="#supported-languages"></a>Supported languages</h2>
@ -403,7 +403,7 @@ This package contains the complete taxonomic tree of almost all microorganisms (
<span class='co'># Becker classification, see ?as.mo</span> <span class='co'># Becker classification, see ?as.mo</span>
<span class='fu'>mo_fullname</span>(<span class='st'>"S. epi"</span>) <span class='co'># "Staphylococcus epidermidis"</span> <span class='fu'>mo_fullname</span>(<span class='st'>"S. epi"</span>) <span class='co'># "Staphylococcus epidermidis"</span>
<span class='fu'>mo_fullname</span>(<span class='st'>"S. epi"</span>, <span class='kw'>Becker</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>) <span class='co'># "Coagulase Negative Staphylococcus (CoNS)"</span> <span class='fu'>mo_fullname</span>(<span class='st'>"S. epi"</span>, <span class='kw'>Becker</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>) <span class='co'># "Coagulase-negative Staphylococcus (CoNS)"</span>
<span class='fu'>mo_shortname</span>(<span class='st'>"S. epi"</span>) <span class='co'># "S. epidermidis"</span> <span class='fu'>mo_shortname</span>(<span class='st'>"S. epi"</span>) <span class='co'># "S. epidermidis"</span>
<span class='fu'>mo_shortname</span>(<span class='st'>"S. epi"</span>, <span class='kw'>Becker</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>) <span class='co'># "CoNS"</span> <span class='fu'>mo_shortname</span>(<span class='st'>"S. epi"</span>, <span class='kw'>Becker</span> <span class='kw'>=</span> <span class='fl'>TRUE</span>) <span class='co'># "CoNS"</span>

View File

@ -6,7 +6,7 @@
`AMR` is a free and open-source [R package](https://www.r-project.org) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial properties by using evidence-based methods. It supports any data format, including WHONET/EARS-Net data. `AMR` is a free and open-source [R package](https://www.r-project.org) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial properties by using evidence-based methods. It supports any data format, including WHONET/EARS-Net data.
After installing this package, R knows almost all ~60,000 microorganisms and ~500 antibiotics by name and code, and knows all about valid RSI and MIC values. After installing this package, R knows ~65,000 microorganisms and ~500 antibiotics by name and code, and knows all about valid RSI and MIC values.
**Used to SPSS?** Read our [tutorial on how to import data from SPSS, SAS or Stata](./articles/SPSS.html) and learn in which ways R outclasses any of these statistical packages. **Used to SPSS?** Read our [tutorial on how to import data from SPSS, SAS or Stata](./articles/SPSS.html) and learn in which ways R outclasses any of these statistical packages.
@ -15,7 +15,7 @@ This R package is actively maintained and is free software; you can freely use a
This package can be used for: This package can be used for:
* Reference for microorganisms, since it contains almost all 60,000 microbial (sub)species from the [Catalogue of Life](http://www.catalogueoflife.org) * Reference for microorganisms, since it contains all microbial (sub)species from the [Catalogue of Life](http://www.catalogueoflife.org)
* Calculating antimicrobial resistance * Calculating antimicrobial resistance
* Calculating empirical susceptibility of both mono therapy and combination therapy * Calculating empirical susceptibility of both mono therapy and combination therapy
* Predicting future antimicrobial resistance using regression models * Predicting future antimicrobial resistance using regression models
@ -132,7 +132,7 @@ The `AMR` package basically does four important things:
1. It **cleanses existing data** by providing new *classes* for microoganisms, antibiotics and antimicrobial results (both S/I/R and MIC). By installing this package, you teach R everything about microbiology that is needed for analysis. These functions all use intelligent rules to guess results that you would expect: 1. It **cleanses existing data** by providing new *classes* for microoganisms, antibiotics and antimicrobial results (both S/I/R and MIC). By installing this package, you teach R everything about microbiology that is needed for analysis. These functions all use intelligent rules to guess results that you would expect:
* Use `as.mo()` to get an ID of a microorganism. The IDs are human readable for the trained eye - the ID of *Klebsiella pneumoniae* is "B_KLBSL_PNE" (B stands for Bacteria) and the ID of *S. aureus* is "B_STPHY_AUR". The function takes almost any text as input that looks like the name or code of a microorganism like "E. coli", "esco" or "esccol" and tries to find expected results using intelligent rules combined with the included Catalogue of Life data set, consisting of almost 60,000 microorganisms. It only takes milliseconds to find results, please see our [benchmarks](./articles/benchmarks.html). Moreover, it can group *Staphylococci* into coagulase negative and positive (CoNS and CoPS, see [source](./reference/as.mo.html#source)) and can categorise *Streptococci* into Lancefield groups (like beta-haemolytic *Streptococcus* Group B, [source](./reference/as.mo.html#source)). * Use `as.mo()` to get an ID of a microorganism. The IDs are human readable for the trained eye - the ID of *Klebsiella pneumoniae* is "B_KLBSL_PNE" (B stands for Bacteria) and the ID of *S. aureus* is "B_STPHY_AUR". The function takes almost any text as input that looks like the name or code of a microorganism like "E. coli", "esco" or "esccol" and tries to find expected results using intelligent rules combined with the included Catalogue of Life data set. It only takes milliseconds to find results, please see our [benchmarks](./articles/benchmarks.html). Moreover, it can group *Staphylococci* into coagulase negative and positive (CoNS and CoPS, see [source](./reference/as.mo.html#source)) and can categorise *Streptococci* into Lancefield groups (like beta-haemolytic *Streptococcus* Group B, [source](./reference/as.mo.html#source)).
* Use `as.rsi()` to transform values to valid antimicrobial results. It produces just S, I or R based on your input and warns about invalid values. Even values like "<=0.002; S" (combined MIC/RSI) will result in "S". * Use `as.rsi()` to transform values to valid antimicrobial results. It produces just S, I or R based on your input and warns about invalid values. Even values like "<=0.002; S" (combined MIC/RSI) will result in "S".
* Use `as.mic()` to cleanse your MIC values. It produces a so-called factor (called *ordinal* in SPSS) with valid MIC values as levels. A value like "<=0.002; S" (combined MIC/RSI) will result in "<=0.002". * Use `as.mic()` to cleanse your MIC values. It produces a so-called factor (called *ordinal* in SPSS) with valid MIC values as levels. A value like "<=0.002; S" (combined MIC/RSI) will result in "<=0.002".
* Use `as.atc()` to get the ATC code of an antibiotic as defined by the WHO. This package contains a database with most LIS codes, official names, DDDs and even trade names of antibiotics. For example, the values "Furabid", "Furadantin", "nitro" all return the ATC code of Nitrofurantoine. * Use `as.atc()` to get the ATC code of an antibiotic as defined by the WHO. This package contains a database with most LIS codes, official names, DDDs and even trade names of antibiotics. For example, the values "Furabid", "Furadantin", "nitro" all return the ATC code of Nitrofurantoine.
@ -143,7 +143,7 @@ The `AMR` package basically does four important things:
* Use `first_isolate()` to identify the first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute). * Use `first_isolate()` to identify the first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute).
* You can also identify first *weighted* isolates of every patient, an adjusted version of the CLSI guideline. This takes into account key antibiotics of every strain and compares them. * You can also identify first *weighted* isolates of every patient, an adjusted version of the CLSI guideline. This takes into account key antibiotics of every strain and compares them.
* Use `mdro()` (abbreviation of Multi Drug Resistant Organisms) to check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently, national guidelines for Germany and the Netherlands are supported. * Use `mdro()` (abbreviation of Multi Drug Resistant Organisms) to check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently, national guidelines for Germany and the Netherlands are supported.
* The [data set `microorganisms`](./reference/microorganisms.html) contains the complete taxonomic tree of almost 60,000 microorganisms. Furthermore, the colloquial name and Gram stain are available, which enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus()`, `mo_family()`, `mo_gramstain()` or even `mo_phylum()`. As they use `as.mo()` internally, they also use the same intelligent rules for determination. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, Spanish, Italian, French and Portuguese. These functions can be used to add new variables to your data. * The [data set `microorganisms`](./reference/microorganisms.html) contains the complete taxonomic tree of ~65,000 microorganisms. Furthermore, some colloquial names and all Gram stains are available, which enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus()`, `mo_family()`, `mo_gramstain()` or even `mo_phylum()`. As they use `as.mo()` internally, they also use the same intelligent rules for determination. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, Spanish, Italian, French and Portuguese. These functions can be used to add new variables to your data.
* The [data set `antibiotics`](./reference/antibiotics.html) contains almost 500 antimicrobial drugs with their ATC code, EARS-Net code, common LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains hundreds of trade names. Use functions like `atc_name()` and `atc_tradenames()` to look up values. The `atc_*` functions use `as.atc()` internally so they support the same intelligent rules to guess the most probable result. For example, `atc_name("Fluclox")`, `atc_name("Floxapen")` and `atc_name("J01CF05")` will all return `"Flucloxacillin"`. These functions can again be used to add new variables to your data. * The [data set `antibiotics`](./reference/antibiotics.html) contains almost 500 antimicrobial drugs with their ATC code, EARS-Net code, common LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains hundreds of trade names. Use functions like `atc_name()` and `atc_tradenames()` to look up values. The `atc_*` functions use `as.atc()` internally so they support the same intelligent rules to guess the most probable result. For example, `atc_name("Fluclox")`, `atc_name("Floxapen")` and `atc_name("J01CF05")` will all return `"Flucloxacillin"`. These functions can again be used to add new variables to your data.
3. It **analyses the data** with convenient functions that use well-known methods. 3. It **analyses the data** with convenient functions that use well-known methods.

View File

@ -26,7 +26,7 @@ clean_mo_history()
\arguments{ \arguments{
\item{x}{a character vector or a \code{data.frame} with one or two columns} \item{x}{a character vector or a \code{data.frame} with one or two columns}
\item{Becker}{a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. \item{Becker}{a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. Note that this does not include species that were newly named after this publication.
This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".} This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".}
@ -44,7 +44,7 @@ clean_mo_history()
Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}. Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}.
} }
\description{ \description{
Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea, Viruses, and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. Please see Examples. Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. Please see Examples.
} }
\details{ \details{
\strong{General info} \cr \strong{General info} \cr
@ -61,13 +61,15 @@ A microbial ID from this package (class: \code{mo}) typically looks like these e
| | ----> species, a 3-4 letter acronym | | ----> species, a 3-4 letter acronym
| ----> genus, a 5-7 letter acronym, mostly without vowels | ----> genus, a 5-7 letter acronym, mostly without vowels
----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista), ----> taxonomic kingdom: A (Archaea), AN (Animalia), B (Bacteria), C (Chromista),
F (Fungi), P (Protozoa), PL (Plantae) or V (Viruses) F (Fungi), P (Protozoa) or PL (Plantae)
} }
Values that cannot be coered will be considered 'unknown' and have an MO code \code{UNKNOWN}. Values that cannot be coered will be considered 'unknown' and have an MO code \code{UNKNOWN}.
Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples. Use the \code{\link{mo_property}_*} functions to get properties based on the returned code, see Examples.
The algorithm uses data from the Catalogue of Life (see below) and from one other source (see \code{?microorganisms}).
\strong{Self-learning algoritm} \cr \strong{Self-learning algoritm} \cr
The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}. The \code{as.mo()} function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use \code{clean_mo_history()} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. Usually, any guess after the first try runs 90-95\% faster than the first try. The algorithm saves its previous findings to \code{~/.Rhistory_mo}.
@ -76,7 +78,7 @@ This function uses intelligent rules to help getting fast and logical results. I
\itemize{ \itemize{
\item{Valid MO codes and full names: it first searches in already valid MO code and known genus/species combinations} \item{Valid MO codes and full names: it first searches in already valid MO code and known genus/species combinations}
\item{Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones (see \emph{Microbial prevalence of pathogens in humans} below)} \item{Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones (see \emph{Microbial prevalence of pathogens in humans} below)}
\item{Taxonomic kingdom: it first searches in Bacteria/Chromista, then Fungi, then Protozoa, then Viruses} \item{Taxonomic kingdom: it first searches in Bacteria/Chromista, then Fungi, then Protozoa}
\item{Breakdown of input values: from here it starts to breakdown input values to find possible matches} \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches}
} }
@ -93,7 +95,6 @@ The algorithm can additionally use three different levels of uncertainty to gues
\itemize{ \itemize{
\item{(uncertainty level 1): It tries to look for only matching genera} \item{(uncertainty level 1): It tries to look for only matching genera}
\item{(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names} \item{(uncertainty level 1): It tries to look for previously accepted (but now invalid) taxonomic names}
\item{(uncertainty level 1): It tries to look for some manual changes which are not (yet) published to the Catalogue of Life (like \emph{Propionibacterium} being \emph{Cutibacterium})}
\item{(uncertainty level 2): It strips off values between brackets and the brackets itself, and re-evaluates the input with all previous rules} \item{(uncertainty level 2): It strips off values between brackets and the brackets itself, and re-evaluates the input with all previous rules}
\item{(uncertainty level 2): It strips off words from the end one by one and re-evaluates the input with all previous rules} \item{(uncertainty level 2): It strips off words from the end one by one and re-evaluates the input with all previous rules}
\item{(uncertainty level 3): It strips off words from the start one by one and re-evaluates the input with all previous rules} \item{(uncertainty level 3): It strips off words from the start one by one and re-evaluates the input with all previous rules}
@ -164,6 +165,12 @@ as.mo("MRSA") # Methicillin Resistant S. aureus
as.mo("VISA") # Vancomycin Intermediate S. aureus as.mo("VISA") # Vancomycin Intermediate S. aureus
as.mo("VRSA") # Vancomycin Resistant S. aureus as.mo("VRSA") # Vancomycin Resistant S. aureus
# Dyslexia is no problem - these all work:
as.mo("Ureaplasma urealyticum")
as.mo("Ureaplasma urealyticus")
as.mo("Ureaplasmium urealytica")
as.mo("Ureaplazma urealitycium")
as.mo("Streptococcus group A") as.mo("Streptococcus group A")
as.mo("GAS") # Group A Streptococci as.mo("GAS") # Group A Streptococci
as.mo("GBS") # Group B Streptococci as.mo("GBS") # Group B Streptococci
@ -174,13 +181,9 @@ as.mo("S. epidermidis", Becker = TRUE) # will not remain species: B_STPHY_CNS
as.mo("S. pyogenes") # will remain species: B_STRPT_PYO as.mo("S. pyogenes") # will remain species: B_STRPT_PYO
as.mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPT_GRA as.mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPT_GRA
# Use mo_* functions to get a specific property based on `mo` # All mo_* functions use as.mo() internally too (see ?mo_property):
Ecoli <- as.mo("E. coli") # returns `B_ESCHR_COL`
mo_genus(Ecoli) # returns "Escherichia"
mo_gramstain(Ecoli) # returns "Gram negative"
# but it uses as.mo internally too, so you could also just use:
mo_genus("E. coli") # returns "Escherichia" mo_genus("E. coli") # returns "Escherichia"
mo_gramstain("E. coli") # returns "Gram negative"#'
\dontrun{ \dontrun{
df$mo <- as.mo(df$microorganism_name) df$mo <- as.mo(df$microorganism_name)

View File

@ -18,7 +18,7 @@ This package contains the complete taxonomic tree of almost all microorganisms (
Included are: Included are:
\itemize{ \itemize{
\item{All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria, Protozoa and Viruses} \item{All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria and Protozoa}
\item{All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of \emph{Aspergillus}, \emph{Candida}, \emph{Cryptococcus}, \emph{Histplasma}, \emph{Pneumocystis}, \emph{Saccharomyces} and \emph{Trichophyton}).} \item{All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of \emph{Aspergillus}, \emph{Candida}, \emph{Cryptococcus}, \emph{Histplasma}, \emph{Pneumocystis}, \emph{Saccharomyces} and \emph{Trichophyton}).}
\item{All ~2,000 (sub)species from ~100 other relevant genera, from the kingdoms of Animalia and Plantae (like \emph{Strongyloides} and \emph{Taenia})} \item{All ~2,000 (sub)species from ~100 other relevant genera, from the kingdoms of Animalia and Plantae (like \emph{Strongyloides} and \emph{Taenia})}
\item{All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed} \item{All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed}
@ -66,3 +66,7 @@ mo_phylum("C. elegans")
mo_fullname("C. elegans") mo_fullname("C. elegans")
# [1] "Chroococcus limneticus elegans" # Because a microorganism was found # [1] "Chroococcus limneticus elegans" # Because a microorganism was found
} }
\seealso{
Data set \code{\link{microorganisms}} for the actual data. \cr
Function \code{\link{as.mo}()} to use the data for intelligent determination of microorganisms.
}

View File

@ -6,11 +6,16 @@
\usage{ \usage{
catalogue_of_life_version() catalogue_of_life_version()
} }
\value{
a \code{list}, invisibly
}
\description{ \description{
This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year. This function returns information about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year.
} }
\details{ \details{
The list item \code{is_latest_annual_release} is based on the system date. The list item \code{is_latest_annual_release} is based on the system date.
For DSMZ, see \code{?microorganisms}.
} }
\section{Catalogue of Life}{ \section{Catalogue of Life}{

View File

@ -3,8 +3,8 @@
\docType{data} \docType{data}
\name{microorganisms} \name{microorganisms}
\alias{microorganisms} \alias{microorganisms}
\title{Data set with ~60,000 microorganisms} \title{Data set with ~65,000 microorganisms}
\format{A \code{\link{data.frame}} with 59,985 observations and 15 variables: \format{A \code{\link{data.frame}} with 65,629 observations and 16 variables:
\describe{ \describe{
\item{\code{mo}}{ID of microorganism as used by this package} \item{\code{mo}}{ID of microorganism as used by this package}
\item{\code{col_id}}{Catalogue of Life ID} \item{\code{col_id}}{Catalogue of Life ID}
@ -20,10 +20,13 @@
\item{\code{rank}}{Taxonomic rank of the microorganism, like \code{"species"} or \code{"genus"}} \item{\code{rank}}{Taxonomic rank of the microorganism, like \code{"species"} or \code{"genus"}}
\item{\code{ref}}{Author(s) and year of concerning scientific publication} \item{\code{ref}}{Author(s) and year of concerning scientific publication}
\item{\code{species_id}}{ID of the species as used by the Catalogue of Life} \item{\code{species_id}}{ID of the species as used by the Catalogue of Life}
\item{\code{source}}{Either \code{"CoL"}, \code{"DSMZ"} (see source) or "manually added"}
\item{\code{prevalence}}{Prevalence of the microorganism, see \code{?as.mo}} \item{\code{prevalence}}{Prevalence of the microorganism, see \code{?as.mo}}
}} }}
\source{ \source{
Catalogue of Life: Annual Checklist (public online database), \url{www.catalogueoflife.org}. Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}).
Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, \url{http://www.dsmz.de/bacterial-diversity/prokaryotic-nomenclature-up-to-date} (check included version with \code{\link{catalogue_of_life_version}()}).
} }
\usage{ \usage{
microorganisms microorganisms
@ -36,9 +39,17 @@ Manually added were:
\itemize{ \itemize{
\item{9 species of \emph{Streptococcus} (beta haemolytic groups A, B, C, D, F, G, H, K and unspecified)} \item{9 species of \emph{Streptococcus} (beta haemolytic groups A, B, C, D, F, G, H, K and unspecified)}
\item{2 species of \emph{Staphylococcus} (coagulase-negative [CoNS] and coagulase-positive [CoPS])} \item{2 species of \emph{Staphylococcus} (coagulase-negative [CoNS] and coagulase-positive [CoPS])}
\item{2 other undefined (unknown Gram negatives and unknown Gram positives)} \item{3 other undefined (unknown, unknown Gram negatives and unknown Gram positives)}
\item{8,830 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) that are not in the Catalogue of Life}
} }
} }
\section{About the records from DSMZ (see source)}{
Names of prokaryotes are defined as being validly published by the International Code of Nomenclature of Bacteria. Validly published are all names which are included in the Approved Lists of Bacterial Names and the names subsequently published in the International Journal of Systematic Bacteriology (IJSB) and, from January 2000, in the International Journal of Systematic and Evolutionary Microbiology (IJSEM) as original articles or in the validation lists.
From: \url{https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads/readme.html}
}
\section{Catalogue of Life}{ \section{Catalogue of Life}{
\if{html}{\figure{logo_col.png}{options: height=40px style=margin-bottom:5px} \cr} \if{html}{\figure{logo_col.png}{options: height=40px style=margin-bottom:5px} \cr}

View File

@ -4,7 +4,7 @@
\name{microorganisms.old} \name{microorganisms.old}
\alias{microorganisms.old} \alias{microorganisms.old}
\title{Data set with previously accepted taxonomic names} \title{Data set with previously accepted taxonomic names}
\format{A \code{\link{data.frame}} with 17,069 observations and 4 variables: \format{A \code{\link{data.frame}} with 16,911 observations and 4 variables:
\describe{ \describe{
\item{\code{col_id}}{Catalogue of Life ID} \item{\code{col_id}}{Catalogue of Life ID}
\item{\code{tsn_new}}{New Catalogue of Life ID} \item{\code{tsn_new}}{New Catalogue of Life ID}

View File

@ -67,7 +67,7 @@ mo_property(x, property = "fullname", language = get_locale(), ...)
\item{...}{other parameters passed on to \code{\link{as.mo}}} \item{...}{other parameters passed on to \code{\link{as.mo}}}
\item{open}{browse the URL using \code{\link[utils]{browseURL}}} \item{open}{browse the URL using \code{\link[utils]{browseURL}()}}
\item{property}{one of the column names of one of the \code{\link{microorganisms}} data set or \code{"shortname"}} \item{property}{one of the column names of one of the \code{\link{microorganisms}} data set or \code{"shortname"}}
} }
@ -90,9 +90,9 @@ All functions will return the most recently known taxonomic property according t
\item{\code{mo_ref("Chlamydophila psittaci")} will return \code{"Everett et al., 1999"} (without a warning)} \item{\code{mo_ref("Chlamydophila psittaci")} will return \code{"Everett et al., 1999"} (without a warning)}
} }
The Gram stain - \code{mo_gramstain()} - will be determined on the taxonomic kingdom and phylum. According to Cavalier-Smith (2002) who defined subkingdoms Negibacteria and Posibacteria, only these phyla are Posibacteria: Actinobacteria, Chloroflexi, Firmicutes and Tenericutes (ref: \url{https://itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=956097}). These bacteria are considered Gram positive - all other bacteria are considered Gram negative. Species outside the kingdom of Bacteria will return a value \code{NA}. The Gram stain - \code{mo_gramstain()} - will be determined on the taxonomic kingdom and phylum. According to Cavalier-Smith (2002) who defined subkingdoms Negibacteria and Posibacteria, only these phyla are Posibacteria: Actinobacteria, Chloroflexi, Firmicutes and Tenericutes. These bacteria are considered Gram positive - all other bacteria are considered Gram negative. Species outside the kingdom of Bacteria will return a value \code{NA}.
The function \code{mo_url()} will return the direct URL to the species in the Catalogue of Life. The function \code{mo_url()} will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species.
} }
\section{Supported languages}{ \section{Supported languages}{
@ -169,7 +169,7 @@ mo_shortname("K. pneu rh") # "K. pneumoniae"
# Becker classification, see ?as.mo # Becker classification, see ?as.mo
mo_fullname("S. epi") # "Staphylococcus epidermidis" mo_fullname("S. epi") # "Staphylococcus epidermidis"
mo_fullname("S. epi", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)" mo_fullname("S. epi", Becker = TRUE) # "Coagulase-negative Staphylococcus (CoNS)"
mo_shortname("S. epi") # "S. epidermidis" mo_shortname("S. epi") # "S. epidermidis"
mo_shortname("S. epi", Becker = TRUE) # "CoNS" mo_shortname("S. epi", Becker = TRUE) # "CoNS"

View File

@ -1,13 +1,23 @@
# Catalogue of Life # Reproduction of the `microorganisms` data set
# Data retrieved from Encyclopaedia of Life:
# https://opendata.eol.org/dataset/catalogue-of-life/ # Data retrieved from the Catalogue of Life (CoL) through the Encyclopaedia of Life:
# https://opendata.eol.org/dataset/catalogue-of-life/
# (download the resource file with a name like "Catalogue of Life yyyy-mm-dd")
# and from the Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures
# https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads.html
# (download the latest "Complete List" as xlsx file)
# unzip and extract taxon.tab (around 1.5 GB), then:
taxon <- data.table::fread("Downloads/taxon.tab")
# result is over 3.7M rows:
library(dplyr) library(dplyr)
library(AMR) library(AMR)
taxon %>% freq(kingdom)
# unzip and extract taxon.tab (around 1.5 GB) from the CoL archive, then:
data_col <- data.table::fread("Downloads/taxon.tab")
# read the xlsx file from DSMZ (only around 2.5 MB):
data_dsmz <- readxl::read_xlsx("Downloads/DSMZ_bactnames.xlsx")
# the CoL data is over 3.7M rows:
data_col %>% freq(kingdom)
# Item Count Percent Cum. Count Cum. Percent # Item Count Percent Cum. Count Cum. Percent
# --- ---------- ---------- -------- ----------- ------------- # --- ---------- ---------- -------- ----------- -------------
# 1 Animalia 2,225,627 59.1% 2,225,627 59.1% # 1 Animalia 2,225,627 59.1% 2,225,627 59.1%
@ -19,21 +29,81 @@ taxon %>% freq(kingdom)
# 7 Viruses 3,827 0.1% 3,764,675 100.0% # 7 Viruses 3,827 0.1% 3,764,675 100.0%
# 8 Archaea 610 0.0% 3,765,285 100.0% # 8 Archaea 610 0.0% 3,765,285 100.0%
MOs <- taxon %>% # clean data_col
# tibble for future transformations data_col <- data_col %>%
as_tibble() %>% as_tibble() %>%
select(col_id = taxonID,
col_id_new = acceptedNameUsageID,
fullname = scientificName,
kingdom,
phylum,
class,
order,
family,
genus,
species = specificEpithet,
subspecies = infraspecificEpithet,
rank = taxonRank,
ref = scientificNameAuthorship,
species_id = furtherInformationURL)
data_col$source <- "CoL"
# clean data_dsmz
data_dsmz <- data_dsmz %>%
as_tibble() %>%
transmute(col_id = NA_integer_,
col_id_new = NA_integer_,
fullname = "",
# kingdom = "",
# phylum = "",
# class = "",
# order = "",
# family = "",
genus = ifelse(is.na(GENUS), "", GENUS),
species = ifelse(is.na(SPECIES), "", SPECIES),
subspecies = ifelse(is.na(SUBSPECIES), "", SUBSPECIES),
rank = ifelse(species == "", "genus", "species"),
ref = AUTHORS,
species_id = as.character(RECORD_NO),
source = "DSMZ")
# DSMZ only contains genus/(sub)species, try to find taxonomic properties based on genus and data_col
ref_taxonomy <- data_col %>%
filter(genus %in% data_dsmz$genus,
family != "") %>%
distinct(genus, .keep_all = TRUE) %>%
select(kingdom, phylum, class, order, family, genus)
data_dsmz <- data_dsmz %>%
left_join(ref_taxonomy, by = "genus") %>%
mutate(kingdom = "Bacteria",
phylum = ifelse(is.na(phylum), "(unknown phylum)", phylum),
class = ifelse(is.na(class), "(unknown class)", class),
order = ifelse(is.na(order), "(unknown order)", order),
family = ifelse(is.na(family), "(unknown family)", family),
)
# combine everything
data_total <- data_col %>%
bind_rows(data_dsmz)
rm(data_col)
rm(data_dsmz)
rm(ref_taxonomy)
MOs <- data_total %>%
filter( filter(
( (
# we only want all microorganisms and viruses # we only want all microorganisms and viruses
!kingdom %in% c("Animalia", "Plantae") !kingdom %in% c("Animalia", "Plantae")
# and no entries above genus - they all already have a taxonomic tree # and no entries above genus level - all species already have a taxonomic tree
& !taxonRank %in% c("kingdom", "phylum", "superfamily", "class", "order", "family") & !rank %in% c("kingdom", "phylum", "superfamily", "class", "order", "family")
# not all fungi: Aspergillus, Candida, Trichphyton and Pneumocystis are the most important, # and not all fungi: Aspergillus, Candida, Trichphyton and Pneumocystis are the most important,
# so only keep these orders from the fungi: # so only keep these orders from the fungi:
& !(kingdom == "Fungi" & !(kingdom == "Fungi"
& !order %in% c("Eurotiales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales", "Onygenales", "Pneumocystales")) & !order %in% c("Eurotiales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales", "Onygenales", "Pneumocystales"))
) )
# or the genus has to be one of the genera we found in our hospitals last decades # or the genus has to be one of the genera we found in our hospitals last decades (Northern Netherlands, 2002-2018)
| genus %in% c("Absidia", "Acremonium", "Actinotignum", "Alternaria", "Anaerosalibacter", "Ancylostoma", "Anisakis", "Apophysomyces", | genus %in% c("Absidia", "Acremonium", "Actinotignum", "Alternaria", "Anaerosalibacter", "Ancylostoma", "Anisakis", "Apophysomyces",
"Arachnia", "Ascaris", "Aureobacterium", "Aureobasidium", "Balantidum", "Bilophilia", "Branhamella", "Brochontrix", "Arachnia", "Ascaris", "Aureobacterium", "Aureobasidium", "Balantidum", "Bilophilia", "Branhamella", "Brochontrix",
"Brugia", "Calymmatobacterium", "Catabacter", "Cdc", "Chilomastix", "Chryseomonas", "Cladophialophora", "Cladosporium", "Brugia", "Calymmatobacterium", "Catabacter", "Cdc", "Chilomastix", "Chryseomonas", "Cladophialophora", "Cladosporium",
@ -47,9 +117,11 @@ MOs <- taxon %>%
"Trichosporon", "Trichuris", "Trypanosoma", "Wuchereria") "Trichosporon", "Trichuris", "Trypanosoma", "Wuchereria")
) %>% ) %>%
# remove text if it contains 'Not assigned' like phylum in viruses # remove text if it contains 'Not assigned' like phylum in viruses
mutate_all(funs(gsub("Not assigned", "", .))) %>% mutate_all(~gsub("Not assigned", "", .))
# Transform 'Smith, Jones, 2011' to 'Smith et al., 2011':
mutate(authors2 = iconv(scientificNameAuthorship, from = "UTF-8", to = "ASCII//TRANSLIT"), MOs <- MOs %>%
# Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011':
mutate(authors2 = iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT"),
# remove leading and trailing brackets # remove leading and trailing brackets
authors2 = gsub("^[(](.*)[)]$", "\\1", authors2), authors2 = gsub("^[(](.*)[)]$", "\\1", authors2),
# only take part after brackets if there's a name # only take part after brackets if there's a name
@ -69,7 +141,7 @@ MOs <- taxon %>%
# remove trailing and leading spaces # remove trailing and leading spaces
authors = trimws(authors), authors = trimws(authors),
# only keep first author and replace all others by 'et al' # only keep first author and replace all others by 'et al'
authors = gsub("(,| and| &| ex| emend\\.?) .*", " et al.", authors), authors = gsub("(,| and| et| &| ex| emend\\.?) .*", " et al.", authors),
# et al. always with ending dot # et al. always with ending dot
authors = gsub(" et al\\.?", " et al.", authors), authors = gsub(" et al\\.?", " et al.", authors),
authors = gsub(" ?,$", "", authors), authors = gsub(" ?,$", "", authors),
@ -86,60 +158,66 @@ MOs <- taxon %>%
ref = gsub("^, ", "", ref) ref = gsub("^, ", "", ref)
) )
# remove the HUGE file # Remove non-ASCII characters (these are not allowed by CRAN)
rm(taxon)
# remove non-ASCII characters (not allowed by CRAN)
MOs <- MOs %>% MOs <- MOs %>%
lapply(iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>% lapply(iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>%
as_tibble(stringsAsFactors = FALSE) as_tibble(stringsAsFactors = FALSE)
# split old taxonomic names - they refer to a new `taxonID` with `acceptedNameUsageID` # Split old taxonomic names - they refer in the original data to a new `taxonID` with `acceptedNameUsageID`
MOs.old <- MOs %>% MOs.old <- MOs %>%
filter(!is.na(acceptedNameUsageID), filter(!is.na(col_id_new),
ref != "") %>% ref != "",
transmute(col_id = taxonID, source != "DSMZ") %>%
col_id_new = acceptedNameUsageID, transmute(col_id,
col_id_new,
fullname = fullname =
trimws( trimws(
gsub("(.*)[(].*", "\\1", gsub("(.*)[(].*", "\\1",
stringr::str_replace( stringr::str_replace(
string = scientificName, string = fullname,
pattern = stringr::fixed(ref), pattern = stringr::fixed(authors2),
replacement = ""))), replacement = ""))),
ref = ref) %>% ref) %>%
filter(!is.na(fullname)) %>% filter(!is.na(fullname)) %>%
distinct(fullname, .keep_all = TRUE) %>% distinct(fullname, .keep_all = TRUE) %>%
arrange(col_id) arrange(col_id)
MOs <- MOs %>% MOs <- MOs %>%
filter(is.na(acceptedNameUsageID)) %>% filter(is.na(col_id_new) | source == "DSMZ") %>%
transmute(col_id = taxonID, transmute(col_id,
fullname = trimws(ifelse(kingdom == "Viruses", fullname = trimws(ifelse(kingdom == "Viruses",
paste(specificEpithet, infraspecificEpithet), gsub(":", "", ifelse(trimws(paste(species, subspecies)) == "", genus, paste(species, subspecies))),
paste(genus, specificEpithet, infraspecificEpithet))), paste(genus, species, subspecies))),
kingdom, kingdom,
phylum, phylum,
class, class,
order, order,
family, family,
genus = gsub(":", "", genus), genus = gsub(":", "", genus),
species = specificEpithet, species,
subspecies = infraspecificEpithet, subspecies,
rank = taxonRank, rank,
ref = ref, ref,
species_id = gsub(".*/([a-f0-9]+)", "\\1", furtherInformationURL)) %>% species_id = gsub(".*/([a-f0-9]+)", "\\1", species_id),
distinct(fullname, .keep_all = TRUE) %>% source) %>%
#distinct(fullname, .keep_all = TRUE) %>%
filter(!grepl("unassigned", fullname, ignore.case = TRUE)) filter(!grepl("unassigned", fullname, ignore.case = TRUE))
# only old names of species that are in MOs: # Keep only old names of species that are in MOs:
MOs.old <- MOs.old %>% filter(col_id_new %in% MOs$col_id) MOs.old <- MOs.old %>% filter(col_id_new %in% MOs$col_id)
# add abbreviations so we can easily know which ones are which ones # Filter out the DSMZ records that were renamed and are now in MOs.old
MOs <- MOs %>%
filter(!(source == "DSMZ" & fullname %in% MOs.old$fullname),
!(source == "DSMZ" & fullname %in% (MOs %>% filter(source == "CoL") %>% pull(fullname)))) %>%
distinct(fullname, .keep_all = TRUE)
# Add abbreviations so we can easily know which ones are which ones.
# These will become valid and unique microbial IDs for the AMR package.
MOs <- MOs %>% MOs <- MOs %>%
group_by(kingdom) %>% group_by(kingdom) %>%
# abbreviations may be same for genera between kingdoms, # abbreviations may be same for genera between kingdoms,
# because each abbreviation starts with the the first character of the kingdom # because each abbreviation starts with the the first character(s) of the kingdom
mutate(abbr_genus = abbreviate(genus, mutate(abbr_genus = abbreviate(genus,
minlength = 5, minlength = 5,
use.classes = TRUE, use.classes = TRUE,
@ -162,21 +240,21 @@ MOs <- MOs %>%
ungroup() %>% ungroup() %>%
# remove trailing underscores # remove trailing underscores
mutate(mo = gsub("_+$", "", mutate(mo = gsub("_+$", "",
toupper(paste(ifelse(kingdom == "Animalia", toupper(paste(ifelse(kingdom %in% c("Animalia", "Plantae"),
"AN", substr(kingdom, 1, 2),
ifelse(kingdom == "Plantae", substr(kingdom, 1, 1)),
"PL",
substr(kingdom, 1, 1))),
abbr_genus, abbr_genus,
abbr_species, abbr_species,
abbr_subspecies, abbr_subspecies,
sep = "_")))) %>% sep = "_")))) %>%
mutate(mo = ifelse(duplicated(.$mo), mutate(mo = ifelse(duplicated(.$mo),
# these one or two must be unique too
paste0(mo, "1"), paste0(mo, "1"),
mo), mo),
fullname = ifelse(fullname == "", fullname = ifelse(fullname == "",
trimws(paste(genus, species, subspecies)), trimws(paste(genus, species, subspecies)),
fullname)) %>% fullname)) %>%
# put `mo` in front, followed by the rest
select(mo, everything(), -abbr_genus, -abbr_species, -abbr_subspecies) select(mo, everything(), -abbr_genus, -abbr_species, -abbr_subspecies)
@ -184,23 +262,22 @@ MOs <- MOs %>%
MOs <- MOs %>% MOs <- MOs %>%
bind_rows( bind_rows(
# Unknowns # Unknowns
MOs %>% data.frame(mo = "UNKNOWN",
.[1,] %>% col_id = NA_integer_,
mutate(mo = "UNKNOWN", fullname = "(unknown name)",
col_id = NA_integer_, kingdom = "(unknown kingdom)",
fullname = "(unknown name)", phylum = "(unknown phylum)",
kingdom = "(unknown kingdom)", class = "(unknown class)",
phylum = "(unknown phylum)", order = "(unknown order)",
class = "(unknown class)", family = "(unknown family)",
order = "(unknown order)", genus = "(unknown genus)",
family = "(unknown family)", species = "(unknown species)",
genus = "(unknown genus)", subspecies = "(unknown subspecies)",
species = "(unknown species)", rank = "(unknown rank)",
subspecies = "(unknown subspecies)", ref = NA_character_,
rank = "(unknown rank)", species_id = "",
ref = NA_character_, source = "manually added",
# kingdom Bacteria: stringsAsFactors = FALSE),
species_id = "36bea735613185bbd9ce135fb0d9382c"),
data.frame(mo = "B_GRAMN", data.frame(mo = "B_GRAMN",
col_id = NA_integer_, col_id = NA_integer_,
fullname = "(unknown Gram negatives)", fullname = "(unknown Gram negatives)",
@ -214,6 +291,8 @@ MOs <- MOs %>%
subspecies = "(unknown subspecies)", subspecies = "(unknown subspecies)",
rank = "species", rank = "species",
ref = NA_character_, ref = NA_character_,
species_id = "",
source = "manually added",
stringsAsFactors = FALSE), stringsAsFactors = FALSE),
data.frame(mo = "B_GRAMP", data.frame(mo = "B_GRAMP",
col_id = NA_integer_, col_id = NA_integer_,
@ -228,78 +307,96 @@ MOs <- MOs %>%
subspecies = "(unknown subspecies)", subspecies = "(unknown subspecies)",
rank = "species", rank = "species",
ref = NA_character_, ref = NA_character_,
species_id = "",
source = "manually added",
stringsAsFactors = FALSE), stringsAsFactors = FALSE),
# CoNS # CoNS
MOs %>% MOs %>%
filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>% filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>%
mutate(mo = gsub("EPI", "CNS", mo), mutate(mo = gsub("EPI", "CNS", mo),
col_id = NA_integer_, col_id = NA_integer_,
species = "coagulase negative", species = "coagulase-negative",
fullname = "Coagulase Negative Staphylococcus (CoNS)", fullname = "Coagulase-negative Staphylococcus (CoNS)",
ref = NA_character_), ref = NA_character_,
species_id = "",
source = "manually added"),
# CoPS # CoPS
MOs %>% MOs %>%
filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>% filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>%
mutate(mo = gsub("EPI", "CPS", mo), mutate(mo = gsub("EPI", "CPS", mo),
col_id = NA_integer_, col_id = NA_integer_,
species = "coagulase positive", species = "coagulase-positive",
fullname = "Coagulase Positive Staphylococcus (CoPS)", fullname = "Coagulase-positive Staphylococcus (CoPS)",
ref = NA_character_), ref = NA_character_,
species_id = "",
source = "manually added"),
# Streptococci groups A, B, C, F, H, K # Streptococci groups A, B, C, F, H, K
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "pyogenes") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRA", mo), # we can keep all other details, since S. pyogenes is the only member of group A
col_id = NA_integer_, mutate(mo = gsub("PYO", "GRA", mo),
species = "group A" , species = "group A" ,
fullname = "Streptococcus group A"), fullname = "Streptococcus group A"),
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "dysgalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("DYS", "GRB", mo), # we can keep all other details, since S. agalactiae is the only member of group B
col_id = NA_integer_, mutate(mo = gsub("AGA", "GRB", mo),
species = "group B" , species = "group B" ,
fullname = "Streptococcus group B"), fullname = "Streptococcus group B"),
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "dysgalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRC", mo), mutate(mo = gsub("DYS", "GRC", mo),
col_id = NA_integer_, col_id = NA_integer_,
species = "group C" , species = "group C" ,
fullname = "Streptococcus group C", fullname = "Streptococcus group C",
ref = NA_character_), ref = NA_character_,
species_id = "",
source = "manually added"),
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRD", mo), mutate(mo = gsub("AGA", "GRD", mo),
col_id = NA_integer_, col_id = NA_integer_,
species = "group D" , species = "group D" ,
fullname = "Streptococcus group D", fullname = "Streptococcus group D",
ref = NA_character_), ref = NA_character_,
species_id = "",
source = "manually added"),
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRF", mo), mutate(mo = gsub("AGA", "GRF", mo),
col_id = NA_integer_, col_id = NA_integer_,
species = "group F" , species = "group F" ,
fullname = "Streptococcus group F", fullname = "Streptococcus group F",
ref = NA_character_), ref = NA_character_,
species_id = "",
source = "manually added"),
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRG", mo), mutate(mo = gsub("AGA", "GRG", mo),
col_id = NA_integer_, col_id = NA_integer_,
species = "group F" , species = "group G" ,
fullname = "Streptococcus group G", fullname = "Streptococcus group G",
ref = NA_character_), ref = NA_character_,
species_id = "",
source = "manually added"),
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRH", mo), mutate(mo = gsub("AGA", "GRH", mo),
col_id = NA_integer_, col_id = NA_integer_,
species = "group H" , species = "group H" ,
fullname = "Streptococcus group H", fullname = "Streptococcus group H",
ref = NA_character_), ref = NA_character_,
species_id = "",
source = "manually added"),
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
mutate(mo = gsub("AGA", "GRK", mo), mutate(mo = gsub("AGA", "GRK", mo),
col_id = NA_integer_, col_id = NA_integer_,
species = "group K" , species = "group K" ,
fullname = "Streptococcus group K", fullname = "Streptococcus group K",
ref = NA_character_), ref = NA_character_,
species_id = "",
source = "manually added"),
# Beta haemolytic Streptococci # Beta haemolytic Streptococci
MOs %>% MOs %>%
filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
@ -307,7 +404,9 @@ MOs <- MOs %>%
col_id = NA_integer_, col_id = NA_integer_,
species = "beta-haemolytic" , species = "beta-haemolytic" ,
fullname = "Beta-haemolytic Streptococcus", fullname = "Beta-haemolytic Streptococcus",
ref = NA_character_) ref = NA_character_,
species_id = "",
source = "manually added")
) )
@ -328,3 +427,4 @@ usethis::use_data(microorganisms, overwrite = TRUE)
usethis::use_data(microorganisms.old, overwrite = TRUE) usethis::use_data(microorganisms.old, overwrite = TRUE)
rm(microorganisms) rm(microorganisms)
rm(microorganisms.old) rm(microorganisms.old)
# and update the year in R/data.R

View File

@ -25,7 +25,7 @@ test_that("get_locale works", {
expect_identical(mo_genus("B_GRAMP", language = "pt"), expect_identical(mo_genus("B_GRAMP", language = "pt"),
"(Gram positivos desconhecidos)") "(Gram positivos desconhecidos)")
expect_identical(mo_fullname("CoNS", "en"), "Coagulase Negative Staphylococcus (CoNS)") expect_identical(mo_fullname("CoNS", "en"), "Coagulase-negative Staphylococcus (CoNS)")
expect_identical(mo_fullname("CoNS", "de"), "Koagulase-negative Staphylococcus (KNS)") expect_identical(mo_fullname("CoNS", "de"), "Koagulase-negative Staphylococcus (KNS)")
expect_identical(mo_fullname("CoNS", "nl"), "Coagulase-negatieve Staphylococcus (CNS)") expect_identical(mo_fullname("CoNS", "nl"), "Coagulase-negatieve Staphylococcus (CNS)")
expect_identical(mo_fullname("CoNS", "es"), "Staphylococcus coagulasa negativo (CoNS)") expect_identical(mo_fullname("CoNS", "es"), "Staphylococcus coagulasa negativo (CoNS)")

View File

@ -244,5 +244,4 @@ test_that("as.mo works", {
# summary # summary
expect_equal(length(summary(septic_patients$mo)), 6) expect_equal(length(summary(septic_patients$mo)), 6)
expect_warning(as.mo("Cutibacterium"))
}) })