authors from ITIS, diff for freq

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-10-01 11:39:43 +02:00
parent 92c9cc2608
commit 3119a221e5
17 changed files with 280 additions and 141 deletions

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 0.3.0.9010
Date: 2018-09-27
Version: 0.3.0.9011
Date: 2018-10-01
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(

View File

@ -90,6 +90,7 @@ export(labels_rsi_count)
export(left_join_microorganisms)
export(like)
export(mo_TSN)
export(mo_authors)
export(mo_class)
export(mo_family)
export(mo_fullname)
@ -104,6 +105,7 @@ export(mo_subkingdom)
export(mo_subspecies)
export(mo_taxonomy)
export(mo_type)
export(mo_year)
export(n_rsi)
export(p.symbol)
export(portion_I)

23
NEWS.md
View File

@ -2,28 +2,29 @@
#### New
* The data set `microorganisms` now contains **all microbial taxonomic data from ITIS** (kingdoms Bacteria, Fungi and Protozoa), the Integrated Taxonomy Information System, available via https://itis.gov. The data set now contains more than 18,000 microorganisms with all known bacteria, fungi and protozoa according ITIS with genus, species, subspecies, family, order, class, phylum and subkingdom. The new data set `microorganisms.old` contains all previously known taxonomic names from those kingdoms.
* Aliases for existing function `mo_property`
* New functions based on the existing function `mo_property`:
* Taxonomic names: `mo_phylum`, `mo_class`, `mo_order`, `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`
* Semantic names: `mo_fullname`, `mo_shortname`
* Microbial properties: `mo_type`, `mo_gramstain`.
* Microbial properties: `mo_type`, `mo_gramstain`
* Author information: `mo_author`, `mo_year`
They also come with support for German, Dutch, French, Italian, Spanish and Portuguese:
```r
mo_gramstain("E. coli")
# [1] "Gram negative"
mo_gramstain("E. coli", language = "de") # "de" = German
mo_gramstain("E. coli", language = "de") # German
# [1] "Gramnegativ"
mo_gramstain("E. coli", language = "es") # "es" = Spanish
mo_gramstain("E. coli", language = "es") # Spanish
# [1] "Gram negativo"
mo_fullname("S. group A", language = "pt") # Portuguese
# [1] "Streptococcus grupo A"
```
Furthermore, old taxonomic names kan easily be looked up and give a note about the taxonomic change:
Furthermore, old taxonomic names will give a note about the current taxonomic name:
```r
mo_fullname("Pseudomonas facilis")
# Note: 'Pseudomonas facilis' was renamed to 'Acidovorax facilis' by Willems et al. in 1990
# [1] "Acidovorax facilis"
mo_gramstain("Escherichia blattae")
# Note: 'Escherichia blattae' (Burgess et al., 1973) was renamed 'Shimwellia blattae' (Priest and Barker, 2010)
# [1] "Gram negative
```
* Functions `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` to selectively count resistant or susceptible isolates
* Extra function `count_df` (which works like `portion_df`) to get all counts of S, I and R of a data set with antibiotic columns, with support for grouped variables
@ -37,14 +38,15 @@
as.mo("S group A")
# [1] B_STRPTC_GRA
```
And with great speed too - on a quite regular Linux server from 2007 it takes us 0.009 seconds to transform 25,000 items:
And with great speed too - on a quite regular Linux server from 2007 it takes us less than 0.02 seconds to transform 25,000 items:
```r
thousands_of_E_colis <- rep("E. coli", 25000)
microbenchmark::microbenchmark(as.mo(thousands_of_E_colis), unit = "s")
# Unit: seconds
# min median max neval
# 0.00861352 0.008774335 0.01952958 100
# 0.01817717 0.01843957 0.03878077 100
```
* Added parameter `reference_df` for `as.mo`, so users can supply their own microbial IDs, name or codes as a reference table
* Renamed all previous references to `bactid` to `mo`, like:
* Column names inputs of `EUCAST_rules`, `first_isolate` and `key_antibiotics`
* Column names of datasets `microorganisms` and `septic_patients`
@ -90,6 +92,7 @@
* Added possibility to set any parameter to `geom_rsi` (and `ggplot_rsi`) so you can set your own preferences
* Fix for joins, where predefined suffices would not be honoured
* Added parameter `quote` to the `freq` function
* Added generic function `diff` for frequency tables
* Added longest en shortest character length in the frequency table (`freq`) header of class `character`
* Support for types (classes) list and matrix for `freq`
```r

View File

@ -124,7 +124,7 @@
#'
#' A data set containing the complete microbial taxonomy of the kingdoms Bacteria, Fungi and Protozoa. MO codes can be looked up using \code{\link{as.mo}}.
#' @inheritSection as.mo ITIS
#' @format A \code{\link{data.frame}} with 18,831 observations and 15 variables:
#' @format A \code{\link{data.frame}} with 18,833 observations and 16 variables:
#' \describe{
#' \item{\code{mo}}{ID of microorganism}
#' \item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS}
@ -140,7 +140,8 @@
#' \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}}
#' \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}}
#' \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.}
#' \item{\code{mo.old}}{The old ID for package versions 0.3.0 and lower.}
#' \item{\code{authors}}{Author(s) that published this taxonomic name as found in ITIS, see Source}
#' \item{\code{year}}{Year in which the author(s) published this taxonomic name as found in ITIS, see Source}
#' }
#' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
#' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms.umcg}}

View File

@ -106,7 +106,7 @@
#' # print a histogram of numeric values
#' septic_patients %>%
#' freq(age) %>%
#' hist() # prettier: ggplot(septic_patients, aes(age)) + geom_histogram()
#' hist()
#'
#' # or print all points to a regular plot
#' septic_patients %>%
@ -134,6 +134,10 @@
#' septic_patients$age) %>%
#' freq(sep = " **sep** ")
#'
#' # check differences between frequency tables
#' diff(freq(septic_patients$trim),
#' freq(septic_patients$trsu))
#'
#' \dontrun{
#' # send frequency table to clipboard (e.g. for pasting in Excel)
#' septic_patients %>%
@ -502,7 +506,7 @@ top_freq <- function(f, n) {
vect
}
#' @rdname freq
#' @noRd
#' @exportMethod diff.frequency_tbl
#' @importFrom dplyr %>% full_join mutate
#' @export
@ -531,8 +535,15 @@ diff.frequency_tbl <- function(x, y, ...) {
mutate(
diff.percent = percent(
diff / count.x,
force_zero = TRUE))
force_zero = TRUE)) %>%
mutate(diff = ifelse(diff %like% '^-',
diff,
paste0("+", diff)),
diff.percent = ifelse(diff.percent %like% '^-',
diff.percent,
paste0("+", diff.percent)))
cat("Differences between frequency tables")
print(
knitr::kable(x,
format = x.attr$tbl_format,

View File

@ -22,8 +22,11 @@ globalVariables(c(".",
"Antibiotic",
"antibiotics",
"authors",
"Becker",
"cnt",
"count",
"count.x",
"count.y",
"cum_count",
"cum_percent",
"date_lab",
@ -39,6 +42,7 @@ globalVariables(c(".",
"key_ab",
"key_ab_lag",
"key_ab_other",
"Lancefield",
"lbl",
"median",
"mic",

107
R/mo.R
View File

@ -27,11 +27,12 @@
#'
#' This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.
#' @param allow_uncertain a logical to indicate whether empty results should be checked for only a part of the input string. When results are found, a warning will be given about the uncertainty and the result.
#' @param reference_df a \code{data.frame} to use for extra reference when translating \code{x} to a valid \code{mo}. The first column can be any microbial name, code or ID (used in your analysis or organisation), the second column must be a valid \code{mo} as found in the \code{\link{microorganisms}} data set.
#' @rdname as.mo
#' @aliases mo
#' @keywords mo Becker becker Lancefield lancefield guess
#' @details
#' A microbial ID (class: \code{mo}) typically looks like these examples:\cr
#' A microbial ID from this package (class: \code{mo}) typically looks like these examples:\cr
#' \preformatted{
#' Code Full name
#' --------------- --------------------------------------
@ -55,13 +56,17 @@
#' \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}}
#' \item{Something like \code{"stau"} or \code{"S aur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}}
#' }
#' This means that looking up human non-pathogenic microorganisms takes a longer time compares to human pathogenic microorganisms.
#' This means that looking up human pathogenic microorganisms takes less time than looking up human \strong{non}-pathogenic microorganisms.
#'
#' \code{guess_mo} is an alias of \code{as.mo}.
#' @section ITIS:
#' \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
#' This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS.
# (source as section, so it can be inherited by mo_property:)
#' This package contains the \strong{complete microbial taxonomic data} (with all seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).
#'
#' The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available too. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
#'
#' ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
# (source as a section, so it can be inherited by other man pages:)
#' @section Source:
#' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870926. \url{https://dx.doi.org/10.1128/CMR.00109-13}
#'
@ -73,7 +78,7 @@
#' @seealso \code{\link{microorganisms}} for the \code{data.frame} with ITIS content that is being used to determine ID's. \cr
#' The \code{\link{mo_property}} functions (like \code{\link{mo_genus}}, \code{\link{mo_gramstain}}) to get properties based on the returned code.
#' @examples
#' # These examples all return "STAAUR", the ID of S. aureus:
#' # These examples all return "B_STPHY_AUR", the ID of S. aureus:
#' as.mo("stau")
#' as.mo("STAU")
#' as.mo("staaur")
@ -123,9 +128,10 @@
#' df <- df %>%
#' mutate(mo = guess_mo(paste(genus, species)))
#' }
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE) {
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL) {
exec_as.mo(x = x, Becker = Becker, Lancefield = Lancefield,
allow_uncertain = allow_uncertain, property = "mo")
allow_uncertain = allow_uncertain, reference_df = reference_df,
property = "mo")
}
#' @rdname as.mo
@ -142,7 +148,7 @@ guess_mo <- as.mo
#' @importFrom dplyr %>% pull left_join
#' @importFrom data.table as.data.table setkey
exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, property = "mo") {
exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL, property = "mo") {
if (NCOL(x) == 2) {
# support tidyverse selection like: df %>% select(colA, colB)
# paste these columns together
@ -173,8 +179,31 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
MOs_allothers <- NULL # will be set later, if needed
MOs_old <- NULL # will be set later, if needed
# defined df to check for
if (!is.null(reference_df)) {
if (!is.data.frame(reference_df) | NCOL(reference_df) < 2) {
stop('`reference_df` must be a data.frame with at least two columns.')
}
# remove factors, just keep characters
suppressWarnings(
reference_df[] <- lapply(reference_df, as.character)
)
}
if (all(x %in% AMR::microorganisms[, property])) {
# already existing mo
} else if (!is.null(reference_df)
& all(x %in% reference_df[, 1])
& all(reference_df[, 2] %in% AMR::microorganisms$mo)) {
# manually defined reference
colnames(reference_df)[1] <- "x"
colnames(reference_df)[2] <- "mo"
suppressWarnings(
x <- data.frame(x = x, stringsAsFactors = FALSE) %>%
left_join(reference_df, by = "x") %>%
left_join(AMR::microorganisms, by = "mo") %>%
pull(property)
)
} else if (all(x %in% AMR::microorganisms.certe[, "certe"])) {
# old Certe codes
suppressWarnings(
@ -283,7 +312,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
}
# FIRST TRY FULLNAMES AND CODES
# if only genus is available, don't select species
# if only genus is available, return only genus
if (all(!c(x[i], x_trimmed[i]) %like% " ")) {
found <- MOs[tolower(fullname) %in% tolower(c(x_species[i], x_trimmed_species[i])), ..property][[1]]
if (length(found) > 0) {
@ -300,6 +329,27 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
}
}
# TRY OTHER SOURCES ----
if (x_backup[i] %in% AMR::microorganisms.certe[, 1]) {
x[i] <- MOs[mo == AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == x_backup[i], 2], ..property][[1]][1L]
next
}
if (x_backup[i] %in% AMR::microorganisms.umcg[, 1]) {
ref_certe <- AMR::microorganisms.umcg[AMR::microorganisms.umcg[, 1] == x_backup[i], 2]
ref_mo <- AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == ref_certe, 2]
x[i] <- MOs[mo == ref_mo, ..property][[1]][1L]
next
}
if (x_backup[i] %in% reference_df[, 1]) {
ref_mo <- reference_df[reference_df[, 1] == x_backup[i], 2]
if (ref_mo %in% MOs[, mo]) {
x[i] <- MOs[mo == ref_mo, ..property][[1]][1L]
next
} else {
warning("Value '", x_backup[i], "' was found in reference_df, but '", ref_mo, "' is not a valid MO code.", call. = FALSE)
}
}
# TRY FIRST THOUSAND MOST PREVALENT IN HUMAN INFECTIONS ----
found <- MOs_mostprevalent[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]]
@ -478,8 +528,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
x[i] <- MOs[tsn == found[1, tsn_new], ..property][[1]]
renamed_note(name_old = found[1, name],
name_new = MOs[tsn == found[1, tsn_new], fullname],
authors = found[1, authors],
year = found[1, year])
authors_old = found[1, authors],
authors_new = MOs[tsn == found[1, tsn_new], authors],
year_old = found[1, year],
year_new = MOs[tsn == found[1, tsn_new], year])
next
}
@ -496,9 +548,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
call. = FALSE, immediate. = TRUE)
renamed_note(name_old = found[1, name],
name_new = MOs[tsn == found[1, tsn_new], fullname],
authors = found[1, authors],
year = found[1, year])
authors_old = found[1, authors],
authors_new = MOs[tsn == found[1, tsn_new], authors],
year_old = found[1, year],
year_new = MOs[tsn == found[1, tsn_new], year])
next
}
@ -605,22 +658,28 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
class(x) <- "mo"
attr(x, 'package') <- 'AMR'
attr(x, 'ITIS') <- TRUE
} else if (property == "tsn") {
} else if (property %in% c("tsn", "year")) {
x <- as.integer(x)
}
x
}
renamed_note <- function(name_old, name_new, authors, year) {
msg <- paste0("Note: '", name_old, "' was renamed to '", name_new, "'")
if (!authors %in% c("", NA)) {
msg <- paste0(msg, " by ", authors)
}
if (!year %in% c("", NA)) {
msg <- paste0(msg, " in ", year)
}
base::message(msg)
#' @importFrom dplyr case_when
renamed_note <- function(name_old, name_new,
authors_old = "", authors_new = "",
year_old = "", year_new = "") {
authorship_old <- case_when(
!authors_old %in% c("", NA) & !year_old %in% c("", NA) ~ paste0(" (", authors_old, ", ", year_old, ")"),
!authors_old %in% c("", NA) ~ paste0(" (", authors_old, ")"),
!year_old %in% c("", NA) ~ paste0(" (", year_old, ")"),
TRUE ~ "")
authorship_new <- case_when(
!authors_new %in% c("", NA) & !year_new %in% c("", NA) ~ paste0(" (", authors_new, ", ", year_new, ")"),
!authors_new %in% c("", NA) ~ paste0(" (", authors_new, ")"),
!year_new %in% c("", NA) ~ paste0(" (", year_new, ")"),
TRUE ~ "")
base::message(paste0("Note: '", name_old, "'", authorship_old, " was renamed '", name_new, "'", authorship_new))
}
#' @exportMethod print.mo

View File

@ -21,8 +21,8 @@
#' Use these functions to return a specific property of a microorganism from the \code{\link{microorganisms}} data set. All input values will be evaluated internally with \code{\link{as.mo}}.
#' @param x any (vector of) text that can be coerced to a valid microorganism code with \code{\link{as.mo}}
#' @param property one of the column names of one of the \code{\link{microorganisms}} data set or \code{"shortname"}
#' @inheritParams as.mo
#' @param language language of the returned text, defaults to English (\code{"en"}) and can also be set with \code{\link{getOption}("AMR_locale")}. Either one of \code{"en"} (English), \code{"de"} (German), \code{"nl"} (Dutch), \code{"es"} (Spanish) or \code{"pt"} (Portuguese).
#' @param ... other parameters passed on to \code{/link{as.mo}}
#' @inheritSection as.mo ITIS
#' @inheritSection as.mo Source
#' @rdname mo_property
@ -31,7 +31,7 @@
#' @export
#' @seealso \code{\link{microorganisms}}
#' @examples
#' # All properties
#' # All properties of Escherichia coli
#' mo_subkingdom("E. coli") # "Negibacteria"
#' mo_phylum("E. coli") # "Proteobacteria"
#' mo_class("E. coli") # "Gammaproteobacteria"
@ -45,6 +45,8 @@
#' mo_gramstain("E. coli") # "Gram negative"
#' mo_TSN("E. coli") # 285
#' mo_type("E. coli") # "Bacteria"
#' mo_authors("E. coli") # "Castellani and Chalmers"
#' mo_year("E. coli") # 1919
#'
#'
#' # Abbreviations known in the field
@ -97,18 +99,27 @@
#'
#' # Complete taxonomy up to Subkingdom, returns a list
#' mo_taxonomy("E. coli")
mo_fullname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) {
x <- mo_validate(x = x, property = "fullname", Becker = Becker, Lancefield = Lancefield)
mo_fullname <- function(x, language = NULL, ...) {
x <- mo_validate(x = x, property = "fullname", ...)
mo_translate(x, language = language)
}
#' @rdname mo_property
#' @importFrom dplyr %>% left_join mutate pull
#' @export
mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) {
mo_shortname <- function(x, language = NULL, ...) {
dots <- list(...)
Becker <- dots$Becker
if (is.null(Becker)) {
Becker <- FALSE
}
Lancefield <- dots$Lancefield
if (is.null(Lancefield)) {
Lancefield <- FALSE
}
if (Becker %in% c(TRUE, "all") | Lancefield == TRUE) {
res1 <- AMR::as.mo(x)
res2 <- suppressWarnings(AMR::as.mo(res1, Becker = Becker, Lancefield = Lancefield))
res1 <- AMR::as.mo(x, Becker = FALSE, Lancefield = FALSE, reference_df = dots$reference_df)
res2 <- suppressWarnings(AMR::as.mo(res1, ...))
res2_fullname <- mo_fullname(res2)
res2_fullname[res2_fullname %like% "\\(CoNS\\)"] <- "CoNS"
res2_fullname[res2_fullname %like% "\\(CoPS\\)"] <- "CoPS"
@ -127,7 +138,7 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
res1[res1 != res2] <- res2_fullname
result <- as.character(res1)
} else {
x <- AMR::as.mo(x)
x <- AMR::as.mo(x, ...)
suppressWarnings(
result <- data.frame(mo = x) %>%
left_join(AMR::microorganisms, by = "mo") %>%
@ -140,82 +151,86 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
#' @rdname mo_property
#' @export
mo_subspecies <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) {
mo_translate(exec_as.mo(x,
Becker = Becker,
Lancefield = Lancefield,
property = "subspecies"),
language = language)
mo_subspecies <- function(x, language = NULL, ...) {
mo_translate(mo_validate(x = x, property = "subspecies", ...), language = language)
}
#' @rdname mo_property
#' @export
mo_species <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) {
x <- mo_validate(x = x, property = "species", Becker = Becker, Lancefield = Lancefield)
mo_translate(x, language = language)
mo_species <- function(x, language = NULL, ...) {
mo_translate(mo_validate(x = x, property = "species", ...), language = language)
}
#' @rdname mo_property
#' @export
mo_genus <- function(x, language = NULL) {
x <- mo_validate(x = x, property = "genus")
mo_translate(x, language = language)
mo_genus <- function(x, language = NULL, ...) {
mo_translate(mo_validate(x = x, property = "genus", ...), language = language)
}
#' @rdname mo_property
#' @export
mo_family <- function(x) {
mo_validate(x = x, property = "family")
mo_family <- function(x, ...) {
mo_validate(x = x, property = "family", ...)
}
#' @rdname mo_property
#' @export
mo_order <- function(x) {
mo_validate(x = x, property = "order")
mo_order <- function(x, ...) {
mo_validate(x = x, property = "order", ...)
}
#' @rdname mo_property
#' @export
mo_class <- function(x) {
mo_validate(x = x, property = "class")
mo_class <- function(x, ...) {
mo_validate(x = x, property = "class", ...)
}
#' @rdname mo_property
#' @export
mo_phylum <- function(x) {
mo_validate(x = x, property = "phylum")
mo_phylum <- function(x, ...) {
mo_validate(x = x, property = "phylum", ...)
}
#' @rdname mo_property
#' @export
mo_subkingdom <- function(x) {
mo_validate(x = x, property = "subkingdom")
mo_subkingdom <- function(x, ...) {
mo_validate(x = x, property = "subkingdom", ...)
}
#' @rdname mo_property
#' @export
mo_type <- function(x, language = NULL) {
x <- mo_validate(x = x, property = "type")
mo_translate(x, language = language)
mo_authors <- function(x, ...) {
mo_validate(x = x, property = "authors", ...)
}
#' @rdname mo_property
#' @export
mo_TSN <- function(x) {
mo_validate(x = x, property = "tsn")
mo_year <- function(x, ...) {
mo_validate(x = x, property = "year", ...)
}
#' @rdname mo_property
#' @export
mo_gramstain <- function(x, language = NULL) {
x <- mo_validate(x = x, property = "gramstain")
mo_translate(x, language = language)
mo_type <- function(x, language = NULL, ...) {
mo_translate(mo_validate(x = x, property = "type", ...), language = language)
}
#' @rdname mo_property
#' @export
mo_TSN <- function(x, ...) {
mo_validate(x = x, property = "tsn", ...)
}
#' @rdname mo_property
#' @export
mo_gramstain <- function(x, language = NULL, ...) {
mo_translate(mo_validate(x = x, property = "gramstain", ...), language = language)
}
#' @rdname mo_property
#' @importFrom data.table data.table as.data.table setkey
#' @export
mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = FALSE, language = NULL) {
mo_property <- function(x, property = 'fullname', language = NULL, ...) {
if (length(property) != 1L) {
stop("'property' must be of length 1.")
}
@ -237,8 +252,8 @@ mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = F
#' @rdname mo_property
#' @export
mo_taxonomy <- function(x) {
x <- AMR::as.mo(x)
mo_taxonomy <- function(x, ...) {
x <- AMR::as.mo(x, ...)
base::list(subkingdom = mo_subkingdom(x),
phylum = mo_phylum(x),
class = mo_class(x),
@ -372,12 +387,20 @@ mo_translate <- function(x, language) {
}
mo_validate <- function(x, property, Becker = FALSE, Lancefield = FALSE) {
mo_validate <- function(x, property, ...) {
dots <- list(...)
Becker <- dots$Becker
if (is.null(Becker)) {
Becker <- FALSE
}
Lancefield <- dots$Lancefield
if (is.null(Lancefield)) {
Lancefield <- FALSE
}
if (!all(x %in% AMR::microorganisms[, property]) | Becker %in% c(TRUE, "all") | Lancefield == TRUE) {
exec_as.mo(x,
Becker = Becker,
Lancefield = Lancefield,
property = property)
exec_as.mo(x, property = property, ...)
} else {
x
}

View File

@ -26,11 +26,11 @@ Erwin E.A. Hassing<sup>2</sup>,
## Contents
* [Why this package?](#why-this-package)
* [ITIS](#itis)
* [How to get it?](#how-to-get-it)
* [Install from CRAN](#install-from-cran)
* [Install from GitHub](#install-from-github)
* [How to use it?](#how-to-use-it)
* [ITIS](#itis)
* [New classes](#new-classes)
* [Overwrite/force resistance based on EUCAST rules](#overwriteforce-resistance-based-on-eucast-rules)
* [Other (microbial) epidemiological functions](#other-microbial-epidemiological-functions)
@ -73,6 +73,15 @@ The `AMR` package basically does four important things:
* 2,000 blood culture isolates from anonymised septic patients between 2001 and 2017 in the Northern Netherlands
* Results of 40 antibiotics (each antibiotic in its own column) with a total of 38,414 antimicrobial results
* Real and genuine data
### ITIS
<img src="man/figures/itis_logo.jpg" height="100px">
This package contains the **complete microbial taxonomic data** (with all seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov).
The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists.
## How to get it?
All stable versions of this package [are published on CRAN](http://cran.r-project.org/package=AMR), the official R network with a peer-reviewed submission process.
@ -95,9 +104,9 @@ This is the latest **development version**. Although it may contain bugfixes and
Development Test | Result | Reference
--- | :---: | ---
Works on Linux and macOS | [![Travis_Build](https://travis-ci.org/msberends/AMR.svg?branch=master)](https://travis-ci.org/msberends/AMR) | Checked by Travis CI, GmbH [[ref 1]](https://travis-ci.org/msberends/AMR)
Works on Windows | [![AppVeyor_Build](https://ci.appveyor.com/api/projects/status/github/msberends/AMR?branch=master&svg=true)](https://ci.appveyor.com/project/msberends/AMR) | Checked by Appveyor Systems Inc. [[ref 2]](https://ci.appveyor.com/project/msberends/AMR)
Syntax lines checked | [![Code_Coverage](https://codecov.io/gh/msberends/AMR/branch/master/graph/badge.svg)](https://codecov.io/gh/msberends/AMR) | Checked by Codecov LLC [[ref 3]](https://codecov.io/gh/msberends/AMR)
All functions checked on Linux and macOS | [![Travis_Build](https://travis-ci.org/msberends/AMR.svg?branch=master)](https://travis-ci.org/msberends/AMR) | Travis CI, GmbH [[ref 1]](https://travis-ci.org/msberends/AMR)
All functions checked on Windows | [![AppVeyor_Build](https://ci.appveyor.com/api/projects/status/github/msberends/AMR?branch=master&svg=true)](https://ci.appveyor.com/project/msberends/AMR) | Appveyor Systems Inc. [[ref 2]](https://ci.appveyor.com/project/msberends/AMR)
Percentage of syntax lines checked | [![Code_Coverage](https://codecov.io/gh/msberends/AMR/branch/master/graph/badge.svg)](https://codecov.io/gh/msberends/AMR) | Codecov LLC [[ref 3]](https://codecov.io/gh/msberends/AMR)
If so, try it with:
```r
@ -119,15 +128,6 @@ library(AMR)
help(package = "AMR")
```
## ITIS
<img src="man/figures/itis_logo.jpg" height="100px">
This `AMR` package contains the **complete microbial taxonomic data** (with all seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov).
The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists.
### New classes
This package contains two new S3 classes: `mic` for MIC values (e.g. from Vitek or Phoenix) and `rsi` for antimicrobial drug interpretations (i.e. S, I and R). Both are actually ordered factors under the hood (an MIC of `2` being higher than `<=1` but lower than `>=32`, and for class `rsi` factors are ordered as `S < I < R`).
Both classes have extensions for existing generic functions like `print`, `summary` and `plot`.
@ -154,9 +154,8 @@ plot(septic_patients$cipr)
![example_1_rsi](man/figures/rsi_example1.png)
<<img src="https://github.com/tidyverse/dplyr/blob/master/man/figures/logo.png" height="50px"> <img src="https://github.com/tidyverse/ggplot2/blob/master/man/figures/logo.png" height="50px">
Or use the `ggplot2` and `dplyr` packages to create more appealing plots:
```r
library(dplyr)
library(ggplot2)

Binary file not shown.

View File

@ -7,12 +7,13 @@
\alias{guess_mo}
\title{Transform to microorganism ID}
\usage{
as.mo(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE)
as.mo(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE,
reference_df = NULL)
is.mo(x)
guess_mo(x, Becker = FALSE, Lancefield = FALSE,
allow_uncertain = FALSE)
allow_uncertain = FALSE, reference_df = NULL)
}
\arguments{
\item{x}{a character vector or a \code{data.frame} with one or two columns}
@ -26,6 +27,8 @@ guess_mo(x, Becker = FALSE, Lancefield = FALSE,
This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.}
\item{allow_uncertain}{a logical to indicate whether empty results should be checked for only a part of the input string. When results are found, a warning will be given about the uncertainty and the result.}
\item{reference_df}{a \code{data.frame} to use for extra reference when translating \code{x} to a valid \code{mo}. The first column can be any microbial name, code or ID (used in your analysis or organisation), the second column must be a valid \code{mo} as found in the \code{\link{microorganisms}} data set.}
}
\value{
Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}.
@ -34,7 +37,7 @@ Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}.
Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using Artificial Intelligence (AI) and the complete taxonomic kingdoms \emph{Bacteria}, \emph{Fungi} and \emph{Protozoa} (see Source), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
}
\details{
A microbial ID (class: \code{mo}) typically looks like these examples:\cr
A microbial ID from this package (class: \code{mo}) typically looks like these examples:\cr
\preformatted{
Code Full name
--------------- --------------------------------------
@ -58,14 +61,18 @@ This function uses Artificial Intelligence (AI) to help getting more logical res
\item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}}
\item{Something like \code{"stau"} or \code{"S aur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}}
}
This means that looking up human non-pathogenic microorganisms takes a longer time compares to human pathogenic microorganisms.
This means that looking up human pathogenic microorganisms takes less time than looking up human \strong{non}-pathogenic microorganisms.
\code{guess_mo} is an alias of \code{as.mo}.
}
\section{ITIS}{
\if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS.
This package contains the \strong{complete microbial taxonomic data} (with all seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).
The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available too. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
}
\section{Source}{
@ -78,7 +85,7 @@ This \code{AMR} package contains the \strong{complete microbial taxonomic data}
}
\examples{
# These examples all return "STAAUR", the ID of S. aureus:
# These examples all return "B_STPHY_AUR", the ID of S. aureus:
as.mo("stau")
as.mo("STAU")
as.mo("staaur")

View File

@ -4,7 +4,6 @@
\alias{freq}
\alias{frequency_tbl}
\alias{top_freq}
\alias{diff.frequency_tbl}
\alias{print.frequency_tbl}
\title{Frequency table}
\usage{
@ -18,8 +17,6 @@ freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"),
top_freq(f, n)
\method{diff}{frequency_tbl}(x, y, ...)
\method{print}{frequency_tbl}(x, nmax = getOption("max.print.freq",
default = 15), ...)
}

View File

@ -4,7 +4,7 @@
\name{microorganisms}
\alias{microorganisms}
\title{Data set with taxonomic data from ITIS}
\format{A \code{\link{data.frame}} with 18,831 observations and 15 variables:
\format{A \code{\link{data.frame}} with 18,833 observations and 16 variables:
\describe{
\item{\code{mo}}{ID of microorganism}
\item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS}
@ -20,7 +20,8 @@
\item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}}
\item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}}
\item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.}
\item{\code{mo.old}}{The old ID for package versions 0.3.0 and lower.}
\item{\code{authors}}{Author(s) that published this taxonomic name as found in ITIS, see Source}
\item{\code{year}}{Year in which the author(s) published this taxonomic name as found in ITIS, see Source}
}}
\source{
[3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
@ -34,7 +35,11 @@ A data set containing the complete microbial taxonomy of the kingdoms Bacteria,
\section{ITIS}{
\if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS.
This package contains the \strong{complete microbial taxonomic data} (with all seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).
The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available too. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
}
\seealso{

View File

@ -24,7 +24,11 @@ A data set containing old (previously valid or accepted) taxonomic names accordi
\section{ITIS}{
\if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS.
This package contains the \strong{complete microbial taxonomic data} (with all seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).
The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available too. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
}
\seealso{

View File

@ -12,56 +12,55 @@
\alias{mo_class}
\alias{mo_phylum}
\alias{mo_subkingdom}
\alias{mo_authors}
\alias{mo_year}
\alias{mo_type}
\alias{mo_TSN}
\alias{mo_gramstain}
\alias{mo_taxonomy}
\title{Property of a microorganism}
\usage{
mo_fullname(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
mo_fullname(x, language = NULL, ...)
mo_shortname(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
mo_shortname(x, language = NULL, ...)
mo_subspecies(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
mo_subspecies(x, language = NULL, ...)
mo_species(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
mo_species(x, language = NULL, ...)
mo_genus(x, language = NULL)
mo_genus(x, language = NULL, ...)
mo_family(x)
mo_family(x, ...)
mo_order(x)
mo_order(x, ...)
mo_class(x)
mo_class(x, ...)
mo_phylum(x)
mo_phylum(x, ...)
mo_subkingdom(x)
mo_subkingdom(x, ...)
mo_type(x, language = NULL)
mo_authors(x, ...)
mo_TSN(x)
mo_year(x, ...)
mo_gramstain(x, language = NULL)
mo_type(x, language = NULL, ...)
mo_property(x, property = "fullname", Becker = FALSE,
Lancefield = FALSE, language = NULL)
mo_TSN(x, ...)
mo_taxonomy(x)
mo_gramstain(x, language = NULL, ...)
mo_property(x, property = "fullname", language = NULL, ...)
mo_taxonomy(x, ...)
}
\arguments{
\item{x}{any (vector of) text that can be coerced to a valid microorganism code with \code{\link{as.mo}}}
\item{Becker}{a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1].
This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".}
\item{Lancefield}{a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L.
This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.}
\item{language}{language of the returned text, defaults to English (\code{"en"}) and can also be set with \code{\link{getOption}("AMR_locale")}. Either one of \code{"en"} (English), \code{"de"} (German), \code{"nl"} (Dutch), \code{"es"} (Spanish) or \code{"pt"} (Portuguese).}
\item{...}{other parameters passed on to \code{/link{as.mo}}}
\item{property}{one of the column names of one of the \code{\link{microorganisms}} data set or \code{"shortname"}}
}
\value{
@ -73,7 +72,11 @@ Use these functions to return a specific property of a microorganism from the \c
\section{ITIS}{
\if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS.
This package contains the \strong{complete microbial taxonomic data} (with all seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).
The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available too. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
}
\section{Source}{
@ -86,7 +89,7 @@ This \code{AMR} package contains the \strong{complete microbial taxonomic data}
}
\examples{
# All properties
# All properties of Escherichia coli
mo_subkingdom("E. coli") # "Negibacteria"
mo_phylum("E. coli") # "Proteobacteria"
mo_class("E. coli") # "Gammaproteobacteria"
@ -100,6 +103,8 @@ mo_shortname("E. coli") # "E. coli"
mo_gramstain("E. coli") # "Gram negative"
mo_TSN("E. coli") # 285
mo_type("E. coli") # "Bacteria"
mo_authors("E. coli") # "Castellani and Chalmers"
mo_year("E. coli") # 1919
# Abbreviations known in the field

View File

@ -141,4 +141,21 @@ test_that("as.mo works", {
expect_equal(as.character(suppressWarnings(as.mo(""))),
NA_character_)
# check less prevalent MOs
expect_equal(as.character(as.mo("Gomphosphaeria aponina delicatula")), "B_GMPHS_APO_DEL")
expect_equal(as.character(as.mo("G apo deli")), "B_GMPHS_APO_DEL")
expect_equal(as.character(as.mo("Gomphosphaeria aponina")), "B_GMPHS_APO")
expect_equal(as.character(as.mo("Gomphosphaeria species")), "B_GMPHS")
expect_equal(as.character(as.mo("Gomphosphaeria")), "B_GMPHS")
expect_equal(as.character(as.mo(" B_GMPHS_APO ")), "B_GMPHS_APO")
expect_equal(as.character(as.mo("g aponina")), "B_GMPHS_APO")
# check old names
expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLA")
# check uncertain names
expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = FALSE))), NA_character_)
expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = TRUE))), "B_ESCHR_COL")
expect_warning(as.mo("esco extra_text", allow_uncertain = TRUE))
})

View File

@ -15,6 +15,8 @@ test_that("mo_property works", {
expect_equal(class(mo_taxonomy("E. coli")), "list")
expect_equal(names(mo_taxonomy("E. coli")), c("subkingdom", "phylum", "class", "order",
"family", "genus", "species", "subspecies"))
expect_equal(mo_authors("E. coli"), "Castellani and Chalmers")
expect_equal(mo_year("E. coli"), 1919)
expect_equal(mo_shortname("MRSA"), "S. aureus")
expect_equal(mo_shortname("MRSA", Becker = TRUE), "S. aureus")