1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 21:22:01 +02:00

authors from ITIS, diff for freq

This commit is contained in:
2018-10-01 11:39:43 +02:00
parent 92c9cc2608
commit 3119a221e5
17 changed files with 280 additions and 141 deletions

View File

@ -124,7 +124,7 @@
#'
#' A data set containing the complete microbial taxonomy of the kingdoms Bacteria, Fungi and Protozoa. MO codes can be looked up using \code{\link{as.mo}}.
#' @inheritSection as.mo ITIS
#' @format A \code{\link{data.frame}} with 18,831 observations and 15 variables:
#' @format A \code{\link{data.frame}} with 18,833 observations and 16 variables:
#' \describe{
#' \item{\code{mo}}{ID of microorganism}
#' \item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS}
@ -140,7 +140,8 @@
#' \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}}
#' \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}}
#' \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.}
#' \item{\code{mo.old}}{The old ID for package versions 0.3.0 and lower.}
#' \item{\code{authors}}{Author(s) that published this taxonomic name as found in ITIS, see Source}
#' \item{\code{year}}{Year in which the author(s) published this taxonomic name as found in ITIS, see Source}
#' }
#' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
#' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms.umcg}}

View File

@ -106,7 +106,7 @@
#' # print a histogram of numeric values
#' septic_patients %>%
#' freq(age) %>%
#' hist() # prettier: ggplot(septic_patients, aes(age)) + geom_histogram()
#' hist()
#'
#' # or print all points to a regular plot
#' septic_patients %>%
@ -134,6 +134,10 @@
#' septic_patients$age) %>%
#' freq(sep = " **sep** ")
#'
#' # check differences between frequency tables
#' diff(freq(septic_patients$trim),
#' freq(septic_patients$trsu))
#'
#' \dontrun{
#' # send frequency table to clipboard (e.g. for pasting in Excel)
#' septic_patients %>%
@ -502,7 +506,7 @@ top_freq <- function(f, n) {
vect
}
#' @rdname freq
#' @noRd
#' @exportMethod diff.frequency_tbl
#' @importFrom dplyr %>% full_join mutate
#' @export
@ -531,8 +535,15 @@ diff.frequency_tbl <- function(x, y, ...) {
mutate(
diff.percent = percent(
diff / count.x,
force_zero = TRUE))
force_zero = TRUE)) %>%
mutate(diff = ifelse(diff %like% '^-',
diff,
paste0("+", diff)),
diff.percent = ifelse(diff.percent %like% '^-',
diff.percent,
paste0("+", diff.percent)))
cat("Differences between frequency tables")
print(
knitr::kable(x,
format = x.attr$tbl_format,

View File

@ -22,8 +22,11 @@ globalVariables(c(".",
"Antibiotic",
"antibiotics",
"authors",
"Becker",
"cnt",
"count",
"count.x",
"count.y",
"cum_count",
"cum_percent",
"date_lab",
@ -39,6 +42,7 @@ globalVariables(c(".",
"key_ab",
"key_ab_lag",
"key_ab_other",
"Lancefield",
"lbl",
"median",
"mic",

107
R/mo.R
View File

@ -27,11 +27,12 @@
#'
#' This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.
#' @param allow_uncertain a logical to indicate whether empty results should be checked for only a part of the input string. When results are found, a warning will be given about the uncertainty and the result.
#' @param reference_df a \code{data.frame} to use for extra reference when translating \code{x} to a valid \code{mo}. The first column can be any microbial name, code or ID (used in your analysis or organisation), the second column must be a valid \code{mo} as found in the \code{\link{microorganisms}} data set.
#' @rdname as.mo
#' @aliases mo
#' @keywords mo Becker becker Lancefield lancefield guess
#' @details
#' A microbial ID (class: \code{mo}) typically looks like these examples:\cr
#' A microbial ID from this package (class: \code{mo}) typically looks like these examples:\cr
#' \preformatted{
#' Code Full name
#' --------------- --------------------------------------
@ -55,13 +56,17 @@
#' \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}}
#' \item{Something like \code{"stau"} or \code{"S aur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}}
#' }
#' This means that looking up human non-pathogenic microorganisms takes a longer time compares to human pathogenic microorganisms.
#' This means that looking up human pathogenic microorganisms takes less time than looking up human \strong{non}-pathogenic microorganisms.
#'
#' \code{guess_mo} is an alias of \code{as.mo}.
#' @section ITIS:
#' \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
#' This \code{AMR} package contains the \strong{complete microbial taxonomic data} (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS.
# (source as section, so it can be inherited by mo_property:)
#' This package contains the \strong{complete microbial taxonomic data} (with all seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, \url{https://www.itis.gov}).
#'
#' The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. Furthermore, the responsible authors and year of publication are available too. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
#'
#' ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3].
# (source as a section, so it can be inherited by other man pages:)
#' @section Source:
#' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870926. \url{https://dx.doi.org/10.1128/CMR.00109-13}
#'
@ -73,7 +78,7 @@
#' @seealso \code{\link{microorganisms}} for the \code{data.frame} with ITIS content that is being used to determine ID's. \cr
#' The \code{\link{mo_property}} functions (like \code{\link{mo_genus}}, \code{\link{mo_gramstain}}) to get properties based on the returned code.
#' @examples
#' # These examples all return "STAAUR", the ID of S. aureus:
#' # These examples all return "B_STPHY_AUR", the ID of S. aureus:
#' as.mo("stau")
#' as.mo("STAU")
#' as.mo("staaur")
@ -123,9 +128,10 @@
#' df <- df %>%
#' mutate(mo = guess_mo(paste(genus, species)))
#' }
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE) {
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL) {
exec_as.mo(x = x, Becker = Becker, Lancefield = Lancefield,
allow_uncertain = allow_uncertain, property = "mo")
allow_uncertain = allow_uncertain, reference_df = reference_df,
property = "mo")
}
#' @rdname as.mo
@ -142,7 +148,7 @@ guess_mo <- as.mo
#' @importFrom dplyr %>% pull left_join
#' @importFrom data.table as.data.table setkey
exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, property = "mo") {
exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL, property = "mo") {
if (NCOL(x) == 2) {
# support tidyverse selection like: df %>% select(colA, colB)
# paste these columns together
@ -173,8 +179,31 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
MOs_allothers <- NULL # will be set later, if needed
MOs_old <- NULL # will be set later, if needed
# defined df to check for
if (!is.null(reference_df)) {
if (!is.data.frame(reference_df) | NCOL(reference_df) < 2) {
stop('`reference_df` must be a data.frame with at least two columns.')
}
# remove factors, just keep characters
suppressWarnings(
reference_df[] <- lapply(reference_df, as.character)
)
}
if (all(x %in% AMR::microorganisms[, property])) {
# already existing mo
} else if (!is.null(reference_df)
& all(x %in% reference_df[, 1])
& all(reference_df[, 2] %in% AMR::microorganisms$mo)) {
# manually defined reference
colnames(reference_df)[1] <- "x"
colnames(reference_df)[2] <- "mo"
suppressWarnings(
x <- data.frame(x = x, stringsAsFactors = FALSE) %>%
left_join(reference_df, by = "x") %>%
left_join(AMR::microorganisms, by = "mo") %>%
pull(property)
)
} else if (all(x %in% AMR::microorganisms.certe[, "certe"])) {
# old Certe codes
suppressWarnings(
@ -283,7 +312,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
}
# FIRST TRY FULLNAMES AND CODES
# if only genus is available, don't select species
# if only genus is available, return only genus
if (all(!c(x[i], x_trimmed[i]) %like% " ")) {
found <- MOs[tolower(fullname) %in% tolower(c(x_species[i], x_trimmed_species[i])), ..property][[1]]
if (length(found) > 0) {
@ -300,6 +329,27 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
}
}
# TRY OTHER SOURCES ----
if (x_backup[i] %in% AMR::microorganisms.certe[, 1]) {
x[i] <- MOs[mo == AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == x_backup[i], 2], ..property][[1]][1L]
next
}
if (x_backup[i] %in% AMR::microorganisms.umcg[, 1]) {
ref_certe <- AMR::microorganisms.umcg[AMR::microorganisms.umcg[, 1] == x_backup[i], 2]
ref_mo <- AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == ref_certe, 2]
x[i] <- MOs[mo == ref_mo, ..property][[1]][1L]
next
}
if (x_backup[i] %in% reference_df[, 1]) {
ref_mo <- reference_df[reference_df[, 1] == x_backup[i], 2]
if (ref_mo %in% MOs[, mo]) {
x[i] <- MOs[mo == ref_mo, ..property][[1]][1L]
next
} else {
warning("Value '", x_backup[i], "' was found in reference_df, but '", ref_mo, "' is not a valid MO code.", call. = FALSE)
}
}
# TRY FIRST THOUSAND MOST PREVALENT IN HUMAN INFECTIONS ----
found <- MOs_mostprevalent[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]]
@ -478,8 +528,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
x[i] <- MOs[tsn == found[1, tsn_new], ..property][[1]]
renamed_note(name_old = found[1, name],
name_new = MOs[tsn == found[1, tsn_new], fullname],
authors = found[1, authors],
year = found[1, year])
authors_old = found[1, authors],
authors_new = MOs[tsn == found[1, tsn_new], authors],
year_old = found[1, year],
year_new = MOs[tsn == found[1, tsn_new], year])
next
}
@ -496,9 +548,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
call. = FALSE, immediate. = TRUE)
renamed_note(name_old = found[1, name],
name_new = MOs[tsn == found[1, tsn_new], fullname],
authors = found[1, authors],
year = found[1, year])
authors_old = found[1, authors],
authors_new = MOs[tsn == found[1, tsn_new], authors],
year_old = found[1, year],
year_new = MOs[tsn == found[1, tsn_new], year])
next
}
@ -605,22 +658,28 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
class(x) <- "mo"
attr(x, 'package') <- 'AMR'
attr(x, 'ITIS') <- TRUE
} else if (property == "tsn") {
} else if (property %in% c("tsn", "year")) {
x <- as.integer(x)
}
x
}
renamed_note <- function(name_old, name_new, authors, year) {
msg <- paste0("Note: '", name_old, "' was renamed to '", name_new, "'")
if (!authors %in% c("", NA)) {
msg <- paste0(msg, " by ", authors)
}
if (!year %in% c("", NA)) {
msg <- paste0(msg, " in ", year)
}
base::message(msg)
#' @importFrom dplyr case_when
renamed_note <- function(name_old, name_new,
authors_old = "", authors_new = "",
year_old = "", year_new = "") {
authorship_old <- case_when(
!authors_old %in% c("", NA) & !year_old %in% c("", NA) ~ paste0(" (", authors_old, ", ", year_old, ")"),
!authors_old %in% c("", NA) ~ paste0(" (", authors_old, ")"),
!year_old %in% c("", NA) ~ paste0(" (", year_old, ")"),
TRUE ~ "")
authorship_new <- case_when(
!authors_new %in% c("", NA) & !year_new %in% c("", NA) ~ paste0(" (", authors_new, ", ", year_new, ")"),
!authors_new %in% c("", NA) ~ paste0(" (", authors_new, ")"),
!year_new %in% c("", NA) ~ paste0(" (", year_new, ")"),
TRUE ~ "")
base::message(paste0("Note: '", name_old, "'", authorship_old, " was renamed '", name_new, "'", authorship_new))
}
#' @exportMethod print.mo

View File

@ -21,8 +21,8 @@
#' Use these functions to return a specific property of a microorganism from the \code{\link{microorganisms}} data set. All input values will be evaluated internally with \code{\link{as.mo}}.
#' @param x any (vector of) text that can be coerced to a valid microorganism code with \code{\link{as.mo}}
#' @param property one of the column names of one of the \code{\link{microorganisms}} data set or \code{"shortname"}
#' @inheritParams as.mo
#' @param language language of the returned text, defaults to English (\code{"en"}) and can also be set with \code{\link{getOption}("AMR_locale")}. Either one of \code{"en"} (English), \code{"de"} (German), \code{"nl"} (Dutch), \code{"es"} (Spanish) or \code{"pt"} (Portuguese).
#' @param ... other parameters passed on to \code{/link{as.mo}}
#' @inheritSection as.mo ITIS
#' @inheritSection as.mo Source
#' @rdname mo_property
@ -31,7 +31,7 @@
#' @export
#' @seealso \code{\link{microorganisms}}
#' @examples
#' # All properties
#' # All properties of Escherichia coli
#' mo_subkingdom("E. coli") # "Negibacteria"
#' mo_phylum("E. coli") # "Proteobacteria"
#' mo_class("E. coli") # "Gammaproteobacteria"
@ -45,6 +45,8 @@
#' mo_gramstain("E. coli") # "Gram negative"
#' mo_TSN("E. coli") # 285
#' mo_type("E. coli") # "Bacteria"
#' mo_authors("E. coli") # "Castellani and Chalmers"
#' mo_year("E. coli") # 1919
#'
#'
#' # Abbreviations known in the field
@ -97,18 +99,27 @@
#'
#' # Complete taxonomy up to Subkingdom, returns a list
#' mo_taxonomy("E. coli")
mo_fullname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) {
x <- mo_validate(x = x, property = "fullname", Becker = Becker, Lancefield = Lancefield)
mo_fullname <- function(x, language = NULL, ...) {
x <- mo_validate(x = x, property = "fullname", ...)
mo_translate(x, language = language)
}
#' @rdname mo_property
#' @importFrom dplyr %>% left_join mutate pull
#' @export
mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) {
mo_shortname <- function(x, language = NULL, ...) {
dots <- list(...)
Becker <- dots$Becker
if (is.null(Becker)) {
Becker <- FALSE
}
Lancefield <- dots$Lancefield
if (is.null(Lancefield)) {
Lancefield <- FALSE
}
if (Becker %in% c(TRUE, "all") | Lancefield == TRUE) {
res1 <- AMR::as.mo(x)
res2 <- suppressWarnings(AMR::as.mo(res1, Becker = Becker, Lancefield = Lancefield))
res1 <- AMR::as.mo(x, Becker = FALSE, Lancefield = FALSE, reference_df = dots$reference_df)
res2 <- suppressWarnings(AMR::as.mo(res1, ...))
res2_fullname <- mo_fullname(res2)
res2_fullname[res2_fullname %like% "\\(CoNS\\)"] <- "CoNS"
res2_fullname[res2_fullname %like% "\\(CoPS\\)"] <- "CoPS"
@ -127,7 +138,7 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
res1[res1 != res2] <- res2_fullname
result <- as.character(res1)
} else {
x <- AMR::as.mo(x)
x <- AMR::as.mo(x, ...)
suppressWarnings(
result <- data.frame(mo = x) %>%
left_join(AMR::microorganisms, by = "mo") %>%
@ -140,82 +151,86 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
#' @rdname mo_property
#' @export
mo_subspecies <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) {
mo_translate(exec_as.mo(x,
Becker = Becker,
Lancefield = Lancefield,
property = "subspecies"),
language = language)
mo_subspecies <- function(x, language = NULL, ...) {
mo_translate(mo_validate(x = x, property = "subspecies", ...), language = language)
}
#' @rdname mo_property
#' @export
mo_species <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) {
x <- mo_validate(x = x, property = "species", Becker = Becker, Lancefield = Lancefield)
mo_translate(x, language = language)
mo_species <- function(x, language = NULL, ...) {
mo_translate(mo_validate(x = x, property = "species", ...), language = language)
}
#' @rdname mo_property
#' @export
mo_genus <- function(x, language = NULL) {
x <- mo_validate(x = x, property = "genus")
mo_translate(x, language = language)
mo_genus <- function(x, language = NULL, ...) {
mo_translate(mo_validate(x = x, property = "genus", ...), language = language)
}
#' @rdname mo_property
#' @export
mo_family <- function(x) {
mo_validate(x = x, property = "family")
mo_family <- function(x, ...) {
mo_validate(x = x, property = "family", ...)
}
#' @rdname mo_property
#' @export
mo_order <- function(x) {
mo_validate(x = x, property = "order")
mo_order <- function(x, ...) {
mo_validate(x = x, property = "order", ...)
}
#' @rdname mo_property
#' @export
mo_class <- function(x) {
mo_validate(x = x, property = "class")
mo_class <- function(x, ...) {
mo_validate(x = x, property = "class", ...)
}
#' @rdname mo_property
#' @export
mo_phylum <- function(x) {
mo_validate(x = x, property = "phylum")
mo_phylum <- function(x, ...) {
mo_validate(x = x, property = "phylum", ...)
}
#' @rdname mo_property
#' @export
mo_subkingdom <- function(x) {
mo_validate(x = x, property = "subkingdom")
mo_subkingdom <- function(x, ...) {
mo_validate(x = x, property = "subkingdom", ...)
}
#' @rdname mo_property
#' @export
mo_type <- function(x, language = NULL) {
x <- mo_validate(x = x, property = "type")
mo_translate(x, language = language)
mo_authors <- function(x, ...) {
mo_validate(x = x, property = "authors", ...)
}
#' @rdname mo_property
#' @export
mo_TSN <- function(x) {
mo_validate(x = x, property = "tsn")
mo_year <- function(x, ...) {
mo_validate(x = x, property = "year", ...)
}
#' @rdname mo_property
#' @export
mo_gramstain <- function(x, language = NULL) {
x <- mo_validate(x = x, property = "gramstain")
mo_translate(x, language = language)
mo_type <- function(x, language = NULL, ...) {
mo_translate(mo_validate(x = x, property = "type", ...), language = language)
}
#' @rdname mo_property
#' @export
mo_TSN <- function(x, ...) {
mo_validate(x = x, property = "tsn", ...)
}
#' @rdname mo_property
#' @export
mo_gramstain <- function(x, language = NULL, ...) {
mo_translate(mo_validate(x = x, property = "gramstain", ...), language = language)
}
#' @rdname mo_property
#' @importFrom data.table data.table as.data.table setkey
#' @export
mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = FALSE, language = NULL) {
mo_property <- function(x, property = 'fullname', language = NULL, ...) {
if (length(property) != 1L) {
stop("'property' must be of length 1.")
}
@ -237,8 +252,8 @@ mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = F
#' @rdname mo_property
#' @export
mo_taxonomy <- function(x) {
x <- AMR::as.mo(x)
mo_taxonomy <- function(x, ...) {
x <- AMR::as.mo(x, ...)
base::list(subkingdom = mo_subkingdom(x),
phylum = mo_phylum(x),
class = mo_class(x),
@ -372,12 +387,20 @@ mo_translate <- function(x, language) {
}
mo_validate <- function(x, property, Becker = FALSE, Lancefield = FALSE) {
mo_validate <- function(x, property, ...) {
dots <- list(...)
Becker <- dots$Becker
if (is.null(Becker)) {
Becker <- FALSE
}
Lancefield <- dots$Lancefield
if (is.null(Lancefield)) {
Lancefield <- FALSE
}
if (!all(x %in% AMR::microorganisms[, property]) | Becker %in% c(TRUE, "all") | Lancefield == TRUE) {
exec_as.mo(x,
Becker = Becker,
Lancefield = Lancefield,
property = property)
exec_as.mo(x, property = property, ...)
} else {
x
}