AMR/R/italicise_taxonomy.R

146 lines
5.8 KiB
R
Raw Normal View History

2021-05-03 10:47:32 +02:00
# ==================================================================== #
# TITLE: #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2021-05-03 10:47:32 +02:00
# #
# SOURCE CODE: #
2021-05-03 10:47:32 +02:00
# https://github.com/msberends/AMR #
# #
# PLEASE CITE THIS SOFTWARE AS: #
2022-10-05 09:12:22 +02:00
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
# Data. Journal of Statistical Software, 104(3), 1-31. #
2023-05-27 10:39:22 +02:00
# https://doi.org/10.18637/jss.v104.i03 #
2022-10-05 09:12:22 +02:00
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2021-05-03 10:47:32 +02:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
#' Italicise Taxonomic Families, Genera, Species, Subspecies
2022-08-28 10:31:50 +02:00
#'
2022-08-21 16:37:20 +02:00
#' According to the binomial nomenclature, the lowest four taxonomic levels (family, genus, species, subspecies) should be printed in italics. This function finds taxonomic names within strings and makes them italic.
2021-05-12 18:15:03 +02:00
#' @param string a [character] (vector)
#' @param type type of conversion of the taxonomic names, either "markdown", "html" or "ansi", see *Details*
2022-08-28 10:31:50 +02:00
#' @details
2021-05-03 10:47:32 +02:00
#' This function finds the taxonomic names and makes them italic based on the [microorganisms] data set.
2022-08-28 10:31:50 +02:00
#'
#' The taxonomic names can be italicised using markdown (the default) by adding `*` before and after the taxonomic names, or `<i>` and `</i>` when using html. When using 'ansi', ANSI colours will be added using `\033[3m` before and `\033[23m` after the taxonomic names. If multiple ANSI colours are not available, no conversion will occur.
2022-08-28 10:31:50 +02:00
#'
2021-05-03 10:47:32 +02:00
#' This function also supports abbreviation of the genus if it is followed by a species, such as "E. coli" and "K. pneumoniae ozaenae".
#' @export
#' @examples
#' italicise_taxonomy("An overview of Staphylococcus aureus isolates")
#' italicise_taxonomy("An overview of S. aureus isolates")
2022-08-28 10:31:50 +02:00
#'
2021-05-03 10:47:32 +02:00
#' cat(italicise_taxonomy("An overview of S. aureus isolates", type = "ansi"))
italicise_taxonomy <- function(string, type = c("markdown", "ansi", "html")) {
2021-05-03 10:47:32 +02:00
if (missing(type)) {
type <- "markdown"
}
meet_criteria(string, allow_class = "character")
meet_criteria(type, allow_class = "character", has_length = 1, is_in = c("markdown", "ansi", "html"))
2022-08-28 10:31:50 +02:00
2023-01-21 23:47:20 +01:00
add_MO_lookup_to_AMR_env()
2021-05-03 10:47:32 +02:00
if (type == "markdown") {
before <- "*"
after <- "*"
} else if (type == "html") {
before <- "<i>"
after <- "</i>"
2021-05-03 10:47:32 +02:00
} else if (type == "ansi") {
2023-03-20 21:59:50 +01:00
if (!has_colour() && !identical(Sys.getenv("IN_PKGDOWN"), "true")) {
2021-05-03 10:47:32 +02:00
return(string)
}
before <- "\033[3m"
after <- "\033[23m"
}
2022-08-28 10:31:50 +02:00
vapply(
FUN.VALUE = character(1),
string,
function(s) {
2022-10-05 09:12:22 +02:00
s_split <- unlist(strsplit(s, " ", fixed = TRUE))
2022-08-28 10:31:50 +02:00
search_strings <- gsub("[^a-zA-Z-]", "", s_split)
ind_species <- search_strings != "" &
2023-01-23 15:01:21 +01:00
search_strings %in% AMR_env$MO_lookup[
which(AMR_env$MO_lookup$rank %in% c(
2022-08-28 10:31:50 +02:00
"family",
"genus",
"species",
"subspecies",
"infraspecies",
"subsp."
)),
2023-01-23 15:01:21 +01:00
"species",
2022-08-28 10:31:50 +02:00
drop = TRUE
2023-01-23 15:01:21 +01:00
]
ind_fullname <- search_strings != "" &
search_strings %in% c(
AMR_env$MO_lookup[
which(AMR_env$MO_lookup$rank %in% c(
"family",
"genus",
"species",
"subspecies",
"infraspecies",
"subsp."
)),
"fullname",
drop = TRUE
2022-08-28 10:31:50 +02:00
],
2023-01-23 15:01:21 +01:00
AMR_env$MO_lookup[
which(AMR_env$MO_lookup$rank %in% c(
"family",
"genus",
"species",
"subspecies",
"infraspecies",
"subsp."
)),
2022-08-28 10:31:50 +02:00
"subspecies",
2023-01-23 15:01:21 +01:00
drop = TRUE
2022-08-28 10:31:50 +02:00
]
)
# also support E. coli, add "E." to indices
has_previous_genera_abbr <- s_split[which(ind_species) - 1] %like_case% "^[A-Z][.]?$"
ind_species <- c(which(ind_species), which(ind_species)[has_previous_genera_abbr] - 1)
ind <- c(ind_species, which(ind_fullname))
s_split[ind] <- paste0(before, s_split[ind], after)
s_paste <- paste(s_split, collapse = " ")
# clean up a bit
s_paste <- gsub(paste0(after, " ", before), " ", s_paste, fixed = TRUE)
s_paste
},
USE.NAMES = FALSE
)
2021-05-03 10:47:32 +02:00
}
#' @rdname italicise_taxonomy
#' @export
italicize_taxonomy <- function(string, type = c("markdown", "ansi", "html")) {
if (missing(type)) {
type <- "markdown"
}
italicise_taxonomy(string = string, type = type)
2021-05-03 10:47:32 +02:00
}