added mo_shortname

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-09-05 10:51:46 +02:00
parent 790bd1622d
commit e39a9a8b05
8 changed files with 88 additions and 12 deletions

View File

@ -95,6 +95,7 @@ export(mo_fullname)
export(mo_genus)
export(mo_gramstain)
export(mo_property)
export(mo_shortname)
export(mo_species)
export(mo_subspecies)
export(mo_type)

View File

@ -10,7 +10,7 @@
* Column names of datasets `microorganisms` and `septic_patients`
* All old syntaxes will still work with this version, but will throw warnings
* Functions `as.atc` and `is.atc` to transform/look up antibiotic ATC codes as defined by the WHO. The existing function `guess_atc` is now an alias of `as.atc`.
* Aliases for existing function `mo_property`: `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`, `mo_fullname`, `mo_aerobic`, `mo_type`, `mo_gramstain`. The last two functions have a `language` parameter, with support for Spanish, German and Dutch:
* Aliases for existing function `mo_property`: `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`, `mo_fullname`, `mo_shortname`, `mo_aerobic`, `mo_type` and `mo_gramstain`. The last two functions have a `language` parameter, with support for Spanish, German and Dutch:
```r
mo_gramstain("E. coli")
# [1] "Negative rods"

18
R/mo.R
View File

@ -18,7 +18,7 @@
#' Transform to microorganism ID
#'
#' Use this function to determine a valid ID based on a genus (and species). This input can be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
#' Use this function to determine a valid ID based on a genus (and species). Determination is done using Artificial Intelligence (AI), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
#' @param x a character vector or a \code{data.frame} with one or two columns
#' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1].
#'
@ -37,6 +37,7 @@
#' \itemize{
#' \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
#' \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae}}
#' \item{Something like \code{"s pyo"} will return the ID of \emph{Streptococcus pyogenes} and not \emph{Actinomyes pyogenes}}
#' \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}}
#' \item{Something like \code{"stau"} or \code{"staaur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}}
#' }
@ -62,6 +63,10 @@
#' as.mo("VISA") # Vancomycin Intermediate S. aureus
#' as.mo("VRSA") # Vancomycin Resistant S. aureus
#'
#' as.mo("Streptococcus group A")
#' as.mo("GAS") # Group A Streptococci
#' as.mo("GBS") # Group B Streptococci
#'
#' # guess_mo is an alias of as.mo and works the same
#' guess_mo("S. epidermidis") # will remain species: STAEPI
#' guess_mo("S. epidermidis", Becker = TRUE) # will not remain species: STACNS
@ -172,6 +177,11 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
x[i] <- 'STAAUR'
next
}
if (tolower(x[i]) == '^s.*pyo$') {
# avoid detection of Actinomyces pyogenes in case of Streptococcus pyogenes
x[i] <- 'STCPYO'
next
}
if (tolower(x[i]) == '^p.*aer$') {
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
x[i] <- 'PSEAER'
@ -192,7 +202,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
next
}
# translate known trivial names to genus+species
# translate known trivial abbreviations to genus+species
if (!is.na(x_trimmed[i])) {
if (toupper(x_trimmed[i]) == 'MRSA'
| toupper(x_trimmed[i]) == 'VISA'
@ -218,6 +228,10 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
x[i] <- 'STCPNE'
next
}
if (toupper(x_trimmed[i]) %like% '^G[ABCDFHK]S$') {
x[i] <- gsub("G([ABCDFHK])S", "STCGR\\1", x_trimmed[i])
next
}
}
# try any match keeping spaces

View File

@ -38,6 +38,7 @@
#' mo_species("E. coli") # "coli"
#' mo_subspecies("E. coli") # <NA>
#' mo_fullname("E. coli") # "Escherichia coli"
#' mo_shortname("E. coli") # "E. coli"
#' mo_type("E. coli") # "Bacteria"
#' mo_gramstain("E. coli") # "Negative rods"
#' mo_aerobic("E. coli") # TRUE
@ -54,6 +55,7 @@
#' # Abbreviations known in the field
#' mo_genus("MRSA") # "Staphylococcus"
#' mo_species("MRSA") # "aureus"
#' mo_shortname("MRSA") # "S. aureus"
#' mo_gramstain("MRSA") # "Positive cocci"
#'
#' mo_genus("VISA") # "Staphylococcus"
@ -65,12 +67,14 @@
#' mo_species("EHEC") # "coli"
#' mo_subspecies("EHEC") # "EHEC"
#' mo_fullname("EHEC") # "Escherichia coli (EHEC)"
#' mo_shortname("EHEC") # "E. coli"
#'
#' mo_genus("doylei") # "Campylobacter"
#' mo_species("doylei") # "jejuni"
#' mo_fullname("doylei") # "Campylobacter jejuni (doylei)"
#'
#' mo_fullname("K. pneu rh") # "Klebsiella pneumoniae (rhinoscleromatis)"
#' mo_shortname("K. pneu rh") # "K. pneumoniae"
#'
#'
#' # Anaerobic bacteria
@ -80,12 +84,16 @@
#'
#'
#' # Becker classification, see ?as.mo
#' mo_fullname("S. epidermidis") # "Staphylococcus epidermidis"
#' mo_fullname("S. epidermidis", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)"
#' mo_fullname("S. epidermidis") # "Staphylococcus epidermidis"
#' mo_fullname("S. epidermidis", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)"
#' mo_shortname("S. epidermidis") # "S. epidermidis"
#' mo_shortname("S. epidermidis", Becker = TRUE) # "CoNS"
#'
#' # Lancefield classification, see ?as.mo
#' mo_fullname("S. pyogenes") # "Streptococcus pyogenes"
#' mo_fullname("S. pyogenes", Lancefield = TRUE) # "Streptococcus group A"
#' mo_fullname("S. pyogenes") # "Streptococcus pyogenes"
#' mo_fullname("S. pyogenes", Lancefield = TRUE) # "Streptococcus group A"
#' mo_shortname("S. pyogenes") # "S. pyogenes"
#' mo_shortname("S. pyogenes", Lancefield = TRUE) # "GAS"
mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = FALSE) {
property <- tolower(property[1])
if (!property %in% colnames(microorganisms)) {
@ -129,6 +137,35 @@ mo_fullname <- function(x, Becker = FALSE, Lancefield = FALSE) {
mo_property(x, "fullname", Becker = Becker, Lancefield = Lancefield)
}
#' @rdname mo_property
#' @export
mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE) {
if (Becker %in% c(TRUE, "all") | Lancefield == TRUE) {
res1 <- as.mo(x)
res2 <- as.mo(x, Becker = Becker, Lancefield = Lancefield)
res2_fullname <- mo_fullname(res2)
res2_fullname[res2_fullname %like% "\\(CoNS\\)"] <- "CoNS"
res2_fullname[res2_fullname %like% "\\(CoPS\\)"] <- "CoPS"
res2_fullname <- gsub("Streptococcus group (.*)",
"G\\1S",
res2_fullname) # turn "Streptococcus group A" to "GAS"
res2_fullname[res2_fullname == mo_fullname(x)] <- paste0(substr(mo_genus(res2_fullname), 1, 1),
". ",
mo_species(res2_fullname))
if (sum(res1 == res2, na.rm = TRUE) > 0) {
res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1),
". ",
mo_species(res1[res1 == res2]))
}
res1[res1 != res2] <- res2_fullname
as.character(res1)
} else {
# return G. species
paste0(substr(mo_genus(x), 1, 1), ". ", mo_species(x))
}
}
#' @rdname mo_property
#' @export
mo_type <- function(x, language = "en") {

View File

@ -33,7 +33,7 @@ guess_mo(x, Becker = FALSE, Lancefield = FALSE)
Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}.
}
\description{
Use this function to determine a valid ID based on a genus (and species). This input can be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
Use this function to determine a valid ID based on a genus (and species). Determination is done using Artificial Intelligence (AI), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
}
\details{
\code{guess_mo} is an alias of \code{as.mo}.
@ -44,6 +44,7 @@ Some exceptions have been built in to get more logical results, based on prevale
\itemize{
\item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
\item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae}}
\item{Something like \code{"s pyo"} will return the ID of \emph{Streptococcus pyogenes} and not \emph{Actinomyes pyogenes}}
\item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}}
\item{Something like \code{"stau"} or \code{"staaur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}}
}
@ -62,6 +63,10 @@ as.mo("MRSA") # Methicillin Resistant S. aureus
as.mo("VISA") # Vancomycin Intermediate S. aureus
as.mo("VRSA") # Vancomycin Resistant S. aureus
as.mo("Streptococcus group A")
as.mo("GAS") # Group A Streptococci
as.mo("GBS") # Group B Streptococci
# guess_mo is an alias of as.mo and works the same
guess_mo("S. epidermidis") # will remain species: STAEPI
guess_mo("S. epidermidis", Becker = TRUE) # will not remain species: STACNS

View File

@ -7,6 +7,7 @@
\alias{mo_species}
\alias{mo_subspecies}
\alias{mo_fullname}
\alias{mo_shortname}
\alias{mo_type}
\alias{mo_gramstain}
\alias{mo_aerobic}
@ -30,6 +31,8 @@ mo_subspecies(x, Becker = FALSE, Lancefield = FALSE)
mo_fullname(x, Becker = FALSE, Lancefield = FALSE)
mo_shortname(x, Becker = FALSE, Lancefield = FALSE)
mo_type(x, language = "en")
mo_gramstain(x, language = "en")
@ -61,6 +64,7 @@ mo_genus("E. coli") # "Escherichia"
mo_species("E. coli") # "coli"
mo_subspecies("E. coli") # <NA>
mo_fullname("E. coli") # "Escherichia coli"
mo_shortname("E. coli") # "E. coli"
mo_type("E. coli") # "Bacteria"
mo_gramstain("E. coli") # "Negative rods"
mo_aerobic("E. coli") # TRUE
@ -77,6 +81,7 @@ mo_gramstain("E. coli", "nl") # "Negatieve staven"
# Abbreviations known in the field
mo_genus("MRSA") # "Staphylococcus"
mo_species("MRSA") # "aureus"
mo_shortname("MRSA") # "S. aureus"
mo_gramstain("MRSA") # "Positive cocci"
mo_genus("VISA") # "Staphylococcus"
@ -88,12 +93,14 @@ mo_genus("EHEC") # "Escherichia"
mo_species("EHEC") # "coli"
mo_subspecies("EHEC") # "EHEC"
mo_fullname("EHEC") # "Escherichia coli (EHEC)"
mo_shortname("EHEC") # "E. coli"
mo_genus("doylei") # "Campylobacter"
mo_species("doylei") # "jejuni"
mo_fullname("doylei") # "Campylobacter jejuni (doylei)"
mo_fullname("K. pneu rh") # "Klebsiella pneumoniae (rhinoscleromatis)"
mo_shortname("K. pneu rh") # "K. pneumoniae"
# Anaerobic bacteria
@ -103,12 +110,16 @@ mo_aerobic("B. fragilis") # FALSE
# Becker classification, see ?as.mo
mo_fullname("S. epidermidis") # "Staphylococcus epidermidis"
mo_fullname("S. epidermidis", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)"
mo_fullname("S. epidermidis") # "Staphylococcus epidermidis"
mo_fullname("S. epidermidis", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)"
mo_shortname("S. epidermidis") # "S. epidermidis"
mo_shortname("S. epidermidis", Becker = TRUE) # "CoNS"
# Lancefield classification, see ?as.mo
mo_fullname("S. pyogenes") # "Streptococcus pyogenes"
mo_fullname("S. pyogenes", Lancefield = TRUE) # "Streptococcus group A"
mo_fullname("S. pyogenes") # "Streptococcus pyogenes"
mo_fullname("S. pyogenes", Lancefield = TRUE) # "Streptococcus group A"
mo_shortname("S. pyogenes") # "S. pyogenes"
mo_shortname("S. pyogenes", Lancefield = TRUE) # "GAS"
}
\seealso{
\code{\link{microorganisms}}

View File

@ -14,6 +14,8 @@ test_that("as.mo works", {
expect_equal(as.character(as.mo("K. pneu rhino")), "KLEPNERH") # K. pneumoniae subspp. rhinoscleromatis
expect_equal(as.character(as.mo("Bartonella")), "BAR")
expect_equal(as.character(as.mo("S. pyo")), "STCPYO") # not Actinomyces pyogenes
expect_equal(as.character(as.mo("P. aer")), "PSEAER") # not Pasteurella aerogenes
expect_equal(as.character(as.mo("Negative rods")), "GNR")

View File

@ -10,6 +10,12 @@ test_that("mo_property works", {
expect_equal(mo_gramstain("E. coli"), "Negative rods")
expect_equal(mo_aerobic("E. coli"), TRUE)
expect_equal(mo_shortname("MRSA"), "S. aureus")
expect_equal(mo_shortname("MRSA", Becker = TRUE), "S. aureus")
expect_equal(mo_shortname("MRSA", Becker = "all"), "CoPS")
expect_equal(mo_shortname("S. aga"), "S. agalactiae")
expect_equal(mo_shortname("S. aga", Lancefield = TRUE), "GBS")
expect_equal(mo_type("E. coli", language = "de"), "Bakterien")
expect_equal(mo_gramstain("E. coli", language = "de"), "Negative Staebchen")