diff --git a/NAMESPACE b/NAMESPACE index 80bfba22..8a5b116f 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -95,6 +95,7 @@ export(mo_fullname) export(mo_genus) export(mo_gramstain) export(mo_property) +export(mo_shortname) export(mo_species) export(mo_subspecies) export(mo_type) diff --git a/NEWS.md b/NEWS.md index aca760a5..eecbc601 100755 --- a/NEWS.md +++ b/NEWS.md @@ -10,7 +10,7 @@ * Column names of datasets `microorganisms` and `septic_patients` * All old syntaxes will still work with this version, but will throw warnings * Functions `as.atc` and `is.atc` to transform/look up antibiotic ATC codes as defined by the WHO. The existing function `guess_atc` is now an alias of `as.atc`. -* Aliases for existing function `mo_property`: `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`, `mo_fullname`, `mo_aerobic`, `mo_type`, `mo_gramstain`. The last two functions have a `language` parameter, with support for Spanish, German and Dutch: +* Aliases for existing function `mo_property`: `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`, `mo_fullname`, `mo_shortname`, `mo_aerobic`, `mo_type` and `mo_gramstain`. The last two functions have a `language` parameter, with support for Spanish, German and Dutch: ```r mo_gramstain("E. coli") # [1] "Negative rods" diff --git a/R/mo.R b/R/mo.R index 80e0a345..6cfd0561 100644 --- a/R/mo.R +++ b/R/mo.R @@ -18,7 +18,7 @@ #' Transform to microorganism ID #' -#' Use this function to determine a valid ID based on a genus (and species). This input can be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. +#' Use this function to determine a valid ID based on a genus (and species). Determination is done using Artificial Intelligence (AI), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. #' @param x a character vector or a \code{data.frame} with one or two columns #' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. #' @@ -37,6 +37,7 @@ #' \itemize{ #' \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first} #' \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae}} +#' \item{Something like \code{"s pyo"} will return the ID of \emph{Streptococcus pyogenes} and not \emph{Actinomyes pyogenes}} #' \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}} #' \item{Something like \code{"stau"} or \code{"staaur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}} #' } @@ -62,6 +63,10 @@ #' as.mo("VISA") # Vancomycin Intermediate S. aureus #' as.mo("VRSA") # Vancomycin Resistant S. aureus #' +#' as.mo("Streptococcus group A") +#' as.mo("GAS") # Group A Streptococci +#' as.mo("GBS") # Group B Streptococci +#' #' # guess_mo is an alias of as.mo and works the same #' guess_mo("S. epidermidis") # will remain species: STAEPI #' guess_mo("S. epidermidis", Becker = TRUE) # will not remain species: STACNS @@ -172,6 +177,11 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { x[i] <- 'STAAUR' next } + if (tolower(x[i]) == '^s.*pyo$') { + # avoid detection of Actinomyces pyogenes in case of Streptococcus pyogenes + x[i] <- 'STCPYO' + next + } if (tolower(x[i]) == '^p.*aer$') { # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa x[i] <- 'PSEAER' @@ -192,7 +202,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { next } - # translate known trivial names to genus+species + # translate known trivial abbreviations to genus+species if (!is.na(x_trimmed[i])) { if (toupper(x_trimmed[i]) == 'MRSA' | toupper(x_trimmed[i]) == 'VISA' @@ -218,6 +228,10 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { x[i] <- 'STCPNE' next } + if (toupper(x_trimmed[i]) %like% '^G[ABCDFHK]S$') { + x[i] <- gsub("G([ABCDFHK])S", "STCGR\\1", x_trimmed[i]) + next + } } # try any match keeping spaces diff --git a/R/mo_property.R b/R/mo_property.R index 52bfeb14..93dfc33d 100644 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -38,6 +38,7 @@ #' mo_species("E. coli") # "coli" #' mo_subspecies("E. coli") # #' mo_fullname("E. coli") # "Escherichia coli" +#' mo_shortname("E. coli") # "E. coli" #' mo_type("E. coli") # "Bacteria" #' mo_gramstain("E. coli") # "Negative rods" #' mo_aerobic("E. coli") # TRUE @@ -54,6 +55,7 @@ #' # Abbreviations known in the field #' mo_genus("MRSA") # "Staphylococcus" #' mo_species("MRSA") # "aureus" +#' mo_shortname("MRSA") # "S. aureus" #' mo_gramstain("MRSA") # "Positive cocci" #' #' mo_genus("VISA") # "Staphylococcus" @@ -65,12 +67,14 @@ #' mo_species("EHEC") # "coli" #' mo_subspecies("EHEC") # "EHEC" #' mo_fullname("EHEC") # "Escherichia coli (EHEC)" +#' mo_shortname("EHEC") # "E. coli" #' #' mo_genus("doylei") # "Campylobacter" #' mo_species("doylei") # "jejuni" #' mo_fullname("doylei") # "Campylobacter jejuni (doylei)" #' #' mo_fullname("K. pneu rh") # "Klebsiella pneumoniae (rhinoscleromatis)" +#' mo_shortname("K. pneu rh") # "K. pneumoniae" #' #' #' # Anaerobic bacteria @@ -80,12 +84,16 @@ #' #' #' # Becker classification, see ?as.mo -#' mo_fullname("S. epidermidis") # "Staphylococcus epidermidis" -#' mo_fullname("S. epidermidis", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)" +#' mo_fullname("S. epidermidis") # "Staphylococcus epidermidis" +#' mo_fullname("S. epidermidis", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)" +#' mo_shortname("S. epidermidis") # "S. epidermidis" +#' mo_shortname("S. epidermidis", Becker = TRUE) # "CoNS" #' #' # Lancefield classification, see ?as.mo -#' mo_fullname("S. pyogenes") # "Streptococcus pyogenes" -#' mo_fullname("S. pyogenes", Lancefield = TRUE) # "Streptococcus group A" +#' mo_fullname("S. pyogenes") # "Streptococcus pyogenes" +#' mo_fullname("S. pyogenes", Lancefield = TRUE) # "Streptococcus group A" +#' mo_shortname("S. pyogenes") # "S. pyogenes" +#' mo_shortname("S. pyogenes", Lancefield = TRUE) # "GAS" mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = FALSE) { property <- tolower(property[1]) if (!property %in% colnames(microorganisms)) { @@ -129,6 +137,35 @@ mo_fullname <- function(x, Becker = FALSE, Lancefield = FALSE) { mo_property(x, "fullname", Becker = Becker, Lancefield = Lancefield) } +#' @rdname mo_property +#' @export +mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE) { + if (Becker %in% c(TRUE, "all") | Lancefield == TRUE) { + res1 <- as.mo(x) + res2 <- as.mo(x, Becker = Becker, Lancefield = Lancefield) + res2_fullname <- mo_fullname(res2) + res2_fullname[res2_fullname %like% "\\(CoNS\\)"] <- "CoNS" + res2_fullname[res2_fullname %like% "\\(CoPS\\)"] <- "CoPS" + res2_fullname <- gsub("Streptococcus group (.*)", + "G\\1S", + res2_fullname) # turn "Streptococcus group A" to "GAS" + res2_fullname[res2_fullname == mo_fullname(x)] <- paste0(substr(mo_genus(res2_fullname), 1, 1), + ". ", + mo_species(res2_fullname)) + if (sum(res1 == res2, na.rm = TRUE) > 0) { + res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1), + ". ", + mo_species(res1[res1 == res2])) + } + res1[res1 != res2] <- res2_fullname + as.character(res1) + } else { + # return G. species + paste0(substr(mo_genus(x), 1, 1), ". ", mo_species(x)) + } +} + + #' @rdname mo_property #' @export mo_type <- function(x, language = "en") { diff --git a/man/as.mo.Rd b/man/as.mo.Rd index 8a10f8e3..7545f250 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -33,7 +33,7 @@ guess_mo(x, Becker = FALSE, Lancefield = FALSE) Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}. } \description{ -Use this function to determine a valid ID based on a genus (and species). This input can be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. +Use this function to determine a valid ID based on a genus (and species). Determination is done using Artificial Intelligence (AI), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. } \details{ \code{guess_mo} is an alias of \code{as.mo}. @@ -44,6 +44,7 @@ Some exceptions have been built in to get more logical results, based on prevale \itemize{ \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first} \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae}} + \item{Something like \code{"s pyo"} will return the ID of \emph{Streptococcus pyogenes} and not \emph{Actinomyes pyogenes}} \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}} \item{Something like \code{"stau"} or \code{"staaur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}} } @@ -62,6 +63,10 @@ as.mo("MRSA") # Methicillin Resistant S. aureus as.mo("VISA") # Vancomycin Intermediate S. aureus as.mo("VRSA") # Vancomycin Resistant S. aureus +as.mo("Streptococcus group A") +as.mo("GAS") # Group A Streptococci +as.mo("GBS") # Group B Streptococci + # guess_mo is an alias of as.mo and works the same guess_mo("S. epidermidis") # will remain species: STAEPI guess_mo("S. epidermidis", Becker = TRUE) # will not remain species: STACNS diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 76574879..90c04468 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -7,6 +7,7 @@ \alias{mo_species} \alias{mo_subspecies} \alias{mo_fullname} +\alias{mo_shortname} \alias{mo_type} \alias{mo_gramstain} \alias{mo_aerobic} @@ -30,6 +31,8 @@ mo_subspecies(x, Becker = FALSE, Lancefield = FALSE) mo_fullname(x, Becker = FALSE, Lancefield = FALSE) +mo_shortname(x, Becker = FALSE, Lancefield = FALSE) + mo_type(x, language = "en") mo_gramstain(x, language = "en") @@ -61,6 +64,7 @@ mo_genus("E. coli") # "Escherichia" mo_species("E. coli") # "coli" mo_subspecies("E. coli") # mo_fullname("E. coli") # "Escherichia coli" +mo_shortname("E. coli") # "E. coli" mo_type("E. coli") # "Bacteria" mo_gramstain("E. coli") # "Negative rods" mo_aerobic("E. coli") # TRUE @@ -77,6 +81,7 @@ mo_gramstain("E. coli", "nl") # "Negatieve staven" # Abbreviations known in the field mo_genus("MRSA") # "Staphylococcus" mo_species("MRSA") # "aureus" +mo_shortname("MRSA") # "S. aureus" mo_gramstain("MRSA") # "Positive cocci" mo_genus("VISA") # "Staphylococcus" @@ -88,12 +93,14 @@ mo_genus("EHEC") # "Escherichia" mo_species("EHEC") # "coli" mo_subspecies("EHEC") # "EHEC" mo_fullname("EHEC") # "Escherichia coli (EHEC)" +mo_shortname("EHEC") # "E. coli" mo_genus("doylei") # "Campylobacter" mo_species("doylei") # "jejuni" mo_fullname("doylei") # "Campylobacter jejuni (doylei)" mo_fullname("K. pneu rh") # "Klebsiella pneumoniae (rhinoscleromatis)" +mo_shortname("K. pneu rh") # "K. pneumoniae" # Anaerobic bacteria @@ -103,12 +110,16 @@ mo_aerobic("B. fragilis") # FALSE # Becker classification, see ?as.mo -mo_fullname("S. epidermidis") # "Staphylococcus epidermidis" -mo_fullname("S. epidermidis", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)" +mo_fullname("S. epidermidis") # "Staphylococcus epidermidis" +mo_fullname("S. epidermidis", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)" +mo_shortname("S. epidermidis") # "S. epidermidis" +mo_shortname("S. epidermidis", Becker = TRUE) # "CoNS" # Lancefield classification, see ?as.mo -mo_fullname("S. pyogenes") # "Streptococcus pyogenes" -mo_fullname("S. pyogenes", Lancefield = TRUE) # "Streptococcus group A" +mo_fullname("S. pyogenes") # "Streptococcus pyogenes" +mo_fullname("S. pyogenes", Lancefield = TRUE) # "Streptococcus group A" +mo_shortname("S. pyogenes") # "S. pyogenes" +mo_shortname("S. pyogenes", Lancefield = TRUE) # "GAS" } \seealso{ \code{\link{microorganisms}} diff --git a/tests/testthat/test-mo.R b/tests/testthat/test-mo.R index afbe1237..50cc4157 100644 --- a/tests/testthat/test-mo.R +++ b/tests/testthat/test-mo.R @@ -14,6 +14,8 @@ test_that("as.mo works", { expect_equal(as.character(as.mo("K. pneu rhino")), "KLEPNERH") # K. pneumoniae subspp. rhinoscleromatis expect_equal(as.character(as.mo("Bartonella")), "BAR") + expect_equal(as.character(as.mo("S. pyo")), "STCPYO") # not Actinomyces pyogenes + expect_equal(as.character(as.mo("P. aer")), "PSEAER") # not Pasteurella aerogenes expect_equal(as.character(as.mo("Negative rods")), "GNR") diff --git a/tests/testthat/test-mo_property.R b/tests/testthat/test-mo_property.R index 782f6aef..d17a9ac6 100644 --- a/tests/testthat/test-mo_property.R +++ b/tests/testthat/test-mo_property.R @@ -10,6 +10,12 @@ test_that("mo_property works", { expect_equal(mo_gramstain("E. coli"), "Negative rods") expect_equal(mo_aerobic("E. coli"), TRUE) + expect_equal(mo_shortname("MRSA"), "S. aureus") + expect_equal(mo_shortname("MRSA", Becker = TRUE), "S. aureus") + expect_equal(mo_shortname("MRSA", Becker = "all"), "CoPS") + expect_equal(mo_shortname("S. aga"), "S. agalactiae") + expect_equal(mo_shortname("S. aga", Lancefield = TRUE), "GBS") + expect_equal(mo_type("E. coli", language = "de"), "Bakterien") expect_equal(mo_gramstain("E. coli", language = "de"), "Negative Staebchen")