added mo_shortname

2026-02-27 22:59:58 +01:00 · 2018-09-05 10:51:46 +02:00
parent 790bd1622d
commit e39a9a8b05
8 changed files with 88 additions and 12 deletions
--- a/R/mo.R
+++ b/R/mo.R
@@ -18,7 +18,7 @@

 #' Transform to microorganism ID
 #'
-#' Use this function to determine a valid ID based on a genus (and species). This input can be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
+#' Use this function to determine a valid ID based on a genus (and species). Determination is done using Artificial Intelligence (AI), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
 #' @param x a character vector or a \code{data.frame} with one or two columns
 #' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1].
 #'
@@ -37,6 +37,7 @@
 #' \itemize{
 #'   \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
 #'   \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae}}
+#'   \item{Something like \code{"s pyo"} will return the ID of \emph{Streptococcus pyogenes} and not \emph{Actinomyes pyogenes}}
 #'   \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}}
 #'   \item{Something like \code{"stau"} or \code{"staaur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}}
 #' }
@@ -62,6 +63,10 @@
 #' as.mo("VISA") # Vancomycin Intermediate S. aureus
 #' as.mo("VRSA") # Vancomycin Resistant S. aureus
 #'
+#' as.mo("Streptococcus group A")
+#' as.mo("GAS") # Group A Streptococci
+#' as.mo("GBS") # Group B Streptococci
+#'
 #' # guess_mo is an alias of as.mo and works the same
 #' guess_mo("S. epidermidis")                 # will remain species: STAEPI
 #' guess_mo("S. epidermidis", Becker = TRUE)  # will not remain species: STACNS
@@ -172,6 +177,11 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
      x[i] <- 'STAAUR'
      next
    }
+    if (tolower(x[i]) == '^s.*pyo$') {
+      # avoid detection of Actinomyces pyogenes in case of Streptococcus pyogenes
+      x[i] <- 'STCPYO'
+      next
+    }
    if (tolower(x[i]) == '^p.*aer$') {
      # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
      x[i] <- 'PSEAER'
@@ -192,7 +202,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
      next
    }

-    # translate known trivial names to genus+species
+    # translate known trivial abbreviations to genus+species
    if (!is.na(x_trimmed[i])) {
      if (toupper(x_trimmed[i]) == 'MRSA'
          | toupper(x_trimmed[i]) == 'VISA'
@@ -218,6 +228,10 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
        x[i] <- 'STCPNE'
        next
      }
+      if (toupper(x_trimmed[i]) %like% '^G[ABCDFHK]S$') {
+        x[i] <- gsub("G([ABCDFHK])S", "STCGR\\1", x_trimmed[i])
+        next
+      }
    }

    # try any match keeping spaces
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -38,6 +38,7 @@
 #' mo_species("E. coli")         # "coli"
 #' mo_subspecies("E. coli")      # <NA>
 #' mo_fullname("E. coli")        # "Escherichia coli"
+#' mo_shortname("E. coli")       # "E. coli"
 #' mo_type("E. coli")            # "Bacteria"
 #' mo_gramstain("E. coli")       # "Negative rods"
 #' mo_aerobic("E. coli")         # TRUE
@@ -54,6 +55,7 @@
 #' # Abbreviations known in the field
 #' mo_genus("MRSA")              # "Staphylococcus"
 #' mo_species("MRSA")            # "aureus"
+#' mo_shortname("MRSA")          # "S. aureus"
 #' mo_gramstain("MRSA")          # "Positive cocci"
 #'
 #' mo_genus("VISA")              # "Staphylococcus"
@@ -65,12 +67,14 @@
 #' mo_species("EHEC")            # "coli"
 #' mo_subspecies("EHEC")         # "EHEC"
 #' mo_fullname("EHEC")           # "Escherichia coli (EHEC)"
+#' mo_shortname("EHEC")          # "E. coli"
 #'
 #' mo_genus("doylei")            # "Campylobacter"
 #' mo_species("doylei")          # "jejuni"
 #' mo_fullname("doylei")         # "Campylobacter jejuni (doylei)"
 #'
 #' mo_fullname("K. pneu rh")     # "Klebsiella pneumoniae (rhinoscleromatis)"
+#' mo_shortname("K. pneu rh")    # "K. pneumoniae"
 #'
 #'
 #' # Anaerobic bacteria
@@ -80,12 +84,16 @@
 #'
 #'
 #' # Becker classification, see ?as.mo
-#' mo_fullname("S. epidermidis")                 # "Staphylococcus epidermidis"
-#' mo_fullname("S. epidermidis", Becker = TRUE)  # "Coagulase Negative Staphylococcus (CoNS)"
+#' mo_fullname("S. epidermidis")                  # "Staphylococcus epidermidis"
+#' mo_fullname("S. epidermidis", Becker = TRUE)   # "Coagulase Negative Staphylococcus (CoNS)"
+#' mo_shortname("S. epidermidis")                 # "S. epidermidis"
+#' mo_shortname("S. epidermidis", Becker = TRUE)  # "CoNS"
 #'
 #' # Lancefield classification, see ?as.mo
-#' mo_fullname("S. pyogenes")                    # "Streptococcus pyogenes"
-#' mo_fullname("S. pyogenes", Lancefield = TRUE) # "Streptococcus group A"
+#' mo_fullname("S. pyogenes")                     # "Streptococcus pyogenes"
+#' mo_fullname("S. pyogenes", Lancefield = TRUE)  # "Streptococcus group A"
+#' mo_shortname("S. pyogenes")                    # "S. pyogenes"
+#' mo_shortname("S. pyogenes", Lancefield = TRUE) # "GAS"
 mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = FALSE) {
  property <- tolower(property[1])
  if (!property %in% colnames(microorganisms)) {
@@ -129,6 +137,35 @@ mo_fullname <- function(x, Becker = FALSE, Lancefield = FALSE) {
  mo_property(x, "fullname", Becker = Becker, Lancefield = Lancefield)
 }

+#' @rdname mo_property
+#' @export
+mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE) {
+  if (Becker %in% c(TRUE, "all") | Lancefield == TRUE) {
+    res1 <- as.mo(x)
+    res2 <- as.mo(x, Becker = Becker, Lancefield = Lancefield)
+    res2_fullname <- mo_fullname(res2)
+    res2_fullname[res2_fullname %like% "\\(CoNS\\)"] <- "CoNS"
+    res2_fullname[res2_fullname %like% "\\(CoPS\\)"] <- "CoPS"
+    res2_fullname <- gsub("Streptococcus group (.*)",
+                          "G\\1S",
+                          res2_fullname) # turn "Streptococcus group A" to "GAS"
+    res2_fullname[res2_fullname == mo_fullname(x)] <- paste0(substr(mo_genus(res2_fullname), 1, 1),
+                                                             ". ",
+                                                             mo_species(res2_fullname))
+    if (sum(res1 == res2, na.rm = TRUE) > 0) {
+      res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1),
+                                   ". ",
+                                   mo_species(res1[res1 == res2]))
+    }
+    res1[res1 != res2] <- res2_fullname
+    as.character(res1)
+  } else {
+    # return G. species
+    paste0(substr(mo_genus(x), 1, 1), ". ", mo_species(x))
+  }
+}
+
+
 #' @rdname mo_property
 #' @export
 mo_type <- function(x, language = "en") {