added mo_shortname

2025-07-09 15:01:51 +02:00 · 2018-09-05 10:51:46 +02:00
parent 790bd1622d
commit e39a9a8b05
8 changed files with 88 additions and 12 deletions
--- a/R/mo.R
+++ b/R/mo.R
@ -18,7 +18,7 @@

 #' Transform to microorganism ID
 #'
-#' Use this function to determine a valid ID based on a genus (and species). This input can be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
+#' Use this function to determine a valid ID based on a genus (and species). Determination is done using Artificial Intelligence (AI), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
 #' @param x a character vector or a \code{data.frame} with one or two columns
 #' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1].
 #'
@ -37,6 +37,7 @@
 #' \itemize{
 #'   \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
 #'   \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae}}
+#'   \item{Something like \code{"s pyo"} will return the ID of \emph{Streptococcus pyogenes} and not \emph{Actinomyes pyogenes}}
 #'   \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}}
 #'   \item{Something like \code{"stau"} or \code{"staaur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}}
 #' }
@ -62,6 +63,10 @@
 #' as.mo("VISA") # Vancomycin Intermediate S. aureus
 #' as.mo("VRSA") # Vancomycin Resistant S. aureus
 #'
+#' as.mo("Streptococcus group A")
+#' as.mo("GAS") # Group A Streptococci
+#' as.mo("GBS") # Group B Streptococci
+#'
 #' # guess_mo is an alias of as.mo and works the same
 #' guess_mo("S. epidermidis")                 # will remain species: STAEPI
 #' guess_mo("S. epidermidis", Becker = TRUE)  # will not remain species: STACNS
@ -172,6 +177,11 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
      x[i] <- 'STAAUR'
      next
    }
+    if (tolower(x[i]) == '^s.*pyo$') {
+      # avoid detection of Actinomyces pyogenes in case of Streptococcus pyogenes
+      x[i] <- 'STCPYO'
+      next
+    }
    if (tolower(x[i]) == '^p.*aer$') {
      # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
      x[i] <- 'PSEAER'
@ -192,7 +202,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
      next
    }

-    # translate known trivial names to genus+species
+    # translate known trivial abbreviations to genus+species
    if (!is.na(x_trimmed[i])) {
      if (toupper(x_trimmed[i]) == 'MRSA'
          | toupper(x_trimmed[i]) == 'VISA'
@ -218,6 +228,10 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
        x[i] <- 'STCPNE'
        next
      }
+      if (toupper(x_trimmed[i]) %like% '^G[ABCDFHK]S$') {
+        x[i] <- gsub("G([ABCDFHK])S", "STCGR\\1", x_trimmed[i])
+        next
+      }
    }

    # try any match keeping spaces