as.mo improvement

2025-07-09 22:22:03 +02:00 · 2019-02-26 12:33:26 +01:00
parent f25a4b3679
commit f9ded23023
67 changed files with 391 additions and 342 deletions
--- a/R/catalogue_of_life.R
+++ b/R/catalogue_of_life.R
@ -71,3 +71,24 @@
 #' mo_fullname("C. elegans")
 #' # [1] "Chroococcus limneticus elegans"  # Because a microorganism was found
 NULL
+
+#' Version info of included Catalogue of Life
+#'
+#' This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year.
+#' @seealso \code{\link{microorganisms}}
+#' @inheritSection catalogue_of_life Catalogue of Life
+#' @inheritSection AMR Read more on our website!
+#' @export
+#' @examples
+#' library(dplyr)
+#' microorganisms %>% freq(kingdom)
+#' microorganisms %>% group_by(kingdom) %>% freq(phylum, nmax = NULL)
+catalogue_of_life_version <- function() {
+  # see the `catalogue_of_life` list in R/data.R
+  list(version = catalogue_of_life$version,
+       url = catalogue_of_life$url,
+       # annual release always somewhere in March
+       is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")),
+       n_species = nrow(AMR::microorganisms),
+       n_synonyms = nrow(AMR::microorganisms.old))
+}
--- a/R/data.R
+++ b/R/data.R
@ -168,20 +168,6 @@ catalogue_of_life <- list(
  url = "http://www.catalogueoflife.org/annual-checklist/2018"
 )

-#' Version info of included Catalogue of Life
-#' @seealso \code{\link{microorganisms}}
-#' @inheritSection catalogue_of_life Catalogue of Life
-#' @inheritSection AMR Read more on our website!
-#' @export
-catalogue_of_life_version <- function() {
-  list(version = catalogue_of_life$version,
-       url = catalogue_of_life$url,
-       # annual release always somewhere in March
-       is_latest_annual_release = Sys.Date() < as.Date(paste0(catalogue_of_life$year + 1, "-04-01")),
-       no_of_species = nrow(AMR::microorganisms),
-       no_of_synonyms = nrow(AMR::microorganisms.old))
-}
-
 #' Data set with previously accepted taxonomic names
 #'
 #' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by \code{\link{as.mo}}.
--- a/R/mo.R
+++ b/R/mo.R
@ -166,7 +166,12 @@
 #'   mutate(mo = as.mo(paste(genus, species)))
 #' }
 as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, reference_df = get_mo_source()) {
-  if (all(x %in% AMR::microorganisms$fullname)
+  if (all(x %in% AMR::microorganisms$mo)
+      & isFALSE(Becker)
+      & isFALSE(Lancefield)
+      & is.null(reference_df)) {
+    y <- x
+  } else if (all(x %in% AMR::microorganisms$fullname)
      & isFALSE(Becker)
      & isFALSE(Lancefield)
      & is.null(reference_df)) {
@ -179,12 +184,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
    if (any(is.na(y))) {
      y[is.na(y)] <- microorganismsDT[prevalence == 3][data.table(fullname = x[is.na(y)]), on = "fullname", "mo"][[1]]
    }
-    return(y)
+  } else {
+    # will be checked for mo class in validation and uses exec_as.mo internally if necessary
+    y <- mo_validate(x = x, property = "mo",
+                     Becker = Becker, Lancefield = Lancefield,
+                     allow_uncertain = allow_uncertain, reference_df = reference_df)
  }
-  # will be checked for mo class in validation
-  mo_validate(x = x, property = "mo",
-              Becker = Becker, Lancefield = Lancefield,
-              allow_uncertain = allow_uncertain, reference_df = reference_df)
+  structure(.Data = y, class = "mo")
 }

 #' @rdname as.mo
@ -891,6 +897,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
                         stringsAsFactors = FALSE)
  df_input <- data.frame(input = as.character(x_input),
                         stringsAsFactors = FALSE)
+
  x <- df_input %>%
    left_join(df_found,
              by = "input") %>%
--- a/R/mo_property.R
+++ b/R/mo_property.R
@ -132,7 +132,7 @@
 #' mo_taxonomy("E. coli")
 mo_fullname <- function(x, language = get_locale(), ...) {
  x <- mo_validate(x = x, property = "fullname", ...)
-  mo_translate(x, language = language)
+  translate(x, language = language)
 }

 #' @rdname mo_property
@ -148,46 +148,64 @@ mo_shortname <- function(x, language = get_locale(), ...) {
  if (is.null(Lancefield)) {
    Lancefield <- FALSE
  }
-  if (Becker %in% c(TRUE, "all") | Lancefield == TRUE) {
-    res1 <- AMR::as.mo(x, Becker = FALSE, Lancefield = FALSE, reference_df = dots$reference_df)
-    res2 <- suppressWarnings(AMR::as.mo(res1, ...))
-    res2_fullname <- mo_fullname(res2, language = language)
-    res2_fullname[res2_fullname %like% " \\(CoNS\\)"] <- "CoNS"
-    res2_fullname[res2_fullname %like% " \\(CoPS\\)"] <- "CoPS"
-    res2_fullname[res2_fullname %like% " \\(KNS\\)"] <- "KNS"
-    res2_fullname[res2_fullname %like% " \\(KPS\\)"] <- "KPS"
-    res2_fullname[res2_fullname %like% " \\(CNS\\)"] <- "CNS"
-    res2_fullname[res2_fullname %like% " \\(CPS\\)"] <- "CPS"
-    res2_fullname <- gsub("Streptococcus (group|Gruppe|gruppe|groep|grupo|gruppo|groupe) (.)",
-                          "G\\2S",
-                          res2_fullname) # turn "Streptococcus group A" and "Streptococcus grupo A" to "GAS"
-    res2_fullname_vector <- res2_fullname[res2_fullname == mo_fullname(res1)]
-    res2_fullname[res2_fullname == mo_fullname(res1)] <- paste0(substr(mo_genus(res2_fullname_vector), 1, 1),
-                                                                ". ",
-                                                                suppressWarnings(mo_species(res2_fullname_vector)))
-    if (sum(res1 == res2, na.rm = TRUE) > 0) {
-      res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1),
-                                   ". ",
-                                   suppressWarnings(mo_species(res1[res1 == res2])))
-    }
-    res1[res1 != res2] <- res2_fullname
-    result <- as.character(res1)
-  } else {
-    x <- AMR::as.mo(x, ...)
-    suppressWarnings(
-      result <- data.frame(mo = x) %>%
-        left_join(AMR::microorganisms, by = "mo") %>%
-        mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>%
-        pull(shortname)
-    )
+
+  shorten <- function(x) {
+    # easiest: no transformations needed
+    x <- mo_fullname(x, language = "en")
+    # shorten for the ones that have a space: shorten first word and write out second word
+    shorten_these <- x %like% " " & !x %like% "Streptococcus group "
+    x[shorten_these] <- paste0(substr(x[shorten_these], 1, 1),
+                              ". ",
+                              x[shorten_these] %>%
+                                strsplit(" ", fixed = TRUE) %>%
+                                unlist() %>%
+                                .[2])
+    x
  }
-  mo_translate(result, language = language)
+
+  if (isFALSE(Becker) & isFALSE(Lancefield)) {
+    result <- shorten(x)
+
+  } else {
+    # get result without transformations
+    res1 <- AMR::as.mo(x, Becker = FALSE, Lancefield = FALSE, reference_df = dots$reference_df)
+    # and result with transformations
+    res2 <- suppressWarnings(AMR::as.mo(res1, ...))
+    if (res1 == res2
+        & !res1 %like% "^B_STRPT_GR") {
+      result <- shorten(x)
+    } else {
+      res2_fullname <- mo_fullname(res2, language = language)
+      res2_fullname[res2_fullname %like% " \\(CoNS\\)"] <- "CoNS"
+      res2_fullname[res2_fullname %like% " \\(CoPS\\)"] <- "CoPS"
+      res2_fullname[res2_fullname %like% " \\(KNS\\)"] <- "KNS"
+      res2_fullname[res2_fullname %like% " \\(KPS\\)"] <- "KPS"
+      res2_fullname[res2_fullname %like% " \\(CNS\\)"] <- "CNS"
+      res2_fullname[res2_fullname %like% " \\(CPS\\)"] <- "CPS"
+      res2_fullname <- gsub("Streptococcus (group|Gruppe|gruppe|groep|grupo|gruppo|groupe) (.)",
+                            "G\\2S",
+                            res2_fullname) # turn "Streptococcus group A" and "Streptococcus grupo A" to "GAS"
+      res2_fullname_vector <- res2_fullname[res2_fullname == mo_fullname(res1)]
+      res2_fullname[res2_fullname == mo_fullname(res1)] <- paste0(substr(mo_genus(res2_fullname_vector), 1, 1),
+                                                                  ". ",
+                                                                  suppressWarnings(mo_species(res2_fullname_vector)))
+      if (sum(res1 == res2, na.rm = TRUE) > 0) {
+        res1[res1 == res2] <- paste0(substr(mo_genus(res1[res1 == res2]), 1, 1),
+                                     ". ",
+                                     suppressWarnings(mo_species(res1[res1 == res2])))
+      }
+      res1[res1 != res2] <- res2_fullname
+      result <- as.character(res1)
+    }
+  }
+
+  translate(result, language = language)
 }

 #' @rdname mo_property
 #' @export
 mo_subspecies <- function(x, language = get_locale(), ...) {
-  mo_translate(mo_validate(x = x, property = "subspecies", ...), language = language)
+  translate(validate(x = x, property = "subspecies", ...), language = language)
 }

 #' @rdname mo_property