mo_source improvement

2025-07-08 21:22:01 +02:00 · 2019-03-01 09:34:04 +01:00
parent 2565b60024
commit c5efb272fd
21 changed files with 324 additions and 294 deletions
--- a/R/mo.R
+++ b/R/mo.R
@ -117,6 +117,7 @@
 #' @seealso \code{\link{microorganisms}} for the \code{data.frame} that is being used to determine ID's. \cr
 #' The \code{\link{mo_property}} functions (like \code{\link{mo_genus}}, \code{\link{mo_gramstain}}) to get properties based on the returned code.
 #' @inheritSection AMR Read more on our website!
+#' @importFrom dplyr %>% pull left_join
 #' @examples
 #' # These examples all return "B_STPHY_AUR", the ID of S. aureus:
 #' as.mo("stau")
@ -171,16 +172,28 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
    # check onLoad() in R/zzz.R: data tables are created there.
  }

-  if (all(x %in% AMR::microorganisms$mo)
+  if (deparse(substitute(reference_df)) == "get_mo_source()"
      & isFALSE(Becker)
      & isFALSE(Lancefield)
-      & is.null(reference_df)) {
+      & !is.null(reference_df)
+      & all(x %in% reference_df[,1])) {
+    # has valid own reference_df
+    # (data.table not faster here)
+    colnames(reference_df)[1] <- "x"
+    suppressWarnings(
+      y <- data.frame(x = x, stringsAsFactors = FALSE) %>%
+        left_join(reference_df, by = "x") %>%
+        pull("mo")
+    )
+
+  } else if (all(x %in% AMR::microorganisms$mo)
+      & isFALSE(Becker)
+      & isFALSE(Lancefield)) {
    y <- x

  } else if (all(tolower(x) %in% microorganismsDT$fullname_lower)
             & isFALSE(Becker)
-             & isFALSE(Lancefield)
-             & is.null(reference_df)) {
+             & isFALSE(Lancefield)) {
    # we need special treatment for very prevalent full names, they are likely! (case insensitive)
    # e.g. as.mo("Staphylococcus aureus")
    y <- microorganismsDT[prevalence == 1][data.table(fullname_lower = tolower(x)),
--- a/R/mo_source.R
+++ b/R/mo_source.R
@ -52,15 +52,19 @@
 #' # Created mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx'.
 #' }
 #'
-#' It has now created a file "~/.mo_source.rds" with the contents of our Excel file. It it an R specific format with great compression.
+#' It has now created a file "~/.mo_source.rds" with the contents of our Excel file, but only the first column with foreign values and the 'mo' column will be kept.
 #'
 #' And now we can use it in our functions:
 #' \preformatted{
 #' as.mo("lab_mo_ecoli")
-#' # B_ESCHR_COL
+#' [1] B_ESCHR_COL
 #'
 #' mo_genus("lab_mo_kpneumoniae")
-#' # "Klebsiella"
+#' [1] "Klebsiella"
+#'
+#' # other input values still work too
+#' as.mo(c("Escherichia coli", "E. coli", "lab_mo_ecoli"))
+#' [1] B_ESCHR_COL  B_ESCHR_COL  B_ESCHR_COL
 #' }
 #'
 #' If we edit the Excel file to, let's say, this:
@ -78,10 +82,10 @@
 #' \preformatted{
 #' as.mo("lab_mo_ecoli")
 #' # Updated mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx'.
-#' # B_ESCHR_COL
+#' [1] B_ESCHR_COL
 #'
 #' mo_genus("lab_Staph_aureus")
-#' # "Staphylococcus"
+#' [1] "Staphylococcus"
 #' }
 #'
 #' To remove the reference completely, just use any of these:
@ -119,7 +123,9 @@ set_mo_source <- function(path) {
      valid <- FALSE
    } else if (!"mo" %in% colnames(df)) {
      valid <- FALSE
-    } else if (!all(df$mo %in% AMR::microorganisms$mo)) {
+    } else if (all(as.data.frame(df)[, 1] == "")) {
+      valid <- FALSE
+    } else if (!all(df$mo %in% c("", AMR::microorganisms$mo))) {
      valid <- FALSE
    } else if (NCOL(df) < 2) {
      valid <- FALSE
@ -163,9 +169,11 @@ set_mo_source <- function(path) {
    stop("File must contain a column with self-defined values and a reference column `mo` with valid values from the `microorganisms` data set.")
  }

+  # keep only first two columns, second must be mo
  if (colnames(df)[1] == "mo") {
-    # put mo to the end
-    df <- df %>% select(-"mo", everything(), "mo")
+    df <- df[, c(2, 1)]
+  } else {
+    df <- df[, c(1, 2)]
  }

  df <- as.data.frame(df, stringAsFactors = FALSE)