(v0.9.0.9016) Support SNOMED codes

2025-07-08 07:51:57 +02:00 · 2020-01-27 19:14:23 +01:00
parent 42b079cdb7
commit a13c62e6e8
31 changed files with 456 additions and 342 deletions
--- a/R/data.R
+++ b/R/data.R
@ -69,7 +69,7 @@
 #'
 #' A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using [as.mo()].
 #' @inheritSection catalogue_of_life Catalogue of Life
-#' @format A [`data.frame`] with 69,447 observations and 16 variables:
+#' @format A [`data.frame`] with 69,447 observations and 17 variables:
 #' - `mo`\cr ID of microorganism as used by this package
 #' - `col_id`\cr Catalogue of Life ID
 #' - `fullname`\cr Full name, like `"Escherichia coli"`
@ -79,6 +79,7 @@
 #' - `species_id`\cr ID of the species as used by the Catalogue of Life
 #' - `source`\cr Either "CoL", "DSMZ" (see Source) or "manually added"
 #' - `prevalence`\cr Prevalence of the microorganism, see [as.mo()]
+#' - `snomed`\cr SNOMED code of the microorganism. Use [mo_snomed()] to retrieve it quickly, see [mo_property()].
 #' @details Manually added were:
 #' - 11 entries of *Streptococcus* (beta-haemolytic: groups A, B, C, D, F, G, H, K and unspecified; other: viridans, milleri)
 #' - 2 entries of *Staphylococcus* (coagulase-negative (CoNS) and coagulase-positive (CoPS))
@ -145,7 +146,7 @@ catalogue_of_life <- list(
 #' - `gender`\cr gender of the patient
 #' - `patient_id`\cr ID of the patient
 #' - `mo`\cr ID of microorganism created with [as.mo()], see also [microorganisms]
-#' - `PEN:RIF`\cr 40 different antibiotics with class [`rsi`] (see [as.rsi()]); these column names occur in [antibiotics] data set and can be translated with [ab_name()]
+#' - `PEN:RIF`\cr 40 different antibiotics with class [`rsi`] (see [as.rsi()]); these column names occur in the [antibiotics] data set and can be translated with [ab_name()]
 #' @inheritSection AMR Read more on our website!
 "example_isolates"

@ -182,9 +183,9 @@ catalogue_of_life <- list(
 #' @inheritSection AMR Read more on our website!
 "WHONET"

-#' Data set for RSI interpretation
+#' Data set for R/SI interpretation
 #'
-#' Data set to interpret MIC and disk diffusion to RSI values. Included guidelines are CLSI (2011-2019) and EUCAST (2011-2019). Use [as.rsi()] to transform MICs or disks measurements to RSI values.
+#' Data set to interpret MIC and disk diffusion to R/SI values. Included guidelines are CLSI (2011-2019) and EUCAST (2011-2019). Use [as.rsi()] to transform MICs or disks measurements to R/SI values.
 #' @format A [`data.frame`] with 13,975 observations and 9 variables:
 #' - `guideline`\cr Name of the guideline
 #' - `method`\cr Either "MIC" or "DISK"
@ -195,32 +196,7 @@ catalogue_of_life <- list(
 #' - `disk_dose`\cr Dose of the used disk diffusion method
 #' - `breakpoint_S`\cr Lowest MIC value or highest number of millimeters that leads to "S"
 #' - `breakpoint_R`\cr Highest MIC value or lowest number of millimeters that leads to "R"
+#' @details The repository of this `AMR` package contains a file comprising this exact data set: [https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt]. This file **allows for machine reading EUCAST and CLSI guidelines**, which is almost impossible with the Excel and PDF files distributed by EUCAST and CLSI. This file is updated automatically.
+
 #' @inheritSection AMR Read more on our website!
 "rsi_translation"
-
-# transforms data set to data.frame with only ASCII values, to comply with CRAN policies
-dataset_UTF8_to_ASCII <- function(df) {
-  trans <- function(vect) {
-    iconv(vect, from = "UTF-8", to = "ASCII//TRANSLIT")
-  }
-  df <- as.data.frame(df, stringsAsFactors = FALSE)
-  for (i in seq_len(NCOL(df))) {
-    col <- df[, i]
-    if (is.list(col)) {
-      for (j in seq_len(length(col))) {
-        col[[j]] <- trans(col[[j]])
-      }
-      df[, i] <- list(col)
-    } else {
-      if (is.factor(col)) {
-        levels(col) <- trans(levels(col))
-      } else if (is.character(col)) {
-        col <- trans(col)
-      } else {
-        col
-      }
-      df[, i] <- col
-    }
-  }
-  df
-}
--- a/R/misc.R
+++ b/R/misc.R
@ -144,3 +144,31 @@ class_integrity_check <- function(value, type, check_vector) {
  }
  value
 }
+
+# transforms data set to data.frame with only ASCII values, to comply with CRAN policies
+dataset_UTF8_to_ASCII <- function(df) {
+  trans <- function(vect) {
+    iconv(vect, from = "UTF-8", to = "ASCII//TRANSLIT")
+  }
+  df <- as.data.frame(df, stringsAsFactors = FALSE)
+  for (i in seq_len(NCOL(df))) {
+    col <- df[, i]
+    if (is.list(col)) {
+      col <- lapply(col, function(j) trans(j))
+      # for (j in seq_len(length(col))) {
+      #   col[[j]] <- trans(col[[j]])
+      # }
+      df[, i] <- list(col)
+    } else {
+      if (is.factor(col)) {
+        levels(col) <- trans(levels(col))
+      } else if (is.character(col)) {
+        col <- trans(col)
+      } else {
+        col
+      }
+      df[, i] <- col
+    }
+  }
+  df
+}
--- a/R/mo.R
+++ b/R/mo.R
@ -132,11 +132,12 @@
 #' as.mo("Staphylococcus aureus")
 #' as.mo("Staphylococcus aureus (MRSA)")
 #' as.mo("Zthafilokkoockus oureuz") # handles incorrect spelling
-#' as.mo("MRSA")   # Methicillin Resistant S. aureus
-#' as.mo("VISA")   # Vancomycin Intermediate S. aureus
-#' as.mo("VRSA")   # Vancomycin Resistant S. aureus
-#' as.mo(22242419) # Catalogue of Life ID
-#'
+#' as.mo("MRSA")    # Methicillin Resistant S. aureus
+#' as.mo("VISA")    # Vancomycin Intermediate S. aureus
+#' as.mo("VRSA")    # Vancomycin Resistant S. aureus
+#' as.mo(22242419)  # Catalogue of Life ID
+#' as.mo(115329001) # SNOMED CT code
+#' 
 #' # Dyslexia is no problem - these all work:
 #' as.mo("Ureaplasma urealyticum")
 #' as.mo("Ureaplasma urealyticus")
@ -567,11 +568,29 @@ exec_as.mo <- function(x,
      cat(paste0(blue("x_trimmed_without_group"), ' "', x_trimmed_without_group, '"\n'))
    }
    
-    progress <- progress_estimated(n = length(x), min_time = 3)
-    
+    if (initial_search == TRUE) {
+      progress <- progress_estimated(n = length(x), min_time = 3)
+      
+      # before we start, omit the ones that are obvious - MO codes and full names
+      skip_vect <- rep(FALSE, length(x))
+      skip_vect[toupper(x_backup) %in% reference_data_to_use$mo] <- TRUE
+      skip_vect[tolower(x_backup) %in% reference_data_to_use$fullname_lower] <- TRUE
+      x[toupper(x_backup) %in% reference_data_to_use$mo] <- reference_data_to_use[data.table(mo = toupper(x_backup[toupper(x_backup) %in% reference_data_to_use$mo])),
+                                                                                  on = "mo",
+                                                                                  ..property][[1]]
+      x[tolower(x_backup) %in% reference_data_to_use$fullname_lower] <- reference_data_to_use[data.table(fullname_lower = tolower(x_backup[tolower(x_backup) %in% reference_data_to_use$fullname_lower])),
+                                                                                              on = "fullname_lower",
+                                                                                              ..property][[1]]
+    }
+
    for (i in seq_len(length(x))) {
      
-      progress$tick()$print()
+      if (initial_search == TRUE) {
+        progress$tick()$print()
+        if (isTRUE(skip_vect[i])) {
+          next
+        }
+      }
      
      mo_hist <- get_mo_history(x_backup[i], uncertainty_level, force = force_mo_history, disable = disable_mo_history)
      if (initial_search == TRUE & !any(is.na(mo_hist))) {
@ -633,10 +652,27 @@ exec_as.mo <- function(x,
        next
      }
      
+      # exact SNOMED code
+      if (x_backup[i] %like% "^[0-9]+$") {
+        snomed_found <- unlist(lapply(reference_data_to_use$snomed,
+                                      function(s) if (x_backup[i] %in% s) {
+                                        TRUE
+                                      } else {
+                                        FALSE
+                                      }))
+        found <- reference_data_to_use[snomed_found == TRUE,
+                                       ..property][[1]]
+        if (length(found) > 0) {
+          x[i] <- found[1L]
+          # don't save to history, as all items are already in microorganisms
+          next
+        }
+      }
+      
+      # very probable: is G. species
      found <- reference_data_to_use[g_species %in% gsub("[^a-z0-9/ \\-]+", "", 
                                                         tolower(c(x_backup[i], x_backup_without_spp[i]))), 
                                     ..property][[1]]
-      # very probable: is G. species
      if (length(found) > 0) {
        x[i] <- found[1L]
        if (initial_search == TRUE) {
@ -1634,6 +1670,7 @@ exec_as.mo <- function(x,
      msg <- paste0(msg, ": ", paste('"', unique(failures), '"', sep = "", collapse = ", "))
    }
    msg <- paste0(msg,  ". Use mo_failures() to review ", plural[2], ". Edit the `allow_uncertain` parameter if needed (see ?as.mo).")
+    cat("\n")
    warning(red(msg),
            call. = FALSE,
            immediate. = TRUE) # thus will always be shown, even if >= warnings
@ -1646,8 +1683,9 @@ exec_as.mo <- function(x,
    if (NROW(uncertainties) > 1) {
      plural <- c("s", "them", "were")
    }
-    msg <- paste0("\nResult", plural[1], " of ", nr2char(NROW(uncertainties)), " value", plural[1],
+    msg <- paste0("Result", plural[1], " of ", nr2char(NROW(uncertainties)), " value", plural[1],
                  " ", plural[3], " guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".")
+    cat("\n")
    warning(red(msg),
            call. = FALSE,
            immediate. = TRUE) # thus will always be shown, even if >= warnings
@ -2036,6 +2074,14 @@ unregex <- function(x) {
 get_mo_code <- function(x, property) {
  if (property == "mo") {
    unique(x)
+  } else if (property == "snomed") {
+    found <- unlist(lapply(microorganismsDT$snomed,
+                           function(s) if (any(x %in% s, na.rm = TRUE)) {
+                             TRUE
+                           } else {
+                             FALSE
+                           }))
+    microorganismsDT$mo[found == TRUE]
  } else {
    microorganismsDT[get(property) == x, "mo"][[1]]
  }
--- a/R/mo_property.R
+++ b/R/mo_property.R
@ -45,6 +45,7 @@
 #' - An [`integer`] in case of [mo_year()]
 #' - A [`list`] in case of [mo_taxonomy()] and [mo_info()]
 #' - A named [`character`] in case of [mo_url()]
+#' - A [`double`] in case of [mo_snomed()]
 #' - A [`character`] in all other cases
 #' @export
 #' @seealso [microorganisms]
@ -62,11 +63,12 @@
 #'
 #' # colloquial properties ----------------------------------------------------
 #' mo_name("E. coli")            # "Escherichia coli"
-#' mo_fullname("E. coli")        # "Escherichia coli", same as mo_name()
+#' mo_fullname("E. coli")        # "Escherichia coli" - same as mo_name()
 #' mo_shortname("E. coli")       # "E. coli"
 #'
 #' # other properties ---------------------------------------------------------
 #' mo_gramstain("E. coli")       # "Gram-negative"
+#' mo_snomed("E. coli")          # 112283007, 116395006, ... (SNOMED codes)
 #' mo_type("E. coli")            # "Bacteria" (equal to kingdom, but may be translated)
 #' mo_rank("E. coli")            # "species"
 #' mo_url("E. coli")             # get the direct url to the online database entry
@ -249,6 +251,12 @@ mo_gramstain <- function(x, language = get_locale(), ...) {
  translate_AMR(x, language = language, only_unknown = FALSE)
 }

+#' @rdname mo_property
+#' @export
+mo_snomed <- function(x, ...) {
+  mo_validate(x = x, property = "snomed", ...)
+}
+
 #' @rdname mo_property
 #' @export
 mo_ref <- function(x, ...) {
@ -428,6 +436,8 @@ mo_validate <- function(x, property, ...) {
    return(to_class_mo(x))
  } else if (property == "col_id") {
    return(as.integer(x))
+  } else if (property == "snomed") {
+    return(as.double(eval(parse(text = x))))
  } else {
    return(x)
  }
--- a/R/zzz.R
+++ b/R/zzz.R
@ -21,8 +21,6 @@

 #' @importFrom data.table as.data.table setkey
 .onLoad <- function(libname, pkgname) {
-  # packageStartupMessage("Loading taxonomic reference data")
-  
  # get new functions not available in older versions of R
  backports::import(pkgname)