WHONET/EARS-Net support

2025-07-13 04:42:09 +02:00 · 2019-01-29 00:06:50 +01:00
parent f6336fdd89
commit 6aae206320
83 changed files with 1428 additions and 633 deletions
--- a/R/atc.R
+++ b/R/atc.R
@ -60,10 +60,15 @@ as.atc <- function(x) {
  x[!x %like% "[A-Z][0-9]{2}[A-Z]{2}[0-9]{2}"] <- gsub("[^a-zA-Z]+", "", x[!x %like% "[A-Z][0-9]{2}[A-Z]{2}[0-9]{2}"])

  x.bak <- x
-  x <- unique(x[!is.na(x)])
+  x <- unique(x)
  failures <- character(0)

  for (i in 1:length(x)) {
+    if (is.na(x[i]) | is.null(x[i]) | identical(x[i], "")) {
+      x.new[i] <- x[i]
+      next
+    }
+
    fail <- TRUE

    # first try atc
@ -80,6 +85,13 @@ as.atc <- function(x) {
      x.new[is.na(x.new) & x.bak == x[i]] <- x[i]
    }

+    # try abbreviation of EARS-Net/WHONET
+    found <- AMR::antibiotics[which(tolower(AMR::antibiotics$ears_net) == tolower(x[i])),]$atc
+    if (length(found) > 0) {
+      fail <- FALSE
+      x.new[is.na(x.new) & x.bak == x[i]] <- found[1L]
+    }
+
    # try abbreviation of certe and glims
    found <- AMR::antibiotics[which(tolower(AMR::antibiotics$certe) == tolower(x[i])),]$atc
    if (length(found) > 0) {
--- a/R/data.R
+++ b/R/data.R
@ -22,9 +22,10 @@
 #' Data set with ~500 antibiotics
 #'
 #' A data set containing all antibiotics with a J0 code and some other antimicrobial agents, with their DDDs. Except for trade names and abbreviations, all properties were downloaded from the WHO, see Source.
-#' @format A \code{\link{data.frame}} with 488 observations and 16 variables:
+#' @format A \code{\link{data.frame}} with 488 observations and 17 variables:
 #' \describe{
-#'   \item{\code{atc}}{ATC code, like \code{J01CR02}}
+#'   \item{\code{atc}}{ATC code (Anatomical Therapeutic Chemical), like \code{J01CR02}}
+#'   \item{\code{ears_net}}{EARS-Net code (European Antimicrobial Resistance Surveillance Network), like \code{AMC}}
 #'   \item{\code{certe}}{Certe code, like \code{amcl}}
 #'   \item{\code{umcg}}{UMCG code, like \code{AMCL}}
 #'   \item{\code{abbr}}{Abbreviation as used by many countries, used internally by \code{\link{as.atc}}}
@ -43,6 +44,8 @@
 #' }
 #' @source - World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology: \url{https://www.whocc.no/atc_ddd_index/}
 #'
+#' Table antibiotic coding EARSS (from WHONET 5.3): \url{http://www.madsonline.dk/Tutorials/landskoder_antibiotika_WM.pdf}
+#'
 #' EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes Tables. Version 3.1, 2016: \url{http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf}
 #'
 #' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{http://ec.europa.eu/health/documents/community-register/html/atc.htm}
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@ -220,6 +220,14 @@ first_isolate <- function(tbl,
    col_keyantibiotics <- NULL
  }

+  # -- specimen
+  if (is.null(col_specimen)) {
+    col_specimen <- search_type_in_df(tbl = tbl, type = "specimen")
+  }
+  if (isFALSE(col_specimen)) {
+    col_specimen <- NULL
+  }
+
  # check if columns exist
  check_columns_existance <- function(column, tblname = tbl) {
    if (NROW(tblname) <= 1 | NCOL(tblname) <= 1) {
--- a/R/ggplot_rsi.R
+++ b/R/ggplot_rsi.R
@ -313,8 +313,8 @@ scale_y_percent <- function(breaks = seq(0, 1, 0.1), limits = NULL) {
 #' @rdname ggplot_rsi
 #' @export
 scale_rsi_colours <- function() {
-  ggplot2::scale_fill_brewer(palette = "RdYlGn")
-  #ggplot2::scale_fill_gradient2(low = "#d5613e", mid = "#ae5ac0", high = "#7daf44")
+  #ggplot2::scale_fill_brewer(palette = "RdYlGn")
+  ggplot2::scale_fill_manual(values = c("#b22222", "#ae9c20", "#7cfc00"))
 }

 #' @rdname ggplot_rsi
--- a/R/guess_ab_col.R
+++ b/R/guess_ab_col.R
@ -21,7 +21,7 @@

 #' Guess antibiotic column
 #'
-#' This tries to find a column name in a data set based on information from the \code{\link{antibiotics}} data set. You can look for an antibiotic (trade) name or abbreviation and it will search the \code{data.frame} for any column containing a name or ATC code of that antibiotic.
+#' This tries to find a column name in a data set based on information from the \code{\link{antibiotics}} data set. Also supports WHONET abbreviations. You can look for an antibiotic (trade) name or abbreviation and it will search the \code{data.frame} for any column containing a name or ATC code of that antibiotic.
 #' @param tbl a \code{data.frame}
 #' @param col a character to look for
 #' @param verbose a logical to indicate whether additional info should be printed
@ -40,6 +40,16 @@
 #' guess_ab_col(df, "J01AA07", verbose = TRUE)
 #' # using column `tetr` for col "J01AA07"
 #' # [1] "tetr"
+#'
+#' # WHONET codes
+#' df <- data.frame(AMP_ND10 = "R",
+#'                  AMC_ED20 = "S")
+#' guess_ab_col(df, "ampicillin")
+#' # [1] "AMP_ND10"
+#' guess_ab_col(df, "J01CR02")
+#' # [1] "AMC_ED20"
+#' guess_ab_col(df, as.atc("augmentin"))
+#' # [1] "AMC_ED20"
 guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) {
  if (is.null(tbl) & is.null(col)) {
    return(as.name("guess_ab_col"))
@ -54,6 +64,11 @@ guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) {
  }

  tbl_names <- colnames(tbl)
+  tbl_names_stripped <- colnames(tbl) %>%
+    strsplit("_") %>%
+    lapply(function(x) {x[1]}) %>%
+    unlist()
+
  if (col %in% tbl_names) {
    return(col)
  }
@ -70,6 +85,15 @@ guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) {
      filter_all(any_vars(. %in% tbl_names))
  }

+  # WHONET
+  if (nrow(ab_result) == 0) {
+    # use like when col >= 5 characters
+    ab_result <- antibiotics %>%
+      select(atc:trade_name) %>%
+      filter_all(any_vars(tolower(.) == tolower(col))) %>%
+      filter_all(any_vars(. %in% tbl_names_stripped))
+  }
+
  if (nrow(ab_result) > 1) {
    # looking more and more for reliable hit
    ab_result_1 <- ab_result %>% filter(tolower(atc) == tolower(col))
@ -95,6 +119,9 @@ guess_ab_col <- function(tbl = NULL, col = NULL, verbose = FALSE) {
    return(NULL)
  } else {
    result <- tbl_names[tbl_names %in% ab_result]
+    if (length(result) == 0) {
+      result <- tbl_names[tbl_names_stripped %in% ab_result]
+    }
    if (length(result) == 0) {
      if (verbose == TRUE) {
        message('no result found for col "', col, '"')
--- a/R/misc.R
+++ b/R/misc.R
@ -130,11 +130,18 @@ search_type_in_df <- function(tbl, type) {
  # try to find columns based on type
  found <- NULL

+  colnames(tbl) <- trimws(colnames(tbl))
+
  # -- mo
  if (type == "mo") {
    if ("mo" %in% lapply(tbl, class)) {
      found <- colnames(tbl)[lapply(tbl, class) == "mo"][1]
+    } else if (any(colnames(tbl) %like% "^(mo|microorganism|organism|bacteria)")) {
+      found <- colnames(tbl)[colnames(tbl) %like% "^(mo|microorganism|organism|bacteria)"][1]
+    } else if (any(colnames(tbl) %like% "species")) {
+      found <- colnames(tbl)[colnames(tbl) %like% "species"][1]
    }
+
  }
  # -- key antibiotics
  if (type == "keyantibiotics") {
@ -154,15 +161,23 @@ search_type_in_df <- function(tbl, type) {
  }
  # -- patient id
  if (type == "patient_id") {
-    if (any(colnames(tbl) %like% "^(patient|patid)")) {
-      found <- colnames(tbl)[colnames(tbl) %like% "^(patient|patid)"][1]
+    if (any(colnames(tbl) %like% "^(identification |patient|patid)")) {
+      found <- colnames(tbl)[colnames(tbl) %like% "^(identification |patient|patid)"][1]
+    }
+  }
+  # -- specimen
+  if (type == "specimen") {
+    if (any(colnames(tbl) %like% "(specimen type)")) {
+      found <- colnames(tbl)[colnames(tbl) %like% "(specimen type)"][1]
+    } else if (any(colnames(tbl) %like% "^(specimen)")) {
+      found <- colnames(tbl)[colnames(tbl) %like% "^(specimen)"][1]
    }
  }

  if (!is.null(found)) {
    msg <- paste0("NOTE: Using column `", bold(found), "` as input for `col_", type, "`.")
-    if (type == "keyantibiotics") {
-      msg <- paste(msg, "Use", bold("col_keyantibiotics = FALSE"), "to prevent this.")
+    if (type %in% c("keyantibiotics", "specimen")) {
+      msg <- paste(msg, "Use", bold(paste0("col_", type), " = FALSE"), "to prevent this.")
    }
    message(blue(msg))
  }
--- a/R/mo_source.R
+++ b/R/mo_source.R
@ -107,11 +107,7 @@ set_mo_source <- function(path) {
    if (!"readxl" %in% utils::installed.packages()) {
      stop("Install the 'readxl' package first.")
    }
-    if (path %like% '[.]xlsx$') {
-      df <- readxl::read_xlsx(path)
-    } else {
-      df <- readxl::read_xls(path)
-    }
+    df <- readxl::read_excel(path)

  } else {
    # try comma first