AMR/R/read.4d.R

# ==================================================================== #
# TITLE                                                                #
# Antimicrobial Resistance (AMR) Analysis                              #
#                                                                      #
# AUTHORS                                                              #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
#                                                                      #
# LICENCE                                                              #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0,    #
# as published by the Free Software Foundation.                        #
#                                                                      #
# This program is distributed in the hope that it will be useful,      #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
# GNU General Public License for more details.                         #
# ==================================================================== #

#' Read data from 4D database
#'
#' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the column names to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
#' @inheritParams utils::read.table
#' @param info a logical to indicate whether info about the import should be printed, defaults to \code{TRUE} in interactive sessions
#' @details Column names will be transformed, but the original column names are set as a "label" attribute and can be seen in e.g. RStudio Viewer.
#' @export
read.4D <- function(file,
                    info = interactive(),
                    header = TRUE,
                    row.names = NULL,
                    sep = "\t",
                    quote = "\"'",
                    dec = ",",
                    na.strings = c("NA", "", "."),
                    skip = 2,
                    check.names = TRUE,
                    strip.white = TRUE,
                    fill = TRUE,
                    blank.lines.skip = TRUE,
                    stringsAsFactors = FALSE,
                    fileEncoding = "UTF-8",
                    encoding = "UTF-8") {

  if (info == TRUE) {
    message("Importing ", file, "... ", appendLF = FALSE)
  }
  data_4D <- utils::read.table(file = file,
                               row.names = row.names,
                               header = header,
                               sep = sep,
                               quote = quote,
                               dec = dec,
                               na.strings = na.strings,
                               skip = skip,
                               check.names = check.names,
                               strip.white = strip.white,
                               fill = fill,
                               blank.lines.skip = blank.lines.skip,
                               stringsAsFactors = stringsAsFactors,
                               fileEncoding = fileEncoding,
                               encoding = encoding)

  # helper functions
  to_date_4D <- function(x) {
    date_regular <- as.Date(x, format = "%d-%m-%y")
    posixlt <- as.POSIXlt(date_regular)
    # born after today will be born 100 years ago
    # based on https://stackoverflow.com/a/3312971/4575331
    posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year <- posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year - 100
    as.Date(posixlt)
  }
  to_age_4D <- function(from, to) {
    from_lt = as.POSIXlt(from)
    to_lt = as.POSIXlt(to)

    age = to_lt$year - from_lt$year

    ifelse(to_lt$mon < from_lt$mon |
             (to_lt$mon == from_lt$mon & to_lt$mday < from_lt$mday),
           age - 1, age)
  }

  if (info == TRUE) {
    message("OK\nTransforming column names... ", appendLF = FALSE)
  }
  if ("row.names" %in% colnames(data_4D) & all(is.na(data_4D[, ncol(data_4D)]))) {
    # remove first column name "row.names" and remove last empty column
    colnames(data_4D) <- c(colnames(data_4D)[2:ncol(data_4D)], "_skip_last")
    data_4D <- data_4D[, -ncol(data_4D)]
  }

  colnames(data_4D) <- tolower(colnames(data_4D))
  if (all(c("afnamedat", "gebdatum") %in% colnames(data_4D))) {
    # add age
    data_4D$age <- NA_integer_
  }
  cols_wanted <- c("patientnr", "gebdatum", "age", "mv", "monsternr", "afnamedat", "bepaling",
                   "afd.", "spec", "mat", "matbijz.",  "mocode",
                   "amfo", "amox", "anid", "azit", "casp", "cecl", "cefe", "cfcl",
                   "cfot", "cfox", "cfta", "cftr", "cfur", "chlo", "cipr", "clin",
                   "cocl", "ctta", "dapt", "doxy", "eryt", "fluo", "fluz", "fosf",
                   "fusi", "gehi", "gent", "imip", "kana", "levo", "line", "mero",
                   "metr", "mico", "mino", "moxi", "mupi", "nali", "nitr", "norf",
                   "oxac", "peni", "pipe", "pita", "poly", "posa", "quda", "rifa",
                   "spat", "teic", "tige", "tobr", "trim", "trsu", "vana", "vanb",
                   "vanc", "vori")
  # this ones actually exist
  cols_wanted <- cols_wanted[cols_wanted %in% colnames(data_4D)]
  # order of columns
  data_4D <- data_4D[, cols_wanted]

  # backup original column names
  colnames.bak <- toupper(colnames(data_4D))
  colnames.bak[colnames.bak == "AGE"] <- NA_character_

  # rename of columns
  colnames(data_4D) <- gsub("patientnr", "patient_id", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("gebdatum", "date_birth", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("mv", "gender", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("monsternr", "sample_id", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("afnamedat", "date_received", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("bepaling", "sample_test", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("afd.", "department", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("spec", "specialty", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("matbijz.", "specimen_type", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("mat", "specimen_group", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("mocode", "mo", colnames(data_4D), fixed = TRUE)

  if (info == TRUE) {
    message("OK\nTransforming dates and age... ", appendLF = FALSE)
  }
  if ("date_birth" %in% colnames(data_4D)) {
    data_4D$date_birth <- to_date_4D(data_4D$date_birth)

  }
  if ("date_received" %in% colnames(data_4D)) {
    data_4D$date_received <- to_date_4D(data_4D$date_received)
  }
  if ("age" %in% colnames(data_4D)) {
    data_4D$age <- to_age_4D(data_4D$date_birth, data_4D$date_received)
  }
  if ("gender" %in% colnames(data_4D)) {
    data_4D$gender[data_4D$gender == "V"] <- "F"
  }

  if (info == TRUE) {
    message("OK\nTransforming MO codes... ", appendLF = FALSE)
  }
  if ("mo" %in% colnames(data_4D)) {
    data_4D$mo <- as.mo(data_4D$mo)
    # column right of mo is:
    drug1 <- colnames(data_4D)[grep("^mo$", colnames(data_4D)) + 1]
    if (!is.na(drug1)) {
      # and last is:
      drug_last <- colnames(data_4D)[length(data_4D)]
      # transform those to rsi:
      data_4D <- suppressWarnings(mutate_at(data_4D, vars(drug1:drug_last), as.rsi))
    }
  }

  # set original column names as label (can be seen in RStudio Viewer)
  if (info == TRUE) {
    message("OK\nSetting original column names as label... ", appendLF = FALSE)
  }
  for (i in 1:ncol(data_4D)) {
    if (!is.na(colnames.bak[i])) {
      attr(data_4D[, i], "label") <- colnames.bak[i]
    }
  }

  if (info == TRUE) {
    message("OK\nSetting query as label to data.frame... ", appendLF = FALSE)
  }
  qry <- readLines(con <- file(file, open="r"))[1]
  close(con)
  attr(data_4D, "label") <- qry

  if (info == TRUE) {
    message("OK")
  }

  data_4D
}
read_4D 2018-11-05 15:30:49 +01:00			`# ==================================================================== #`
			`# TITLE #`
			`# Antimicrobial Resistance (AMR) Analysis #`
			`# #`
			`# AUTHORS #`
			`# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #`
			`# #`
			`# LICENCE #`
			`# This program is free software; you can redistribute it and/or modify #`
			`# it under the terms of the GNU General Public License version 2.0, #`
			`# as published by the Free Software Foundation. #`
			`# #`
			`# This program is distributed in the hope that it will be useful, #`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of #`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #`
			`# GNU General Public License for more details. #`
			`# ==================================================================== #`

			`#' Read data from 4D database`
			`#'`
read.4D improvements 2018-11-15 12:42:35 +01:00			`#' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the column names to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.`
read_4D 2018-11-05 15:30:49 +01:00			`#' @inheritParams utils::read.table`
read.4D improvements 2018-11-15 12:42:35 +01:00			`#' @param info a logical to indicate whether info about the import should be printed, defaults to \code{TRUE} in interactive sessions`
			`#' @details Column names will be transformed, but the original column names are set as a "label" attribute and can be seen in e.g. RStudio Viewer.`
read_4D 2018-11-05 15:30:49 +01:00			`#' @export`
read.4D improvements 2018-11-15 12:42:35 +01:00			`read.4D <- function(file,`
			`info = interactive(),`
read_4D 2018-11-05 15:30:49 +01:00			`header = TRUE,`
read.4D improvements 2018-11-15 12:42:35 +01:00			`row.names = NULL,`
read_4D 2018-11-05 15:30:49 +01:00			`sep = "\t",`
			`quote = "\"'",`
			`dec = ",",`
			`na.strings = c("NA", "", "."),`
			`skip = 2,`
			`check.names = TRUE,`
			`strip.white = TRUE,`
			`fill = TRUE,`
			`blank.lines.skip = TRUE,`
			`stringsAsFactors = FALSE,`
			`fileEncoding = "UTF-8",`
			`encoding = "UTF-8") {`

read.4D improvements 2018-11-15 12:42:35 +01:00			`if (info == TRUE) {`
unit test read.4d, unselecting freq cols 2018-11-19 13:00:22 +01:00			`message("Importing ", file, "... ", appendLF = FALSE)`
read.4D improvements 2018-11-15 12:42:35 +01:00			`}`
read_4D 2018-11-05 15:30:49 +01:00			`data_4D <- utils::read.table(file = file,`
read.4D improvements 2018-11-15 12:42:35 +01:00			`row.names = row.names,`
read_4D 2018-11-05 15:30:49 +01:00			`header = header,`
			`sep = sep,`
			`quote = quote,`
			`dec = dec,`
			`na.strings = na.strings,`
			`skip = skip,`
			`check.names = check.names,`
			`strip.white = strip.white,`
			`fill = fill,`
			`blank.lines.skip = blank.lines.skip,`
			`stringsAsFactors = stringsAsFactors,`
			`fileEncoding = fileEncoding,`
			`encoding = encoding)`

			`# helper functions`
			`to_date_4D <- function(x) {`
			`date_regular <- as.Date(x, format = "%d-%m-%y")`
			`posixlt <- as.POSIXlt(date_regular)`
			`# born after today will be born 100 years ago`
			`# based on https://stackoverflow.com/a/3312971/4575331`
read.4D improvements 2018-11-15 12:42:35 +01:00			`posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year <- posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year - 100`
read_4D 2018-11-05 15:30:49 +01:00			`as.Date(posixlt)`
			`}`
			`to_age_4D <- function(from, to) {`
			`from_lt = as.POSIXlt(from)`
			`to_lt = as.POSIXlt(to)`

			`age = to_lt$year - from_lt$year`

			`ifelse(to_lt$mon < from_lt$mon \|`
			`(to_lt$mon == from_lt$mon & to_lt$mday < from_lt$mday),`
			`age - 1, age)`
			`}`

read.4D improvements 2018-11-15 12:42:35 +01:00			`if (info == TRUE) {`
			`message("OK\nTransforming column names... ", appendLF = FALSE)`
			`}`
			`if ("row.names" %in% colnames(data_4D) & all(is.na(data_4D[, ncol(data_4D)]))) {`
			`# remove first column name "row.names" and remove last empty column`
			`colnames(data_4D) <- c(colnames(data_4D)[2:ncol(data_4D)], "_skip_last")`
			`data_4D <- data_4D[, -ncol(data_4D)]`
			`}`

read_4D 2018-11-05 15:30:49 +01:00			`colnames(data_4D) <- tolower(colnames(data_4D))`
			`if (all(c("afnamedat", "gebdatum") %in% colnames(data_4D))) {`
			`# add age`
			`data_4D$age <- NA_integer_`
			`}`
			`cols_wanted <- c("patientnr", "gebdatum", "age", "mv", "monsternr", "afnamedat", "bepaling",`
			`"afd.", "spec", "mat", "matbijz.", "mocode",`
			`"amfo", "amox", "anid", "azit", "casp", "cecl", "cefe", "cfcl",`
			`"cfot", "cfox", "cfta", "cftr", "cfur", "chlo", "cipr", "clin",`
			`"cocl", "ctta", "dapt", "doxy", "eryt", "fluo", "fluz", "fosf",`
			`"fusi", "gehi", "gent", "imip", "kana", "levo", "line", "mero",`
			`"metr", "mico", "mino", "moxi", "mupi", "nali", "nitr", "norf",`
			`"oxac", "peni", "pipe", "pita", "poly", "posa", "quda", "rifa",`
			`"spat", "teic", "tige", "tobr", "trim", "trsu", "vana", "vanb",`
			`"vanc", "vori")`
			`# this ones actually exist`
			`cols_wanted <- cols_wanted[cols_wanted %in% colnames(data_4D)]`
			`# order of columns`
			`data_4D <- data_4D[, cols_wanted]`

read.4D improvements 2018-11-15 12:42:35 +01:00			`# backup original column names`
			`colnames.bak <- toupper(colnames(data_4D))`
MDRO update 2018-11-16 20:50:50 +01:00			`colnames.bak[colnames.bak == "AGE"] <- NA_character_`
read.4D improvements 2018-11-15 12:42:35 +01:00
read_4D 2018-11-05 15:30:49 +01:00			`# rename of columns`
			`colnames(data_4D) <- gsub("patientnr", "patient_id", colnames(data_4D), fixed = TRUE)`
			`colnames(data_4D) <- gsub("gebdatum", "date_birth", colnames(data_4D), fixed = TRUE)`
			`colnames(data_4D) <- gsub("mv", "gender", colnames(data_4D), fixed = TRUE)`
			`colnames(data_4D) <- gsub("monsternr", "sample_id", colnames(data_4D), fixed = TRUE)`
			`colnames(data_4D) <- gsub("afnamedat", "date_received", colnames(data_4D), fixed = TRUE)`
			`colnames(data_4D) <- gsub("bepaling", "sample_test", colnames(data_4D), fixed = TRUE)`
			`colnames(data_4D) <- gsub("afd.", "department", colnames(data_4D), fixed = TRUE)`
			`colnames(data_4D) <- gsub("spec", "specialty", colnames(data_4D), fixed = TRUE)`
			`colnames(data_4D) <- gsub("matbijz.", "specimen_type", colnames(data_4D), fixed = TRUE)`
			`colnames(data_4D) <- gsub("mat", "specimen_group", colnames(data_4D), fixed = TRUE)`
			`colnames(data_4D) <- gsub("mocode", "mo", colnames(data_4D), fixed = TRUE)`

read.4D improvements 2018-11-15 12:42:35 +01:00			`if (info == TRUE) {`
			`message("OK\nTransforming dates and age... ", appendLF = FALSE)`
			`}`
read_4D 2018-11-05 15:30:49 +01:00			`if ("date_birth" %in% colnames(data_4D)) {`
			`data_4D$date_birth <- to_date_4D(data_4D$date_birth)`

			`}`
			`if ("date_received" %in% colnames(data_4D)) {`
			`data_4D$date_received <- to_date_4D(data_4D$date_received)`
			`}`
			`if ("age" %in% colnames(data_4D)) {`
			`data_4D$age <- to_age_4D(data_4D$date_birth, data_4D$date_received)`
			`}`
			`if ("gender" %in% colnames(data_4D)) {`
			`data_4D$gender[data_4D$gender == "V"] <- "F"`
			`}`
read.4D improvements 2018-11-15 12:42:35 +01:00
			`if (info == TRUE) {`
			`message("OK\nTransforming MO codes... ", appendLF = FALSE)`
			`}`
read_4D 2018-11-05 15:30:49 +01:00			`if ("mo" %in% colnames(data_4D)) {`
			`data_4D$mo <- as.mo(data_4D$mo)`
			`# column right of mo is:`
			`drug1 <- colnames(data_4D)[grep("^mo$", colnames(data_4D)) + 1]`
			`if (!is.na(drug1)) {`
			`# and last is:`
			`drug_last <- colnames(data_4D)[length(data_4D)]`
			`# transform those to rsi:`
			`data_4D <- suppressWarnings(mutate_at(data_4D, vars(drug1:drug_last), as.rsi))`
			`}`
			`}`

read.4D improvements 2018-11-15 12:42:35 +01:00			`# set original column names as label (can be seen in RStudio Viewer)`
			`if (info == TRUE) {`
			`message("OK\nSetting original column names as label... ", appendLF = FALSE)`
			`}`
			`for (i in 1:ncol(data_4D)) {`
MDRO update 2018-11-16 20:50:50 +01:00			`if (!is.na(colnames.bak[i])) {`
			`attr(data_4D[, i], "label") <- colnames.bak[i]`
			`}`
			`}`

			`if (info == TRUE) {`
			`message("OK\nSetting query as label to data.frame... ", appendLF = FALSE)`
read.4D improvements 2018-11-15 12:42:35 +01:00			`}`
MDRO update 2018-11-16 20:50:50 +01:00			`qry <- readLines(con <- file(file, open="r"))[1]`
			`close(con)`
			`attr(data_4D, "label") <- qry`
read.4D improvements 2018-11-15 12:42:35 +01:00
			`if (info == TRUE) {`
			`message("OK")`
			`}`

read_4D 2018-11-05 15:30:49 +01:00			`data_4D`
			`}`