AMR/R/read.4d.R

176 lines
8.1 KiB
R
Raw Normal View History

2018-11-05 15:30:49 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
2019-01-02 23:24:07 +01:00
# SOURCE #
# https://gitlab.com/msberends/AMR #
2018-11-05 15:30:49 +01:00
# #
# LICENCE #
2019-01-02 23:24:07 +01:00
# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
2018-11-05 15:30:49 +01:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# This R package was created for academic research and was publicly #
# released in the hope that it will be useful, but it comes WITHOUT #
# ANY WARRANTY OR LIABILITY. #
2019-04-05 18:47:39 +02:00
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
2018-11-05 15:30:49 +01:00
# ==================================================================== #
#' Read data from 4D database
#'
#' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the column names to English, transform the MO codes with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
2018-11-05 15:30:49 +01:00
#' @inheritParams utils::read.table
2018-11-15 12:42:35 +01:00
#' @param info a logical to indicate whether info about the import should be printed, defaults to \code{TRUE} in interactive sessions
#' @details Column names will be transformed, but the original column names are set as a "label" attribute and can be seen in e.g. RStudio Viewer.
2019-01-02 23:24:07 +01:00
#' @inheritSection AMR Read more on our website!
2018-11-05 15:30:49 +01:00
#' @export
2018-11-15 12:42:35 +01:00
read.4D <- function(file,
info = interactive(),
2018-11-05 15:30:49 +01:00
header = TRUE,
2018-11-15 12:42:35 +01:00
row.names = NULL,
2018-11-05 15:30:49 +01:00
sep = "\t",
quote = "\"'",
dec = ",",
na.strings = c("NA", "", "."),
skip = 2,
check.names = TRUE,
strip.white = TRUE,
fill = TRUE,
blank.lines.skip = TRUE,
stringsAsFactors = FALSE,
fileEncoding = "UTF-8",
encoding = "UTF-8") {
2018-11-15 12:42:35 +01:00
if (info == TRUE) {
message("Importing ", file, "... ", appendLF = FALSE)
2018-11-15 12:42:35 +01:00
}
2018-11-05 15:30:49 +01:00
data_4D <- utils::read.table(file = file,
2018-11-15 12:42:35 +01:00
row.names = row.names,
2018-11-05 15:30:49 +01:00
header = header,
sep = sep,
quote = quote,
dec = dec,
na.strings = na.strings,
skip = skip,
check.names = check.names,
strip.white = strip.white,
fill = fill,
blank.lines.skip = blank.lines.skip,
stringsAsFactors = stringsAsFactors,
fileEncoding = fileEncoding,
encoding = encoding)
# helper function for dates
2018-11-05 15:30:49 +01:00
to_date_4D <- function(x) {
date_regular <- as.Date(x, format = "%d-%m-%y")
posixlt <- as.POSIXlt(date_regular)
# born after today will be born 100 years ago
# based on https://stackoverflow.com/a/3312971/4575331
2018-11-15 12:42:35 +01:00
posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year <- posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year - 100
2018-11-05 15:30:49 +01:00
as.Date(posixlt)
}
2018-11-15 12:42:35 +01:00
if (info == TRUE) {
message("OK\nTransforming column names... ", appendLF = FALSE)
}
if ("row.names" %in% colnames(data_4D) & all(is.na(data_4D[, ncol(data_4D)]))) {
# remove first column name "row.names" and remove last empty column
colnames(data_4D) <- c(colnames(data_4D)[2:ncol(data_4D)], "_skip_last")
data_4D <- data_4D[, -ncol(data_4D)]
}
2018-11-05 15:30:49 +01:00
colnames(data_4D) <- tolower(colnames(data_4D))
if (all(c("afnamedat", "gebdatum") %in% colnames(data_4D))) {
# add age
data_4D$age <- NA_integer_
}
cols_wanted <- c("patientnr", "gebdatum", "age", "mv", "monsternr", "afnamedat", "bepaling",
"afd.", "spec", "mat", "matbijz.", "mocode",
"amfo", "amox", "anid", "azit", "casp", "cecl", "cefe", "cfcl",
"cfot", "cfox", "cfta", "cftr", "cfur", "chlo", "cipr", "clin",
"cocl", "ctta", "dapt", "doxy", "eryt", "fluo", "fluz", "fosf",
"fusi", "gehi", "gent", "imip", "kana", "levo", "line", "mero",
"metr", "mico", "mino", "moxi", "mupi", "nali", "nitr", "norf",
"oxac", "peni", "pipe", "pita", "poly", "posa", "quda", "rifa",
"spat", "teic", "tige", "tobr", "trim", "trsu", "vana", "vanb",
"vanc", "vori")
# this ones actually exist
cols_wanted <- cols_wanted[cols_wanted %in% colnames(data_4D)]
# order of columns
data_4D <- data_4D[, cols_wanted]
2018-11-15 12:42:35 +01:00
# backup original column names
colnames.bak <- toupper(colnames(data_4D))
2018-11-16 20:50:50 +01:00
colnames.bak[colnames.bak == "AGE"] <- NA_character_
2018-11-15 12:42:35 +01:00
2018-11-05 15:30:49 +01:00
# rename of columns
colnames(data_4D) <- gsub("patientnr", "patient_id", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("gebdatum", "date_birth", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("mv", "gender", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("monsternr", "sample_id", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("afnamedat", "date_received", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("bepaling", "sample_test", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("afd.", "department", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("spec", "specialty", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("matbijz.", "specimen_type", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("mat", "specimen_group", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("mocode", "mo", colnames(data_4D), fixed = TRUE)
2018-11-15 12:42:35 +01:00
if (info == TRUE) {
message("OK\nTransforming dates and age... ", appendLF = FALSE)
}
2018-11-05 15:30:49 +01:00
if ("date_birth" %in% colnames(data_4D)) {
data_4D$date_birth <- to_date_4D(data_4D$date_birth)
}
if ("date_received" %in% colnames(data_4D)) {
data_4D$date_received <- to_date_4D(data_4D$date_received)
}
if ("age" %in% colnames(data_4D)) {
data_4D$age <- age(data_4D$date_birth, data_4D$date_received)
2018-11-05 15:30:49 +01:00
}
if ("gender" %in% colnames(data_4D)) {
data_4D$gender[data_4D$gender == "V"] <- "F"
}
2018-11-15 12:42:35 +01:00
if (info == TRUE) {
message("OK\nTransforming MO codes... ", appendLF = FALSE)
}
2018-11-05 15:30:49 +01:00
if ("mo" %in% colnames(data_4D)) {
data_4D$mo <- as.mo(data_4D$mo)
# column right of mo is:
drug1 <- colnames(data_4D)[grep("^mo$", colnames(data_4D)) + 1]
if (!is.na(drug1)) {
# and last is:
drug_last <- colnames(data_4D)[length(data_4D)]
# transform those to rsi:
data_4D <- suppressWarnings(mutate_at(data_4D, vars(drug1:drug_last), as.rsi))
}
}
2018-11-15 12:42:35 +01:00
# set original column names as label (can be seen in RStudio Viewer)
if (info == TRUE) {
message("OK\nSetting original column names as label... ", appendLF = FALSE)
}
2019-10-11 17:21:02 +02:00
for (i in seq_len(ncol(data_4D))) {
2018-11-16 20:50:50 +01:00
if (!is.na(colnames.bak[i])) {
attr(data_4D[, i], "label") <- colnames.bak[i]
}
}
if (info == TRUE) {
message("OK\nSetting query as label to data.frame... ", appendLF = FALSE)
2018-11-15 12:42:35 +01:00
}
2019-10-11 17:21:02 +02:00
qry <- readLines(con <- file(file, open = "r"))[1]
2018-11-16 20:50:50 +01:00
close(con)
attr(data_4D, "label") <- qry
2018-11-15 12:42:35 +01:00
if (info == TRUE) {
message("OK")
}
2018-11-05 15:30:49 +01:00
data_4D
}