269 lines
8.2 KiB
R
269 lines
8.2 KiB
R
# initially only interfacing 4D data
|
|
|
|
# TODO loggen zoveel mogelijk over ontvangen data (nieuwe GLIMS data)
|
|
# TODO pushen R scripts naar git server waar Gerhard en Kornelis bij kunnen
|
|
|
|
# require(stringr)
|
|
require(tidyverse)
|
|
require(AMR)
|
|
|
|
dataFolder = './data'
|
|
|
|
get_most_recent_4D_file = function(path, prefix) {
|
|
files = list.files(dataFolder, paste0('^', prefix, '*'))
|
|
tFiles = tibble(name = files,
|
|
mtime = file.mtime(file.path(dataFolder, files))) %>%
|
|
arrange(desc(mtime))
|
|
|
|
tmpFilename = tFiles$name[1]
|
|
|
|
return(tmpFilename)
|
|
}
|
|
|
|
read_4D_file = function(path) {
|
|
tFile = read_tsv(path, col_types = paste0(rep('c', 30), collapse = ''))
|
|
|
|
# remove comment lines
|
|
if (nrow(tFile) > 0) {
|
|
tFile = tFile %>%
|
|
slice( seq(4, nrow(tFile)) ) %>%
|
|
filter(str_sub(ID, 1, 3) != '___')
|
|
}
|
|
|
|
if (colnames(tFile)[1] == 'X1') {
|
|
if (str_detect(path, 'AB-')) {
|
|
tFile = tibble(ID = '0',
|
|
Monsternummer = '',
|
|
IsolNr = '',
|
|
AB = '',
|
|
V = '',
|
|
PrefMIC ='',
|
|
MIC = '',
|
|
D = '',
|
|
DIF = '',
|
|
PrefEtest = '',
|
|
E = '',
|
|
ETEST = '',
|
|
RIS = '',
|
|
RAP = '',
|
|
...15 = '')
|
|
} else if (str_detect(path, 'MS-')) {
|
|
tFile = tibble(UMCG.Monster= '0',
|
|
PIN.Lot= '0',
|
|
TypeObject= '0',
|
|
Lot= '0',Echtgenoot= '0',
|
|
AchternNaam= '0',
|
|
Geboortedatum= '0',
|
|
Geslacht= '0',
|
|
OntvOLabDatum= '0',
|
|
Tijdstip.materiaal.afgenomen= '0',
|
|
PC= '0',
|
|
Aanvrager= '0',
|
|
ligafdeling= '0',
|
|
A= '0',
|
|
B= '0',
|
|
C= '0',
|
|
Materiaal= '0',
|
|
Materiaal.variabelen= '0',
|
|
X = F)
|
|
} else if (str_detect(path, 'ST-')) {
|
|
tFile = tibble(ID = '0',
|
|
Monsternummer = '',
|
|
MMBProc = '',
|
|
Isolnr = '',
|
|
MO = '',
|
|
Waarde = '',
|
|
Pos = '',
|
|
Rap = '')
|
|
} else if (str_detect(path, 'TM-')) {
|
|
tFile = tibble(ID = '0',
|
|
Monsternummer = '',
|
|
Test = '',
|
|
Uitslag = '')
|
|
} else if (str_detect(path, 'TS-')) {
|
|
tFile = tibble(ID = '0',
|
|
Monsternummer = '',
|
|
Isolnr = '',
|
|
Test = '',
|
|
Waarde = '',
|
|
Rapport = '')
|
|
}
|
|
}
|
|
if ('ID' %in% colnames(tFile)) {
|
|
tFile = tFile %>%
|
|
filter(ID != '0')
|
|
} else if ('UMCG.Monster' %in% colnames(tFile)) {
|
|
tFile = tFile %>%
|
|
filter(UMCG.Monster != '0')
|
|
}
|
|
|
|
return(tFile)
|
|
}
|
|
|
|
read_4D_TM_file = function(path) {
|
|
tmFilename = str_remove(path, paste0(dirname(path), '/'))
|
|
tmFileL = readLines(path)
|
|
tmFileL = tmFileL[c(5:length(tmFileL))]
|
|
for (l in 1:length(tmFileL)) {
|
|
if (str_count(tmFileL[l], '\t') == 3) {
|
|
tmFileL = paste0(tmFileL, '\t')
|
|
}
|
|
}
|
|
newTmFilename = file.path(dirname(path),
|
|
tmFilename %>% str_replace('TM', 'bTM'))
|
|
if (file.exists(newTmFilename)) file.remove(newTmFilename)
|
|
writeLines(tmFileL, newTmFilename)
|
|
|
|
tFile = read_tsv(newTmFilename,
|
|
col_names = c('ID','Monsternummer','Test','Status.test.tm','Uitslag'),
|
|
col_types = 'ccccc')
|
|
|
|
# remove comment lines
|
|
tFile = tFile %>%
|
|
slice(c(5:(tFile %>% nrow()))) %>%
|
|
mutate(
|
|
Monsternummer = as.character(Monsternummer)
|
|
)
|
|
return(tFile)
|
|
}
|
|
|
|
get_most_recent_4D_MS_file = function(path) {
|
|
# get files
|
|
msFiles = list.files(path, '^MS*')
|
|
# create table with modification datetimes, for selection of the most recent one
|
|
tmsFiles = tibble(name = msFiles,
|
|
mtime = file.mtime(file.path(path, msFiles))) %>%
|
|
arrange(desc(mtime))
|
|
# get the name of the most recently modified file
|
|
msFilename = tmsFiles$name[1]
|
|
|
|
return(msFilename)
|
|
}
|
|
|
|
read_4D_MS_file = function(path) {
|
|
# print(path)
|
|
msFilename = str_remove(path, paste0(dirname(path), '/'))
|
|
# read content
|
|
msFileL = readLines(path)
|
|
# remove lines with repeated '_', useful for the human eye only
|
|
msFileL = msFileL[str_sub(msFileL, 1, 5) != '_____']
|
|
# replace (real) header
|
|
msHeader = readLines(file.path(dataFolder, 'headerMS.txt'))
|
|
msFileL[1] = paste0(msHeader, '\t')
|
|
# replace tabs within a date time combination with a space in line 2 and further,
|
|
# for example: replace '29/09/2021 14:05' by '29/09/2021 14:05'
|
|
# ↑ = '\t' ↑ = ' '
|
|
for (i in 2:length(msFileL)) {
|
|
msFileL[i] = msFileL[i] %>% str_replace_all('(?<=[:digit:]{4})\t(?=[:digit:]{2}:[:digit:]{2})', ' ')
|
|
}
|
|
# write to new machine readable file
|
|
newMsFilename = file.path(dirname(path),
|
|
msFilename %>% str_replace('MS', 'bMS'))
|
|
if (file.exists(newMsFilename)) file.remove(newMsFilename)
|
|
writeLines(msFileL, newMsFilename)
|
|
# finally: read the machine readable file
|
|
result = read.delim(file = newMsFilename, sep = '\t') %>%
|
|
as_tibble() %>%
|
|
mutate(
|
|
UMCG.Monster = as.character(UMCG.Monster),
|
|
PIN.Lot = as.character(PIN.Lot),
|
|
Aanvrager = as.character(Aanvrager),
|
|
C = as.character(C)
|
|
)
|
|
# Hurray!
|
|
return(result)
|
|
}
|
|
|
|
# abFile = read_4D_file(dataFolder, 'AB')
|
|
# msFile = read_4D_MS_file(dataFolder)
|
|
# stFile = read_4D_file(dataFolder, 'ST')
|
|
# tmFile = read_4D_file(dataFolder, 'TM')
|
|
# tsFile = read_4D_file(dataFolder, 'TS')
|
|
|
|
ProcessAndMergeDailyFiles = function(abFile, msFile, stFile, tmFile, tsFile) {
|
|
if (!is.null(abFile))
|
|
abFile = abFile %>%
|
|
filter(!is.na(RAP)) #%>%
|
|
# mutate(
|
|
# #Monsternummer = as.character(Monsternummer)
|
|
# AB.amr = AMR::ab_property(AB, 'ab') %>% as.character()
|
|
# )
|
|
|
|
if (!is.null(stFile))
|
|
stFile = stFile %>%
|
|
filter(!is.na(Rap)) %>%
|
|
# mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>%
|
|
rename(IsolNr = Isolnr)
|
|
|
|
if (!is.null(msFile))
|
|
msFile = msFile %>%
|
|
rename(Monsternummer = UMCG.Monster) %>%
|
|
# mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>%
|
|
select(-X)
|
|
|
|
# if (!is.null(tmFile))
|
|
# tmFile = tmFile %>%
|
|
# mutate(Monsternummer = as.character(Monsternummer) %>% str_trim())
|
|
|
|
if (!is.null(tsFile))
|
|
tsFile = tsFile %>%
|
|
filter(!is.na(Rapport))
|
|
# mutate(Monsternummer = as.character(Monsternummer) %>% str_trim())
|
|
|
|
if (!is.null(abFile) & !is.null(stFile))
|
|
t_ab_st = abFile %>%
|
|
full_join(stFile,
|
|
by = c('Monsternummer', 'IsolNr'),
|
|
suffix = c('.ab', '.st'))
|
|
|
|
if (exists('t_ab_st')) {
|
|
if (!is.null(t_ab_st) & !is.null(msFile)) {
|
|
t_ab_st_ms = t_ab_st %>%
|
|
full_join(msFile, by = 'Monsternummer') %>%
|
|
rename(Waarde.st = Waarde)
|
|
|
|
return(
|
|
list(
|
|
merged = t_ab_st_ms,
|
|
abFile = abFile,
|
|
stFile = stFile,
|
|
msFile = msFile,
|
|
tsFile = tsFile,
|
|
tmFile = tmFile
|
|
)
|
|
)
|
|
|
|
# t_ab_st_ms_ts = t_ab_st_ms %>%
|
|
# full_join(tsFile, by = c('Monsternummer', 'IsolNr' = 'Isolnr'))
|
|
#
|
|
# t_ab_st_ms_ts_tm = t_ab_st_ms_ts %>%
|
|
# add_column(Uitslag = NA,
|
|
# is_sampleResult = F) %>%
|
|
# add_row(Monsternummer = tmFile$Monsternummer,
|
|
# Test = tmFile$Test,
|
|
# Uitslag = tmFile$Uitslag,
|
|
# is_sampleResult = T)
|
|
#
|
|
# t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>%
|
|
# rename(Waarde.ts = Waarde) %>%
|
|
# filter(!Test %in% c('no_filter_yet'))
|
|
#
|
|
# # if ('X' %in% colnames(t_ab_st_ms_ts_tm)) {
|
|
# # t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>%
|
|
# # select(-'X')
|
|
# # print('Dit statement is voor debugging')
|
|
# # }
|
|
}
|
|
}
|
|
return(
|
|
list(
|
|
merged = NULL,
|
|
abFile = abFile,
|
|
stFile = stFile,
|
|
msFile = msFile,
|
|
tsFile = tsFile,
|
|
tmFile = tmFile
|
|
)
|
|
)
|
|
}
|