radar2/interfaces/4D/src/4D_day.R

269 lines
8.2 KiB
R

# initially only interfacing 4D data
# TODO loggen zoveel mogelijk over ontvangen data (nieuwe GLIMS data)
# TODO pushen R scripts naar git server waar Gerhard en Kornelis bij kunnen
# require(stringr)
require(tidyverse)
require(AMR)
dataFolder = './data'
get_most_recent_4D_file = function(path, prefix) {
files = list.files(dataFolder, paste0('^', prefix, '*'))
tFiles = tibble(name = files,
mtime = file.mtime(file.path(dataFolder, files))) %>%
arrange(desc(mtime))
tmpFilename = tFiles$name[1]
return(tmpFilename)
}
read_4D_file = function(path) {
tFile = read_tsv(path, col_types = paste0(rep('c', 30), collapse = ''))
# remove comment lines
if (nrow(tFile) > 0) {
tFile = tFile %>%
slice( seq(4, nrow(tFile)) ) %>%
filter(str_sub(ID, 1, 3) != '___')
}
if (colnames(tFile)[1] == 'X1') {
if (str_detect(path, 'AB-')) {
tFile = tibble(ID = '0',
Monsternummer = '',
IsolNr = '',
AB = '',
V = '',
PrefMIC ='',
MIC = '',
D = '',
DIF = '',
PrefEtest = '',
E = '',
ETEST = '',
RIS = '',
RAP = '',
...15 = '')
} else if (str_detect(path, 'MS-')) {
tFile = tibble(UMCG.Monster= '0',
PIN.Lot= '0',
TypeObject= '0',
Lot= '0',Echtgenoot= '0',
AchternNaam= '0',
Geboortedatum= '0',
Geslacht= '0',
OntvOLabDatum= '0',
Tijdstip.materiaal.afgenomen= '0',
PC= '0',
Aanvrager= '0',
ligafdeling= '0',
A= '0',
B= '0',
C= '0',
Materiaal= '0',
Materiaal.variabelen= '0',
X = F)
} else if (str_detect(path, 'ST-')) {
tFile = tibble(ID = '0',
Monsternummer = '',
MMBProc = '',
Isolnr = '',
MO = '',
Waarde = '',
Pos = '',
Rap = '')
} else if (str_detect(path, 'TM-')) {
tFile = tibble(ID = '0',
Monsternummer = '',
Test = '',
Uitslag = '')
} else if (str_detect(path, 'TS-')) {
tFile = tibble(ID = '0',
Monsternummer = '',
Isolnr = '',
Test = '',
Waarde = '',
Rapport = '')
}
}
if ('ID' %in% colnames(tFile)) {
tFile = tFile %>%
filter(ID != '0')
} else if ('UMCG.Monster' %in% colnames(tFile)) {
tFile = tFile %>%
filter(UMCG.Monster != '0')
}
return(tFile)
}
read_4D_TM_file = function(path) {
tmFilename = str_remove(path, paste0(dirname(path), '/'))
tmFileL = readLines(path)
tmFileL = tmFileL[c(5:length(tmFileL))]
for (l in 1:length(tmFileL)) {
if (str_count(tmFileL[l], '\t') == 3) {
tmFileL = paste0(tmFileL, '\t')
}
}
newTmFilename = file.path(dirname(path),
tmFilename %>% str_replace('TM', 'bTM'))
if (file.exists(newTmFilename)) file.remove(newTmFilename)
writeLines(tmFileL, newTmFilename)
tFile = read_tsv(newTmFilename,
col_names = c('ID','Monsternummer','Test','Status.test.tm','Uitslag'),
col_types = 'ccccc')
# remove comment lines
tFile = tFile %>%
slice(c(5:(tFile %>% nrow()))) %>%
mutate(
Monsternummer = as.character(Monsternummer)
)
return(tFile)
}
get_most_recent_4D_MS_file = function(path) {
# get files
msFiles = list.files(path, '^MS*')
# create table with modification datetimes, for selection of the most recent one
tmsFiles = tibble(name = msFiles,
mtime = file.mtime(file.path(path, msFiles))) %>%
arrange(desc(mtime))
# get the name of the most recently modified file
msFilename = tmsFiles$name[1]
return(msFilename)
}
read_4D_MS_file = function(path) {
# print(path)
msFilename = str_remove(path, paste0(dirname(path), '/'))
# read content
msFileL = readLines(path)
# remove lines with repeated '_', useful for the human eye only
msFileL = msFileL[str_sub(msFileL, 1, 5) != '_____']
# replace (real) header
msHeader = readLines(file.path(dataFolder, 'headerMS.txt'))
msFileL[1] = paste0(msHeader, '\t')
# replace tabs within a date time combination with a space in line 2 and further,
# for example: replace '29/09/2021 14:05' by '29/09/2021 14:05'
# ↑ = '\t' ↑ = ' '
for (i in 2:length(msFileL)) {
msFileL[i] = msFileL[i] %>% str_replace_all('(?<=[:digit:]{4})\t(?=[:digit:]{2}:[:digit:]{2})', ' ')
}
# write to new machine readable file
newMsFilename = file.path(dirname(path),
msFilename %>% str_replace('MS', 'bMS'))
if (file.exists(newMsFilename)) file.remove(newMsFilename)
writeLines(msFileL, newMsFilename)
# finally: read the machine readable file
result = read.delim(file = newMsFilename, sep = '\t') %>%
as_tibble() %>%
mutate(
UMCG.Monster = as.character(UMCG.Monster),
PIN.Lot = as.character(PIN.Lot),
Aanvrager = as.character(Aanvrager),
C = as.character(C)
)
# Hurray!
return(result)
}
# abFile = read_4D_file(dataFolder, 'AB')
# msFile = read_4D_MS_file(dataFolder)
# stFile = read_4D_file(dataFolder, 'ST')
# tmFile = read_4D_file(dataFolder, 'TM')
# tsFile = read_4D_file(dataFolder, 'TS')
ProcessAndMergeDailyFiles = function(abFile, msFile, stFile, tmFile, tsFile) {
if (!is.null(abFile))
abFile = abFile %>%
filter(!is.na(RAP)) #%>%
# mutate(
# #Monsternummer = as.character(Monsternummer)
# AB.amr = AMR::ab_property(AB, 'ab') %>% as.character()
# )
if (!is.null(stFile))
stFile = stFile %>%
filter(!is.na(Rap)) %>%
# mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>%
rename(IsolNr = Isolnr)
if (!is.null(msFile))
msFile = msFile %>%
rename(Monsternummer = UMCG.Monster) %>%
# mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>%
select(-X)
# if (!is.null(tmFile))
# tmFile = tmFile %>%
# mutate(Monsternummer = as.character(Monsternummer) %>% str_trim())
if (!is.null(tsFile))
tsFile = tsFile %>%
filter(!is.na(Rapport))
# mutate(Monsternummer = as.character(Monsternummer) %>% str_trim())
if (!is.null(abFile) & !is.null(stFile))
t_ab_st = abFile %>%
full_join(stFile,
by = c('Monsternummer', 'IsolNr'),
suffix = c('.ab', '.st'))
if (exists('t_ab_st')) {
if (!is.null(t_ab_st) & !is.null(msFile)) {
t_ab_st_ms = t_ab_st %>%
full_join(msFile, by = 'Monsternummer') %>%
rename(Waarde.st = Waarde)
return(
list(
merged = t_ab_st_ms,
abFile = abFile,
stFile = stFile,
msFile = msFile,
tsFile = tsFile,
tmFile = tmFile
)
)
# t_ab_st_ms_ts = t_ab_st_ms %>%
# full_join(tsFile, by = c('Monsternummer', 'IsolNr' = 'Isolnr'))
#
# t_ab_st_ms_ts_tm = t_ab_st_ms_ts %>%
# add_column(Uitslag = NA,
# is_sampleResult = F) %>%
# add_row(Monsternummer = tmFile$Monsternummer,
# Test = tmFile$Test,
# Uitslag = tmFile$Uitslag,
# is_sampleResult = T)
#
# t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>%
# rename(Waarde.ts = Waarde) %>%
# filter(!Test %in% c('no_filter_yet'))
#
# # if ('X' %in% colnames(t_ab_st_ms_ts_tm)) {
# # t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>%
# # select(-'X')
# # print('Dit statement is voor debugging')
# # }
}
}
return(
list(
merged = NULL,
abFile = abFile,
stFile = stFile,
msFile = msFile,
tsFile = tsFile,
tmFile = tmFile
)
)
}