# initially only interfacing 4D data # TODO loggen zoveel mogelijk over ontvangen data (nieuwe GLIMS data) # TODO pushen R scripts naar git server waar Gerhard en Kornelis bij kunnen # require(stringr) require(tidyverse) require(AMR) dataFolder = './data' get_most_recent_4D_file = function(path, prefix) { files = list.files(dataFolder, paste0('^', prefix, '*')) tFiles = tibble(name = files, mtime = file.mtime(file.path(dataFolder, files))) %>% arrange(desc(mtime)) tmpFilename = tFiles$name[1] return(tmpFilename) } read_4D_file = function(path) { tFile = read_tsv(path, col_types = paste0(rep('c', 30), collapse = '')) # remove comment lines if (nrow(tFile) > 0) { tFile = tFile %>% slice( seq(4, nrow(tFile)) ) %>% filter(str_sub(ID, 1, 3) != '___') } if (colnames(tFile)[1] == 'X1') { if (str_detect(path, 'AB-')) { tFile = tibble(ID = '0', Monsternummer = '', IsolNr = '', AB = '', V = '', PrefMIC ='', MIC = '', D = '', DIF = '', PrefEtest = '', E = '', ETEST = '', RIS = '', RAP = '', ...15 = '') } else if (str_detect(path, 'MS-')) { tFile = tibble(UMCG.Monster= '0', PIN.Lot= '0', TypeObject= '0', Lot= '0',Echtgenoot= '0', AchternNaam= '0', Geboortedatum= '0', Geslacht= '0', OntvOLabDatum= '0', Tijdstip.materiaal.afgenomen= '0', PC= '0', Aanvrager= '0', ligafdeling= '0', A= '0', B= '0', C= '0', Materiaal= '0', Materiaal.variabelen= '0', X = F) } else if (str_detect(path, 'ST-')) { tFile = tibble(ID = '0', Monsternummer = '', MMBProc = '', Isolnr = '', MO = '', Waarde = '', Pos = '', Rap = '') } else if (str_detect(path, 'TM-')) { tFile = tibble(ID = '0', Monsternummer = '', Test = '', Uitslag = '') } else if (str_detect(path, 'TS-')) { tFile = tibble(ID = '0', Monsternummer = '', Isolnr = '', Test = '', Waarde = '', Rapport = '') } } if ('ID' %in% colnames(tFile)) { tFile = tFile %>% filter(ID != '0') } else if ('UMCG.Monster' %in% colnames(tFile)) { tFile = tFile %>% filter(UMCG.Monster != '0') } return(tFile) } read_4D_TM_file = function(path) { tmFilename = str_remove(path, paste0(dirname(path), '/')) tmFileL = readLines(path) tmFileL = tmFileL[c(5:length(tmFileL))] for (l in 1:length(tmFileL)) { if (str_count(tmFileL[l], '\t') == 3) { tmFileL = paste0(tmFileL, '\t') } } newTmFilename = file.path(dirname(path), tmFilename %>% str_replace('TM', 'bTM')) if (file.exists(newTmFilename)) file.remove(newTmFilename) writeLines(tmFileL, newTmFilename) tFile = read_tsv(newTmFilename, col_names = c('ID','Monsternummer','Test','Status.test.tm','Uitslag'), col_types = 'ccccc') # remove comment lines tFile = tFile %>% slice(c(5:(tFile %>% nrow()))) %>% mutate( Monsternummer = as.character(Monsternummer) ) return(tFile) } get_most_recent_4D_MS_file = function(path) { # get files msFiles = list.files(path, '^MS*') # create table with modification datetimes, for selection of the most recent one tmsFiles = tibble(name = msFiles, mtime = file.mtime(file.path(path, msFiles))) %>% arrange(desc(mtime)) # get the name of the most recently modified file msFilename = tmsFiles$name[1] return(msFilename) } read_4D_MS_file = function(path) { # print(path) msFilename = str_remove(path, paste0(dirname(path), '/')) # read content msFileL = readLines(path) # remove lines with repeated '_', useful for the human eye only msFileL = msFileL[str_sub(msFileL, 1, 5) != '_____'] # replace (real) header msHeader = readLines(file.path(dataFolder, 'headerMS.txt')) msFileL[1] = paste0(msHeader, '\t') # replace tabs within a date time combination with a space in line 2 and further, # for example: replace '29/09/2021 14:05' by '29/09/2021 14:05' # ↑ = '\t' ↑ = ' ' for (i in 2:length(msFileL)) { msFileL[i] = msFileL[i] %>% str_replace_all('(?<=[:digit:]{4})\t(?=[:digit:]{2}:[:digit:]{2})', ' ') } # write to new machine readable file newMsFilename = file.path(dirname(path), msFilename %>% str_replace('MS', 'bMS')) if (file.exists(newMsFilename)) file.remove(newMsFilename) writeLines(msFileL, newMsFilename) # finally: read the machine readable file result = read.delim(file = newMsFilename, sep = '\t') %>% as_tibble() %>% mutate( UMCG.Monster = as.character(UMCG.Monster), PIN.Lot = as.character(PIN.Lot), Aanvrager = as.character(Aanvrager), C = as.character(C) ) # Hurray! return(result) } # abFile = read_4D_file(dataFolder, 'AB') # msFile = read_4D_MS_file(dataFolder) # stFile = read_4D_file(dataFolder, 'ST') # tmFile = read_4D_file(dataFolder, 'TM') # tsFile = read_4D_file(dataFolder, 'TS') ProcessAndMergeDailyFiles = function(abFile, msFile, stFile, tmFile, tsFile) { if (!is.null(abFile)) abFile = abFile %>% filter(!is.na(RAP)) #%>% # mutate( # #Monsternummer = as.character(Monsternummer) # AB.amr = AMR::ab_property(AB, 'ab') %>% as.character() # ) if (!is.null(stFile)) stFile = stFile %>% filter(!is.na(Rap)) %>% # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>% rename(IsolNr = Isolnr) if (!is.null(msFile)) msFile = msFile %>% rename(Monsternummer = UMCG.Monster) %>% # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) %>% select(-X) # if (!is.null(tmFile)) # tmFile = tmFile %>% # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) if (!is.null(tsFile)) tsFile = tsFile %>% filter(!is.na(Rapport)) # mutate(Monsternummer = as.character(Monsternummer) %>% str_trim()) if (!is.null(abFile) & !is.null(stFile)) t_ab_st = abFile %>% full_join(stFile, by = c('Monsternummer', 'IsolNr'), suffix = c('.ab', '.st')) if (exists('t_ab_st')) { if (!is.null(t_ab_st) & !is.null(msFile)) { t_ab_st_ms = t_ab_st %>% full_join(msFile, by = 'Monsternummer') %>% rename(Waarde.st = Waarde) return( list( merged = t_ab_st_ms, abFile = abFile, stFile = stFile, msFile = msFile, tsFile = tsFile, tmFile = tmFile ) ) # t_ab_st_ms_ts = t_ab_st_ms %>% # full_join(tsFile, by = c('Monsternummer', 'IsolNr' = 'Isolnr')) # # t_ab_st_ms_ts_tm = t_ab_st_ms_ts %>% # add_column(Uitslag = NA, # is_sampleResult = F) %>% # add_row(Monsternummer = tmFile$Monsternummer, # Test = tmFile$Test, # Uitslag = tmFile$Uitslag, # is_sampleResult = T) # # t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>% # rename(Waarde.ts = Waarde) %>% # filter(!Test %in% c('no_filter_yet')) # # # if ('X' %in% colnames(t_ab_st_ms_ts_tm)) { # # t_ab_st_ms_ts_tm = t_ab_st_ms_ts_tm %>% # # select(-'X') # # print('Dit statement is voor debugging') # # } } } return( list( merged = NULL, abFile = abFile, stFile = stFile, msFile = msFile, tsFile = tsFile, tmFile = tmFile ) ) }