# # 2017/09/25 # select samples from the combined.csv for the further analysis # # HISTORY # 2017/10/02 modularized. # # Aki Kunikoshi # 428968@gmail.com # import numpy as np def readFile(filename): with open(filename, 'r') as fin: lines = fin.read() linesEach = lines.split('\n') return linesEach def selectSamplesFromCombinedData(word, fileCombined): # load combined data fin = open(fileCombined, 'r') line = fin.readline() # load data per region dataGroningen = [] dataLimburg = [] dataOverijsel = [] while line: line = fin.readline() line = line.rstrip() lineList = line.split(',') if len(lineList) == 6 and lineList[5] == word: region = lineList[2] if region == 'Groningen_and_Drenthe': dataGroningen.append(lineList) elif region == 'Limburg': dataLimburg.append(lineList) elif region == 'Oost_Overijsel-Gelderland': dataOverijsel.append(lineList) fin.close() return (dataGroningen, dataLimburg, dataOverijsel) #print("{0}: {1} {2} {3}".format(word,len(listGroningen),len(listLimburg),len(listOverijsel)) def groupSamplesInCSV(fileCSV, idxRegion): fin = open(fileCSV, 'r') # first line is the header line = fin.readline() line = line.rstrip() header = line.split(',') # load data per region dataGroningen = [] dataLimburg = [] dataOverijsel = [] while line: line = fin.readline() line = line.rstrip() lineList = line.split(',') if len(lineList) == len(header): region = lineList[idxRegion] if region == 'Groningen_and_Drenthe': dataGroningen.append(lineList) elif region == 'Limburg': dataLimburg.append(lineList) elif region == 'Oost_Overijsel-Gelderland': dataOverijsel.append(lineList) fin.close() return (header, dataGroningen, dataLimburg, dataOverijsel) def addUserID(featureFile, recordingsCSV): dirFeature = config['sentence_based']['dirFeature']