75 lines
1.8 KiB
Python
75 lines
1.8 KiB
Python
#
|
|
# 2017/09/25
|
|
# select samples from the combined.csv for the further analysis
|
|
#
|
|
# HISTORY
|
|
# 2017/10/02 modularized.
|
|
#
|
|
# Aki Kunikoshi
|
|
# 428968@gmail.com
|
|
#
|
|
import numpy as np
|
|
|
|
def readFile(filename):
|
|
with open(filename, 'r') as fin:
|
|
lines = fin.read()
|
|
linesEach = lines.split('\n')
|
|
return linesEach
|
|
|
|
|
|
def selectSamplesFromCombinedData(word, fileCombined):
|
|
# load combined data
|
|
fin = open(fileCombined, 'r')
|
|
line = fin.readline()
|
|
|
|
# load data per region
|
|
dataGroningen = []
|
|
dataLimburg = []
|
|
dataOverijsel = []
|
|
while line:
|
|
line = fin.readline()
|
|
line = line.rstrip()
|
|
lineList = line.split(',')
|
|
if len(lineList) == 6 and lineList[5] == word:
|
|
region = lineList[2]
|
|
if region == 'Groningen_and_Drenthe':
|
|
dataGroningen.append(lineList)
|
|
elif region == 'Limburg':
|
|
dataLimburg.append(lineList)
|
|
elif region == 'Oost_Overijsel-Gelderland':
|
|
dataOverijsel.append(lineList)
|
|
fin.close()
|
|
return (dataGroningen, dataLimburg, dataOverijsel)
|
|
#print("{0}: {1} {2} {3}".format(word,len(listGroningen),len(listLimburg),len(listOverijsel))
|
|
|
|
|
|
def groupSamplesInCSV(fileCSV, idxRegion):
|
|
fin = open(fileCSV, 'r')
|
|
|
|
# first line is the header
|
|
line = fin.readline()
|
|
line = line.rstrip()
|
|
header = line.split(',')
|
|
|
|
# load data per region
|
|
dataGroningen = []
|
|
dataLimburg = []
|
|
dataOverijsel = []
|
|
while line:
|
|
line = fin.readline()
|
|
line = line.rstrip()
|
|
lineList = line.split(',')
|
|
if len(lineList) == len(header):
|
|
region = lineList[idxRegion]
|
|
if region == 'Groningen_and_Drenthe':
|
|
dataGroningen.append(lineList)
|
|
elif region == 'Limburg':
|
|
dataLimburg.append(lineList)
|
|
elif region == 'Oost_Overijsel-Gelderland':
|
|
dataOverijsel.append(lineList)
|
|
fin.close()
|
|
return (header, dataGroningen, dataLimburg, dataOverijsel)
|
|
|
|
def addUserID(featureFile, recordingsCSV):
|
|
dirFeature = config['sentence_based']['dirFeature']
|