accent_classification/accent_classification/data_io.py

75 lines
1.8 KiB
Python

#
# 2017/09/25
# select samples from the combined.csv for the further analysis
#
# HISTORY
# 2017/10/02 modularized.
#
# Aki Kunikoshi
# 428968@gmail.com
#
import numpy as np
def readFile(filename):
with open(filename, 'r') as fin:
lines = fin.read()
linesEach = lines.split('\n')
return linesEach
def selectSamplesFromCombinedData(word, fileCombined):
# load combined data
fin = open(fileCombined, 'r')
line = fin.readline()
# load data per region
dataGroningen = []
dataLimburg = []
dataOverijsel = []
while line:
line = fin.readline()
line = line.rstrip()
lineList = line.split(',')
if len(lineList) == 6 and lineList[5] == word:
region = lineList[2]
if region == 'Groningen_and_Drenthe':
dataGroningen.append(lineList)
elif region == 'Limburg':
dataLimburg.append(lineList)
elif region == 'Oost_Overijsel-Gelderland':
dataOverijsel.append(lineList)
fin.close()
return (dataGroningen, dataLimburg, dataOverijsel)
#print("{0}: {1} {2} {3}".format(word,len(listGroningen),len(listLimburg),len(listOverijsel))
def groupSamplesInCSV(fileCSV, idxRegion):
fin = open(fileCSV, 'r')
# first line is the header
line = fin.readline()
line = line.rstrip()
header = line.split(',')
# load data per region
dataGroningen = []
dataLimburg = []
dataOverijsel = []
while line:
line = fin.readline()
line = line.rstrip()
lineList = line.split(',')
if len(lineList) == len(header):
region = lineList[idxRegion]
if region == 'Groningen_and_Drenthe':
dataGroningen.append(lineList)
elif region == 'Limburg':
dataLimburg.append(lineList)
elif region == 'Oost_Overijsel-Gelderland':
dataOverijsel.append(lineList)
fin.close()
return (header, dataGroningen, dataLimburg, dataOverijsel)
def addUserID(featureFile, recordingsCSV):
dirFeature = config['sentence_based']['dirFeature']