accent_classification/accent_classification/word_based.py

56 lines
2.1 KiB
Python

import os
import sys
import configparser
import numpy as np
from matplotlib import pyplot
currDir = 'C:\\Users\\Aki\\source\\repos\\rug_VS\\dialect_identification\\dialect_identification'
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), currDir))
from dataIO import readFile
from dataIO import selectSamplesFromCombinedData
import dataManipulation
configFile = currDir + '\\config.ini'
config = configparser.ConfigParser()
config.sections()
config.read(configFile)
fileWordList = config['word_based']['fileWordList']
fileCombined = config['word_based']['fileCombined']
wordList = readFile(fileWordList)
for wordNum in range(1, len(wordList)):
word = wordList[wordNum-1] # target word
#print("=== {} ===".format(word))
dataGroningen, dataLimburg, dataOverijsel = selectSamplesFromCombinedData(word, fileCombined)
sampleNumMax = 50
dataG, indexG = dataManipulation.extractRandomSample(np.array(dataGroningen), sampleNumMax)
dataL, indexL = dataManipulation.extractRandomSample(np.array(dataLimburg), sampleNumMax)
dataO, indexO = dataManipulation.extractRandomSample(np.array(dataOverijsel), sampleNumMax)
# combine pronunciation from three regions
# data: (sampleNumMax x 3) x 1
cPronunciation = 4
data = np.hstack([dataG[:, cPronunciation], dataL[:, cPronunciation], dataO[:, cPronunciation]])
# MDS
dataLevenshtein = dataManipulation.makeLevenshteinMatrix(data)
dataMDS = dataManipulation.MDS(dataLevenshtein)
# plot
pyplot.scatter(dataMDS[0:sampleNumMax-1, 0], dataMDS[0:sampleNumMax-1, 1], s=80, c='red', marker="o", facecolors='none', label="Groningen and Drenthe")
pyplot.scatter(dataMDS[sampleNumMax:sampleNumMax*2-1, 0], dataMDS[sampleNumMax:sampleNumMax*2-1, 1], c='green', marker="^", facecolors='none', label="Limburg")
pyplot.scatter(dataMDS[sampleNumMax*2:sampleNumMax*3-1, 0], dataMDS[sampleNumMax*2:sampleNumMax*3-1, 1], c='blue', marker="+", facecolors='none', label="Oost Overijsel-Gelderland")
pyplot.title(word)
#ax.set_xlabel('x')
#ax.set_ylabel('y')
pyplot.legend(loc='upper right')
#pyplot.show()
pyplot.savefig('c:\\cygwin64\\home\\Aki\\rug_cygwin\\_same-utterance\\fig\\' + word + '.png')
pyplot.gcf().clear()