56 lines
2.1 KiB
Python
56 lines
2.1 KiB
Python
import os
|
|
import sys
|
|
import configparser
|
|
|
|
import numpy as np
|
|
from matplotlib import pyplot
|
|
|
|
currDir = 'C:\\Users\\Aki\\source\\repos\\rug_VS\\dialect_identification\\dialect_identification'
|
|
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), currDir))
|
|
from dataIO import readFile
|
|
from dataIO import selectSamplesFromCombinedData
|
|
import dataManipulation
|
|
|
|
|
|
configFile = currDir + '\\config.ini'
|
|
|
|
config = configparser.ConfigParser()
|
|
config.sections()
|
|
config.read(configFile)
|
|
fileWordList = config['word_based']['fileWordList']
|
|
fileCombined = config['word_based']['fileCombined']
|
|
|
|
wordList = readFile(fileWordList)
|
|
|
|
for wordNum in range(1, len(wordList)):
|
|
word = wordList[wordNum-1] # target word
|
|
#print("=== {} ===".format(word))
|
|
|
|
dataGroningen, dataLimburg, dataOverijsel = selectSamplesFromCombinedData(word, fileCombined)
|
|
|
|
sampleNumMax = 50
|
|
dataG, indexG = dataManipulation.extractRandomSample(np.array(dataGroningen), sampleNumMax)
|
|
dataL, indexL = dataManipulation.extractRandomSample(np.array(dataLimburg), sampleNumMax)
|
|
dataO, indexO = dataManipulation.extractRandomSample(np.array(dataOverijsel), sampleNumMax)
|
|
|
|
# combine pronunciation from three regions
|
|
# data: (sampleNumMax x 3) x 1
|
|
cPronunciation = 4
|
|
data = np.hstack([dataG[:, cPronunciation], dataL[:, cPronunciation], dataO[:, cPronunciation]])
|
|
|
|
# MDS
|
|
dataLevenshtein = dataManipulation.makeLevenshteinMatrix(data)
|
|
dataMDS = dataManipulation.MDS(dataLevenshtein)
|
|
|
|
# plot
|
|
pyplot.scatter(dataMDS[0:sampleNumMax-1, 0], dataMDS[0:sampleNumMax-1, 1], s=80, c='red', marker="o", facecolors='none', label="Groningen and Drenthe")
|
|
pyplot.scatter(dataMDS[sampleNumMax:sampleNumMax*2-1, 0], dataMDS[sampleNumMax:sampleNumMax*2-1, 1], c='green', marker="^", facecolors='none', label="Limburg")
|
|
pyplot.scatter(dataMDS[sampleNumMax*2:sampleNumMax*3-1, 0], dataMDS[sampleNumMax*2:sampleNumMax*3-1, 1], c='blue', marker="+", facecolors='none', label="Oost Overijsel-Gelderland")
|
|
|
|
pyplot.title(word)
|
|
#ax.set_xlabel('x')
|
|
#ax.set_ylabel('y')
|
|
pyplot.legend(loc='upper right')
|
|
#pyplot.show()
|
|
pyplot.savefig('c:\\cygwin64\\home\\Aki\\rug_cygwin\\_same-utterance\\fig\\' + word + '.png')
|
|
pyplot.gcf().clear() |