FA result evaluation and xsampa to ipa conversion is updated.

This commit is contained in:
yemaozi88 2018-06-21 16:27:00 +02:00
parent 5fb05ddab2
commit d56ef7f075
3 changed files with 209 additions and 72 deletions

Binary file not shown.

View File

@ -22,11 +22,11 @@ dataset_list = ['devel', 'test', 'train']
extract_features = 0 extract_features = 0
make_feature_list = 0 make_feature_list = 0
conv_lexicon = 0 conv_lexicon = 0
check_lexicon = 0 check_lexicon = 1
make_mlf = 0 make_mlf = 0
combine_files = 0 combine_files = 0
flat_start = 0 flat_start = 0
train_model = 1 train_model = 0
forced_alignment = 0 forced_alignment = 0
@ -133,7 +133,11 @@ if check_lexicon:
print("==== check if all the phones are successfully converted. ====\n") print("==== check if all the phones are successfully converted. ====\n")
# the phones used in the lexicon. # the phones used in the lexicon.
phonelist = am_func.get_phonelist(lex_htk) phonelist_asr = am_func.get_phonelist(lex_asr)
phonelist_oov = am_func.get_phonelist(lex_oov)
phonelist_htk = am_func.get_phonelist(lex_htk)
phonelist = phonelist_asr.union(phonelist_oov)
# the lines which include a specific phone. # the lines which include a specific phone.
lines = am_func.find_phone(lex_asr, 'g') lines = am_func.find_phone(lex_asr, 'g')

View File

@ -3,19 +3,54 @@ import sys
import csv import csv
import subprocess import subprocess
import configparser import configparser
from collections import Counter
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt
## ======================= user define ======================= ## ======================= functions =======================
def read_fileFA(fileFA):
"""
read the result file of HTK forced alignment.
this function only works when input is one word.
"""
with open(fileFA, 'r') as f:
lines = f.read()
lines = lines.split('\n')
phones = []
for line in lines:
line_split = line.split()
if len(line_split) > 1:
phones.append(line_split[2])
return ' '.join(phones)
#####################
## USER DEFINE ##
#####################
curr_dir = r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model' curr_dir = r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model'
config_ini = curr_dir + '\\config.ini' config_ini = curr_dir + '\\config.ini'
forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment' forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment'
forced_alignment_module_old = r'C:\OneDrive\Research\rug\code\forced_alignment\forced_alignment'
ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter' ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter'
csvfile = r"C:\OneDrive\Research\rug\stimmen\Frisian Variants Picture Task Stimmen.csv" csvfile = r"C:\OneDrive\Research\rug\stimmen\Frisian Variants Picture Task Stimmen.csv"
experiments_dir = r'C:\OneDrive\Research\rug\experiments'
data_dir = experiments_dir + '\\stimmen\\data'
cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
# procedure # procedure
convert_phones = 0
make_dic_files = 0
make_dic_files_short = 0
do_forced_alignment = 0
eval_forced_alignment = 1
## ======================= add paths ======================= ## ======================= add paths =======================
@ -28,6 +63,10 @@ sys.path.append(curr_dir)
import convert_xsampa2ipa import convert_xsampa2ipa
import acoustic_model_functions as am_func import acoustic_model_functions as am_func
# for forced-alignment
sys.path.append(forced_alignment_module_old)
import pyHTK
## ======================= load variables ======================= ## ======================= load variables =======================
config = configparser.ConfigParser() config = configparser.ConfigParser()
@ -40,85 +79,179 @@ lex_asr = FAME_dir + '\\lexicon\\lex.asr'
lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk' lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk'
## ======================= check phones included in FAME! =======================
# the phones used in the lexicon.
#phonelist = am_func.get_phonelist(lex_htk)
# the lines which include a specific phone.
#lines = am_func.find_phone(lex_asr, 'x')
## ======================= convert phones ====================== ## ======================= convert phones ======================
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir) if convert_phones:
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir)
with open(csvfile, encoding="utf-8") as fin: ## check phones included in FAME!
lines = csv.reader(fin, delimiter=';', lineterminator="\n", skipinitialspace=True) # the phones used in the lexicon.
next(lines, None) # skip the headers #phonelist = am_func.get_phonelist(lex_htk)
filenames = [] # the lines which include a specific phone.
words = [] #lines = am_func.find_phone(lex_asr, 'x')
pronunciations = []
for line in lines: with open(csvfile, encoding="utf-8") as fin:
if line[1] is not '' and len(line) > 5: lines = csv.reader(fin, delimiter=';', lineterminator="\n", skipinitialspace=True)
filenames.append(line[0]) next(lines, None) # skip the headers
words.append(line[1])
pron_xsampa = line[3] filenames = []
pron_ipa = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, pron_xsampa) words = []
pron_ipa = pron_ipa.replace('ː', ':') pronunciations = []
pron_famehtk = convert_phone_set.ipa2famehtk(pron_ipa) for line in lines:
if line[1] is not '' and len(line) > 5:
filenames.append(line[0])
words.append(line[1])
pron_xsampa = line[3]
pron_ipa = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, pron_xsampa)
pron_ipa = pron_ipa.replace('ː', ':')
pron_famehtk = convert_phone_set.ipa2famehtk(pron_ipa)
# adjust to phones used in the acoustic model. # adjust to phones used in the acoustic model.
pron_famehtk = pron_famehtk.replace('sp', 'sil') pron_famehtk = pron_famehtk.replace('sp', 'sil')
pron_famehtk = pron_famehtk.replace('ce :', 'ce') # because ceh is ignored. pron_famehtk = pron_famehtk.replace('ce :', 'ce') # because ceh is ignored.
pron_famehtk = pron_famehtk.replace('w :', 'wh') pron_famehtk = pron_famehtk.replace('w :', 'wh')
pron_famehtk = pron_famehtk.replace('e :', 'eh') pron_famehtk = pron_famehtk.replace('e :', 'eh')
pron_famehtk = pron_famehtk.replace('eh :', 'eh') pron_famehtk = pron_famehtk.replace('eh :', 'eh')
pron_famehtk = pron_famehtk.replace('ih :', 'ih') pron_famehtk = pron_famehtk.replace('ih :', 'ih')
#translation_key = {'sp': 'sil', 'ce :': 'ceh', 'w :': 'wh'} #translation_key = {'sp': 'sil', 'ce :': 'ceh', 'w :': 'wh'}
#pron = [] #pron = []
#for phoneme in pron_famehtk.split(' '): #for phoneme in pron_famehtk.split(' '):
# pron.append(translation_key.get(phoneme, phoneme)) # pron.append(translation_key.get(phoneme, phoneme))
#pronunciations.append(' '.join(pron_famehtk)) #pronunciations.append(' '.join(pron_famehtk))
pronunciations.append(pron_famehtk) pronunciations.append(pron_famehtk)
filenames = np.array(filenames) # check if all phones are in the phonelist of the acoustic model.
words = np.array(words) #phonelist = ' '.join(pronunciations)
pronunciations = np.array(pronunciations) #np.unique(phonelist.split(' '))
#phonelist.find(':')
del line, lines filenames = np.array(filenames)
del pron_xsampa, pron_ipa, pron_famehtk words = np.array(words)
pronunciations = np.array(pronunciations)
# check if all phones are in the phonelist of the acoustic model. del line, lines
#phonelist = ' '.join(pronunciations) del pron_xsampa, pron_ipa, pron_famehtk
#np.unique(phonelist.split(' '))
#phonelist.find(':')
# make dict files. np.save(data_dir + '\\filenames.npy', filenames)
np.save(data_dir + '\\words.npy', words)
np.save(data_dir + '\\pronunciations.npy', pronunciations)
else:
filenames = np.load(data_dir + '\\filenames.npy')
words = np.load(data_dir + '\\words.npy')
pronunciations = np.load(data_dir + '\\pronunciations.npy')
word_list = np.unique(words) word_list = np.unique(words)
word_id = 1
word = word_list[word_id]
## ======================= make dict files used for HTK. ======================
if make_dic_files:
output_dir = experiments_dir + r'\stimmen\dic'
for word in word_list:
WORD = word.upper()
fileDic = output_dir + '\\' + word + '.dic'
# make dic file.
pronvar_ = pronunciations[words == word]
pronvar = np.unique(pronvar_)
with open(fileDic, 'w') as f:
for pvar in pronvar:
f.write('{0}\t{1}\n'.format(WORD, pvar))
## ======================= make dict files for most popular words. ======================
if make_dic_files_short:
output_dir = experiments_dir + r'\stimmen\dic'
#word = word_list[3]
for word in word_list:
WORD = word.upper()
fileStat = output_dir + '\\' + word + '_stat.csv'
pronvar = pronunciations[words == word]
c = Counter(pronvar)
total_num = sum(c.values())
with open(fileStat, 'w') as f:
for key, value in c.items():
f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, value/total_num*100, WORD, key))
## ======================= forced alignment ======================= ## ======================= forced alignment =======================
#if forced_alignment: if do_forced_alignment:
# try: configHVite = cygwin_dir + r'\config\config.HVite'
# scripts.run_command([ filePhoneList = experiments_dir + r'\friesian\acoustic_model\config\phonelist_friesian.txt'
# 'HVite','-T', '1', '-a', '-C', configHVite, wav_dir = experiments_dir + r'\stimmen\wav'
# '-H', AcousticModel, '-m', '-I',
# mlf_file, '-i', fa_file, '-S', #for hmm_num in [1, 2, 4, 8, 16, 32, 64, 128]:
# script_file, htk_dict_file, filePhoneList for hmm_num in [64]:
# ]) hmm_num_str = str(hmm_num)
# except: AcousticModel = experiments_dir + r'\friesian\acoustic_model\model\hmm' + hmm_num_str + r'-3\hmmdefs'
# print("\033[91mHVite command failed with these input files:\033[0m")
# print(_debug_show_file('HVite config', configHVite)) predictions = []
# print(_debug_show_file('Accoustic model', AcousticModel)) file_num_max = len(filenames)
# print(_debug_show_file('Master Label file', mlf_file)) for i in range(0, file_num_max):
# print(_debug_show_file('Output', fa_file)) print('=== {0}/{1} ==='.format(i, file_num_max))
# print(_debug_show_file('Script file', script_file)) filename = filenames[i]
# print(_debug_show_file('HTK dictionary', htk_dict_file)) fileWav = wav_dir + '\\' + filename
# print(_debug_show_file('Phoneme list', filePhoneList))
# raise if os.path.exists(fileWav):
word = words[i]
WORD = word.upper()
# make label file.
fileLab = wav_dir + '\\' + filename.replace('.wav', '.lab')
with open(fileLab, 'w') as f:
lines = f.write(WORD)
fileDic = experiments_dir + r'\stimmen\dic_short' + '\\' + word + '.dic'
fileFA = experiments_dir + r'\stimmen\FA_short' + '\\' + filename.replace('.wav', '.txt') + hmm_num_str
pyHTK.doHVite(fileWav, fileLab, fileDic, fileFA, configHVite, filePhoneList, AcousticModel)
prediction = read_fileFA(fileFA)
predictions.append(prediction)
os.remove(fileLab)
print('{0}: {1} -> {2}'.format(WORD, pronunciations[i], prediction))
else:
predictions.append('')
print('!!!!! file not found.')
predictions = np.array(predictions)
match = np.c_[words[predictions != ''], pronunciations[predictions != ''], predictions[predictions != '']]
np.save(data_dir + '\\match_hmm' + hmm_num_str + '.npy', match)
##os.remove(hcopy_scp.name) ## ======================= evaluate the result of forced alignment =======================
if eval_forced_alignment:
#for hmm_num in [1, 2, 4, 8, 16, 32, 64]:
hmm_num = 64
hmm_num_str = str(hmm_num)
match = np.load(data_dir + '\\match_hmm' + hmm_num_str + '.npy')
# use dic_short?
if 1:
pronunciation_variants = np.array(['WORD', 'pronunciation']).reshape(1, 2)
for word in word_list:
fileDic = experiments_dir + r'\stimmen\dic_short' + '\\' + word + '.dic'
pronunciation_variants = np.r_[pronunciation_variants, pyHTK.loadHTKdic(fileDic)]
match_short = []
for line in match:
word = line[0]
WORD = word.upper()
pronvar = pronunciation_variants[pronunciation_variants[:, 0] == word.upper(), 1]
if line[1] in pronvar:
match_short.append(line)
match_short = np.array(match_short)
match = np.copy(match_short)
# number of match
total_match = sum(match[:, 1] == match[:, 2])
print("{}: {}/{}".format(hmm_num_str, total_match, match.shape[0]))