2019-01-21 10:35:50 +01:00
|
|
|
|
import os
|
|
|
|
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
2019-01-21 21:56:55 +01:00
|
|
|
|
import sys
|
2019-01-21 10:35:50 +01:00
|
|
|
|
|
|
|
|
|
import defaultfiles as default
|
2019-01-21 21:56:55 +01:00
|
|
|
|
sys.path.append(default.forced_alignment_module_dir)
|
|
|
|
|
from forced_alignment import pyhtk, convert_phone_set, scripts
|
2019-01-21 10:35:50 +01:00
|
|
|
|
|
2019-01-21 21:56:55 +01:00
|
|
|
|
reus_dir = r'c:\Users\Aki\source\repos\acoustic_model\reus-test'
|
|
|
|
|
wav_dir = reus_dir
|
2019-01-21 10:35:50 +01:00
|
|
|
|
wav_files = ['reus1008-reus.wav',
|
|
|
|
|
'reus1167-man.wav',
|
|
|
|
|
'reus3768-mantsje.wav']
|
|
|
|
|
|
2019-01-21 21:56:55 +01:00
|
|
|
|
word = 'reus'
|
|
|
|
|
pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
|
|
|
|
|
|
|
|
|
|
for wav_file in wav_files:
|
|
|
|
|
file_lab = os.path.join(reus_dir, wav_file.replace('.wav', '.lab'))
|
|
|
|
|
file_dic = os.path.join(reus_dir, wav_file.replace('.wav', '.dic'))
|
|
|
|
|
file_txt = os.path.join(reus_dir, wav_file.replace('.wav', '.txt'))
|
|
|
|
|
|
|
|
|
|
# output htk dict file
|
|
|
|
|
with open(file_dic, 'w', encoding="utf-8") as f:
|
|
|
|
|
for ipa in pronunciation_ipa:
|
|
|
|
|
cgn = convert_phone_set.ipa2cgn([ipa.replace(':', 'ː')])
|
|
|
|
|
barbara = convert_phone_set.cgn2barbara(cgn)
|
|
|
|
|
f.write(word.upper() + '\t' + barbara + '\n')
|
|
|
|
|
|
|
|
|
|
# output htk label file.
|
|
|
|
|
pyhtk._create_label_file(word, file_lab)
|
|
|
|
|
|
|
|
|
|
scripts.run_command([
|
|
|
|
|
'HVite','-T', '1',
|
|
|
|
|
'-a',
|
|
|
|
|
'-C', default.config_hvite,
|
|
|
|
|
'-H', default.acoustic_model,
|
|
|
|
|
'-m',
|
|
|
|
|
'-i', file_txt,
|
|
|
|
|
#'-S', script_file,
|
|
|
|
|
file_dic, default.phonelist_txt, os.path.join(wav_dir, wav_file)
|
|
|
|
|
])
|