2018-03-28 10:31:33 +02:00
|
|
|
import sys
|
2019-01-24 09:38:28 +01:00
|
|
|
import os
|
2019-01-27 01:34:04 +01:00
|
|
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
2019-01-24 09:38:28 +01:00
|
|
|
|
2018-03-28 10:31:33 +02:00
|
|
|
import tempfile
|
2019-02-03 00:34:35 +01:00
|
|
|
import shutil
|
2019-02-03 13:54:37 +01:00
|
|
|
import glob
|
2019-01-27 23:52:33 +01:00
|
|
|
import time
|
2019-01-24 09:38:28 +01:00
|
|
|
|
2019-01-28 12:34:20 +01:00
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
2018-03-26 20:50:14 +02:00
|
|
|
|
2019-01-24 09:38:28 +01:00
|
|
|
import fame_functions
|
2019-03-25 00:06:53 +01:00
|
|
|
from phoneset import fame_ipa, fame_asr, fame_phonetics
|
2019-01-24 09:38:28 +01:00
|
|
|
import defaultfiles as default
|
|
|
|
sys.path.append(default.toolbox_dir)
|
2019-01-27 01:34:04 +01:00
|
|
|
import file_handling as fh
|
|
|
|
from htk import pyhtk
|
2019-04-22 00:59:53 +02:00
|
|
|
#from scripts import run_command
|
2018-04-02 01:07:50 +02:00
|
|
|
|
2018-03-28 10:31:33 +02:00
|
|
|
|
|
|
|
## ======================= user define =======================
|
2018-04-25 09:07:46 +02:00
|
|
|
# procedure
|
2019-04-22 00:59:53 +02:00
|
|
|
combine_all = 1
|
|
|
|
|
2019-02-03 13:54:37 +01:00
|
|
|
make_lexicon = 0
|
2019-02-04 13:46:27 +01:00
|
|
|
make_label = 0 # it takes roughly 4800 sec on Surface pro 2.
|
2019-03-03 02:05:37 +01:00
|
|
|
make_mlf = 0
|
2019-02-04 13:46:27 +01:00
|
|
|
extract_features = 0
|
2019-04-22 00:59:53 +02:00
|
|
|
flat_start = 1
|
|
|
|
train_monophone_without_sp = 1
|
|
|
|
add_sp = 1
|
|
|
|
train_monophone_with_re_aligned_mlf = 1
|
|
|
|
increase_mixture = 1
|
2019-03-23 21:52:48 +01:00
|
|
|
train_triphone = 0
|
2019-04-22 00:59:53 +02:00
|
|
|
train_triphone_tied = 0
|
2019-02-04 13:46:27 +01:00
|
|
|
|
|
|
|
|
|
|
|
# pre-defined values.
|
|
|
|
dataset_list = ['devel', 'test', 'train']
|
2019-04-22 00:59:53 +02:00
|
|
|
feature_size = 30
|
2019-03-07 22:16:50 +01:00
|
|
|
improvement_threshold = 0.3
|
2019-03-05 00:11:38 +01:00
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
|
|
|
|
lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov')
|
2018-03-28 10:31:33 +02:00
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
config_dir = os.path.join(default.htk_dir, 'config')
|
2019-03-25 00:06:53 +01:00
|
|
|
phonelist_full_txt = os.path.join(config_dir, 'phonelist_full.txt')
|
2019-04-22 00:59:53 +02:00
|
|
|
tree_hed = os.path.join(config_dir, 'tree.hed')
|
|
|
|
quests_hed = os.path.join(config_dir, 'quests.hed')
|
2019-03-03 02:05:37 +01:00
|
|
|
|
2019-03-07 22:16:50 +01:00
|
|
|
model_dir = os.path.join(default.htk_dir, 'model')
|
2019-03-23 21:52:48 +01:00
|
|
|
model_mono0_dir = os.path.join(model_dir, 'mono0')
|
|
|
|
model_mono1_dir = os.path.join(model_dir, 'mono1')
|
|
|
|
model_mono1sp_dir = os.path.join(model_dir, 'mono1sp')
|
|
|
|
model_mono1sp2_dir = os.path.join(model_dir, 'mono1sp2')
|
2019-04-22 00:59:53 +02:00
|
|
|
model_tri1_dir = os.path.join(model_dir, 'tri1')
|
|
|
|
model_tri1tied_dir = os.path.join(model_dir, 'tri1tied')
|
2019-02-03 00:34:35 +01:00
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
# directories / files to be made.
|
|
|
|
lexicon_dir = os.path.join(default.htk_dir, 'lexicon')
|
|
|
|
lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr')
|
|
|
|
lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov')
|
|
|
|
lexicon_htk = os.path.join(lexicon_dir, 'lex.htk')
|
2019-04-22 00:59:53 +02:00
|
|
|
lexicon_htk_with_sp = os.path.join(lexicon_dir, 'lex_with_sp.htk')
|
2019-03-25 00:06:53 +01:00
|
|
|
lexicon_htk_triphone = os.path.join(lexicon_dir, 'lex_triphone.htk')
|
2019-01-24 09:38:28 +01:00
|
|
|
|
|
|
|
feature_dir = os.path.join(default.htk_dir, 'mfc')
|
2019-03-03 02:05:37 +01:00
|
|
|
fh.make_new_directory(feature_dir, existing_dir='leave')
|
2019-01-24 09:38:28 +01:00
|
|
|
tmp_dir = os.path.join(default.htk_dir, 'tmp')
|
2019-03-03 02:05:37 +01:00
|
|
|
fh.make_new_directory(tmp_dir, existing_dir='leave')
|
2019-02-03 00:34:35 +01:00
|
|
|
label_dir = os.path.join(default.htk_dir, 'label')
|
2019-03-03 02:05:37 +01:00
|
|
|
fh.make_new_directory(label_dir, existing_dir='leave')
|
2019-02-03 00:34:35 +01:00
|
|
|
|
2019-03-05 00:11:38 +01:00
|
|
|
|
2019-02-04 20:32:12 +01:00
|
|
|
## training
|
2019-04-22 00:59:53 +02:00
|
|
|
if combine_all:
|
|
|
|
hcompv_scp_train = os.path.join(tmp_dir, 'all.scp')
|
|
|
|
mlf_file_train = os.path.join(label_dir, 'all_phone.mlf')
|
|
|
|
mlf_file_train_word = os.path.join(label_dir, 'all_word.mlf')
|
|
|
|
mlf_file_train_with_sp = os.path.join(label_dir, 'all_phone_with_sp.mlf')
|
|
|
|
mlf_file_train_aligned = os.path.join(label_dir, 'all_phone_aligned.mlf')
|
|
|
|
triphone_mlf = os.path.join(label_dir, 'all_triphone.mlf')
|
|
|
|
else:
|
|
|
|
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
|
|
|
|
mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
|
|
|
|
mlf_file_train_word = os.path.join(label_dir, 'train_word.mlf')
|
|
|
|
mlf_file_train_with_sp = os.path.join(label_dir, 'train_phone_with_sp.mlf')
|
|
|
|
mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
|
|
|
|
triphone_mlf = os.path.join(label_dir, 'train_triphone.mlf')
|
2019-03-08 23:13:08 +01:00
|
|
|
hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp')
|
2019-02-04 20:32:12 +01:00
|
|
|
|
2019-03-05 00:11:38 +01:00
|
|
|
## testing
|
|
|
|
htk_stimmen_dir = os.path.join(default.htk_dir, 'stimmen')
|
|
|
|
|
|
|
|
|
2019-01-29 21:52:11 +01:00
|
|
|
## ======================= make lexicon for HTK =======================
|
|
|
|
if make_lexicon:
|
2019-02-04 13:46:27 +01:00
|
|
|
timer_start = time.time()
|
|
|
|
print('==== making lexicon for HTK ====')
|
2019-01-29 21:52:11 +01:00
|
|
|
|
|
|
|
# convert each lexicon from fame_asr phoneset to fame_htk phoneset.
|
2019-02-04 13:46:27 +01:00
|
|
|
print('>>> converting each lexicon from fame_asr phoneset to fame_htk phoneset...')
|
2019-01-29 21:52:11 +01:00
|
|
|
fame_functions.lexicon_asr2htk(lexicon_asr, lexicon_htk_asr)
|
|
|
|
fame_functions.lexicon_asr2htk(lexicon_oov, lexicon_htk_oov)
|
2018-04-25 09:07:46 +02:00
|
|
|
|
|
|
|
# combine lexicon
|
2019-02-04 13:46:27 +01:00
|
|
|
print('>>> combining lexicon files into one lexicon...')
|
2018-04-25 09:07:46 +02:00
|
|
|
# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov.
|
|
|
|
# therefore there is no overlap between lex_asr and lex_oov.
|
2019-01-29 21:52:11 +01:00
|
|
|
fame_functions.combine_lexicon(lexicon_htk_asr, lexicon_htk_oov, lexicon_htk)
|
|
|
|
|
2019-03-03 02:05:37 +01:00
|
|
|
## fixing the lexicon for HTK.
|
2019-01-29 21:52:11 +01:00
|
|
|
# (1) Replace all tabs with single space;
|
|
|
|
# (2) Put a '\' before any dictionary entry beginning with single quote
|
2019-03-03 02:05:37 +01:00
|
|
|
# http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html
|
2019-02-14 00:21:28 +01:00
|
|
|
print('>>> fixing the lexicon...')
|
|
|
|
fame_functions.fix_lexicon(lexicon_htk)
|
2018-04-02 01:07:50 +02:00
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
## adding sp to the lexicon for HTK.
|
|
|
|
print('>>> adding sp to the lexicon...')
|
|
|
|
with open(lexicon_htk) as f:
|
|
|
|
lines = f.read().split('\n')
|
|
|
|
with open(lexicon_htk_with_sp, 'wb') as f:
|
|
|
|
f.write(bytes(' sp\n'.join(lines), 'ascii'))
|
2019-03-23 21:52:48 +01:00
|
|
|
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
|
|
|
|
2018-04-02 01:07:50 +02:00
|
|
|
|
2019-03-03 02:05:37 +01:00
|
|
|
## intialize the instance for HTK.
|
2019-04-22 00:59:53 +02:00
|
|
|
chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk_with_sp, feature_size)
|
2019-03-03 02:05:37 +01:00
|
|
|
|
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
## ======================= make label files =======================
|
|
|
|
if make_label:
|
2018-04-25 09:07:46 +02:00
|
|
|
for dataset in dataset_list:
|
2019-02-03 00:34:35 +01:00
|
|
|
timer_start = time.time()
|
2019-02-04 13:46:27 +01:00
|
|
|
print("==== making label files on dataset {}".format(dataset))
|
2018-04-25 09:07:46 +02:00
|
|
|
|
2019-02-03 00:34:35 +01:00
|
|
|
script_list = os.path.join(default.fame_dir, 'data', dataset, 'text')
|
2019-02-04 13:46:27 +01:00
|
|
|
wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
|
|
|
|
label_dir_ = os.path.join(label_dir, dataset)
|
|
|
|
dictionary_file = os.path.join(label_dir_, 'temp.dic')
|
2019-03-03 02:05:37 +01:00
|
|
|
fh.make_new_directory(label_dir_, existing_dir='leave')
|
2018-04-25 09:07:46 +02:00
|
|
|
|
|
|
|
# list of scripts
|
|
|
|
with open(script_list, "rt", encoding="utf-8") as fin:
|
2019-02-03 00:34:35 +01:00
|
|
|
scripts = fin.read().split('\n')
|
|
|
|
|
|
|
|
for line in scripts:
|
|
|
|
# sample line:
|
|
|
|
# sp0457m_test_1968_plakkenfryslanterhorne_2168 en dan begjinne je natuerlik
|
|
|
|
filename_ = line.split(' ')[0]
|
|
|
|
filename = '_'.join(filename_.split('_')[1:])
|
|
|
|
sentence = ' '.join(line.split(' ')[1:])
|
2019-02-03 13:54:37 +01:00
|
|
|
sentence_htk = fame_functions.word2htk(sentence)
|
2019-02-03 00:34:35 +01:00
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
wav_file = os.path.join(wav_dir_, filename + '.wav')
|
2019-03-03 02:05:37 +01:00
|
|
|
if os.path.exists(wav_file) and chtk.can_be_ascii(sentence_htk) == 0:
|
|
|
|
if chtk.get_number_of_missing_words(
|
|
|
|
sentence_htk, dictionary_file) == 0:
|
2019-02-03 13:54:37 +01:00
|
|
|
# when the file name is too long, HDMan command does not work.
|
|
|
|
# therefore first temporary dictionary_file is made, then renamed.
|
2019-02-04 13:46:27 +01:00
|
|
|
shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic'))
|
|
|
|
|
|
|
|
label_file = os.path.join(label_dir_, filename + '.lab')
|
2019-04-22 00:59:53 +02:00
|
|
|
chtk.make_label_file(sentence_htk, label_file)
|
2019-02-03 13:54:37 +01:00
|
|
|
else:
|
|
|
|
os.remove(dictionary_file)
|
2019-03-03 02:05:37 +01:00
|
|
|
|
2019-02-03 00:34:35 +01:00
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
2019-02-03 13:54:37 +01:00
|
|
|
|
2018-04-25 09:07:46 +02:00
|
|
|
|
2019-03-03 02:05:37 +01:00
|
|
|
## ======================= make master label files =======================
|
|
|
|
if make_mlf:
|
2019-02-04 13:46:27 +01:00
|
|
|
timer_start = time.time()
|
2019-03-03 02:05:37 +01:00
|
|
|
print("==== making master label files ====")
|
2019-02-04 13:46:27 +01:00
|
|
|
|
2019-03-03 02:05:37 +01:00
|
|
|
# train_2002_gongfansaken_10347.lab is empty. should be removed.
|
|
|
|
empty_lab_file = os.path.join(label_dir, 'train', 'train_2002_gongfansaken_10347.lab')
|
|
|
|
empty_dic_file = empty_lab_file.replace('.lab', '.dic')
|
|
|
|
|
|
|
|
if os.path.exists(empty_lab_file):
|
|
|
|
os.remove(empty_lab_file)
|
|
|
|
if os.path.exists(empty_dic_file):
|
|
|
|
os.remove(empty_dic_file)
|
2019-02-03 13:54:37 +01:00
|
|
|
|
|
|
|
for dataset in dataset_list:
|
2019-02-04 13:46:27 +01:00
|
|
|
feature_dir_ = os.path.join(feature_dir, dataset)
|
|
|
|
label_dir_ = os.path.join(label_dir, dataset)
|
|
|
|
mlf_word = os.path.join(label_dir, dataset + '_word.mlf')
|
|
|
|
mlf_phone = os.path.join(label_dir, dataset + '_phone.mlf')
|
2019-03-23 21:52:48 +01:00
|
|
|
mlf_phone_with_sp = os.path.join(label_dir, dataset + '_phone_with_sp.mlf')
|
2019-02-04 13:46:27 +01:00
|
|
|
|
2019-03-03 02:05:37 +01:00
|
|
|
print(">>> generating a word level mlf file for {}...".format(dataset))
|
|
|
|
chtk.label2mlf(label_dir_, mlf_word)
|
2019-04-22 00:59:53 +02:00
|
|
|
|
2019-03-03 02:05:37 +01:00
|
|
|
print(">>> generating a phone level mlf file for {}...".format(dataset))
|
2019-03-23 21:52:48 +01:00
|
|
|
chtk.mlf_word2phone(mlf_phone, mlf_word, with_sp=False)
|
|
|
|
chtk.mlf_word2phone(mlf_phone_with_sp, mlf_word, with_sp=True)
|
2019-03-03 02:05:37 +01:00
|
|
|
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
2019-02-03 13:54:37 +01:00
|
|
|
|
2018-04-25 09:07:46 +02:00
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
## ======================= extract features =======================
|
|
|
|
if extract_features:
|
|
|
|
for dataset in dataset_list:
|
|
|
|
timer_start = time.time()
|
|
|
|
print('==== extract features on dataset {} ===='.format(dataset))
|
2018-04-25 09:07:46 +02:00
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
|
2019-02-04 13:46:27 +01:00
|
|
|
label_dir_ = os.path.join(label_dir, dataset)
|
|
|
|
feature_dir_ = os.path.join(feature_dir, dataset)
|
2019-03-03 02:05:37 +01:00
|
|
|
fh.make_new_directory(feature_dir_, existing_dir='delete')
|
2018-04-25 09:07:46 +02:00
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
# a script file for HCopy
|
|
|
|
print(">>> making a script file for HCopy...")
|
|
|
|
hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
|
|
|
|
hcopy_scp.close()
|
2018-04-25 09:07:46 +02:00
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
# get a list of features (hcopy.scp)
|
|
|
|
# from the filelist in FAME! corpus.
|
|
|
|
#fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
|
|
|
|
# from the list of label files.
|
|
|
|
lab_list = glob.glob(os.path.join(label_dir_, '*.lab'))
|
|
|
|
feature_list = [
|
|
|
|
os.path.join(wav_dir_, os.path.basename(lab_file).replace('.lab', '.wav')) + '\t'
|
|
|
|
+ os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc'))
|
|
|
|
for lab_file in lab_list]
|
2019-03-03 02:05:37 +01:00
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
#if os.path.exists(empty_mfc_file):
|
|
|
|
# os.remove(empty_mfc_file)
|
2019-02-04 13:46:27 +01:00
|
|
|
with open(hcopy_scp.name, 'wb') as f:
|
|
|
|
f.write(bytes('\n'.join(feature_list), 'ascii'))
|
2018-04-25 09:07:46 +02:00
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
# extract features.
|
|
|
|
print(">>> extracting features on {}...".format(dataset))
|
2019-03-03 02:05:37 +01:00
|
|
|
chtk.wav2mfc(hcopy_scp.name)
|
2019-02-04 13:46:27 +01:00
|
|
|
os.remove(hcopy_scp.name)
|
2018-04-25 09:07:46 +02:00
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
# make hcompv.scp.
|
|
|
|
print(">>> making a script file for {}...".format(dataset))
|
|
|
|
listdir = glob.glob(os.path.join(label_dir_, '*.dic'))
|
|
|
|
mfc_list = [filename.replace(label_dir_, feature_dir_).replace('.dic', '.mfc') for filename in listdir]
|
|
|
|
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
|
|
|
|
with open(hcompv_scp, 'wb') as f:
|
|
|
|
f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii'))
|
2018-04-25 09:07:46 +02:00
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
print(">>> extracting features on stimmen...")
|
|
|
|
chtk.wav2mfc(os.path.join(htk_stimmen_dir, 'hcopy.scp'))
|
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
2018-04-25 09:07:46 +02:00
|
|
|
|
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
## ======================= flat start monophones =======================
|
|
|
|
if combine_all:
|
|
|
|
# script files.
|
|
|
|
fh.concatenate(
|
|
|
|
os.path.join(tmp_dir, 'devel.scp'),
|
|
|
|
os.path.join(tmp_dir, 'test.scp'),
|
|
|
|
hcompv_scp_train
|
|
|
|
)
|
|
|
|
fh.concatenate(
|
|
|
|
hcompv_scp_train,
|
|
|
|
os.path.join(tmp_dir, 'train.scp'),
|
|
|
|
hcompv_scp_train
|
|
|
|
)
|
|
|
|
|
|
|
|
# phone level mlfs.
|
|
|
|
fh.concatenate(
|
|
|
|
os.path.join(label_dir, 'devel_phone.mlf'),
|
|
|
|
os.path.join(label_dir, 'test_phone.mlf'),
|
|
|
|
mlf_file_train
|
|
|
|
)
|
|
|
|
fh.concatenate(
|
|
|
|
mlf_file_train,
|
|
|
|
os.path.join(label_dir, 'train_phone.mlf'),
|
|
|
|
mlf_file_train
|
|
|
|
)
|
|
|
|
|
|
|
|
# phone level mlfs with sp.
|
|
|
|
fh.concatenate(
|
|
|
|
os.path.join(label_dir, 'devel_phone_with_sp.mlf'),
|
|
|
|
os.path.join(label_dir, 'test_phone_with_sp.mlf'),
|
|
|
|
mlf_file_train_with_sp
|
|
|
|
)
|
|
|
|
fh.concatenate(
|
|
|
|
mlf_file_train_with_sp,
|
|
|
|
os.path.join(label_dir, 'train_phone_with_sp.mlf'),
|
|
|
|
mlf_file_train_with_sp
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# word level mlfs.
|
|
|
|
fh.concatenate(
|
|
|
|
os.path.join(label_dir, 'devel_word.mlf'),
|
|
|
|
os.path.join(label_dir, 'test_word.mlf'),
|
|
|
|
mlf_file_train_word
|
|
|
|
)
|
|
|
|
fh.concatenate(
|
|
|
|
mlf_file_train_word,
|
|
|
|
os.path.join(label_dir, 'train_word.mlf'),
|
|
|
|
mlf_file_train_word
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2018-04-25 09:07:46 +02:00
|
|
|
## ======================= flat start monophones =======================
|
2019-02-04 13:46:27 +01:00
|
|
|
if flat_start:
|
|
|
|
timer_start = time.time()
|
|
|
|
print('==== flat start ====')
|
2019-03-23 21:52:48 +01:00
|
|
|
fh.make_new_directory(model_mono0_dir, existing_dir='leave')
|
2019-03-03 02:05:37 +01:00
|
|
|
|
2019-03-23 21:52:48 +01:00
|
|
|
chtk.flat_start(hcompv_scp_train, model_mono0_dir)
|
2019-03-05 00:11:38 +01:00
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
# make macros.
|
2019-03-23 21:52:48 +01:00
|
|
|
vFloors = os.path.join(model_mono0_dir, 'vFloors')
|
2019-03-05 00:11:38 +01:00
|
|
|
if os.path.exists(vFloors):
|
2019-04-22 00:59:53 +02:00
|
|
|
chtk.make_macros(vFloors)
|
2018-04-25 09:07:46 +02:00
|
|
|
|
|
|
|
# allocate mean & variance to all phones in the phone list
|
2019-02-04 20:32:12 +01:00
|
|
|
print('>>> allocating mean & variance to all phones in the phone list...')
|
2019-04-22 00:59:53 +02:00
|
|
|
chtk.make_hmmdefs(model_mono0_dir)
|
2019-02-04 20:32:12 +01:00
|
|
|
|
2019-02-04 13:46:27 +01:00
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
2018-04-25 09:07:46 +02:00
|
|
|
|
|
|
|
|
2019-02-04 20:32:12 +01:00
|
|
|
## ======================= train model without short pause =======================
|
2019-03-23 21:52:48 +01:00
|
|
|
if train_monophone_without_sp:
|
|
|
|
print('==== train monophone without sp ====')
|
2019-03-05 00:11:38 +01:00
|
|
|
|
|
|
|
timer_start = time.time()
|
|
|
|
niter = chtk.re_estimation_until_saturated(
|
2019-03-23 21:52:48 +01:00
|
|
|
model_mono1_dir,
|
|
|
|
model_mono0_dir, improvement_threshold, hcompv_scp_train,
|
2019-03-05 00:11:38 +01:00
|
|
|
os.path.join(htk_stimmen_dir, 'mfc'),
|
|
|
|
'mfc',
|
|
|
|
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
|
|
|
|
mlf_file=mlf_file_train,
|
2019-03-25 00:06:53 +01:00
|
|
|
lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic')
|
2019-03-05 00:11:38 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
2019-02-04 20:32:12 +01:00
|
|
|
|
|
|
|
|
|
|
|
## ======================= adding sp to the model =======================
|
|
|
|
if add_sp:
|
|
|
|
print('==== adding sp to the model ====')
|
2019-03-05 00:11:38 +01:00
|
|
|
# reference:
|
|
|
|
# http://www.f.waseda.jp/yusukekondo/htk.html#flat_start_estimation
|
2019-03-07 22:16:50 +01:00
|
|
|
timer_start = time.time()
|
2019-02-04 20:32:12 +01:00
|
|
|
|
|
|
|
# make model with sp.
|
2019-03-05 00:11:38 +01:00
|
|
|
print('>>> adding sp state to the last model in the previous step...')
|
2019-03-23 21:52:48 +01:00
|
|
|
fh.make_new_directory(model_mono1sp_dir, existing_dir='leave')
|
|
|
|
niter = chtk.get_niter_max(model_mono1_dir)
|
|
|
|
modeln_dir_pre = os.path.join(model_mono1_dir, 'iter'+str(niter))
|
|
|
|
modeln_dir = os.path.join(model_mono1sp_dir, 'iter0')
|
|
|
|
|
2019-03-05 00:11:38 +01:00
|
|
|
chtk.add_sp(modeln_dir_pre, modeln_dir)
|
2019-03-25 00:06:53 +01:00
|
|
|
|
|
|
|
print('>>> re-estimation...')
|
2019-03-07 22:16:50 +01:00
|
|
|
niter = chtk.re_estimation_until_saturated(
|
2019-03-23 21:52:48 +01:00
|
|
|
model_mono1sp_dir, modeln_dir, improvement_threshold, hcompv_scp_train,
|
2019-03-07 22:16:50 +01:00
|
|
|
os.path.join(htk_stimmen_dir, 'mfc'),
|
|
|
|
'mfc',
|
|
|
|
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
|
2019-03-23 21:52:48 +01:00
|
|
|
mlf_file=mlf_file_train_with_sp,
|
2019-03-25 00:06:53 +01:00
|
|
|
lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
|
2019-03-07 22:16:50 +01:00
|
|
|
model_type='monophone_with_sp'
|
|
|
|
)
|
2019-03-25 00:06:53 +01:00
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
|
|
|
|
2019-02-14 00:21:28 +01:00
|
|
|
|
2019-03-07 22:16:50 +01:00
|
|
|
## ======================= train model with re-aligned mlf =======================
|
2019-03-23 21:52:48 +01:00
|
|
|
if train_monophone_with_re_aligned_mlf:
|
|
|
|
print('==== traina monophone with re-aligned mlf ====')
|
2019-03-25 00:06:53 +01:00
|
|
|
timer_start = time.time()
|
2019-03-07 22:16:50 +01:00
|
|
|
|
|
|
|
print('>>> re-aligning the training data... ')
|
2019-03-23 21:52:48 +01:00
|
|
|
niter = chtk.get_niter_max(model_mono1sp_dir)
|
|
|
|
modeln_dir = os.path.join(model_mono1sp_dir, 'iter'+str(niter))
|
2019-03-07 22:16:50 +01:00
|
|
|
chtk.make_aligned_label(
|
|
|
|
os.path.join(modeln_dir, 'macros'),
|
|
|
|
os.path.join(modeln_dir, 'hmmdefs'),
|
|
|
|
mlf_file_train_aligned,
|
2019-04-22 00:59:53 +02:00
|
|
|
mlf_file_train_word,
|
2019-03-07 22:16:50 +01:00
|
|
|
hcompv_scp_train)
|
2019-04-22 00:59:53 +02:00
|
|
|
chtk.fix_mlf(mlf_file_train_aligned)
|
2019-03-08 23:13:08 +01:00
|
|
|
|
|
|
|
print('>>> updating the script file... ')
|
|
|
|
chtk.update_script_file(
|
|
|
|
mlf_file_train_aligned,
|
2019-03-23 21:52:48 +01:00
|
|
|
mlf_file_train_with_sp,
|
2019-03-08 23:13:08 +01:00
|
|
|
hcompv_scp_train,
|
|
|
|
hcompv_scp_train_updated)
|
2019-03-07 22:16:50 +01:00
|
|
|
|
|
|
|
print('>>> re-estimation... ')
|
|
|
|
timer_start = time.time()
|
2019-03-23 21:52:48 +01:00
|
|
|
fh.make_new_directory(model_mono1sp2_dir, existing_dir='leave')
|
|
|
|
niter = chtk.get_niter_max(model_mono1sp_dir)
|
2019-03-07 22:16:50 +01:00
|
|
|
niter = chtk.re_estimation_until_saturated(
|
2019-03-23 21:52:48 +01:00
|
|
|
model_mono1sp2_dir,
|
|
|
|
os.path.join(model_mono1sp_dir, 'iter'+str(niter)),
|
2019-03-07 22:16:50 +01:00
|
|
|
improvement_threshold,
|
2019-03-08 23:13:08 +01:00
|
|
|
hcompv_scp_train_updated,
|
2019-03-07 22:16:50 +01:00
|
|
|
os.path.join(htk_stimmen_dir, 'mfc'),
|
|
|
|
'mfc',
|
|
|
|
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
|
2019-03-08 23:13:08 +01:00
|
|
|
mlf_file=mlf_file_train_aligned,
|
2019-03-25 00:06:53 +01:00
|
|
|
lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
|
2019-03-07 22:16:50 +01:00
|
|
|
model_type='monophone_with_sp'
|
|
|
|
)
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
2019-03-03 02:05:37 +01:00
|
|
|
|
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
## ======================= increase mixture =======================
|
|
|
|
if increase_mixture:
|
|
|
|
print('==== increase mixture ====')
|
2019-03-25 00:06:53 +01:00
|
|
|
timer_start = time.time()
|
2019-04-22 00:59:53 +02:00
|
|
|
for nmix in [2, 4, 8, 16]:
|
|
|
|
if nmix == 2:
|
|
|
|
modeln_dir_ = model_mono1sp2_dir
|
|
|
|
else:
|
|
|
|
modeln_dir_ = os.path.join(model_dir, 'mono'+str(nmix_))
|
|
|
|
modeln_dir = os.path.join(model_dir, 'mono'+str(nmix))
|
|
|
|
|
|
|
|
print('mixture: {}'.format(nmix))
|
|
|
|
fh.make_new_directory(modeln_dir, existing_dir='delete')
|
|
|
|
niter = chtk.get_niter_max(modeln_dir_)
|
|
|
|
chtk.increase_mixture(
|
|
|
|
os.path.join(modeln_dir_, 'iter'+str(niter), 'hmmdefs'),
|
|
|
|
nmix,
|
|
|
|
os.path.join(modeln_dir, 'iter0'),
|
|
|
|
model_type='monophone_with_sp')
|
|
|
|
shutil.copy2(os.path.join(modeln_dir_, 'iter'+str(niter), 'macros'),
|
|
|
|
os.path.join(modeln_dir, 'iter0', 'macros'))
|
|
|
|
|
|
|
|
#improvement_threshold = -10
|
|
|
|
niter = chtk.re_estimation_until_saturated(
|
|
|
|
modeln_dir,
|
|
|
|
os.path.join(modeln_dir_, 'iter0'),
|
|
|
|
improvement_threshold,
|
|
|
|
hcompv_scp_train_updated,
|
|
|
|
os.path.join(htk_stimmen_dir, 'mfc'),
|
|
|
|
'mfc',
|
|
|
|
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
|
|
|
|
mlf_file=mlf_file_train_aligned,
|
|
|
|
lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
|
|
|
|
model_type='monophone_with_sp'
|
|
|
|
)
|
|
|
|
nmix_ = nmix
|
2019-03-08 23:13:08 +01:00
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
2019-03-08 23:13:08 +01:00
|
|
|
|
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
## ======================= train triphone =======================
|
|
|
|
print('>>> making triphone list... ')
|
|
|
|
chtk.make_triphonelist(
|
|
|
|
mlf_file_train_aligned,
|
|
|
|
triphone_mlf)
|
|
|
|
|
|
|
|
if train_triphone:
|
|
|
|
print('==== train triphone model ====')
|
|
|
|
timer_start = time.time()
|
2019-03-23 21:52:48 +01:00
|
|
|
|
|
|
|
print('>>> init triphone model... ')
|
|
|
|
niter = chtk.get_niter_max(model_mono1sp2_dir)
|
|
|
|
fh.make_new_directory(os.path.join(model_tri1_dir, 'iter0'), existing_dir='leave')
|
|
|
|
chtk.init_triphone(
|
|
|
|
os.path.join(model_mono1sp2_dir, 'iter'+str(niter)),
|
|
|
|
os.path.join(model_tri1_dir, 'iter0')
|
|
|
|
)
|
|
|
|
|
|
|
|
print('>>> re-estimation... ')
|
2019-04-22 00:59:53 +02:00
|
|
|
## I wanted to train until satulated:
|
|
|
|
#niter = chtk.re_estimation_until_saturated(
|
2019-03-23 21:52:48 +01:00
|
|
|
# model_tri1_dir,
|
|
|
|
# os.path.join(model_tri1_dir, 'iter0'),
|
|
|
|
# improvement_threshold,
|
|
|
|
# hcompv_scp_train_updated,
|
|
|
|
# os.path.join(htk_stimmen_dir, 'mfc'),
|
|
|
|
# 'mfc',
|
|
|
|
# os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
|
|
|
|
# mlf_file=triphone_mlf,
|
2019-03-25 00:06:53 +01:00
|
|
|
# lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
|
2019-03-23 21:52:48 +01:00
|
|
|
# model_type='triphone'
|
|
|
|
# )
|
|
|
|
#
|
|
|
|
# but because the data size is limited, some triphone cannot be trained and received the error:
|
|
|
|
# ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???]
|
|
|
|
# therefore only two times re-estimation is performed.
|
|
|
|
output_dir = model_tri1_dir
|
|
|
|
for niter in range(1, 4):
|
|
|
|
hmm_n = 'iter' + str(niter)
|
|
|
|
hmm_n_pre = 'iter' + str(niter-1)
|
|
|
|
_modeln_dir = os.path.join(output_dir, hmm_n)
|
|
|
|
_modeln_dir_pre = os.path.join(output_dir, hmm_n_pre)
|
|
|
|
|
|
|
|
fh.make_new_directory(_modeln_dir, 'leave')
|
|
|
|
chtk.re_estimation(
|
|
|
|
os.path.join(_modeln_dir_pre, 'hmmdefs'),
|
|
|
|
_modeln_dir,
|
|
|
|
hcompv_scp_train_updated,
|
|
|
|
mlf_file=triphone_mlf,
|
|
|
|
macros=os.path.join(_modeln_dir_pre, 'macros'),
|
|
|
|
model_type='triphone')
|
|
|
|
|
2019-03-25 00:06:53 +01:00
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
|
|
|
|
2019-03-23 21:52:48 +01:00
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
## ======================= train tied-state triphones =======================
|
2019-03-23 21:52:48 +01:00
|
|
|
if train_triphone_tied:
|
2019-04-22 00:59:53 +02:00
|
|
|
print('==== train tied-state triphones ====')
|
2019-03-25 00:06:53 +01:00
|
|
|
timer_start = time.time()
|
|
|
|
|
|
|
|
print('>>> making lexicon for triphone... ')
|
2019-04-22 00:59:53 +02:00
|
|
|
chtk.make_lexicon_triphone(phonelist_full_txt, lexicon_htk_triphone)
|
|
|
|
chtk.combine_phonelists(phonelist_full_txt)
|
2019-03-25 00:06:53 +01:00
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
print('>>> making a tree header... ')
|
|
|
|
fame_phonetics.make_quests_hed(quests_hed)
|
|
|
|
stats = os.path.join(r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\model\tri1\iter3', 'stats')
|
|
|
|
chtk.make_tree_header(tree_hed, quests_hed, stats, config_dir)
|
2019-03-25 00:06:53 +01:00
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
print('>>> init triphone model... ')
|
|
|
|
niter = chtk.get_niter_max(model_tri1_dir)
|
|
|
|
fh.make_new_directory(os.path.join(model_tri1tied_dir, 'iter0'), existing_dir='leave')
|
|
|
|
chtk.init_triphone(
|
|
|
|
os.path.join(model_tri1_dir, 'iter'+str(niter)),
|
|
|
|
os.path.join(model_tri1tied_dir, 'iter0'),
|
|
|
|
tied=True)
|
2019-03-25 00:06:53 +01:00
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
# I wanted to train until satulated:
|
|
|
|
#niter = chtk.re_estimation_until_saturated(
|
|
|
|
# model_tri1tied_dir,
|
|
|
|
# os.path.join(model_tri1tied_dir, 'iter0'),
|
|
|
|
# improvement_threshold,
|
|
|
|
# hcompv_scp_train_updated,
|
|
|
|
# os.path.join(htk_stimmen_dir, 'mfc'),
|
|
|
|
# 'mfc',
|
|
|
|
# os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
|
|
|
|
# mlf_file=triphone_mlf,
|
|
|
|
# lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
|
|
|
|
# model_type='triphone'
|
|
|
|
# )
|
|
|
|
#
|
|
|
|
# but because the data size is limited, some triphone cannot be trained and received the error:
|
|
|
|
# ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???]
|
|
|
|
# therefore only 3 times re-estimation is performed.
|
|
|
|
output_dir = model_tri1tied_dir
|
|
|
|
for niter in range(1, 4):
|
|
|
|
hmm_n = 'iter' + str(niter)
|
|
|
|
hmm_n_pre = 'iter' + str(niter-1)
|
|
|
|
_modeln_dir = os.path.join(output_dir, hmm_n)
|
|
|
|
_modeln_dir_pre = os.path.join(output_dir, hmm_n_pre)
|
|
|
|
|
|
|
|
fh.make_new_directory(_modeln_dir, 'leave')
|
|
|
|
chtk.re_estimation(
|
|
|
|
os.path.join(_modeln_dir_pre, 'hmmdefs'),
|
|
|
|
_modeln_dir,
|
|
|
|
hcompv_scp_train_updated,
|
|
|
|
mlf_file=triphone_mlf,
|
|
|
|
macros=os.path.join(_modeln_dir_pre, 'macros'),
|
|
|
|
model_type='triphone')
|
2019-03-25 00:06:53 +01:00
|
|
|
|
2019-04-22 00:59:53 +02:00
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|