monophone training is completed.

This commit is contained in:
yemaozi88 2019-03-07 22:16:50 +01:00
parent 41d4fa5ff9
commit fa81b70b27
2 changed files with 55 additions and 60 deletions

Binary file not shown.

View File

@ -26,9 +26,8 @@ make_mlf = 0
extract_features = 0
flat_start = 0
train_model_without_sp = 0
add_sp = 1
train_model_with_sp = 0
train_model_with_sp_align_mlf = 0
add_sp = 0
train_model_with_re_aligned_mlf = 1
train_triphone = 0
@ -36,7 +35,7 @@ train_triphone = 0
# pre-defined values.
dataset_list = ['devel', 'test', 'train']
feature_size = 39
improvement_threshold = 0.5
improvement_threshold = 0.3
hmmdefs_name = 'hmmdefs'
proto_name = 'proto'
@ -49,10 +48,11 @@ config_dir = os.path.join(default.htk_dir, 'config')
sil_hed = os.path.join(config_dir, 'sil.hed')
prototype = os.path.join(config_dir, proto_name)
model_dir = os.path.join(default.htk_dir, 'model')
model0_dir = os.path.join(model_dir, 'hmm0')
model1_dir = os.path.join(model_dir, 'hmm1')
model1sp_dir = os.path.join(model_dir, 'hmm1sp')
model_dir = os.path.join(default.htk_dir, 'model')
model0_dir = os.path.join(model_dir, 'hmm0')
model1_dir = os.path.join(model_dir, 'hmm1')
model1sp_dir = os.path.join(model_dir, 'hmm1sp')
model1sp2_dir = os.path.join(model_dir, 'hmm1sp2')
# directories / files to be made.
lexicon_dir = os.path.join(default.htk_dir, 'lexicon')
@ -68,7 +68,6 @@ label_dir = os.path.join(default.htk_dir, 'label')
fh.make_new_directory(label_dir, existing_dir='leave')
## training
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
@ -78,10 +77,6 @@ mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
htk_stimmen_dir = os.path.join(default.htk_dir, 'stimmen')
## train without sp
niter_max = 10
## ======================= make lexicon for HTK =======================
if make_lexicon:
timer_start = time.time()
@ -273,64 +268,64 @@ if add_sp:
print('==== adding sp to the model ====')
# reference:
# http://www.f.waseda.jp/yusukekondo/htk.html#flat_start_estimation
timer_start = time.time()
# make model with sp.
niter = 7
print('>>> adding sp state to the last model in the previous step...')
fh.make_new_directory(model1sp_dir, existing_dir='leave')
niter = chtk.get_niter_max(model1_dir)
modeln_dir_pre = os.path.join(model1_dir, 'iter'+str(niter))
## update hmmdefs and macros.
print('>>> adding sp to the model...')
modeln_dir = os.path.join(model1sp_dir, 'iter0')
modeln_dir = os.path.join(model1sp_dir, 'iter0')
chtk.add_sp(modeln_dir_pre, modeln_dir)
print("elapsed time: {}".format(time.time() - timer_start))
## ======================= train model with short pause =======================
if train_model_with_sp:
print('==== train model with sp ====')
for niter in range(20, 50):
timer_start = time.time()
hmm_n = 'iter' + str(niter)
hmm_n_pre = 'iter' + str(niter-1)
modeln_dir = os.path.join(model1_dir, hmm_n)
modeln_dir_pre = os.path.join(model1_dir, hmm_n_pre)
# re-estimation
fh.make_new_directory(modeln_dir)
pyhtk.re_estimation(
config_train,
os.path.join(modeln_dir_pre, hmmdefs_name),
modeln_dir,
hcompv_scp_train, phonelist_txt,
mlf_file=mlf_file_train,
macros=os.path.join(modeln_dir_pre, 'macros'))
print("elapsed time: {}".format(time.time() - timer_start))
niter = chtk.re_estimation_until_saturated(
model1sp_dir, modeln_dir, improvement_threshold, hcompv_scp_train,
os.path.join(htk_stimmen_dir, 'mfc'),
'mfc',
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
mlf_file=mlf_file_train,
lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
model_type='monophone_with_sp'
)
## ======================= train model with short pause =======================
if train_model_with_sp_align_mlf:
print('==== train model with sp with align.mlf ====')
for niter in range(50, 60):
timer_start = time.time()
hmm_n = 'iter' + str(niter)
hmm_n_pre = 'iter' + str(niter-1)
modeln_dir = os.path.join(model1_dir, hmm_n)
modeln_dir_pre = os.path.join(model1_dir, hmm_n_pre)
# re-estimation
fh.make_new_directory(modeln_dir)
pyhtk.re_estimation(
config_train,
os.path.join(modeln_dir_pre, hmmdefs_name),
modeln_dir,
hcompv_scp_train, phonelist_txt,
mlf_file=mlf_file_train_aligned,
macros=os.path.join(modeln_dir_pre, 'macros'))
print("elapsed time: {}".format(time.time() - timer_start))
## ======================= train model with re-aligned mlf =======================
if train_model_with_re_aligned_mlf:
print('==== traina model with re-aligned mlf ====')
print('>>> re-aligning the training data... ')
timer_start = time.time()
niter = chtk.get_niter_max(model1sp_dir)
modeln_dir = os.path.join(model1sp_dir, 'iter'+str(niter))
chtk.make_aligned_label(
os.path.join(modeln_dir, 'macros'),
os.path.join(modeln_dir, 'hmmdefs'),
mlf_file_train_aligned,
os.path.join(label_dir, 'train_word.mlf'),
hcompv_scp_train)
print("elapsed time: {}".format(time.time() - timer_start))
print('>>> re-estimation... ')
timer_start = time.time()
fh.make_new_directory(model1sp2_dir, existing_dir='leave')
niter = chtk.get_niter_max(model1sp_dir)
niter = chtk.re_estimation_until_saturated(
model1sp2_dir,
os.path.join(model1sp_dir, 'iter'+str(niter)),
improvement_threshold,
hcompv_scp_train,
os.path.join(htk_stimmen_dir, 'mfc'),
'mfc',
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
mlf_file=mlf_file_train,
lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
model_type='monophone_with_sp'
)
print("elapsed time: {}".format(time.time() - timer_start))
# train triphone.
## ======================= train triphone =======================
if train_triphone:
triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
macros = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'macros')