re-aligned mlf file include less files than original mlf file. Therefore the scp file should also be updated accordingly, when re-estimation is performed. this bug is fixed.

This commit is contained in:
yemaozi88 2019-03-08 23:13:08 +01:00
parent fa81b70b27
commit fdd165ce6a
2 changed files with 33 additions and 22 deletions

Binary file not shown.

View File

@ -27,8 +27,8 @@ extract_features = 0
flat_start = 0
train_model_without_sp = 0
add_sp = 0
train_model_with_re_aligned_mlf = 1
train_triphone = 0
train_model_with_re_aligned_mlf = 0
train_triphone = 1
@ -45,9 +45,6 @@ lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov')
config_dir = os.path.join(default.htk_dir, 'config')
sil_hed = os.path.join(config_dir, 'sil.hed')
prototype = os.path.join(config_dir, proto_name)
model_dir = os.path.join(default.htk_dir, 'model')
model0_dir = os.path.join(model_dir, 'hmm0')
model1_dir = os.path.join(model_dir, 'hmm1')
@ -72,6 +69,7 @@ fh.make_new_directory(label_dir, existing_dir='leave')
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp')
## testing
htk_stimmen_dir = os.path.join(default.htk_dir, 'stimmen')
@ -304,6 +302,13 @@ if train_model_with_re_aligned_mlf:
mlf_file_train_aligned,
os.path.join(label_dir, 'train_word.mlf'),
hcompv_scp_train)
print('>>> updating the script file... ')
chtk.update_script_file(
mlf_file_train_aligned,
mlf_file_train,
hcompv_scp_train,
hcompv_scp_train_updated)
print("elapsed time: {}".format(time.time() - timer_start))
print('>>> re-estimation... ')
@ -314,11 +319,11 @@ if train_model_with_re_aligned_mlf:
model1sp2_dir,
os.path.join(model1sp_dir, 'iter'+str(niter)),
improvement_threshold,
hcompv_scp_train,
hcompv_scp_train_updated,
os.path.join(htk_stimmen_dir, 'mfc'),
'mfc',
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
mlf_file=mlf_file_train,
mlf_file=mlf_file_train_aligned,
lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
model_type='monophone_with_sp'
)
@ -327,19 +332,25 @@ if train_model_with_re_aligned_mlf:
## ======================= train triphone =======================
if train_triphone:
triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
macros = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'macros')
hmmdefs = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'hmmdefs')
model_out_dir = os.path.join(model_dir, 'hmm1_tri', 'iter1')
run_command([
'HERest', '-B',
'-C', config_train,
'-I', triphone_mlf,
'-t', '250.0', '150.0', '1000.0',
'-s', 'stats'
'-S', hcompv_scp_train,
'-H', macros,
'-H', hmmdefs,
'-M', model_out_dir,
os.path.join(config_dir, 'triphonelist.txt')
])
triphonelist_txt = os.path.join(config_dir, 'triphonelist_txt')
triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
chtk.make_triphonelist(
triphonelist_txt,
triphone_mlf,
mlf_file_train_aligned)
#run_command([
# 'HERest', '-B',
# '-C', config_train,
# '-I', triphone_mlf,
# '-t', '250.0', '150.0', '1000.0',
# '-s', 'stats'
# '-S', hcompv_scp_train,
# '-H', macros,
# '-H', hmmdefs,
# '-M', model_out_dir,
# os.path.join(config_dir, 'triphonelist.txt')
#])