re-aligned mlf file include less files than original mlf file. Therefore the scp file should also be updated accordingly, when re-estimation is performed. this bug is fixed.

This commit is contained in:
yemaozi88 2019-03-08 23:13:08 +01:00
parent fa81b70b27
commit fdd165ce6a
2 changed files with 33 additions and 22 deletions

Binary file not shown.

View File

@ -27,8 +27,8 @@ extract_features = 0
flat_start = 0 flat_start = 0
train_model_without_sp = 0 train_model_without_sp = 0
add_sp = 0 add_sp = 0
train_model_with_re_aligned_mlf = 1 train_model_with_re_aligned_mlf = 0
train_triphone = 0 train_triphone = 1
@ -45,9 +45,6 @@ lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov')
config_dir = os.path.join(default.htk_dir, 'config') config_dir = os.path.join(default.htk_dir, 'config')
sil_hed = os.path.join(config_dir, 'sil.hed')
prototype = os.path.join(config_dir, proto_name)
model_dir = os.path.join(default.htk_dir, 'model') model_dir = os.path.join(default.htk_dir, 'model')
model0_dir = os.path.join(model_dir, 'hmm0') model0_dir = os.path.join(model_dir, 'hmm0')
model1_dir = os.path.join(model_dir, 'hmm1') model1_dir = os.path.join(model_dir, 'hmm1')
@ -72,6 +69,7 @@ fh.make_new_directory(label_dir, existing_dir='leave')
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp') hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
mlf_file_train = os.path.join(label_dir, 'train_phone.mlf') mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf') mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp')
## testing ## testing
htk_stimmen_dir = os.path.join(default.htk_dir, 'stimmen') htk_stimmen_dir = os.path.join(default.htk_dir, 'stimmen')
@ -304,6 +302,13 @@ if train_model_with_re_aligned_mlf:
mlf_file_train_aligned, mlf_file_train_aligned,
os.path.join(label_dir, 'train_word.mlf'), os.path.join(label_dir, 'train_word.mlf'),
hcompv_scp_train) hcompv_scp_train)
print('>>> updating the script file... ')
chtk.update_script_file(
mlf_file_train_aligned,
mlf_file_train,
hcompv_scp_train,
hcompv_scp_train_updated)
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
print('>>> re-estimation... ') print('>>> re-estimation... ')
@ -314,11 +319,11 @@ if train_model_with_re_aligned_mlf:
model1sp2_dir, model1sp2_dir,
os.path.join(model1sp_dir, 'iter'+str(niter)), os.path.join(model1sp_dir, 'iter'+str(niter)),
improvement_threshold, improvement_threshold,
hcompv_scp_train, hcompv_scp_train_updated,
os.path.join(htk_stimmen_dir, 'mfc'), os.path.join(htk_stimmen_dir, 'mfc'),
'mfc', 'mfc',
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'), os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
mlf_file=mlf_file_train, mlf_file=mlf_file_train_aligned,
lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'), lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
model_type='monophone_with_sp' model_type='monophone_with_sp'
) )
@ -327,19 +332,25 @@ if train_model_with_re_aligned_mlf:
## ======================= train triphone ======================= ## ======================= train triphone =======================
if train_triphone: if train_triphone:
triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
macros = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'macros')
hmmdefs = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'hmmdefs')
model_out_dir = os.path.join(model_dir, 'hmm1_tri', 'iter1') model_out_dir = os.path.join(model_dir, 'hmm1_tri', 'iter1')
run_command([
'HERest', '-B', triphonelist_txt = os.path.join(config_dir, 'triphonelist_txt')
'-C', config_train, triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
'-I', triphone_mlf,
'-t', '250.0', '150.0', '1000.0', chtk.make_triphonelist(
'-s', 'stats' triphonelist_txt,
'-S', hcompv_scp_train, triphone_mlf,
'-H', macros, mlf_file_train_aligned)
'-H', hmmdefs,
'-M', model_out_dir, #run_command([
os.path.join(config_dir, 'triphonelist.txt') # 'HERest', '-B',
]) # '-C', config_train,
# '-I', triphone_mlf,
# '-t', '250.0', '150.0', '1000.0',
# '-s', 'stats'
# '-S', hcompv_scp_train,
# '-H', macros,
# '-H', hmmdefs,
# '-M', model_out_dir,
# os.path.join(config_dir, 'triphonelist.txt')
#])