label alignment using HVite is added.

This commit is contained in:
yemaozi88
2019-02-14 00:21:28 +01:00
parent 8f89f60538
commit c185072d5b
11 changed files with 527 additions and 339 deletions

View File

@ -27,7 +27,8 @@ extract_features = 0
flat_start = 0
train_model_without_sp = 0
add_sp = 0
train_model_with_sp = 1
train_model_with_sp = 0
train_model_with_sp_align_mlf = 1
@ -75,6 +76,7 @@ if not os.path.exists(label_dir):
## training
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
## train without sp
niter_max = 10
@ -102,7 +104,8 @@ if make_lexicon:
# (1) Replace all tabs with single space;
# (2) Put a '\' before any dictionary entry beginning with single quote
#http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html
fame_functions.fix_single_quote(lexicon_htk)
print('>>> fixing the lexicon...')
fame_functions.fix_lexicon(lexicon_htk)
print("elapsed time: {}".format(time.time() - timer_start))
@ -269,11 +272,11 @@ if train_model_without_sp:
fh.make_new_directory(modeln_dir)
pyhtk.re_estimation(
config_train,
os.path.join(modeln_dir_pre, 'macros'),
os.path.join(modeln_dir_pre, hmmdefs_name),
modeln_dir,
hcompv_scp_train, phonelist_txt,
mlf_file=mlf_file_train)
mlf_file=mlf_file_train,
macros=os.path.join(modeln_dir_pre, 'macros'))
print("elapsed time: {}".format(time.time() - timer_start))
@ -321,7 +324,6 @@ if add_sp:
## ======================= train model with short pause =======================
if train_model_with_sp:
print('==== train model with sp ====')
#for niter in range(niter_max+1, niter_max*2+1):
for niter in range(20, 50):
timer_start = time.time()
hmm_n = 'iter' + str(niter)
@ -333,9 +335,31 @@ if train_model_with_sp:
fh.make_new_directory(modeln_dir)
pyhtk.re_estimation(
config_train,
os.path.join(modeln_dir_pre, 'macros'),
os.path.join(modeln_dir_pre, hmmdefs_name),
modeln_dir,
hcompv_scp_train, phonelist_txt,
mlf_file=mlf_file_train)
mlf_file=mlf_file_train,
macros=os.path.join(modeln_dir_pre, 'macros'))
print("elapsed time: {}".format(time.time() - timer_start))
## ======================= train model with short pause =======================
if train_model_with_sp_align_mlf:
print('==== train model with sp with align.mlf ====')
for niter in range(50, 60):
timer_start = time.time()
hmm_n = 'iter' + str(niter)
hmm_n_pre = 'iter' + str(niter-1)
modeln_dir = os.path.join(model1_dir, hmm_n)
modeln_dir_pre = os.path.join(model1_dir, hmm_n_pre)
# re-estimation
fh.make_new_directory(modeln_dir)
pyhtk.re_estimation(
config_train,
os.path.join(modeln_dir_pre, hmmdefs_name),
modeln_dir,
hcompv_scp_train, phonelist_txt,
mlf_file=mlf_file_train_aligned,
macros=os.path.join(modeln_dir_pre, 'macros'))
print("elapsed time: {}".format(time.time() - timer_start))