triphone training is added.
This commit is contained in:
parent
fdd165ce6a
commit
bf586fcde5
Binary file not shown.
@ -345,6 +345,7 @@ def fix_lexicon(lexicon_file):
|
|||||||
|
|
||||||
for i in lex[lex['word'].str.startswith('\'')].index.values:
|
for i in lex[lex['word'].str.startswith('\'')].index.values:
|
||||||
lex.iat[i, 0] = lex.iat[i, 0].replace('\'', '\\\'')
|
lex.iat[i, 0] = lex.iat[i, 0].replace('\'', '\\\'')
|
||||||
|
|
||||||
# to_csv does not work with space seperator. therefore all tabs should manually be replaced.
|
# to_csv does not work with space seperator. therefore all tabs should manually be replaced.
|
||||||
#lex.to_csv(lexicon_file, index=False, header=False, encoding="utf-8", sep=' ', quoting=csv.QUOTE_NONE, escapechar='\\')
|
#lex.to_csv(lexicon_file, index=False, header=False, encoding="utf-8", sep=' ', quoting=csv.QUOTE_NONE, escapechar='\\')
|
||||||
lex.to_csv(lexicon_file, index=False, header=False, sep='\t', encoding='utf-8')
|
lex.to_csv(lexicon_file, index=False, header=False, sep='\t', encoding='utf-8')
|
||||||
|
@ -25,11 +25,11 @@ make_label = 0 # it takes roughly 4800 sec on Surface pro 2.
|
|||||||
make_mlf = 0
|
make_mlf = 0
|
||||||
extract_features = 0
|
extract_features = 0
|
||||||
flat_start = 0
|
flat_start = 0
|
||||||
train_model_without_sp = 0
|
train_monophone_without_sp = 0
|
||||||
add_sp = 0
|
add_sp = 0
|
||||||
train_model_with_re_aligned_mlf = 0
|
train_monophone_with_re_aligned_mlf = 0
|
||||||
train_triphone = 1
|
train_triphone = 0
|
||||||
|
train_triphone_tied = 1
|
||||||
|
|
||||||
|
|
||||||
# pre-defined values.
|
# pre-defined values.
|
||||||
@ -46,16 +46,18 @@ lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov')
|
|||||||
config_dir = os.path.join(default.htk_dir, 'config')
|
config_dir = os.path.join(default.htk_dir, 'config')
|
||||||
|
|
||||||
model_dir = os.path.join(default.htk_dir, 'model')
|
model_dir = os.path.join(default.htk_dir, 'model')
|
||||||
model0_dir = os.path.join(model_dir, 'hmm0')
|
model_mono0_dir = os.path.join(model_dir, 'mono0')
|
||||||
model1_dir = os.path.join(model_dir, 'hmm1')
|
model_mono1_dir = os.path.join(model_dir, 'mono1')
|
||||||
model1sp_dir = os.path.join(model_dir, 'hmm1sp')
|
model_mono1sp_dir = os.path.join(model_dir, 'mono1sp')
|
||||||
model1sp2_dir = os.path.join(model_dir, 'hmm1sp2')
|
model_mono1sp2_dir = os.path.join(model_dir, 'mono1sp2')
|
||||||
|
model_tri1_dir = os.path.join(model_dir, 'tri1')
|
||||||
|
|
||||||
# directories / files to be made.
|
# directories / files to be made.
|
||||||
lexicon_dir = os.path.join(default.htk_dir, 'lexicon')
|
lexicon_dir = os.path.join(default.htk_dir, 'lexicon')
|
||||||
lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr')
|
lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr')
|
||||||
lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov')
|
lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov')
|
||||||
lexicon_htk = os.path.join(lexicon_dir, 'lex.htk')
|
lexicon_htk = os.path.join(lexicon_dir, 'lex.htk')
|
||||||
|
#lexicon_htk_with_sp = os.path.join(lexicon_dir, 'lex_with_sp.htk')
|
||||||
|
|
||||||
feature_dir = os.path.join(default.htk_dir, 'mfc')
|
feature_dir = os.path.join(default.htk_dir, 'mfc')
|
||||||
fh.make_new_directory(feature_dir, existing_dir='leave')
|
fh.make_new_directory(feature_dir, existing_dir='leave')
|
||||||
@ -68,6 +70,7 @@ fh.make_new_directory(label_dir, existing_dir='leave')
|
|||||||
## training
|
## training
|
||||||
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
|
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
|
||||||
mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
|
mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
|
||||||
|
mlf_file_train_with_sp = os.path.join(label_dir, 'train_phone_with_sp.mlf')
|
||||||
mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
|
mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
|
||||||
hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp')
|
hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp')
|
||||||
|
|
||||||
@ -97,8 +100,17 @@ if make_lexicon:
|
|||||||
# http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html
|
# http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html
|
||||||
print('>>> fixing the lexicon...')
|
print('>>> fixing the lexicon...')
|
||||||
fame_functions.fix_lexicon(lexicon_htk)
|
fame_functions.fix_lexicon(lexicon_htk)
|
||||||
print("elapsed time: {}".format(time.time() - timer_start))
|
|
||||||
|
|
||||||
|
## add sp to the end of each line.
|
||||||
|
#print('>>> adding sp...')
|
||||||
|
#with open(lexicon_htk) as f:
|
||||||
|
# lines = f.read().split('\n')
|
||||||
|
#lines = [line + ' sp' for line in lines]
|
||||||
|
#with open(lexicon_htk_with_sp, 'wb') as f:
|
||||||
|
# f.write(bytes('\n'.join(lines), 'ascii'))
|
||||||
|
|
||||||
|
print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
|
|
||||||
|
|
||||||
## intialize the instance for HTK.
|
## intialize the instance for HTK.
|
||||||
chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk, feature_size)
|
chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk, feature_size)
|
||||||
@ -164,12 +176,15 @@ if make_mlf:
|
|||||||
label_dir_ = os.path.join(label_dir, dataset)
|
label_dir_ = os.path.join(label_dir, dataset)
|
||||||
mlf_word = os.path.join(label_dir, dataset + '_word.mlf')
|
mlf_word = os.path.join(label_dir, dataset + '_word.mlf')
|
||||||
mlf_phone = os.path.join(label_dir, dataset + '_phone.mlf')
|
mlf_phone = os.path.join(label_dir, dataset + '_phone.mlf')
|
||||||
|
mlf_phone_with_sp = os.path.join(label_dir, dataset + '_phone_with_sp.mlf')
|
||||||
|
|
||||||
print(">>> generating a word level mlf file for {}...".format(dataset))
|
print(">>> generating a word level mlf file for {}...".format(dataset))
|
||||||
chtk.label2mlf(label_dir_, mlf_word)
|
chtk.label2mlf(label_dir_, mlf_word)
|
||||||
print(">>> generating a phone level mlf file for {}...".format(dataset))
|
print(">>> generating a phone level mlf file for {}...".format(dataset))
|
||||||
chtk.mlf_word2phone(mlf_phone, mlf_word)
|
chtk.mlf_word2phone(mlf_phone, mlf_word, with_sp=False)
|
||||||
|
chtk.mlf_word2phone(mlf_phone_with_sp, mlf_word, with_sp=True)
|
||||||
|
|
||||||
|
|
||||||
print("elapsed time: {}".format(time.time() - timer_start))
|
print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
|
|
||||||
|
|
||||||
@ -224,33 +239,33 @@ if extract_features:
|
|||||||
if flat_start:
|
if flat_start:
|
||||||
timer_start = time.time()
|
timer_start = time.time()
|
||||||
print('==== flat start ====')
|
print('==== flat start ====')
|
||||||
fh.make_new_directory(model0_dir, existing_dir='leave')
|
fh.make_new_directory(model_mono0_dir, existing_dir='leave')
|
||||||
|
|
||||||
chtk.flat_start(hcompv_scp_train, model0_dir)
|
chtk.flat_start(hcompv_scp_train, model_mono0_dir)
|
||||||
|
|
||||||
# create macros.
|
# create macros.
|
||||||
vFloors = os.path.join(model0_dir, 'vFloors')
|
vFloors = os.path.join(model_mono0_dir, 'vFloors')
|
||||||
if os.path.exists(vFloors):
|
if os.path.exists(vFloors):
|
||||||
chtk.create_macros(vFloors)
|
chtk.create_macros(vFloors)
|
||||||
|
|
||||||
# allocate mean & variance to all phones in the phone list
|
# allocate mean & variance to all phones in the phone list
|
||||||
print('>>> allocating mean & variance to all phones in the phone list...')
|
print('>>> allocating mean & variance to all phones in the phone list...')
|
||||||
chtk.create_hmmdefs(
|
chtk.create_hmmdefs(
|
||||||
os.path.join(model0_dir, proto_name),
|
os.path.join(model_mono0_dir, proto_name),
|
||||||
os.path.join(model0_dir, 'hmmdefs')
|
os.path.join(model_mono0_dir, 'hmmdefs')
|
||||||
)
|
)
|
||||||
|
|
||||||
print("elapsed time: {}".format(time.time() - timer_start))
|
print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
|
|
||||||
|
|
||||||
## ======================= train model without short pause =======================
|
## ======================= train model without short pause =======================
|
||||||
if train_model_without_sp:
|
if train_monophone_without_sp:
|
||||||
print('==== train model without sp ====')
|
print('==== train monophone without sp ====')
|
||||||
|
|
||||||
timer_start = time.time()
|
timer_start = time.time()
|
||||||
niter = chtk.re_estimation_until_saturated(
|
niter = chtk.re_estimation_until_saturated(
|
||||||
model1_dir,
|
model_mono1_dir,
|
||||||
model0_dir, improvement_threshold, hcompv_scp_train,
|
model_mono0_dir, improvement_threshold, hcompv_scp_train,
|
||||||
os.path.join(htk_stimmen_dir, 'mfc'),
|
os.path.join(htk_stimmen_dir, 'mfc'),
|
||||||
'mfc',
|
'mfc',
|
||||||
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
|
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
|
||||||
@ -270,32 +285,34 @@ if add_sp:
|
|||||||
|
|
||||||
# make model with sp.
|
# make model with sp.
|
||||||
print('>>> adding sp state to the last model in the previous step...')
|
print('>>> adding sp state to the last model in the previous step...')
|
||||||
fh.make_new_directory(model1sp_dir, existing_dir='leave')
|
fh.make_new_directory(model_mono1sp_dir, existing_dir='leave')
|
||||||
niter = chtk.get_niter_max(model1_dir)
|
niter = chtk.get_niter_max(model_mono1_dir)
|
||||||
modeln_dir_pre = os.path.join(model1_dir, 'iter'+str(niter))
|
modeln_dir_pre = os.path.join(model_mono1_dir, 'iter'+str(niter))
|
||||||
modeln_dir = os.path.join(model1sp_dir, 'iter0')
|
modeln_dir = os.path.join(model_mono1sp_dir, 'iter0')
|
||||||
|
|
||||||
|
#hmmdefs_pre = os.path.join(modeln_dir_pre, 'hmmdefs')
|
||||||
chtk.add_sp(modeln_dir_pre, modeln_dir)
|
chtk.add_sp(modeln_dir_pre, modeln_dir)
|
||||||
print("elapsed time: {}".format(time.time() - timer_start))
|
print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
|
|
||||||
niter = chtk.re_estimation_until_saturated(
|
niter = chtk.re_estimation_until_saturated(
|
||||||
model1sp_dir, modeln_dir, improvement_threshold, hcompv_scp_train,
|
model_mono1sp_dir, modeln_dir, improvement_threshold, hcompv_scp_train,
|
||||||
os.path.join(htk_stimmen_dir, 'mfc'),
|
os.path.join(htk_stimmen_dir, 'mfc'),
|
||||||
'mfc',
|
'mfc',
|
||||||
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
|
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
|
||||||
mlf_file=mlf_file_train,
|
mlf_file=mlf_file_train_with_sp,
|
||||||
lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
|
lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
|
||||||
model_type='monophone_with_sp'
|
model_type='monophone_with_sp'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
## ======================= train model with re-aligned mlf =======================
|
## ======================= train model with re-aligned mlf =======================
|
||||||
if train_model_with_re_aligned_mlf:
|
if train_monophone_with_re_aligned_mlf:
|
||||||
print('==== traina model with re-aligned mlf ====')
|
print('==== traina monophone with re-aligned mlf ====')
|
||||||
|
|
||||||
print('>>> re-aligning the training data... ')
|
print('>>> re-aligning the training data... ')
|
||||||
timer_start = time.time()
|
timer_start = time.time()
|
||||||
niter = chtk.get_niter_max(model1sp_dir)
|
niter = chtk.get_niter_max(model_mono1sp_dir)
|
||||||
modeln_dir = os.path.join(model1sp_dir, 'iter'+str(niter))
|
modeln_dir = os.path.join(model_mono1sp_dir, 'iter'+str(niter))
|
||||||
chtk.make_aligned_label(
|
chtk.make_aligned_label(
|
||||||
os.path.join(modeln_dir, 'macros'),
|
os.path.join(modeln_dir, 'macros'),
|
||||||
os.path.join(modeln_dir, 'hmmdefs'),
|
os.path.join(modeln_dir, 'hmmdefs'),
|
||||||
@ -306,18 +323,18 @@ if train_model_with_re_aligned_mlf:
|
|||||||
print('>>> updating the script file... ')
|
print('>>> updating the script file... ')
|
||||||
chtk.update_script_file(
|
chtk.update_script_file(
|
||||||
mlf_file_train_aligned,
|
mlf_file_train_aligned,
|
||||||
mlf_file_train,
|
mlf_file_train_with_sp,
|
||||||
hcompv_scp_train,
|
hcompv_scp_train,
|
||||||
hcompv_scp_train_updated)
|
hcompv_scp_train_updated)
|
||||||
print("elapsed time: {}".format(time.time() - timer_start))
|
print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
|
|
||||||
print('>>> re-estimation... ')
|
print('>>> re-estimation... ')
|
||||||
timer_start = time.time()
|
timer_start = time.time()
|
||||||
fh.make_new_directory(model1sp2_dir, existing_dir='leave')
|
fh.make_new_directory(model_mono1sp2_dir, existing_dir='leave')
|
||||||
niter = chtk.get_niter_max(model1sp_dir)
|
niter = chtk.get_niter_max(model_mono1sp_dir)
|
||||||
niter = chtk.re_estimation_until_saturated(
|
niter = chtk.re_estimation_until_saturated(
|
||||||
model1sp2_dir,
|
model_mono1sp2_dir,
|
||||||
os.path.join(model1sp_dir, 'iter'+str(niter)),
|
os.path.join(model_mono1sp_dir, 'iter'+str(niter)),
|
||||||
improvement_threshold,
|
improvement_threshold,
|
||||||
hcompv_scp_train_updated,
|
hcompv_scp_train_updated,
|
||||||
os.path.join(htk_stimmen_dir, 'mfc'),
|
os.path.join(htk_stimmen_dir, 'mfc'),
|
||||||
@ -332,25 +349,68 @@ if train_model_with_re_aligned_mlf:
|
|||||||
|
|
||||||
## ======================= train triphone =======================
|
## ======================= train triphone =======================
|
||||||
if train_triphone:
|
if train_triphone:
|
||||||
model_out_dir = os.path.join(model_dir, 'hmm1_tri', 'iter1')
|
print('==== traina triphone model ====')
|
||||||
|
#model_out_dir = os.path.join(model_dir, 'hmm1_tri', 'iter1')
|
||||||
|
|
||||||
triphonelist_txt = os.path.join(config_dir, 'triphonelist_txt')
|
triphonelist_txt = os.path.join(config_dir, 'triphonelist.txt')
|
||||||
triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
|
triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
|
||||||
|
|
||||||
|
print('>>> making triphone list... ')
|
||||||
chtk.make_triphonelist(
|
chtk.make_triphonelist(
|
||||||
triphonelist_txt,
|
triphonelist_txt,
|
||||||
triphone_mlf,
|
triphone_mlf,
|
||||||
mlf_file_train_aligned)
|
mlf_file_train_aligned)
|
||||||
|
|
||||||
#run_command([
|
print('>>> making triphone header... ')
|
||||||
# 'HERest', '-B',
|
chtk.make_tri_hed(
|
||||||
# '-C', config_train,
|
os.path.join(config_dir, 'mktri.hed')
|
||||||
# '-I', triphone_mlf,
|
)
|
||||||
# '-t', '250.0', '150.0', '1000.0',
|
|
||||||
# '-s', 'stats'
|
print('>>> init triphone model... ')
|
||||||
# '-S', hcompv_scp_train,
|
niter = chtk.get_niter_max(model_mono1sp2_dir)
|
||||||
# '-H', macros,
|
fh.make_new_directory(os.path.join(model_tri1_dir, 'iter0'), existing_dir='leave')
|
||||||
# '-H', hmmdefs,
|
chtk.init_triphone(
|
||||||
# '-M', model_out_dir,
|
os.path.join(model_mono1sp2_dir, 'iter'+str(niter)),
|
||||||
# os.path.join(config_dir, 'triphonelist.txt')
|
os.path.join(model_tri1_dir, 'iter0')
|
||||||
#])
|
)
|
||||||
|
|
||||||
|
print('>>> re-estimation... ')
|
||||||
|
# I wanted to train until satulated:
|
||||||
|
# #niter = chtk.re_estimation_until_saturated(
|
||||||
|
# model_tri1_dir,
|
||||||
|
# os.path.join(model_tri1_dir, 'iter0'),
|
||||||
|
# improvement_threshold,
|
||||||
|
# hcompv_scp_train_updated,
|
||||||
|
# os.path.join(htk_stimmen_dir, 'mfc'),
|
||||||
|
# 'mfc',
|
||||||
|
# os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
|
||||||
|
# mlf_file=triphone_mlf,
|
||||||
|
# lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
|
||||||
|
# model_type='triphone'
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# but because the data size is limited, some triphone cannot be trained and received the error:
|
||||||
|
# ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???]
|
||||||
|
# therefore only two times re-estimation is performed.
|
||||||
|
output_dir = model_tri1_dir
|
||||||
|
|
||||||
|
for niter in range(1, 4):
|
||||||
|
hmm_n = 'iter' + str(niter)
|
||||||
|
hmm_n_pre = 'iter' + str(niter-1)
|
||||||
|
_modeln_dir = os.path.join(output_dir, hmm_n)
|
||||||
|
_modeln_dir_pre = os.path.join(output_dir, hmm_n_pre)
|
||||||
|
|
||||||
|
fh.make_new_directory(_modeln_dir, 'leave')
|
||||||
|
chtk.re_estimation(
|
||||||
|
os.path.join(_modeln_dir_pre, 'hmmdefs'),
|
||||||
|
_modeln_dir,
|
||||||
|
hcompv_scp_train_updated,
|
||||||
|
mlf_file=triphone_mlf,
|
||||||
|
macros=os.path.join(_modeln_dir_pre, 'macros'),
|
||||||
|
model_type='triphone')
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= train triphone =======================
|
||||||
|
if train_triphone_tied:
|
||||||
|
print('==== traina tied-state triphone ====')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user