diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 96899cb..b545ca0 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/acoustic_model/fame_hmm.py b/acoustic_model/fame_hmm.py index 5555919..4cffa2f 100644 --- a/acoustic_model/fame_hmm.py +++ b/acoustic_model/fame_hmm.py @@ -26,9 +26,8 @@ make_mlf = 0 extract_features = 0 flat_start = 0 train_model_without_sp = 0 -add_sp = 1 -train_model_with_sp = 0 -train_model_with_sp_align_mlf = 0 +add_sp = 0 +train_model_with_re_aligned_mlf = 1 train_triphone = 0 @@ -36,7 +35,7 @@ train_triphone = 0 # pre-defined values. dataset_list = ['devel', 'test', 'train'] feature_size = 39 -improvement_threshold = 0.5 +improvement_threshold = 0.3 hmmdefs_name = 'hmmdefs' proto_name = 'proto' @@ -49,10 +48,11 @@ config_dir = os.path.join(default.htk_dir, 'config') sil_hed = os.path.join(config_dir, 'sil.hed') prototype = os.path.join(config_dir, proto_name) -model_dir = os.path.join(default.htk_dir, 'model') -model0_dir = os.path.join(model_dir, 'hmm0') -model1_dir = os.path.join(model_dir, 'hmm1') -model1sp_dir = os.path.join(model_dir, 'hmm1sp') +model_dir = os.path.join(default.htk_dir, 'model') +model0_dir = os.path.join(model_dir, 'hmm0') +model1_dir = os.path.join(model_dir, 'hmm1') +model1sp_dir = os.path.join(model_dir, 'hmm1sp') +model1sp2_dir = os.path.join(model_dir, 'hmm1sp2') # directories / files to be made. lexicon_dir = os.path.join(default.htk_dir, 'lexicon') @@ -68,7 +68,6 @@ label_dir = os.path.join(default.htk_dir, 'label') fh.make_new_directory(label_dir, existing_dir='leave') - ## training hcompv_scp_train = os.path.join(tmp_dir, 'train.scp') mlf_file_train = os.path.join(label_dir, 'train_phone.mlf') @@ -78,10 +77,6 @@ mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf') htk_stimmen_dir = os.path.join(default.htk_dir, 'stimmen') -## train without sp -niter_max = 10 - - ## ======================= make lexicon for HTK ======================= if make_lexicon: timer_start = time.time() @@ -273,64 +268,64 @@ if add_sp: print('==== adding sp to the model ====') # reference: # http://www.f.waseda.jp/yusukekondo/htk.html#flat_start_estimation + timer_start = time.time() # make model with sp. - niter = 7 print('>>> adding sp state to the last model in the previous step...') fh.make_new_directory(model1sp_dir, existing_dir='leave') + niter = chtk.get_niter_max(model1_dir) modeln_dir_pre = os.path.join(model1_dir, 'iter'+str(niter)) - - ## update hmmdefs and macros. - print('>>> adding sp to the model...') - modeln_dir = os.path.join(model1sp_dir, 'iter0') + modeln_dir = os.path.join(model1sp_dir, 'iter0') chtk.add_sp(modeln_dir_pre, modeln_dir) + print("elapsed time: {}".format(time.time() - timer_start)) - -## ======================= train model with short pause ======================= -if train_model_with_sp: - print('==== train model with sp ====') - for niter in range(20, 50): - timer_start = time.time() - hmm_n = 'iter' + str(niter) - hmm_n_pre = 'iter' + str(niter-1) - modeln_dir = os.path.join(model1_dir, hmm_n) - modeln_dir_pre = os.path.join(model1_dir, hmm_n_pre) - - # re-estimation - fh.make_new_directory(modeln_dir) - pyhtk.re_estimation( - config_train, - os.path.join(modeln_dir_pre, hmmdefs_name), - modeln_dir, - hcompv_scp_train, phonelist_txt, - mlf_file=mlf_file_train, - macros=os.path.join(modeln_dir_pre, 'macros')) - print("elapsed time: {}".format(time.time() - timer_start)) + niter = chtk.re_estimation_until_saturated( + model1sp_dir, modeln_dir, improvement_threshold, hcompv_scp_train, + os.path.join(htk_stimmen_dir, 'mfc'), + 'mfc', + os.path.join(htk_stimmen_dir, 'word_lattice.ltc'), + mlf_file=mlf_file_train, + lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'), + model_type='monophone_with_sp' + ) -## ======================= train model with short pause ======================= -if train_model_with_sp_align_mlf: - print('==== train model with sp with align.mlf ====') - for niter in range(50, 60): - timer_start = time.time() - hmm_n = 'iter' + str(niter) - hmm_n_pre = 'iter' + str(niter-1) - modeln_dir = os.path.join(model1_dir, hmm_n) - modeln_dir_pre = os.path.join(model1_dir, hmm_n_pre) - - # re-estimation - fh.make_new_directory(modeln_dir) - pyhtk.re_estimation( - config_train, - os.path.join(modeln_dir_pre, hmmdefs_name), - modeln_dir, - hcompv_scp_train, phonelist_txt, - mlf_file=mlf_file_train_aligned, - macros=os.path.join(modeln_dir_pre, 'macros')) - print("elapsed time: {}".format(time.time() - timer_start)) +## ======================= train model with re-aligned mlf ======================= +if train_model_with_re_aligned_mlf: + print('==== traina model with re-aligned mlf ====') + + print('>>> re-aligning the training data... ') + timer_start = time.time() + niter = chtk.get_niter_max(model1sp_dir) + modeln_dir = os.path.join(model1sp_dir, 'iter'+str(niter)) + chtk.make_aligned_label( + os.path.join(modeln_dir, 'macros'), + os.path.join(modeln_dir, 'hmmdefs'), + mlf_file_train_aligned, + os.path.join(label_dir, 'train_word.mlf'), + hcompv_scp_train) + print("elapsed time: {}".format(time.time() - timer_start)) + + print('>>> re-estimation... ') + timer_start = time.time() + fh.make_new_directory(model1sp2_dir, existing_dir='leave') + niter = chtk.get_niter_max(model1sp_dir) + niter = chtk.re_estimation_until_saturated( + model1sp2_dir, + os.path.join(model1sp_dir, 'iter'+str(niter)), + improvement_threshold, + hcompv_scp_train, + os.path.join(htk_stimmen_dir, 'mfc'), + 'mfc', + os.path.join(htk_stimmen_dir, 'word_lattice.ltc'), + mlf_file=mlf_file_train, + lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'), + model_type='monophone_with_sp' + ) + print("elapsed time: {}".format(time.time() - timer_start)) -# train triphone. +## ======================= train triphone ======================= if train_triphone: triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf') macros = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'macros')