diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 712c255..96899cb 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/acoustic_model/fame_functions.py b/acoustic_model/fame_functions.py index 9ca7e0d..9f4e127 100644 --- a/acoustic_model/fame_functions.py +++ b/acoustic_model/fame_functions.py @@ -378,17 +378,22 @@ def ipa2htk(ipa): return ''.join(htk_splitted) -def performance_on_stimmen(stimmen_dir, hmmdefs): - #hmmdefs = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\model_\hmm1\iter20\hmmdefs' - #stimmen_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\stimmen' +def performance_on_stimmen(config_dir, stimmen_dir, hmmdefs): lattice_file = os.path.join(stimmen_dir, 'word_lattice.ltc') hvite_scp = os.path.join(stimmen_dir, 'hvite.scp') #fh.make_filelist(os.path.join(stimmen_dir, 'mfc'), hvite_scp, file_type='mfc') hresult_scp = os.path.join(stimmen_dir, 'hresult.scp') #fh.make_filelist(os.path.join(stimmen_dir, 'mfc'), hresult_scp, file_type='rec') lexicon_file = os.path.join(stimmen_dir, 'lexicon_recognition.dic') - chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_file) + # get feature_size from hmmdefs. + with open(hmmdefs) as f: + line = f.readline() + line = f.readline().strip() + feature_size = int(line.split(' ')[2]) + + chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_file, feature_size) + result = chtk.recognition( lattice_file, hmmdefs, diff --git a/acoustic_model/fame_hmm.py b/acoustic_model/fame_hmm.py index 4e2f430..5555919 100644 --- a/acoustic_model/fame_hmm.py +++ b/acoustic_model/fame_hmm.py @@ -26,7 +26,7 @@ make_mlf = 0 extract_features = 0 flat_start = 0 train_model_without_sp = 0 -add_sp = 0 +add_sp = 1 train_model_with_sp = 0 train_model_with_sp_align_mlf = 0 train_triphone = 0 @@ -35,6 +35,9 @@ train_triphone = 0 # pre-defined values. dataset_list = ['devel', 'test', 'train'] +feature_size = 39 +improvement_threshold = 0.5 + hmmdefs_name = 'hmmdefs' proto_name = 'proto' @@ -47,7 +50,9 @@ sil_hed = os.path.join(config_dir, 'sil.hed') prototype = os.path.join(config_dir, proto_name) model_dir = os.path.join(default.htk_dir, 'model') - +model0_dir = os.path.join(model_dir, 'hmm0') +model1_dir = os.path.join(model_dir, 'hmm1') +model1sp_dir = os.path.join(model_dir, 'hmm1sp') # directories / files to be made. lexicon_dir = os.path.join(default.htk_dir, 'lexicon') @@ -55,9 +60,6 @@ lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr') lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov') lexicon_htk = os.path.join(lexicon_dir, 'lex.htk') - -#model1_dir = os.path.join(model_dir, 'hmm1') - feature_dir = os.path.join(default.htk_dir, 'mfc') fh.make_new_directory(feature_dir, existing_dir='leave') tmp_dir = os.path.join(default.htk_dir, 'tmp') @@ -65,11 +67,17 @@ fh.make_new_directory(tmp_dir, existing_dir='leave') label_dir = os.path.join(default.htk_dir, 'label') fh.make_new_directory(label_dir, existing_dir='leave') + + ## training hcompv_scp_train = os.path.join(tmp_dir, 'train.scp') mlf_file_train = os.path.join(label_dir, 'train_phone.mlf') mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf') +## testing +htk_stimmen_dir = os.path.join(default.htk_dir, 'stimmen') + + ## train without sp niter_max = 10 @@ -100,7 +108,7 @@ if make_lexicon: ## intialize the instance for HTK. -chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk) +chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk, feature_size) ## ======================= make label files ======================= @@ -223,11 +231,14 @@ if extract_features: if flat_start: timer_start = time.time() print('==== flat start ====') - feature_size = 39 - model0_dir = os.path.join(model_dir, 'hmm0') fh.make_new_directory(model0_dir, existing_dir='leave') - chtk.flat_start(hcompv_scp_train, model0_dir, feature_size) + chtk.flat_start(hcompv_scp_train, model0_dir) + + # create macros. + vFloors = os.path.join(model0_dir, 'vFloors') + if os.path.exists(vFloors): + chtk.create_macros(vFloors) # allocate mean & variance to all phones in the phone list print('>>> allocating mean & variance to all phones in the phone list...') @@ -241,69 +252,38 @@ if flat_start: ## ======================= train model without short pause ======================= if train_model_without_sp: - fh.make_new_directory(model1_dir) - print('==== train model without sp ====') - if not os.path.exists(os.path.join(model1_dir, 'iter0')): - shutil.copytree(model0_dir, os.path.join(model1_dir, 'iter0')) - for niter in range(1, niter_max): - timer_start = time.time() - hmm_n = 'iter' + str(niter) - hmm_n_pre = 'iter' + str(niter-1) - modeln_dir = os.path.join(model1_dir, hmm_n) - modeln_dir_pre = os.path.join(model1_dir, hmm_n_pre) - - # re-estimation - fh.make_new_directory(modeln_dir) - pyhtk.re_estimation( - config_train, - os.path.join(modeln_dir_pre, hmmdefs_name), - modeln_dir, - hcompv_scp_train, phonelist_txt, - mlf_file=mlf_file_train, - macros=os.path.join(modeln_dir_pre, 'macros')) - print("elapsed time: {}".format(time.time() - timer_start)) + + timer_start = time.time() + niter = chtk.re_estimation_until_saturated( + model1_dir, + model0_dir, improvement_threshold, hcompv_scp_train, + os.path.join(htk_stimmen_dir, 'mfc'), + 'mfc', + os.path.join(htk_stimmen_dir, 'word_lattice.ltc'), + mlf_file=mlf_file_train, + lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic') + ) + + print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= adding sp to the model ======================= if add_sp: print('==== adding sp to the model ====') + # reference: + # http://www.f.waseda.jp/yusukekondo/htk.html#flat_start_estimation # make model with sp. - print('>>> modifying the last model in the previous step...') - modeln_dir_pre = os.path.join(model1_dir, 'iter'+str(niter_max-1)) - modeln_dir = modeln_dir_pre.replace('iter' + str(niter_max-1), 'iter' + str(niter_max)) - fh.make_new_directory(modeln_dir) - shutil.copy( - os.path.join(modeln_dir_pre, 'macros'), - os.path.join(modeln_dir, 'macros')) - shutil.copy( - os.path.join(modeln_dir_pre, hmmdefs_name), - os.path.join(modeln_dir, hmmdefs_name)) + niter = 7 + print('>>> adding sp state to the last model in the previous step...') + fh.make_new_directory(model1sp_dir, existing_dir='leave') + modeln_dir_pre = os.path.join(model1_dir, 'iter'+str(niter)) - ## ======================= - ## manually make changes to modeln_dir/hmmdefs - ## ======================= - # add states 'sil'. - # http://www.f.waseda.jp/yusukekondo/htk.html#flat_start_estimation - #shutil.copy( - # os.path.join(model_dir, 'hmmdefs.txt'), - # os.path.join(modeln_dir, hmmdefs_name)) - - #hmmdefs_file_pre = os.path.join(modeln_dir_pre, hmmdefs_name) - hmmdefs_file = os.path.join(modeln_dir, hmmdefs_name) - macros_file = os.path.join(modeln_dir, 'macros') - #with open(hmmdefs_file_pre) as f: - # lines = f.read() - #lines_ = lines.split('~h ') - #sil_model = [line for line in lines_ if line.split('\n')[0].replace('"', '') == 'sil'][0] - - # update hmmdefs and macros. - print('>>> updating hmmdefs and macros...') - modeln_dir_pre = modeln_dir - modeln_dir = modeln_dir.replace('iter' + str(niter_max), 'iter' + str(niter_max+1)) - fh.make_new_directory(modeln_dir) - pyhtk.include_sil_in_hmmdefs(macros_file, hmmdefs_file, modeln_dir, sil_hed, phonelist_txt) + ## update hmmdefs and macros. + print('>>> adding sp to the model...') + modeln_dir = os.path.join(model1sp_dir, 'iter0') + chtk.add_sp(modeln_dir_pre, modeln_dir) ## ======================= train model with short pause =======================