import sys import os os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import tempfile import shutil import glob import time import numpy as np import pandas as pd import fame_functions from phoneset import fame_ipa, fame_asr import defaultfiles as default sys.path.append(default.toolbox_dir) import file_handling as fh from htk import pyhtk ## ======================= user define ======================= # procedure make_lexicon = 0 make_label = 0 # it takes roughly 4800 sec on Surface pro 2. make_htk_files = 0 extract_features = 0 flat_start = 0 train_model_without_sp = 0 add_sp = 0 train_model_with_sp = 0 train_model_with_sp_align_mlf = 1 # pre-defined values. dataset_list = ['devel', 'test', 'train'] hmmdefs_name = 'hmmdefs' proto_name = 'proto39' lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr') lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov') config_dir = os.path.join(default.htk_dir, 'config') config_hcopy = os.path.join(config_dir, 'config.HCopy') config_train = os.path.join(config_dir, 'config.train') global_ded = os.path.join(config_dir, 'global.ded') mkphones_led = os.path.join(config_dir, 'mkphones.led') sil_hed = os.path.join(config_dir, 'sil.hed') prototype = os.path.join(config_dir, proto_name) model_dir = os.path.join(default.htk_dir, 'model') # directories / files to be made. lexicon_dir = os.path.join(default.htk_dir, 'lexicon') lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr') lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov') lexicon_htk = os.path.join(lexicon_dir, 'lex.htk') phonelist_txt = os.path.join(config_dir, 'phonelist.txt') model0_dir = os.path.join(model_dir, 'hmm0') model1_dir = os.path.join(model_dir, 'hmm1') feature_dir = os.path.join(default.htk_dir, 'mfc') if not os.path.exists(feature_dir): os.makedirs(feature_dir) tmp_dir = os.path.join(default.htk_dir, 'tmp') if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) label_dir = os.path.join(default.htk_dir, 'label') if not os.path.exists(label_dir): os.makedirs(label_dir) ## training hcompv_scp_train = os.path.join(tmp_dir, 'train.scp') mlf_file_train = os.path.join(label_dir, 'train_phone.mlf') mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf') ## train without sp niter_max = 10 ## ======================= make lexicon for HTK ======================= if make_lexicon: timer_start = time.time() print('==== making lexicon for HTK ====') # convert each lexicon from fame_asr phoneset to fame_htk phoneset. print('>>> converting each lexicon from fame_asr phoneset to fame_htk phoneset...') fame_functions.lexicon_asr2htk(lexicon_asr, lexicon_htk_asr) fame_functions.lexicon_asr2htk(lexicon_oov, lexicon_htk_oov) # combine lexicon print('>>> combining lexicon files into one lexicon...') # pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov. # therefore there is no overlap between lex_asr and lex_oov. fame_functions.combine_lexicon(lexicon_htk_asr, lexicon_htk_oov, lexicon_htk) ## ======================= ## manually make changes to the pronunciation dictionary and save it as lex.htk ## ======================= # (1) Replace all tabs with single space; # (2) Put a '\' before any dictionary entry beginning with single quote #http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html print('>>> fixing the lexicon...') fame_functions.fix_lexicon(lexicon_htk) print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= make label files ======================= if make_label: # train_2002_gongfansaken_10347.lab is empty. should be removed. for dataset in dataset_list: timer_start = time.time() print("==== making label files on dataset {}".format(dataset)) script_list = os.path.join(default.fame_dir, 'data', dataset, 'text') wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) label_dir_ = os.path.join(label_dir, dataset) dictionary_file = os.path.join(label_dir_, 'temp.dic') fh.make_new_directory(label_dir_) # list of scripts with open(script_list, "rt", encoding="utf-8") as fin: scripts = fin.read().split('\n') for line in scripts: # sample line: # sp0457m_test_1968_plakkenfryslanterhorne_2168 en dan begjinne je natuerlik filename_ = line.split(' ')[0] filename = '_'.join(filename_.split('_')[1:]) sentence = ' '.join(line.split(' ')[1:]) sentence_htk = fame_functions.word2htk(sentence) wav_file = os.path.join(wav_dir_, filename + '.wav') if os.path.exists(wav_file) and pyhtk.can_be_ascii(sentence_htk) == 0: if pyhtk.create_dictionary_without_log( sentence_htk, global_ded, dictionary_file, lexicon_htk) == 0: # when the file name is too long, HDMan command does not work. # therefore first temporary dictionary_file is made, then renamed. shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic')) label_file = os.path.join(label_dir_, filename + '.lab') pyhtk.create_label_file(sentence_htk, label_file) else: os.remove(dictionary_file) print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= make other required files ======================= if make_htk_files: timer_start = time.time() print("==== making files required for HTK ====") print(">>> making a phonelist...") pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt) for dataset in dataset_list: wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) feature_dir_ = os.path.join(feature_dir, dataset) label_dir_ = os.path.join(label_dir, dataset) mlf_word = os.path.join(label_dir, dataset + '_word.mlf') mlf_phone = os.path.join(label_dir, dataset + '_phone.mlf') #print(">>> making a script file for {}...".format(dataset)) #listdir = glob.glob(os.path.join(wav_dir_, '*.dic')) #mfc_list = [filename.replace(wav_dir_, feature_dir_).replace('.dic', '.mfc') for filename in listdir] #hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') #with open(hcompv_scp, 'wb') as f: # f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii')) print(">>> making a mlf file for {}...".format(dataset)) lab_list = glob.glob(os.path.join(label_dir_, '*.lab')) with open(mlf_word, 'wb') as fmlf: fmlf.write(bytes('#!MLF!#\n', 'ascii')) for label_file in lab_list: filename = os.path.basename(label_file) fmlf.write(bytes('\"*/{}\"\n'.format(filename), 'ascii')) with open(label_file) as flab: lines = flab.read() fmlf.write(bytes(lines + '.\n', 'ascii')) print(">>> generating phone level transcription for {}...".format(dataset)) pyhtk.mlf_word2phone(lexicon_htk, mlf_phone, mlf_word, mkphones_led) print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= extract features ======================= if extract_features: for dataset in dataset_list: timer_start = time.time() print('==== extract features on dataset {} ===='.format(dataset)) wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) label_dir_ = os.path.join(label_dir, dataset) feature_dir_ = os.path.join(feature_dir, dataset) fh.make_new_directory(feature_dir_) # a script file for HCopy print(">>> making a script file for HCopy...") hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False) hcopy_scp.close() # get a list of features (hcopy.scp) # from the filelist in FAME! corpus. #fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name) # from the list of label files. lab_list = glob.glob(os.path.join(label_dir_, '*.lab')) feature_list = [ os.path.join(wav_dir_, os.path.basename(lab_file).replace('.lab', '.wav')) + '\t' + os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc')) for lab_file in lab_list] with open(hcopy_scp.name, 'wb') as f: f.write(bytes('\n'.join(feature_list), 'ascii')) # extract features. print(">>> extracting features on {}...".format(dataset)) pyhtk.wav2mfc(config_hcopy, hcopy_scp.name) os.remove(hcopy_scp.name) # make hcompv.scp. print(">>> making a script file for {}...".format(dataset)) listdir = glob.glob(os.path.join(label_dir_, '*.dic')) mfc_list = [filename.replace(label_dir_, feature_dir_).replace('.dic', '.mfc') for filename in listdir] hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') with open(hcompv_scp, 'wb') as f: f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii')) print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= flat start monophones ======================= if flat_start: timer_start = time.time() print('==== flat start ====') pyhtk.flat_start(config_train, hcompv_scp_train, model0_dir, prototype) # allocate mean & variance to all phones in the phone list print('>>> allocating mean & variance to all phones in the phone list...') pyhtk.create_hmmdefs( os.path.join(model0_dir, proto_name), os.path.join(model0_dir, 'hmmdefs'), phonelist_txt) # make macros print('>>> making macros...') with open(os.path.join(model0_dir, 'vFloors')) as f: lines = f.read() with open(os.path.join(model0_dir, 'macros'), 'wb') as f: f.write(bytes('~o 39\n' + lines, 'ascii')) print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= train model without short pause ======================= if train_model_without_sp: fh.make_new_directory(model1_dir) print('==== train model without sp ====') if not os.path.exists(os.path.join(model1_dir, 'iter0')): shutil.copytree(model0_dir, os.path.join(model1_dir, 'iter0')) for niter in range(1, niter_max): timer_start = time.time() hmm_n = 'iter' + str(niter) hmm_n_pre = 'iter' + str(niter-1) modeln_dir = os.path.join(model1_dir, hmm_n) modeln_dir_pre = os.path.join(model1_dir, hmm_n_pre) # re-estimation fh.make_new_directory(modeln_dir) pyhtk.re_estimation( config_train, os.path.join(modeln_dir_pre, hmmdefs_name), modeln_dir, hcompv_scp_train, phonelist_txt, mlf_file=mlf_file_train, macros=os.path.join(modeln_dir_pre, 'macros')) print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= adding sp to the model ======================= if add_sp: print('==== adding sp to the model ====') # make model with sp. print('>>> modifying the last model in the previous step...') modeln_dir_pre = os.path.join(model1_dir, 'iter'+str(niter_max-1)) modeln_dir = modeln_dir_pre.replace('iter' + str(niter_max-1), 'iter' + str(niter_max)) fh.make_new_directory(modeln_dir) shutil.copy( os.path.join(modeln_dir_pre, 'macros'), os.path.join(modeln_dir, 'macros')) shutil.copy( os.path.join(modeln_dir_pre, hmmdefs_name), os.path.join(modeln_dir, hmmdefs_name)) ## ======================= ## manually make changes to modeln_dir/hmmdefs ## ======================= # add states 'sil'. # http://www.f.waseda.jp/yusukekondo/htk.html#flat_start_estimation #shutil.copy( # os.path.join(model_dir, 'hmmdefs.txt'), # os.path.join(modeln_dir, hmmdefs_name)) #hmmdefs_file_pre = os.path.join(modeln_dir_pre, hmmdefs_name) hmmdefs_file = os.path.join(modeln_dir, hmmdefs_name) macros_file = os.path.join(modeln_dir, 'macros') #with open(hmmdefs_file_pre) as f: # lines = f.read() #lines_ = lines.split('~h ') #sil_model = [line for line in lines_ if line.split('\n')[0].replace('"', '') == 'sil'][0] # update hmmdefs and macros. print('>>> updating hmmdefs and macros...') modeln_dir_pre = modeln_dir modeln_dir = modeln_dir.replace('iter' + str(niter_max), 'iter' + str(niter_max+1)) fh.make_new_directory(modeln_dir) pyhtk.include_sil_in_hmmdefs(macros_file, hmmdefs_file, modeln_dir, sil_hed, phonelist_txt) ## ======================= train model with short pause ======================= if train_model_with_sp: print('==== train model with sp ====') for niter in range(20, 50): timer_start = time.time() hmm_n = 'iter' + str(niter) hmm_n_pre = 'iter' + str(niter-1) modeln_dir = os.path.join(model1_dir, hmm_n) modeln_dir_pre = os.path.join(model1_dir, hmm_n_pre) # re-estimation fh.make_new_directory(modeln_dir) pyhtk.re_estimation( config_train, os.path.join(modeln_dir_pre, hmmdefs_name), modeln_dir, hcompv_scp_train, phonelist_txt, mlf_file=mlf_file_train, macros=os.path.join(modeln_dir_pre, 'macros')) print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= train model with short pause ======================= if train_model_with_sp_align_mlf: print('==== train model with sp with align.mlf ====') for niter in range(50, 60): timer_start = time.time() hmm_n = 'iter' + str(niter) hmm_n_pre = 'iter' + str(niter-1) modeln_dir = os.path.join(model1_dir, hmm_n) modeln_dir_pre = os.path.join(model1_dir, hmm_n_pre) # re-estimation fh.make_new_directory(modeln_dir) pyhtk.re_estimation( config_train, os.path.join(modeln_dir_pre, hmmdefs_name), modeln_dir, hcompv_scp_train, phonelist_txt, mlf_file=mlf_file_train_aligned, macros=os.path.join(modeln_dir_pre, 'macros')) print("elapsed time: {}".format(time.time() - timer_start))