diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 31f158b..7775f45 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc index a74cd44..ef57dca 100644 Binary files a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc and b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc differ diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py index b10d247..2188c97 100644 --- a/acoustic_model/defaultfiles.py +++ b/acoustic_model/defaultfiles.py @@ -1,14 +1,13 @@ import os - -#default_hvite_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'htk', 'config.HVite') +# add path of the parent directory +#os.path.dirname(os.path.realpath(__file__)) #cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' #htk_dir = r'C:\Aki\htk_fame' htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk' -config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy') -#config_train = os.path.join(cygwin_dir, 'config', 'config.train') + #config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite') #mkhmmdefs_pl = os.path.join(cygwin_dir, 'src', 'acoustic_model', 'mkhmmdefs.pl') diff --git a/acoustic_model/fame_hmm.py b/acoustic_model/fame_hmm.py index d8da964..881dec3 100644 --- a/acoustic_model/fame_hmm.py +++ b/acoustic_model/fame_hmm.py @@ -5,8 +5,6 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import tempfile import shutil import glob -#import configparser -#import subprocess import time import numpy as np @@ -21,45 +19,42 @@ from htk import pyhtk ## ======================= user define ======================= -#repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model' -#curr_dir = repo_dir + '\\acoustic_model' -#config_ini = curr_dir + '\\config.ini' -#output_dir = 'C:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model' -#forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment' +# procedure +make_lexicon = 0 +make_label = 0 # it takes roughly 4800 sec on Surface pro 2. +make_htk_files = 0 +extract_features = 0 +flat_start = 0 +train_model_without_sp = 1 + + +# pre-defined values. dataset_list = ['devel', 'test', 'train'] +hmmdefs_name = 'hmmdefs' -# procedure -extract_features = 0 -make_lexicon = 0 -make_dictionary = 0 # 4800 sec -make_htk_files = 1 -combine_files = 0 -flat_start = 0 -train_model = 0 +lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr') +lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov') + +config_dir = os.path.join(default.htk_dir, 'config') +config_hcopy = os.path.join(config_dir, 'config.HCopy') +config_train = os.path.join(config_dir, 'config.train') +global_ded = os.path.join(config_dir, 'global.ded') +mkphones_led = os.path.join(config_dir, 'mkphones.led') +prototype = os.path.join(config_dir, 'proto39') + +model_dir = os.path.join(default.htk_dir, 'model') -## ======================= load variables ======================= +# directories / files to be made. -lexicon_dir = os.path.join(default.fame_dir, 'lexicon') -lexicon_asr = os.path.join(lexicon_dir, 'lex.asr') -lexicon_oov = os.path.join(lexicon_dir, 'lex.oov') -lexicon_htk_asr = os.path.join(default.htk_dir, 'lexicon', 'lex.htk_asr') -lexicon_htk_oov = os.path.join(default.htk_dir, 'lexicon', 'lex.htk_oov') -lexicon_htk = os.path.join(default.htk_dir, 'lexicon', 'lex.htk') +lexicon_dir = os.path.join(default.htk_dir, 'lexicon') +lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr') +lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov') +lexicon_htk = os.path.join(lexicon_dir, 'lex.htk') -global_ded = os.path.join(default.htk_dir, 'config', 'global.ded') - - -#hcompv_scp = output_dir + '\\scp\\combined.scp' -#combined_mlf = output_dir + '\\label\\combined.mlf' - -#model_dir = output_dir + '\\model' -#model0_dir = model_dir + '\\hmm0' -#proto_init = model_dir + '\\proto38' -#proto_name = 'proto' -#phonelist = output_dir + '\\config\\phonelist_friesian.txt' -#hmmdefs_name = 'hmmdefs' +phonelist_txt = os.path.join(config_dir, 'phonelist.txt') +model0_dir = os.path.join(model_dir, 'hmm0') feature_dir = os.path.join(default.htk_dir, 'mfc') if not os.path.exists(feature_dir): @@ -72,42 +67,18 @@ if not os.path.exists(label_dir): os.makedirs(label_dir) - -## ======================= extract features ======================= -if extract_features: - - for dataset in dataset_list: - print('==== extract features on dataset {} ====\n'.format(dataset)) - - # a script file for HCopy - print(">>> making a script file for HCopy... \n") - hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False) - hcopy_scp.close() - - # get a list of features (hcopy.scp) from the filelist in FAME! corpus - feature_dir_ = os.path.join(feature_dir, dataset) - if not os.path.exists(feature_dir_): - os.makedirs(feature_dir_) - - # extract features - print(">>> extracting features... \n") - fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name) - pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name) - - os.remove(hcopy_scp.name) - - ## ======================= make lexicon for HTK ======================= if make_lexicon: - print('==== make lexicon for HTK ====\n') + timer_start = time.time() + print('==== making lexicon for HTK ====') # convert each lexicon from fame_asr phoneset to fame_htk phoneset. - print('>>> converting each lexicon from fame_asr phoneset to fame_htk phoneset... \n') + print('>>> converting each lexicon from fame_asr phoneset to fame_htk phoneset...') fame_functions.lexicon_asr2htk(lexicon_asr, lexicon_htk_asr) fame_functions.lexicon_asr2htk(lexicon_oov, lexicon_htk_oov) # combine lexicon - print('>>> combining lexicon files into one lexicon... \n') + print('>>> combining lexicon files into one lexicon...') # pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov. # therefore there is no overlap between lex_asr and lex_oov. fame_functions.combine_lexicon(lexicon_htk_asr, lexicon_htk_oov, lexicon_htk) @@ -119,28 +90,26 @@ if make_lexicon: # (2) Put a '\' before any dictionary entry beginning with single quote #http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html fame_functions.fix_single_quote(lexicon_htk) + print("elapsed time: {}".format(time.time() - timer_start)) -## ======================= make dic files ======================= -if make_dictionary: +## ======================= make label files ======================= +if make_label: for dataset in dataset_list: timer_start = time.time() - print("==== generating HTK dictionary files on dataset {}\n".format(dataset)) + print("==== making label files on dataset {}".format(dataset)) - #hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp' - #hcompv_scp2 = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp' script_list = os.path.join(default.fame_dir, 'data', dataset, 'text') - #mlf_word = output_dir + '\\label\\' + dataset + '_word.mlf' - #mlf_phone = output_dir + '\\label\\' + dataset + '_phone.mlf' - wav_dir = os.path.join(default.fame_dir, 'fame', 'wav', dataset) - dictionary_file = os.path.join(wav_dir, 'temp.dic') + wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) + label_dir_ = os.path.join(label_dir, dataset) + dictionary_file = os.path.join(label_dir_, 'temp.dic') + fh.make_new_directory(label_dir_) # list of scripts with open(script_list, "rt", encoding="utf-8") as fin: scripts = fin.read().split('\n') for line in scripts: - #for line in ['sp0035m_train_1975_fragmentenvraaggesprekkenruilverkaveling_15413 en dat kan men nog meer']: # sample line: # sp0457m_test_1968_plakkenfryslanterhorne_2168 en dan begjinne je natuerlik filename_ = line.split(' ')[0] @@ -148,180 +117,144 @@ if make_dictionary: sentence = ' '.join(line.split(' ')[1:]) sentence_htk = fame_functions.word2htk(sentence) - wav_file = os.path.join(wav_dir, filename + '.wav') - if os.path.exists(wav_file): - #dictionary_file = os.path.join(wav_dir, filename + '.dic') + wav_file = os.path.join(wav_dir_, filename + '.wav') + if os.path.exists(wav_file) and pyhtk.can_be_ascii(sentence_htk) == 0: if pyhtk.create_dictionary_without_log( - sentence, global_ded, dictionary_file, lexicon_htk) == 0: + sentence_htk, global_ded, dictionary_file, lexicon_htk) == 0: # when the file name is too long, HDMan command does not work. # therefore first temporary dictionary_file is made, then renamed. - shutil.move(dictionary_file, os.path.join(wav_dir, filename + '.dic')) - label_file = os.path.join(wav_dir, filename + '.lab') - pyhtk.create_label_file(sentence, label_file) + shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic')) + + label_file = os.path.join(label_dir_, filename + '.lab') + pyhtk.create_label_file(sentence_htk, label_file) else: os.remove(dictionary_file) print("elapsed time: {}".format(time.time() - timer_start)) - # lexicon - #lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation']) - - # list of features - #with open(hcompv_scp) as fin: - # features = fin.read() - # features = features.split('\n') - #i = 0 - #missing_words = [] - #fscp = open(hcompv_scp2, 'wt') - #fmlf = open(mlf_word, "wt", encoding="utf-8") - #fmlf.write("#!MLF!#\n") - #feature_nr = 1 - #for feature in features: - # sys.stdout.write("\r%d/%d" % (feature_nr, len(features))) - # sys.stdout.flush() - # feature_nr += 1 - # file_basename = os.path.basename(feature).replace('.mfc', '') - - # # get words from scripts. - # try: - # script = scripts[scripts.str.contains(file_basename)] - # except IndexError: - # script = [] - - # if len(script) != 0: - # script_id = script.index[0] - # script_txt = script.get(script_id) - # script_words = script_txt.split(' ') - # del script_words[0] - - # check if all words can be found in the lexicon. - # SCRIPT_WORDS = [] - # script_prons = [] - # is_in_lexicon = 1 - # for word in script_words: - # WORD = word.upper() - # SCRIPT_WORDS.append(WORD) - # extracted = lexicon_htk[lexicon_htk['word']==WORD] - # if len(extracted) == 0: - # missing_words.append(word) - # script_prons.append(extracted) - # is_in_lexicon *= len(extracted) - - # if all pronunciations are found in the lexicon, update scp and mlf files. - # if is_in_lexicon: - # add the feature filename into the .scp file. - # fscp.write("{}\n".format(feature)) - # i += 1 - - # add the words to the mlf file. - # fmlf.write('\"*/{}.lab\"\n'.format(file_basename)) - #fmlf.write('{}'.format('\n'.join(SCRIPT_WORDS))) - # for word_ in SCRIPT_WORDS: - # if word_[0] == '\'': - # word_ = '\\' + word_ - # fmlf.write('{}\n'.format(word_)) - # fmlf.write('.\n') - # print("\n{0} has {1} samples.\n".format(dataset, i)) - # np.save(output_dir + '\\missing_words' + '_' + dataset + '.npy', missing_words) - - # fscp.close() - # fmlf.close() ## ======================= make other required files ======================= if make_htk_files: - ## phonelist - phonelist_txt = os.path.join(default.htk_dir, 'config', 'phonelist.txt') + timer_start = time.time() + print("==== making files required for HTK ====") + + print(">>> making a phonelist...") pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt) - ## hcomp_v.scp - print(">>> making a script file for HCompV... \n") for dataset in dataset_list: - #timer_start = time.time() + wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) + feature_dir_ = os.path.join(feature_dir, dataset) + label_dir_ = os.path.join(label_dir, dataset) + mlf_word = os.path.join(label_dir, dataset + '_word.mlf') + mlf_phone = os.path.join(label_dir, dataset + '_phone.mlf') - wav_dir = os.path.join(default.fame_dir, 'fame', 'wav', dataset) + #print(">>> making a script file for {}...".format(dataset)) + #listdir = glob.glob(os.path.join(wav_dir_, '*.dic')) + #mfc_list = [filename.replace(wav_dir_, feature_dir_).replace('.dic', '.mfc') for filename in listdir] + #hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') + #with open(hcompv_scp, 'wb') as f: + # f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii')) - listdir = glob.glob(os.path.join(wav_dir, '*.dic')) - filelist = [filename.replace(wav_dir, feature_dir).replace('.dic', '.fea') for filename in listdir] + print(">>> making a mlf file for {}...".format(dataset)) + lab_list = glob.glob(os.path.join(label_dir_, '*.lab')) + with open(mlf_word, 'wb') as fmlf: + fmlf.write(bytes('#!MLF!#\n', 'ascii')) + for label_file in lab_list: + filename = os.path.basename(label_file) + fmlf.write(bytes('\"*/{}\"\n'.format(filename), 'ascii')) + with open(label_file) as flab: + lines = flab.read() + fmlf.write(bytes(lines + '.\n', 'ascii')) - hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') - with open(hcompv_scp, 'wt', newline='\r\n') as f: - f.write('\n'.join(filelist)) + print(">>> generating phone level transcription for {}...".format(dataset)) + pyhtk.mlf_word2phone(lexicon_htk, mlf_phone, mlf_word, mkphones_led) + print("elapsed time: {}".format(time.time() - timer_start)) -## hcomp_scp -# a script file for HCompV +## ======================= extract features ======================= +if extract_features: + for dataset in dataset_list: + timer_start = time.time() + print('==== extract features on dataset {} ===='.format(dataset)) - # print("generating phone level transcription...\n") - # mkphones = output_dir + '\\label\\mkphones0.txt' - # subprocessStr = r"HLEd -l * -d " + lex_htk_ + ' -i ' + mlf_phone + ' ' + mkphones + ' ' + mlf_word - # subprocess.call(subprocessStr, shell=True) - + wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) + label_dir_ = os.path.join(label_dir, dataset) + feature_dir_ = os.path.join(feature_dir, dataset) + fh.make_new_directory(feature_dir_) -## ======================= combined scps and mlfs ======================= -#if combine_files: -# print("==== combine scps and mlfs ====\n") + # a script file for HCopy + print(">>> making a script file for HCopy...") + hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False) + hcopy_scp.close() -# fscp = open(hcompv_scp, 'wt') -# fmlf = open(combined_mlf, 'wt') - -# for dataset in dataset_list: -# fmlf.write("#!MLF!#\n") -# for dataset in dataset_list: -# each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf' -# each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp' + # get a list of features (hcopy.scp) + # from the filelist in FAME! corpus. + #fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name) + # from the list of label files. + lab_list = glob.glob(os.path.join(label_dir_, '*.lab')) + feature_list = [ + os.path.join(wav_dir_, os.path.basename(lab_file).replace('.lab', '.wav')) + '\t' + + os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc')) + for lab_file in lab_list] + with open(hcopy_scp.name, 'wb') as f: + f.write(bytes('\n'.join(feature_list), 'ascii')) -# with open(each_mlf, 'r') as fin: -# lines = fin.read() -# lines = lines.split('\n') -# fmlf.write('\n'.join(lines[1:])) + # extract features. + print(">>> extracting features on {}...".format(dataset)) + pyhtk.wav2mfc(config_hcopy, hcopy_scp.name) + os.remove(hcopy_scp.name) -# with open(each_scp, 'r') as fin: -# lines = fin.read() -# fscp.write(lines) + # make hcompv.scp. + print(">>> making a script file for {}...".format(dataset)) + listdir = glob.glob(os.path.join(label_dir_, '*.dic')) + mfc_list = [filename.replace(label_dir_, feature_dir_).replace('.dic', '.mfc') for filename in listdir] + hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') + with open(hcompv_scp, 'wb') as f: + f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii')) -# fscp.close() -# fmlf.close() + print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= flat start monophones ======================= -if flat_start: - subprocessStr = 'HCompV -T 1 -C ' + config_train + ' -m -v 0.01 -S ' + hcompv_scp + ' -M ' + model0_dir + ' ' + proto_init - subprocess.call(subprocessStr, shell=True) +if flat_start: + hcompv_scp = os.path.join(tmp_dir, 'test.scp') + + timer_start = time.time() + print('==== flat start ====') + pyhtk.flat_start(config_train, hcompv_scp, model0_dir, prototype) # allocate mean & variance to all phones in the phone list - subprocessStr = 'perl ' + mkhmmdefs_pl + ' ' + model0_dir + '\\proto38' + ' ' + phonelist + ' > ' + model0_dir + '\\' + hmmdefs_name - subprocess.call(subprocessStr, shell=True) + pyhtk.create_hmmdefs( + os.path.join(model0_dir, 'proto39'), + os.path.join(model0_dir, 'hmmdefs'), + phonelist_txt) + print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= estimate monophones ======================= -if train_model: - iter_num_max = 3 - for mix_num in [128, 256, 512, 1024]: - for iter_num in range(1, iter_num_max+1): - print("===== mix{}, iter{} =====".format(mix_num, iter_num)) - iter_num_pre = iter_num - 1 - modelN_dir = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num) - if not os.path.exists(modelN_dir): - os.makedirs(modelN_dir) +if train_model_without_sp: + hcompv_scp = os.path.join(tmp_dir, 'test.scp') + mlf_file = os.path.join(label_dir, 'test_phone.mlf') + output_dir = os.path.join(model_dir, 'hmm1') + fh.make_new_directory(output_dir) - if iter_num == 1 and mix_num == 1: - modelN_dir_pre = model0_dir - else: - modelN_dir_pre = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num_pre) + print('==== train model without sp ====') + if not os.path.exists(os.path.join(output_dir, 'iter0')): + shutil.copytree(model0_dir, os.path.join(output_dir, 'iter0')) + niter = 1 + for niter in range(1, 5): + timer_start = time.time() + hmm_n = 'iter' + str(niter) + hmm_n_pre = 'iter' + str(niter-1) + modeln_dir = os.path.join(output_dir, hmm_n) + modeln_dir_pre = os.path.join(output_dir, hmm_n_pre) - ## re-estimation - subprocessStr = 'HERest -T 1 -C ' + config_train + ' -v 0.01 -I ' + combined_mlf + ' -H ' + modelN_dir_pre + '\\' + hmmdefs_name + ' -M ' + modelN_dir + ' ' + phonelist + ' -S ' + hcompv_scp - subprocess.call(subprocessStr, shell=True) - - mix_num_next = mix_num * 2 - modelN_dir_next = model_dir + '\\hmm' + str(mix_num_next) + '-0' - if not os.path.exists(modelN_dir_next): - os.makedirs(modelN_dir_next) - - header_file = modelN_dir + '\\mix' + str(mix_num_next) + '.hed' - with open(header_file, 'w') as fout: - fout.write("MU %d {*.state[2-4].mix}" % (mix_num_next)) - - subprocessStr = 'HHEd -T 1 -H ' + modelN_dir + '\\' + hmmdefs_name + ' -M ' + modelN_dir_next + ' ' + header_file + ' ' + phonelist - - subprocess.call(subprocessStr, shell=True) - + # re-estimation + fh.make_new_directory(modeln_dir) + pyhtk.re_estimation( + config_train, + os.path.join(modeln_dir_pre, 'proto39'), + os.path.join(modeln_dir_pre, hmmdefs_name), + modeln_dir, + hcompv_scp, phonelist_txt, + mlf_file=mlf_file) + print("elapsed time: {}".format(time.time() - timer_start)) \ No newline at end of file diff --git a/acoustic_model/test.txt b/acoustic_model/test.txt deleted file mode 100644 index e69de29..0000000