import sys import os os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import tempfile import shutil import glob #import configparser #import subprocess import time import numpy as np import pandas as pd import fame_functions from phoneset import fame_ipa, fame_asr import defaultfiles as default sys.path.append(default.toolbox_dir) import file_handling as fh from htk import pyhtk ## ======================= user define ======================= #repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model' #curr_dir = repo_dir + '\\acoustic_model' #config_ini = curr_dir + '\\config.ini' #output_dir = 'C:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model' #forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment' dataset_list = ['devel', 'test', 'train'] # procedure extract_features = 0 make_lexicon = 0 make_dictionary = 0 # 4800 sec make_htk_files = 1 combine_files = 0 flat_start = 0 train_model = 0 ## ======================= load variables ======================= lexicon_dir = os.path.join(default.fame_dir, 'lexicon') lexicon_asr = os.path.join(lexicon_dir, 'lex.asr') lexicon_oov = os.path.join(lexicon_dir, 'lex.oov') lexicon_htk_asr = os.path.join(default.htk_dir, 'lexicon', 'lex.htk_asr') lexicon_htk_oov = os.path.join(default.htk_dir, 'lexicon', 'lex.htk_oov') lexicon_htk = os.path.join(default.htk_dir, 'lexicon', 'lex.htk') global_ded = os.path.join(default.htk_dir, 'config', 'global.ded') #hcompv_scp = output_dir + '\\scp\\combined.scp' #combined_mlf = output_dir + '\\label\\combined.mlf' #model_dir = output_dir + '\\model' #model0_dir = model_dir + '\\hmm0' #proto_init = model_dir + '\\proto38' #proto_name = 'proto' #phonelist = output_dir + '\\config\\phonelist_friesian.txt' #hmmdefs_name = 'hmmdefs' feature_dir = os.path.join(default.htk_dir, 'mfc') if not os.path.exists(feature_dir): os.makedirs(feature_dir) tmp_dir = os.path.join(default.htk_dir, 'tmp') if not os.path.exists(tmp_dir): os.makedirs(tmp_dir) label_dir = os.path.join(default.htk_dir, 'label') if not os.path.exists(label_dir): os.makedirs(label_dir) ## ======================= extract features ======================= if extract_features: for dataset in dataset_list: print('==== extract features on dataset {} ====\n'.format(dataset)) # a script file for HCopy print(">>> making a script file for HCopy... \n") hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False) hcopy_scp.close() # get a list of features (hcopy.scp) from the filelist in FAME! corpus feature_dir_ = os.path.join(feature_dir, dataset) if not os.path.exists(feature_dir_): os.makedirs(feature_dir_) # extract features print(">>> extracting features... \n") fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name) pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name) os.remove(hcopy_scp.name) ## ======================= make lexicon for HTK ======================= if make_lexicon: print('==== make lexicon for HTK ====\n') # convert each lexicon from fame_asr phoneset to fame_htk phoneset. print('>>> converting each lexicon from fame_asr phoneset to fame_htk phoneset... \n') fame_functions.lexicon_asr2htk(lexicon_asr, lexicon_htk_asr) fame_functions.lexicon_asr2htk(lexicon_oov, lexicon_htk_oov) # combine lexicon print('>>> combining lexicon files into one lexicon... \n') # pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov. # therefore there is no overlap between lex_asr and lex_oov. fame_functions.combine_lexicon(lexicon_htk_asr, lexicon_htk_oov, lexicon_htk) ## ======================= ## manually make changes to the pronunciation dictionary and save it as lex.htk ## ======================= # (1) Replace all tabs with single space; # (2) Put a '\' before any dictionary entry beginning with single quote #http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html fame_functions.fix_single_quote(lexicon_htk) ## ======================= make dic files ======================= if make_dictionary: for dataset in dataset_list: timer_start = time.time() print("==== generating HTK dictionary files on dataset {}\n".format(dataset)) #hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp' #hcompv_scp2 = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp' script_list = os.path.join(default.fame_dir, 'data', dataset, 'text') #mlf_word = output_dir + '\\label\\' + dataset + '_word.mlf' #mlf_phone = output_dir + '\\label\\' + dataset + '_phone.mlf' wav_dir = os.path.join(default.fame_dir, 'fame', 'wav', dataset) dictionary_file = os.path.join(wav_dir, 'temp.dic') # list of scripts with open(script_list, "rt", encoding="utf-8") as fin: scripts = fin.read().split('\n') for line in scripts: #for line in ['sp0035m_train_1975_fragmentenvraaggesprekkenruilverkaveling_15413 en dat kan men nog meer']: # sample line: # sp0457m_test_1968_plakkenfryslanterhorne_2168 en dan begjinne je natuerlik filename_ = line.split(' ')[0] filename = '_'.join(filename_.split('_')[1:]) sentence = ' '.join(line.split(' ')[1:]) sentence_htk = fame_functions.word2htk(sentence) wav_file = os.path.join(wav_dir, filename + '.wav') if os.path.exists(wav_file): #dictionary_file = os.path.join(wav_dir, filename + '.dic') if pyhtk.create_dictionary_without_log( sentence, global_ded, dictionary_file, lexicon_htk) == 0: # when the file name is too long, HDMan command does not work. # therefore first temporary dictionary_file is made, then renamed. shutil.move(dictionary_file, os.path.join(wav_dir, filename + '.dic')) label_file = os.path.join(wav_dir, filename + '.lab') pyhtk.create_label_file(sentence, label_file) else: os.remove(dictionary_file) print("elapsed time: {}".format(time.time() - timer_start)) # lexicon #lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation']) # list of features #with open(hcompv_scp) as fin: # features = fin.read() # features = features.split('\n') #i = 0 #missing_words = [] #fscp = open(hcompv_scp2, 'wt') #fmlf = open(mlf_word, "wt", encoding="utf-8") #fmlf.write("#!MLF!#\n") #feature_nr = 1 #for feature in features: # sys.stdout.write("\r%d/%d" % (feature_nr, len(features))) # sys.stdout.flush() # feature_nr += 1 # file_basename = os.path.basename(feature).replace('.mfc', '') # # get words from scripts. # try: # script = scripts[scripts.str.contains(file_basename)] # except IndexError: # script = [] # if len(script) != 0: # script_id = script.index[0] # script_txt = script.get(script_id) # script_words = script_txt.split(' ') # del script_words[0] # check if all words can be found in the lexicon. # SCRIPT_WORDS = [] # script_prons = [] # is_in_lexicon = 1 # for word in script_words: # WORD = word.upper() # SCRIPT_WORDS.append(WORD) # extracted = lexicon_htk[lexicon_htk['word']==WORD] # if len(extracted) == 0: # missing_words.append(word) # script_prons.append(extracted) # is_in_lexicon *= len(extracted) # if all pronunciations are found in the lexicon, update scp and mlf files. # if is_in_lexicon: # add the feature filename into the .scp file. # fscp.write("{}\n".format(feature)) # i += 1 # add the words to the mlf file. # fmlf.write('\"*/{}.lab\"\n'.format(file_basename)) #fmlf.write('{}'.format('\n'.join(SCRIPT_WORDS))) # for word_ in SCRIPT_WORDS: # if word_[0] == '\'': # word_ = '\\' + word_ # fmlf.write('{}\n'.format(word_)) # fmlf.write('.\n') # print("\n{0} has {1} samples.\n".format(dataset, i)) # np.save(output_dir + '\\missing_words' + '_' + dataset + '.npy', missing_words) # fscp.close() # fmlf.close() ## ======================= make other required files ======================= if make_htk_files: ## phonelist phonelist_txt = os.path.join(default.htk_dir, 'config', 'phonelist.txt') pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt) ## hcomp_v.scp print(">>> making a script file for HCompV... \n") for dataset in dataset_list: #timer_start = time.time() wav_dir = os.path.join(default.fame_dir, 'fame', 'wav', dataset) listdir = glob.glob(os.path.join(wav_dir, '*.dic')) filelist = [filename.replace(wav_dir, feature_dir).replace('.dic', '.fea') for filename in listdir] hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') with open(hcompv_scp, 'wt', newline='\r\n') as f: f.write('\n'.join(filelist)) ## hcomp_scp # a script file for HCompV # print("generating phone level transcription...\n") # mkphones = output_dir + '\\label\\mkphones0.txt' # subprocessStr = r"HLEd -l * -d " + lex_htk_ + ' -i ' + mlf_phone + ' ' + mkphones + ' ' + mlf_word # subprocess.call(subprocessStr, shell=True) ## ======================= combined scps and mlfs ======================= #if combine_files: # print("==== combine scps and mlfs ====\n") # fscp = open(hcompv_scp, 'wt') # fmlf = open(combined_mlf, 'wt') # for dataset in dataset_list: # fmlf.write("#!MLF!#\n") # for dataset in dataset_list: # each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf' # each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp' # with open(each_mlf, 'r') as fin: # lines = fin.read() # lines = lines.split('\n') # fmlf.write('\n'.join(lines[1:])) # with open(each_scp, 'r') as fin: # lines = fin.read() # fscp.write(lines) # fscp.close() # fmlf.close() ## ======================= flat start monophones ======================= if flat_start: subprocessStr = 'HCompV -T 1 -C ' + config_train + ' -m -v 0.01 -S ' + hcompv_scp + ' -M ' + model0_dir + ' ' + proto_init subprocess.call(subprocessStr, shell=True) # allocate mean & variance to all phones in the phone list subprocessStr = 'perl ' + mkhmmdefs_pl + ' ' + model0_dir + '\\proto38' + ' ' + phonelist + ' > ' + model0_dir + '\\' + hmmdefs_name subprocess.call(subprocessStr, shell=True) ## ======================= estimate monophones ======================= if train_model: iter_num_max = 3 for mix_num in [128, 256, 512, 1024]: for iter_num in range(1, iter_num_max+1): print("===== mix{}, iter{} =====".format(mix_num, iter_num)) iter_num_pre = iter_num - 1 modelN_dir = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num) if not os.path.exists(modelN_dir): os.makedirs(modelN_dir) if iter_num == 1 and mix_num == 1: modelN_dir_pre = model0_dir else: modelN_dir_pre = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num_pre) ## re-estimation subprocessStr = 'HERest -T 1 -C ' + config_train + ' -v 0.01 -I ' + combined_mlf + ' -H ' + modelN_dir_pre + '\\' + hmmdefs_name + ' -M ' + modelN_dir + ' ' + phonelist + ' -S ' + hcompv_scp subprocess.call(subprocessStr, shell=True) mix_num_next = mix_num * 2 modelN_dir_next = model_dir + '\\hmm' + str(mix_num_next) + '-0' if not os.path.exists(modelN_dir_next): os.makedirs(modelN_dir_next) header_file = modelN_dir + '\\mix' + str(mix_num_next) + '.hed' with open(header_file, 'w') as fout: fout.write("MU %d {*.state[2-4].mix}" % (mix_num_next)) subprocessStr = 'HHEd -T 1 -H ' + modelN_dir + '\\' + hmmdefs_name + ' -M ' + modelN_dir_next + ' ' + header_file + ' ' + phonelist subprocess.call(subprocessStr, shell=True)