diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 45e0fe2..31f158b 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/acoustic_model/fame_hmm.py b/acoustic_model/fame_hmm.py index 9ce920b..d8da964 100644 --- a/acoustic_model/fame_hmm.py +++ b/acoustic_model/fame_hmm.py @@ -4,6 +4,7 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import tempfile import shutil +import glob #import configparser #import subprocess import time @@ -30,8 +31,9 @@ dataset_list = ['devel', 'test', 'train'] # procedure extract_features = 0 -make_lexicon = 1 -make_mlf = 0 +make_lexicon = 0 +make_dictionary = 0 # 4800 sec +make_htk_files = 1 combine_files = 0 flat_start = 0 train_model = 0 @@ -92,11 +94,6 @@ if extract_features: fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name) pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name) - # a script file for HCompV - print(">>> making a script file for HCompV... \n") - hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') - fh.make_filelist(feature_dir_, hcompv_scp, '.mfc') - os.remove(hcopy_scp.name) @@ -124,23 +121,11 @@ if make_lexicon: fame_functions.fix_single_quote(lexicon_htk) -## ======================= make phonelist ======================= -#phonelist_txt = os.path.join(default.htk_dir, 'config', 'phonelist.txt') -#pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt) -#sentence = 'ien fan de minsken fan it deiferbliuw sels brúntsje visser' -#log_txt = os.path.join(default.htk_dir, 'config', 'log.txt') -#dictionary_file = os.path.join(default.htk_dir, 'config', 'test.dic') -#pyhtk.create_dictionary( -# sentence, global_ded, log_txt, dictionary_file, lexicon_htk) -#pyhtk.create_dictionary_without_log( -# sentence, global_ded, dictionary_file, lexicon_htk) - - -## ======================= make label file ======================= -if make_mlf: +## ======================= make dic files ======================= +if make_dictionary: for dataset in dataset_list: timer_start = time.time() - print("==== generating word level transcription on dataset {}\n".format(dataset)) + print("==== generating HTK dictionary files on dataset {}\n".format(dataset)) #hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp' #hcompv_scp2 = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp' @@ -161,25 +146,22 @@ if make_mlf: filename_ = line.split(' ')[0] filename = '_'.join(filename_.split('_')[1:]) sentence = ' '.join(line.split(' ')[1:]) + sentence_htk = fame_functions.word2htk(sentence) wav_file = os.path.join(wav_dir, filename + '.wav') - if len(re.findall(r'[\w]+[âêûô\'ú]+[\w]+', sentence))==0: - try: - sentence_ascii = bytes(sentence, 'ascii') - except UnicodeEncodeError: - print(sentence) - #if os.path.exists(wav_file): - # #dictionary_file = os.path.join(wav_dir, filename + '.dic') - # if pyhtk.create_dictionary_without_log( - # sentence, global_ded, dictionary_file, lexicon_htk) == 0: - # # when the file name is too long, HDMan command does not work. - # # therefore first temporary dictionary_file is made, then renamed. - # shutil.move(dictionary_file, os.path.join(wav_dir, filename + '.dic')) - # label_file = os.path.join(wav_dir, filename + '.lab') - # pyhtk.create_label_file(sentence, label_file) - # else: - # os.remove(dictionary_file) + if os.path.exists(wav_file): + #dictionary_file = os.path.join(wav_dir, filename + '.dic') + if pyhtk.create_dictionary_without_log( + sentence, global_ded, dictionary_file, lexicon_htk) == 0: + # when the file name is too long, HDMan command does not work. + # therefore first temporary dictionary_file is made, then renamed. + shutil.move(dictionary_file, os.path.join(wav_dir, filename + '.dic')) + label_file = os.path.join(wav_dir, filename + '.lab') + pyhtk.create_label_file(sentence, label_file) + else: + os.remove(dictionary_file) print("elapsed time: {}".format(time.time() - timer_start)) + # lexicon #lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation']) @@ -244,8 +226,30 @@ if make_mlf: # fscp.close() # fmlf.close() +## ======================= make other required files ======================= +if make_htk_files: + ## phonelist + phonelist_txt = os.path.join(default.htk_dir, 'config', 'phonelist.txt') + pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt) + + ## hcomp_v.scp + print(">>> making a script file for HCompV... \n") + for dataset in dataset_list: + #timer_start = time.time() + + wav_dir = os.path.join(default.fame_dir, 'fame', 'wav', dataset) + + listdir = glob.glob(os.path.join(wav_dir, '*.dic')) + filelist = [filename.replace(wav_dir, feature_dir).replace('.dic', '.fea') for filename in listdir] + + hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') + with open(hcompv_scp, 'wt', newline='\r\n') as f: + f.write('\n'.join(filelist)) + + +## hcomp_scp +# a script file for HCompV - ## generate phone level transcription # print("generating phone level transcription...\n") # mkphones = output_dir + '\\label\\mkphones0.txt' # subprocessStr = r"HLEd -l * -d " + lex_htk_ + ' -i ' + mlf_phone + ' ' + mkphones + ' ' + mlf_word @@ -253,29 +257,29 @@ if make_mlf: ## ======================= combined scps and mlfs ======================= -if combine_files: - print("==== combine scps and mlfs ====\n") +#if combine_files: +# print("==== combine scps and mlfs ====\n") - fscp = open(hcompv_scp, 'wt') - fmlf = open(combined_mlf, 'wt') +# fscp = open(hcompv_scp, 'wt') +# fmlf = open(combined_mlf, 'wt') - for dataset in dataset_list: - fmlf.write("#!MLF!#\n") - for dataset in dataset_list: - each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf' - each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp' +# for dataset in dataset_list: +# fmlf.write("#!MLF!#\n") +# for dataset in dataset_list: +# each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf' +# each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp' - with open(each_mlf, 'r') as fin: - lines = fin.read() - lines = lines.split('\n') - fmlf.write('\n'.join(lines[1:])) +# with open(each_mlf, 'r') as fin: +# lines = fin.read() +# lines = lines.split('\n') +# fmlf.write('\n'.join(lines[1:])) - with open(each_scp, 'r') as fin: - lines = fin.read() - fscp.write(lines) +# with open(each_scp, 'r') as fin: +# lines = fin.read() +# fscp.write(lines) - fscp.close() - fmlf.close() +# fscp.close() +# fmlf.close() ## ======================= flat start monophones ======================= diff --git a/acoustic_model/phoneset/fame_asr.py b/acoustic_model/phoneset/fame_asr.py index a9f47a7..22e9d65 100644 --- a/acoustic_model/phoneset/fame_asr.py +++ b/acoustic_model/phoneset/fame_asr.py @@ -110,7 +110,7 @@ phoneset_htk = [translation_key_asr2htk.get(i, i) for i in phoneset_short] # 'ä', 'ë', 'ï', 'ö', 'ü' #] translation_key_word2htk = { - '\'': '\\\'', + #'\'': '\\\'', 'í':'i1', 'é':'e1', 'ú':'u1', 'ć':'c1', 'à':'a2', 'è':'e2', 'â':'a3', 'ê':'e3', 'ô':'o3', 'û':'u3',