diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index a64dccd..6764d7f 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/_tmp/phone_to_be_searched.npy b/_tmp/phone_to_be_searched.npy new file mode 100644 index 0000000..17bfbfa Binary files /dev/null and b/_tmp/phone_to_be_searched.npy differ diff --git a/_tmp/translation_key.npy b/_tmp/translation_key.npy new file mode 100644 index 0000000..fffeade Binary files /dev/null and b/_tmp/translation_key.npy differ diff --git a/acoustic_model.sln b/acoustic_model.sln index 406d9e5..886b9ee 100644 --- a/acoustic_model.sln +++ b/acoustic_model.sln @@ -10,19 +10,21 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution ..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py ..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py ..\toolbox\evaluation.py = ..\toolbox\evaluation.py + ..\toolbox\toolbox\file_handling.py = ..\toolbox\toolbox\file_handling.py ..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py ..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py ..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py ..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py ..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py ..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py - reus-test\reus-test.py = reus-test\reus-test.py ..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py ..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py ..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py EndProjectSection EndProject +Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "pyhtk", "..\pyhtk\pyhtk\pyhtk.pyproj", "{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -31,6 +33,8 @@ Global GlobalSection(ProjectConfigurationPlatforms) = postSolution {4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU + {75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Release|Any CPU.ActiveCfg = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc index ef367cd..f41e244 100644 Binary files a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc and b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc differ diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj index 17163f2..4dd80ce 100644 --- a/acoustic_model/acoustic_model.pyproj +++ b/acoustic_model/acoustic_model.pyproj @@ -4,7 +4,8 @@ 2.0 4d8c8573-32f0-4a62-9e62-3ce5cc680390 . - forced_aligner_comparison.py + + . @@ -21,10 +22,6 @@ false - - - Code - Code @@ -35,9 +32,8 @@ Code - - Code - + + Code @@ -47,6 +43,7 @@ Code + diff --git a/acoustic_model/acoustic_model_function.py b/acoustic_model/acoustic_model_function.py deleted file mode 100644 index 4fced38..0000000 --- a/acoustic_model/acoustic_model_function.py +++ /dev/null @@ -1,202 +0,0 @@ -import os -import sys -from collections import Counter - -import numpy as np -import pandas as pd - -import defaultfiles as default - -sys.path.append(default.forced_alignment_module_dir) -from forced_alignment import convert_phone_set - - -def make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp): - """ Make a script file for HCopy using the filelist in FAME! corpus. """ - filelist_txt = FAME_dir + '\\fame\\filelists\\' + dataset + 'list.txt' - with open(filelist_txt) as fin: - filelist = fin.read() - filelist = filelist.split('\n') - - with open(hcopy_scp, 'w') as fout: - for filename_ in filelist: - filename = filename_.replace('.TextGrid', '') - - if len(filename) > 3: # remove '.', '..' and '' - wav_file = FAME_dir + '\\fame\\wav\\' + dataset + '\\' + filename + '.wav' - mfc_file = feature_dir + '\\' + filename + '.mfc' - - fout.write(wav_file + '\t' + mfc_file + '\n') - - -def make_filelist(input_dir, output_txt): - """ Make a list of files in the input_dir. """ - filenames = os.listdir(input_dir) - - with open(output_txt, 'w') as fout: - for filename in filenames: - fout.write(input_dir + '\\' + filename + '\n') - - -def make_htk_dict(word, pronvar_, fileDic, output_type): - """ - make dict files which can be used for HTK. - param word: target word. - param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray. - param fileDic: output dic file. - param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3. - """ - #assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.') - WORD = word.upper() - - if output_type == 0: # full - pronvar = np.unique(pronvar_) - - with open(fileDic, 'w') as f: - for pvar in pronvar: - f.write('{0}\t{1}\n'.format(WORD, pvar)) - else: - c = Counter(pronvar_) - total_num = sum(c.values()) - with open(fileDic, 'w') as f: - if output_type == 3: - for key, value in c.most_common(3): - f.write('{0}\t{1}\n'.format(WORD, key)) - else: - for key, value in c.items(): - percentage = value/total_num*100 - - if output_type == 1: # all - f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key)) - elif output_type == 2: # less than 2 percent - if percentage < 2: - f.write('{0}\t{1}\n'.format(WORD, key)) - - -def get_phonelist(lexicon_file): - """ Make a list of phones which appears in the lexicon. """ - - with open(lexicon_file, "rt", encoding="utf-8") as fin: - lines = fin.read() - lines = lines.split('\n') - phonelist = set([]) - for line in lines: - line = line.split('\t') - if len(line) > 1: - pronunciation = set(line[1].split()) - phonelist = phonelist | pronunciation - return phonelist - - -def find_phone(lexicon_file, phone): - """ Search where the phone is used in the lexicon. """ - with open(lexicon_file, "rt", encoding="utf-8") as fin: - lines = fin.read() - lines = lines.split('\n') - - extracted = [] - for line in lines: - line = line.split('\t') - if len(line) > 1: - pronunciation = line[1] - if phone in pronunciation: - extracted.append(line) - return extracted - - -def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out): - """ Convert a lexicon file from IPA to HTK format for FAME! corpus. """ - - lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation']) - with open(lexicon_file_out, "w", encoding="utf-8") as fout: - for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']): - pronunciation_no_space = pronunciation.replace(' ', '') - pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space) - if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk: - fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk)) - - -def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out): - """ Combine two lexicon files and sort by words. """ - - with open(lexicon_file1, "rt", encoding="utf-8") as fin: - lines1 = fin.read() - lines1 = lines1.split('\n') - with open(lexicon_file2, "rt", encoding="utf-8") as fin: - lines2 = fin.read() - lines2 = lines2.split('\n') - - lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation']) - lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation']) - lex = pd.concat([lex1, lex2]) - lex = lex.sort_values(by='word', ascending=True) - lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t') - - -def read_fileFA(fileFA): - """ - read the result file of HTK forced alignment. - this function only works when input is one word. - """ - with open(fileFA, 'r') as f: - lines = f.read() - lines = lines.split('\n') - - phones = [] - for line in lines: - line_split = line.split() - if len(line_split) > 1: - phones.append(line_split[2]) - - return ' '.join(phones) - - -def fame_pronunciation_variant(ipa): - ipa = ipa.replace('æ', 'ɛ') - ipa = ipa.replace('ɐ', 'a') - ipa = ipa.replace('ɑ', 'a') - ipa = ipa.replace('ɾ', 'r') - ipa = ipa.replace('ɹ', 'r') # ??? - ipa = ipa.replace('ʁ', 'r') - ipa = ipa.replace('ʀ', 'r') # ??? - ipa = ipa.replace('ʊ', 'u') - ipa = ipa.replace('χ', 'x') - - pronvar_list = [ipa] - while 'ø:' in ' '.join(pronvar_list) or 'œ' in ' '.join(pronvar_list) or 'ɒ' in ' '.join(pronvar_list): - pronvar_list_ = [] - for p in pronvar_list: - if 'ø:' in p: - pronvar_list_.append(p.replace('ø:', 'ö')) - pronvar_list_.append(p.replace('ø:', 'ö:')) - if 'œ' in p: - pronvar_list_.append(p.replace('œ', 'ɔ̈')) - pronvar_list_.append(p.replace('œ', 'ɔ̈:')) - if 'ɒ' in p: - pronvar_list_.append(p.replace('ɒ', 'ɔ̈')) - pronvar_list_.append(p.replace('ɒ', 'ɔ̈:')) - pronvar_list = np.unique(pronvar_list_) - return pronvar_list - - -def make_fame2ipa_variants(fame): - fame = 'rɛös' - ipa = [fame] - ipa.append(fame.replace('ɛ', 'æ')) - ipa.append(fame.replace('a', 'ɐ')) - ipa.append(fame.replace('a', 'ɑ')) - ipa.append(fame.replace('r', 'ɾ')) - ipa.append(fame.replace('r', 'ɹ')) - ipa.append(fame.replace('r', 'ʁ')) - ipa.append(fame.replace('r', 'ʀ')) - ipa.append(fame.replace('u', 'ʊ')) - ipa.append(fame.replace('x', 'χ')) - - ipa.append(fame.replace('ö', 'ø:')) - ipa.append(fame.replace('ö:', 'ø:')) - ipa.append(fame.replace('ɔ̈', 'œ')) - ipa.append(fame.replace('ɔ̈:', 'œ')) - ipa.append(fame.replace('ɔ̈', 'ɒ')) - ipa.append(fame.replace('ɔ̈:', 'ɒ')) - - return ipa diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py index f53100f..1046c29 100644 --- a/acoustic_model/defaultfiles.py +++ b/acoustic_model/defaultfiles.py @@ -2,11 +2,13 @@ import os #default_hvite_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'htk', 'config.HVite') -cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' +#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' -#config_hcopy = os.path.join(cygwin_dir, 'config', 'config.HCopy') +htk_dir = r'C:\Aki\htk_fame' + +config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy') #config_train = os.path.join(cygwin_dir, 'config', 'config.train') -config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite') +#config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite') #mkhmmdefs_pl = os.path.join(cygwin_dir, 'src', 'acoustic_model', 'mkhmmdefs.pl') #dbLexicon = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\lexicon.accdb @@ -26,19 +28,23 @@ config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite') #filePhoneList = config['pyHTK']['filePhoneList'] #AcousticModel = config['pyHTK']['AcousticModel'] -repo_dir = r'C:\Users\Aki\source\repos' +repo_dir = r'C:\Users\A.Kunikoshi\source\repos' ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification') +pyhtk_dir = os.path.join(repo_dir, 'pyhtk', 'pyhtk') +toolbox_dir = os.path.join(repo_dir, 'toolbox', 'toolbox') -htk_config_dir = r'c:\Users\Aki\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017' +htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017' config_hvite = os.path.join(htk_config_dir, 'config.HVite') #acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo') -acoustic_model = r'c:\cygwin64\home\Aki\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo' +acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo' phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt') WSL_dir = r'C:\OneDrive\WSL' -fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') +#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') +fame_dir = r'f:\_corpus\fame' + fame_s5_dir = os.path.join(fame_dir, 's5') fame_corpus_dir = os.path.join(fame_dir, 'corpus') diff --git a/acoustic_model/fame_functions.py b/acoustic_model/fame_functions.py new file mode 100644 index 0000000..4a16a95 --- /dev/null +++ b/acoustic_model/fame_functions.py @@ -0,0 +1,252 @@ +import os +os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model') + +import sys +from collections import Counter +import pickle + +import numpy as np +import pandas as pd + +import defaultfiles as default + +#sys.path.append(default.forced_alignment_module_dir) +#from forced_alignment import convert_phone_set + +#def find_phone(lexicon_file, phone): +# """ Search where the phone is used in the lexicon. """ +# with open(lexicon_file, "rt", encoding="utf-8") as fin: +# lines = fin.read() +# lines = lines.split('\n') + +# extracted = [] +# for line in lines: +# line = line.split('\t') +# if len(line) > 1: +# pronunciation = line[1] +# if phone in pronunciation: +# extracted.append(line) +# return extracted + + +#def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out): +# """ Convert a lexicon file from IPA to HTK format for FAME! corpus. """ + +# lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation']) +# with open(lexicon_file_out, "w", encoding="utf-8") as fout: +# for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']): +# pronunciation_no_space = pronunciation.replace(' ', '') +# pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space) +# if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk: +# fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk)) + + +#def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out): +# """ Combine two lexicon files and sort by words. """ + +# with open(lexicon_file1, "rt", encoding="utf-8") as fin: +# lines1 = fin.read() +# lines1 = lines1.split('\n') +# with open(lexicon_file2, "rt", encoding="utf-8") as fin: +# lines2 = fin.read() +# lines2 = lines2.split('\n') + +# lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation']) +# lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation']) +# lex = pd.concat([lex1, lex2]) +# lex = lex.sort_values(by='word', ascending=True) +# lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t') + + +#def read_fileFA(fileFA): +# """ +# read the result file of HTK forced alignment. +# this function only works when input is one word. +# """ +# with open(fileFA, 'r') as f: +# lines = f.read() +# lines = lines.split('\n') + +# phones = [] +# for line in lines: +# line_split = line.split() +# if len(line_split) > 1: +# phones.append(line_split[2]) + +# return ' '.join(phones) + + +#def fame_pronunciation_variant(ipa): +# ipa = ipa.replace('æ', 'ɛ') +# ipa = ipa.replace('ɐ', 'a') +# ipa = ipa.replace('ɑ', 'a') +# ipa = ipa.replace('ɾ', 'r') +# ipa = ipa.replace('ɹ', 'r') # ??? +# ipa = ipa.replace('ʁ', 'r') +# ipa = ipa.replace('ʀ', 'r') # ??? +# ipa = ipa.replace('ʊ', 'u') +# ipa = ipa.replace('χ', 'x') + +# pronvar_list = [ipa] +# while 'ø:' in ' '.join(pronvar_list) or 'œ' in ' '.join(pronvar_list) or 'ɒ' in ' '.join(pronvar_list): +# pronvar_list_ = [] +# for p in pronvar_list: +# if 'ø:' in p: +# pronvar_list_.append(p.replace('ø:', 'ö')) +# pronvar_list_.append(p.replace('ø:', 'ö:')) +# if 'œ' in p: +# pronvar_list_.append(p.replace('œ', 'ɔ̈')) +# pronvar_list_.append(p.replace('œ', 'ɔ̈:')) +# if 'ɒ' in p: +# pronvar_list_.append(p.replace('ɒ', 'ɔ̈')) +# pronvar_list_.append(p.replace('ɒ', 'ɔ̈:')) +# pronvar_list = np.unique(pronvar_list_) +# return pronvar_list + + +#def make_fame2ipa_variants(fame): +# fame = 'rɛös' +# ipa = [fame] +# ipa.append(fame.replace('ɛ', 'æ')) +# ipa.append(fame.replace('a', 'ɐ')) +# ipa.append(fame.replace('a', 'ɑ')) +# ipa.append(fame.replace('r', 'ɾ')) +# ipa.append(fame.replace('r', 'ɹ')) +# ipa.append(fame.replace('r', 'ʁ')) +# ipa.append(fame.replace('r', 'ʀ')) +# ipa.append(fame.replace('u', 'ʊ')) +# ipa.append(fame.replace('x', 'χ')) + +# ipa.append(fame.replace('ö', 'ø:')) +# ipa.append(fame.replace('ö:', 'ø:')) +# ipa.append(fame.replace('ɔ̈', 'œ')) +# ipa.append(fame.replace('ɔ̈:', 'œ')) +# ipa.append(fame.replace('ɔ̈', 'ɒ')) +# ipa.append(fame.replace('ɔ̈:', 'ɒ')) + +# return ipa + +def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp): + """ Make a script file for HCopy using the filelist in FAME! corpus. """ + + filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt') + with open(filelist_txt) as fin: + filelist = fin.read() + filelist = filelist.split('\n') + + with open(hcopy_scp, 'w') as fout: + for filename_ in filelist: + filename = filename_.replace('.TextGrid', '') + + if len(filename) > 3: # remove '.', '..' and '' + wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav') + mfc_file = os.path.join(feature_dir, filename + '.mfc') + + fout.write(wav_file + '\t' + mfc_file + '\n') + + +#def make_filelist(input_dir, output_txt): +# """ Make a list of files in the input_dir. """ +# filenames = os.listdir(input_dir) + +# with open(output_txt, 'w') as fout: +# for filename in filenames: +# fout.write(input_dir + '\\' + filename + '\n') + + +#def make_htk_dict(word, pronvar_, fileDic, output_type): +# """ +# make dict files which can be used for HTK. +# param word: target word. +# param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray. +# param fileDic: output dic file. +# param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3. +# """ +# #assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.') +# WORD = word.upper() + +# if output_type == 0: # full +# pronvar = np.unique(pronvar_) + +# with open(fileDic, 'w') as f: +# for pvar in pronvar: +# f.write('{0}\t{1}\n'.format(WORD, pvar)) +# else: +# c = Counter(pronvar_) +# total_num = sum(c.values()) +# with open(fileDic, 'w') as f: +# if output_type == 3: +# for key, value in c.most_common(3): +# f.write('{0}\t{1}\n'.format(WORD, key)) +# else: +# for key, value in c.items(): +# percentage = value/total_num*100 + +# if output_type == 1: # all +# f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key)) +# elif output_type == 2: # less than 2 percent +# if percentage < 2: +# f.write('{0}\t{1}\n'.format(WORD, key)) + + + + + +def load_lexicon(lexicon_file): + lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8") + lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True) + return lex + + +def get_phonelist(lexicon_asr): + """ Make a list of phones which appears in the lexicon. """ + + #with open(lexicon_file, "rt", encoding="utf-8") as fin: + # lines = fin.read() + # lines = lines.split('\n') + # phonelist = set([]) + # for line in lines: + # line = line.split('\t') + # if len(line) > 1: + # pronunciation = set(line[1].split()) + # phonelist = phonelist | pronunciation + lex = load_lexicon(lexicon_asr) + return set(' '.join(lex['pronunciation']).split(' ')) + +import time + +timer_start = time.time() + +#def get_translation_key(): +dir_tmp = r'c:\Users\A.Kunikoshi\source\repos\acoustic_model\_tmp' +lexicon_ipa = r'f:\_corpus\FAME\lexicon\lex.ipa' +lexicon_asr = r'f:\_corpus\FAME\lexicon\lex.asr' + +lex_ipa = load_lexicon(lexicon_ipa) +lex_asr = load_lexicon(lexicon_asr) +if 0: + phone_to_be_searched = get_phonelist(lexicon_asr) + translation_key = dict() + for word in lex_asr['word']: + if np.sum(lex_asr['word'] == word) == 1 and np.sum(lex_ipa['word'] == word) == 1: + asr = lex_asr[lex_asr['word'] == word].iat[0, 1] + ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] + + asr_list = asr.split(' ') + # if there are phones which is not in phone_to_be_searched + if len([True for i in asr_list if i in phone_to_be_searched]) > 0: + if(len(ipa) == len(asr_list)): + print("{0}: {1} --> {2}".format(word, ipa, asr)) + for ipa_, asr_ in zip(ipa, asr_list): + if asr_ in phone_to_be_searched: + #if not translation_key[ipa_] == asr_: + translation_key[ipa_] = asr_ + phone_to_be_searched.remove(asr_) + + print("elapsed time: {}".format(time.time() - timer_start)) + + np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key) + np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched) +else: + translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item() + phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item() diff --git a/acoustic_model/train_hmm_fame.py b/acoustic_model/fame_hmm.py similarity index 71% rename from acoustic_model/train_hmm_fame.py rename to acoustic_model/fame_hmm.py index 8cf7789..5f69329 100644 --- a/acoustic_model/train_hmm_fame.py +++ b/acoustic_model/fame_hmm.py @@ -1,105 +1,127 @@ -import os import sys -import tempfile -import configparser -import subprocess -from collections import Counter +import os +os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model') -import numpy as np -import pandas as pd +import tempfile +#import configparser +#import subprocess +#from collections import Counter + +#import numpy as np +#import pandas as pd + +import fame_functions +import defaultfiles as default +sys.path.append(default.pyhtk_dir) +import pyhtk +sys.path.append(default.toolbox_dir) +import file_handling ## ======================= user define ======================= -repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model' -curr_dir = repo_dir + '\\acoustic_model' -config_ini = curr_dir + '\\config.ini' -output_dir = 'C:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model' -forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment' +#repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model' +#curr_dir = repo_dir + '\\acoustic_model' +#config_ini = curr_dir + '\\config.ini' +#output_dir = 'C:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model' +#forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment' dataset_list = ['devel', 'test', 'train'] # procedure -extract_features = 0 -make_feature_list = 0 -conv_lexicon = 0 -check_lexicon = 0 -make_mlf = 0 -combine_files = 0 -flat_start = 0 -train_model = 1 +extract_features = 1 +#conv_lexicon = 0 +#check_lexicon = 0 +#make_mlf = 0 +#combine_files = 0 +#flat_start = 0 +#train_model = 1 -sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir)) -sys.path.append(forced_alignment_module) -from forced_alignment import convert_phone_set +#sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir)) +#sys.path.append(forced_alignment_module) +#from forced_alignment import convert_phone_set -import acoustic_model_functions as am_func ## ======================= load variables ======================= -config = configparser.ConfigParser() -config.sections() -config.read(config_ini) +#config = configparser.ConfigParser() +#config.sections() +#config.read(config_ini) -config_hcopy = config['Settings']['config_hcopy'] -config_train = config['Settings']['config_train'] -mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl'] -FAME_dir = config['Settings']['FAME_dir'] +#config_hcopy = config['Settings']['config_hcopy'] +#config_train = config['Settings']['config_train'] +#mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl'] +#FAME_dir = config['Settings']['FAME_dir'] -lex_asr = FAME_dir + '\\lexicon\\lex.asr' -lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk' -lex_oov = FAME_dir + '\\lexicon\\lex.oov' -lex_oov_htk = FAME_dir + '\\lexicon\\lex.oov_htk' -#lex_ipa = FAME_dir + '\\lexicon\\lex.ipa' -#lex_ipa_ = FAME_dir + '\\lexicon\\lex.ipa_' -#lex_ipa_htk = FAME_dir + '\\lexicon\\lex.ipa_htk' -lex_htk = FAME_dir + '\\lexicon\\lex_original.htk' -lex_htk_ = FAME_dir + '\\lexicon\\lex.htk' +#lex_asr = FAME_dir + '\\lexicon\\lex.asr' +#lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk' +#lex_oov = FAME_dir + '\\lexicon\\lex.oov' +#lex_oov_htk = FAME_dir + '\\lexicon\\lex.oov_htk' +##lex_ipa = FAME_dir + '\\lexicon\\lex.ipa' +##lex_ipa_ = FAME_dir + '\\lexicon\\lex.ipa_' +##lex_ipa_htk = FAME_dir + '\\lexicon\\lex.ipa_htk' +#lex_htk = FAME_dir + '\\lexicon\\lex_original.htk' +#lex_htk_ = FAME_dir + '\\lexicon\\lex.htk' -hcompv_scp = output_dir + '\\scp\\combined.scp' -combined_mlf = output_dir + '\\label\\combined.mlf' +#hcompv_scp = output_dir + '\\scp\\combined.scp' +#combined_mlf = output_dir + '\\label\\combined.mlf' -model_dir = output_dir + '\\model' -model0_dir = model_dir + '\\hmm0' -proto_init = model_dir + '\\proto38' -proto_name = 'proto' -phonelist = output_dir + '\\config\\phonelist_friesian.txt' -hmmdefs_name = 'hmmdefs' +#model_dir = output_dir + '\\model' +#model0_dir = model_dir + '\\hmm0' +#proto_init = model_dir + '\\proto38' +#proto_name = 'proto' +#phonelist = output_dir + '\\config\\phonelist_friesian.txt' +#hmmdefs_name = 'hmmdefs' +feature_dir = os.path.join(default.htk_dir, 'mfc') +if not os.path.exists(feature_dir): + os.makedirs(feature_dir) +tmp_dir = os.path.join(default.htk_dir, 'tmp') +if not os.path.exists(tmp_dir): + os.makedirs(tmp_dir) ## ======================= extract features ======================= if extract_features: - print("==== extract features ====\n") - for dataset in dataset_list: - print(dataset) - + #for dataset in ['test']: + print('==== {} ===='.format(dataset)) + # a script file for HCopy + print(">>> making a script file for HCopy... \n") hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False) hcopy_scp.close() + #hcopy_scp = os.path.join(default.htk_dir, 'tmp', 'HCopy.scp') # get a list of features (hcopy.scp) from the filelist in FAME! corpus - feature_dir = output_dir + '\\mfc\\' + dataset - am_func.make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp.name) + feature_dir_ = os.path.join(feature_dir, dataset) + if not os.path.exists(feature_dir_): + os.makedirs(feature_dir_) # extract features - subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name - subprocess.call(subprocessStr, shell=True) + print(">>> extracting features... \n") + fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name) + #subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name + #subprocess.call(subprocessStr, shell=True) + pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name) + + # a script file for HCompV + print(">>> making a script file for HCompV... \n") ## ======================= make a list of features ======================= -if make_feature_list: - print("==== make a list of features ====\n") +#if make_feature_list: +# print("==== make a list of features ====\n") - for dataset in dataset_list: - print(dataset) +# for dataset in dataset_list: +# print(dataset) - feature_dir = output_dir + '\\mfc\\' + dataset - hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp' + #feature_dir = output_dir + '\\mfc\\' + dataset + hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') - am_func.make_filelist(feature_dir, hcompv_scp) + #am_func.make_filelist(feature_dir, hcompv_scp) + file_handling.make_filelist(feature_dir_, hcompv_scp, '.mfc') ## ======================= convert lexicon from ipa to fame_htk =======================