diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 6764d7f..ccccfcf 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/_tmp/phone_to_be_searched.npy b/_tmp/phone_to_be_searched.npy index 17bfbfa..1979d5a 100644 Binary files a/_tmp/phone_to_be_searched.npy and b/_tmp/phone_to_be_searched.npy differ diff --git a/_tmp/translation_key.npy b/_tmp/translation_key.npy index fffeade..96c1125 100644 Binary files a/_tmp/translation_key.npy and b/_tmp/translation_key.npy differ diff --git a/acoustic_model.sln b/acoustic_model.sln index 886b9ee..de57edb 100644 --- a/acoustic_model.sln +++ b/acoustic_model.sln @@ -10,7 +10,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution ..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py ..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py ..\toolbox\evaluation.py = ..\toolbox\evaluation.py - ..\toolbox\toolbox\file_handling.py = ..\toolbox\toolbox\file_handling.py ..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py ..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py ..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py @@ -23,7 +22,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution ..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py EndProjectSection EndProject -Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "pyhtk", "..\pyhtk\pyhtk\pyhtk.pyproj", "{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}" +Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "toolbox", "..\toolbox\toolbox.pyproj", "{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -33,8 +32,8 @@ Global GlobalSection(ProjectConfigurationPlatforms) = postSolution {4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU - {75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Release|Any CPU.ActiveCfg = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc index f41e244..ac76aeb 100644 Binary files a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc and b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc differ diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj index 4dd80ce..bbe6bd9 100644 --- a/acoustic_model/acoustic_model.pyproj +++ b/acoustic_model/acoustic_model.pyproj @@ -23,12 +23,18 @@ + + Code + Code Code + + Code + Code diff --git a/acoustic_model/convert_phone_set.py b/acoustic_model/convert_phone_set.py new file mode 100644 index 0000000..49d8761 --- /dev/null +++ b/acoustic_model/convert_phone_set.py @@ -0,0 +1,29 @@ +"""Module to convert phonemes.""" + +def multi_character_tokenize(line, multi_character_tokens): + """Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0, + if so tokenizes and eats that token. Otherwise tokenizes a single character""" + while line != '': + for token in multi_character_tokens: + if line.startswith(token) and len(token) > 0: + yield token + line = line[len(token):] + break + else: + yield line[:1] + line = line[1:] + + +def split_word(word, multi_character_phones): + """ + Split a line by given phoneset. + + Args: + word (str): one word written in given phoneset. + multi_character_phones: + + Returns: + word_seperated (str): the word splitted in given phoneset. + """ + + return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)] \ No newline at end of file diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py index 1046c29..6a280af 100644 --- a/acoustic_model/defaultfiles.py +++ b/acoustic_model/defaultfiles.py @@ -4,7 +4,8 @@ import os #cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' -htk_dir = r'C:\Aki\htk_fame' +#htk_dir = r'C:\Aki\htk_fame' +htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk' config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy') #config_train = os.path.join(cygwin_dir, 'config', 'config.train') @@ -28,22 +29,22 @@ config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy') #filePhoneList = config['pyHTK']['filePhoneList'] #AcousticModel = config['pyHTK']['AcousticModel'] -repo_dir = r'C:\Users\A.Kunikoshi\source\repos' +repo_dir = r'C:\Users\Aki\source\repos' ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification') -pyhtk_dir = os.path.join(repo_dir, 'pyhtk', 'pyhtk') -toolbox_dir = os.path.join(repo_dir, 'toolbox', 'toolbox') +#pyhtk_dir = os.path.join(repo_dir, 'pyhtk', 'pyhtk') +toolbox_dir = os.path.join(repo_dir, 'toolbox') -htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017' -config_hvite = os.path.join(htk_config_dir, 'config.HVite') +#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017' +#config_hvite = os.path.join(htk_config_dir, 'config.HVite') #acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo') -acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo' -phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt') +#acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo' +#phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt') WSL_dir = r'C:\OneDrive\WSL' #fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') -fame_dir = r'f:\_corpus\fame' +fame_dir = r'd:\_corpus\fame' fame_s5_dir = os.path.join(fame_dir, 's5') fame_corpus_dir = os.path.join(fame_dir, 'corpus') diff --git a/acoustic_model/fame_functions.py b/acoustic_model/fame_functions.py index 4a16a95..30ce6f8 100644 --- a/acoustic_model/fame_functions.py +++ b/acoustic_model/fame_functions.py @@ -1,5 +1,5 @@ import os -os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model') +os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import sys from collections import Counter @@ -9,6 +9,8 @@ import numpy as np import pandas as pd import defaultfiles as default +import fame_phoneset +import convert_phone_set #sys.path.append(default.forced_alignment_module_dir) #from forced_alignment import convert_phone_set @@ -213,40 +215,74 @@ def get_phonelist(lexicon_asr): lex = load_lexicon(lexicon_asr) return set(' '.join(lex['pronunciation']).split(' ')) -import time -timer_start = time.time() +def extract_unknown_phones(word_list, known_phones): + return [i for i in word_list if not i in known_phones] -#def get_translation_key(): -dir_tmp = r'c:\Users\A.Kunikoshi\source\repos\acoustic_model\_tmp' -lexicon_ipa = r'f:\_corpus\FAME\lexicon\lex.ipa' -lexicon_asr = r'f:\_corpus\FAME\lexicon\lex.asr' -lex_ipa = load_lexicon(lexicon_ipa) -lex_asr = load_lexicon(lexicon_asr) -if 0: - phone_to_be_searched = get_phonelist(lexicon_asr) - translation_key = dict() - for word in lex_asr['word']: - if np.sum(lex_asr['word'] == word) == 1 and np.sum(lex_ipa['word'] == word) == 1: - asr = lex_asr[lex_asr['word'] == word].iat[0, 1] - ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] +if __name__ == '__main__': + import time + timer_start = time.time() + + #def get_translation_key(): + dir_tmp = r'c:\Users\Aki\source\repos\acoustic_model\_tmp' + lexicon_ipa = r'd:\_corpus\FAME\lexicon\lex.ipa' + lexicon_asr = r'd:\_corpus\FAME\lexicon\lex.asr' + + lex_ipa = load_lexicon(lexicon_ipa) + lex_asr = load_lexicon(lexicon_asr) + if 1: + phone_to_be_searched = fame_phoneset.phoneset_ipa[:] + translation_key = dict() + for word in lex_ipa['word']: + if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1: + ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] + asr = lex_asr[lex_asr['word'] == word].iat[0, 1] - asr_list = asr.split(' ') - # if there are phones which is not in phone_to_be_searched - if len([True for i in asr_list if i in phone_to_be_searched]) > 0: - if(len(ipa) == len(asr_list)): - print("{0}: {1} --> {2}".format(word, ipa, asr)) - for ipa_, asr_ in zip(ipa, asr_list): - if asr_ in phone_to_be_searched: - #if not translation_key[ipa_] == asr_: + ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) + asr_list = asr.split(' ') + + # if there are phones which is not in phone_to_be_searched + #if len([True for i in asr_list if i in phone_to_be_searched]) > 0: + if(len(ipa_list) == len(asr_list)): + print("{0}: {1} --> {2}".format(word, ipa_list, asr_list)) + for ipa_, asr_ in zip(ipa_list, asr_list): + if ipa_ in phone_to_be_searched: translation_key[ipa_] = asr_ - phone_to_be_searched.remove(asr_) + phone_to_be_searched.remove(ipa_) - print("elapsed time: {}".format(time.time() - timer_start)) + print("elapsed time: {}".format(time.time() - timer_start)) - np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key) - np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched) -else: - translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item() - phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item() + np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key) + np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched) + else: + translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item() + phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item() + + + #phone_unknown = list(phone_to_be_searched) + ##phone_unknown.remove('') + #phone_known = list(translation_key.keys()) + + #p = phone_unknown[0] + + ### extract lines which contains 'unknown' phone. + #lex_ipa_ = lex_ipa[lex_ipa['pronunciation'].str.count(p)>0] + ##phone_unknown_ = phone_unknown[:] + ##phone_unknown_.remove(p) + #phone_known_ = phone_known[:] + #phone_known_.append(p) + #for index, row in lex_ipa_.iterrows(): + # ipa = row['pronunciation'] + # phone_extract_unknown_phones(asr_list, phone_known_): + + # # check the number of phones in phone_unknown_ + # if len([True for i in asr_list if i in phone_unknown_]) == 0: + # word = row['word'] + # ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] + # print("{0}: {1} --> {2}".format(word, ipa, asr)) + # #print("{0}:{1}".format(index, row['pronunciation'])) + + + + \ No newline at end of file diff --git a/acoustic_model/fame_hmm.py b/acoustic_model/fame_hmm.py index 5f69329..d6327e7 100644 --- a/acoustic_model/fame_hmm.py +++ b/acoustic_model/fame_hmm.py @@ -1,6 +1,6 @@ import sys import os -os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model') +os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import tempfile #import configparser @@ -12,10 +12,9 @@ import tempfile import fame_functions import defaultfiles as default -sys.path.append(default.pyhtk_dir) -import pyhtk sys.path.append(default.toolbox_dir) -import file_handling +import file_handling as fh +from htk import pyhtk ## ======================= user define ======================= @@ -94,7 +93,7 @@ if extract_features: hcopy_scp.close() #hcopy_scp = os.path.join(default.htk_dir, 'tmp', 'HCopy.scp') - # get a list of features (hcopy.scp) from the filelist in FAME! corpus + ## get a list of features (hcopy.scp) from the filelist in FAME! corpus feature_dir_ = os.path.join(feature_dir, dataset) if not os.path.exists(feature_dir_): os.makedirs(feature_dir_) @@ -110,6 +109,7 @@ if extract_features: # a script file for HCompV print(">>> making a script file for HCompV... \n") + ## ======================= make a list of features ======================= #if make_feature_list: # print("==== make a list of features ====\n") @@ -121,7 +121,7 @@ if extract_features: hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') #am_func.make_filelist(feature_dir, hcompv_scp) - file_handling.make_filelist(feature_dir_, hcompv_scp, '.mfc') + fh.make_filelist(feature_dir_, hcompv_scp, '.mfc') ## ======================= convert lexicon from ipa to fame_htk ======================= diff --git a/acoustic_model/fame_phoneset.py b/acoustic_model/fame_phoneset.py new file mode 100644 index 0000000..b1a07de --- /dev/null +++ b/acoustic_model/fame_phoneset.py @@ -0,0 +1,55 @@ +phoneset_ipa = [ + # vowels + 'i̯', + 'y', + 'i', + 'i:', + 'ɪ', + 'ɪ:', + 'e', + 'e:', + 'ə', + 'ə:', + 'ɛ', + 'ɛ:', + 'a', + 'a:', + 'ṷ', + 'ú', + 'u', + 'u:', + 'ü', + 'ü:', + 'o', + 'o:', + 'ö', + 'ö:', + 'ɔ', + 'ɔ:', + 'ɔ̈', + 'ɔ̈:', + + # plosives + 'p', + 'b', + 't', + 'd', + 'k', + 'g', + + # nasals + 'm', + 'n', + 'ŋ', + + # fricatives + 'f', + 'v', + 's', + 's:', + 'z', + 'x', + 'h', + ] + +multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1]