diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 0b78f7c..92f0791 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc index ad9ae7f..8bb2ce1 100644 Binary files a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc and b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc differ diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj index bbe6bd9..f2c3827 100644 --- a/acoustic_model/acoustic_model.pyproj +++ b/acoustic_model/acoustic_model.pyproj @@ -32,7 +32,9 @@ Code - + + + Code diff --git a/acoustic_model/convert_phone_set.py b/acoustic_model/convert_phone_set.py index d2c1350..0a7ad22 100644 --- a/acoustic_model/convert_phone_set.py +++ b/acoustic_model/convert_phone_set.py @@ -20,7 +20,7 @@ def split_word(word, multi_character_phones): Args: word (str): a word written in given phoneset. - multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_phoneset.py. + multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_ipa.py. Returns: (word_seperated) (list): the word splitted in given phoneset. diff --git a/acoustic_model/fame_asr.py b/acoustic_model/fame_asr.py new file mode 100644 index 0000000..8408646 --- /dev/null +++ b/acoustic_model/fame_asr.py @@ -0,0 +1,127 @@ +""" definition of the phones to be used. """ + +# phonese in {FAME}/lexicon/lex.asr +phoneset = [ + # vowels + 'a', + 'a:', + 'e', + 'e:', + 'i', + 'i:', + 'i̯', + 'o', + 'o:', + 'ö', + 'ö:', + 'u', + 'u:', + 'ü', + 'ü:', + #'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone. + 'ṷ', + 'y', + 'ɔ', + 'ɔ:', + 'ɔ̈', + 'ɔ̈:', + 'ə', + 'ɛ', + 'ɛ:', + 'ɪ', + 'ɪ:', + + # plosives + 'p', + 'b', + 't', + 'd', + 'k', + 'g', + 'ɡ', # = 'g' + + # nasals + 'm', + 'n', + 'ŋ', + + # fricatives + 'f', + 'v', + 's', + 's:', + 'z', + 'x', + 'h', + + # tap and flip + 'r', + 'r:', + + # approximant + 'j', + 'l' + ] + + +## reduce the number of phones. +# the phones which seldom occur are replaced with another more popular phones. +# replacements are based on the advice from Martijn Wieling. +reduction_key = { + 'y':'i:', 'e':'e:', 'ə:':'ɛ:', 'r:':'r', 'ɡ':'g' + } +# already removed beforehand in phoneset. Just to be sure. +phones_to_be_removed = ['ú', 's:', 'ɔ̈:'] + +phoneset_short = [reduction_key.get(i, i) for i in phoneset + if not i in phones_to_be_removed] +phoneset_short = list(set(phoneset_short)) +phoneset_short.sort() + + +## translation_key to htk format (ascii). +# phones which gives UnicodeEncodeError when phone.encode("ascii") +# are replaced with other characters. +translation_key_asr2htk = { + 'i̯': 'i_', + 'ṷ': 'u_', + + # on the analogy of German umlaut, 'e' is used. + 'ö': 'oe', 'ö:': 'oe:', + 'ü': 'ue', 'ü:': 'ue:', + + # on the analogy of Chinese... + 'ŋ': 'ng', + + # refer to Xsampa. + 'ɔ': 'O', 'ɔ:': 'O:', 'ɔ̈': 'Oe', + 'ɛ': 'E', 'ɛ:': 'E:', + 'ɪ': 'I', 'ɪ:': 'I:', + + # it is @ in Xsampa, but that is not handy on HTK. + 'ə': 'A' + } +phoneset_htk = [translation_key_asr2htk.get(i, i) for i in phoneset_short] + +## check +#for i in phoneset_short: +# try: +# print("{0} --> {1}".format(i, i.encode("ascii"))) +# except UnicodeEncodeError: +# print(">>> {}".format(i)) + + +## the list of multi character phones. +# for example, the length of 'a:' is 3, but in the codes it is treated as one letter. + +# original. +multi_character_phones = [i for i in phoneset if len(i) > 1] +multi_character_phones.sort(key=len, reverse=True) + +# phonset reduced. +multi_character_phones_short = [i for i in phoneset_short if len(i) > 1] +multi_character_phones_short.sort(key=len, reverse=True) + +# htk compatible. +multi_character_phones_htk = [i for i in phoneset_htk if len(i) > 1] +multi_character_phones_htk.sort(key=len, reverse=True) diff --git a/acoustic_model/fame_functions.py b/acoustic_model/fame_functions.py index eadb879..380f602 100644 --- a/acoustic_model/fame_functions.py +++ b/acoustic_model/fame_functions.py @@ -1,5 +1,4 @@ import os -os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import sys from collections import Counter @@ -9,7 +8,7 @@ import numpy as np import pandas as pd import defaultfiles as default -import fame_phoneset +from phoneset import fame_ipa import convert_phone_set @@ -110,14 +109,6 @@ import convert_phone_set # return ipa -#def make_filelist(input_dir, output_txt): -# """ Make a list of files in the input_dir. """ -# filenames = os.listdir(input_dir) - -# with open(output_txt, 'w') as fout: -# for filename in filenames: -# fout.write(input_dir + '\\' + filename + '\n') - #def make_htk_dict(word, pronvar_, fileDic, output_type): # """ @@ -179,10 +170,11 @@ def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_s fout.write(wav_file + '\t' + mfc_file + '\n') + return def load_lexicon(lexicon_file): - """ load lexicon file as Data Frame. + """ load lexicon file as data frame. Args: lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. @@ -196,25 +188,27 @@ def load_lexicon(lexicon_file): return lex -def get_phoneset_from_lexicon(lexicon_file, phoneset='asr'): +def get_phoneset_from_lexicon(lexicon_file, phoneset_name='asr'): """ Make a list of phones which appears in the lexicon. Args: lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. - phoneset (str): the phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'. + phoneset_name (str): the name of phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'. Returns: (list_of_phones) (set): the set of phones included in the lexicon_file. """ - assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\'' + assert phoneset_name in ['asr', 'ipa'], 'phoneset_name should be \'asr\' or \'ipa\'' lex = load_lexicon(lexicon_file) - if phoneset == 'asr': + if phoneset_name == 'asr': return set(' '.join(lex['pronunciation']).split(' ')) - elif phoneset == 'ipa': + elif phoneset_name == 'ipa': join_pronunciations = ''.join(lex['pronunciation']) - return set(convert_phone_set.split_word(join_pronunciations, fame_phoneset.multi_character_phones_ipa)) + return set(convert_phone_set.split_word(join_pronunciations, fame_ipa.multi_character_phones)) + + return def extract_unknown_phones(ipa, known_phones): @@ -228,7 +222,7 @@ def extract_unknown_phones(ipa, known_phones): (list_of_phones) (list): unknown phones not included in 'known_phones'. """ - ipa_split = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) + ipa_split = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones) return [i for i in ipa_split if not i in known_phones] @@ -247,14 +241,14 @@ def get_translation_key(lexicon_file_ipa, lexicon_file_asr): """ lex_ipa = load_lexicon(lexicon_file_ipa) lex_asr = load_lexicon(lexicon_file_asr) - phone_unknown = fame_phoneset.phoneset_ipa[:] + phone_unknown = fame_ipa.phoneset[:] translation_key = dict() for word in lex_ipa['word']: if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1: ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] asr = lex_asr[lex_asr['word'] == word].iat[0, 1] - ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) + ipa_list = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones) asr_list = asr.split(' ') # if there are phones which is not in phone_unknown @@ -268,13 +262,13 @@ def get_translation_key(lexicon_file_ipa, lexicon_file_asr): return translation_key, list(phone_unknown) -def find_phone(lexicon_file, phone, phoneset='ipa'): +def find_phone(lexicon_file, phone, phoneset_name='ipa'): """ extract rows where the phone is used in the lexicon_file. Args: lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. phone (str): the phone to be searched. - phoneset (str): the phoneset with which lexicon_file is written. 'asr' or 'ipa'(default). + phoneset_name (str): the name of phoneset_name with which lexicon_file is written. 'asr' or 'ipa'(default). Returns: extracted (df): rows where the phone is used. @@ -283,7 +277,7 @@ def find_phone(lexicon_file, phone, phoneset='ipa'): * develop when the phonset == 'asr'. """ - assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\'' + assert phoneset_name in ['asr', 'ipa'], 'phoneset_name should be \'asr\' or \'ipa\'' lex = load_lexicon(lexicon_file) @@ -292,8 +286,8 @@ def find_phone(lexicon_file, phone, phoneset='ipa'): extracted = pd.DataFrame(index=[], columns=['word', 'pronunciation']) for index, row in lex_.iterrows(): - if phoneset == 'ipa': - pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_phoneset.multi_character_phones_ipa) + if phoneset_name == 'ipa': + pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_ipa.multi_character_phones) if phone in pronunciation: extracted_ = pd.Series([row['word'], pronunciation], index=extracted.columns) extracted = extracted.append(extracted_, ignore_index=True) diff --git a/acoustic_model/fame_hmm.py b/acoustic_model/fame_hmm.py index fe319d0..058deaa 100644 --- a/acoustic_model/fame_hmm.py +++ b/acoustic_model/fame_hmm.py @@ -8,8 +8,8 @@ import tempfile #from collections import Counter import time -#import numpy as np -#import pandas as pd +import numpy as np +import pandas as pd import fame_functions import defaultfiles as default @@ -54,6 +54,10 @@ conv_lexicon = 1 #mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl'] #FAME_dir = config['Settings']['FAME_dir'] +#lexicon_dir = os.path.join(default.fame_dir, 'lexicon') +#lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa') +#lexicon_asr = os.path.join(lexicon_dir, 'lex.asr') + #lex_asr = FAME_dir + '\\lexicon\\lex.asr' #lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk' #lex_oov = FAME_dir + '\\lexicon\\lex.oov' @@ -111,71 +115,6 @@ if extract_features: ## ======================= convert lexicon from ipa to fame_htk ======================= if conv_lexicon: print('==== convert lexicon from ipa 2 fame ====\n') - - #dir_out = r'c:\Users\Aki\source\repos\acoustic_model\_tmp' - lexicon_dir = os.path.join(default.fame_dir, 'lexicon') - lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa') - lexicon_asr = os.path.join(lexicon_dir, 'lex.asr') - - # get the correspondence between lex_ipa and lex_asr. - lex_asr = fame_functions.load_lexicon(lexicon_asr) - lex_ipa = fame_functions.load_lexicon(lexicon_ipa) - if 1: - timer_start = time.time() - translation_key, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr) - print("elapsed time: {}".format(time.time() - timer_start)) - - np.save('translation_key_ipa2asr.npy', translation_key) - np.save('phone_unknown.npy', phone_unknown) - else: - translation_key = np.load('translation_key_ipa2asr.npy').item() - phone_unknown = np.load('phone_unknown.npy') - phone_unknown = list(phone_unknown) - - - ## manually check the correspondence for the phone in phone_unknown. - #p = phone_unknown[0] - #lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa') - - #for word in lex_ipa_['word']: - # ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] - # if np.sum(lex_asr['word'] == word) > 0: - # asr = lex_asr[lex_asr['word'] == word].iat[0, 1] - - # ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) - # asr_list = asr.split(' ') - # if p in ipa_list and (len(ipa_list) == len(asr_list)): - # print("{0}: {1} --> {2}".format(word, ipa_list, asr_list)) - # for ipa_, asr_ in zip(ipa_list, asr_list): - # if ipa_ in phone_unknown: - # translation_key[ipa_] = asr_ - # phone_unknown.remove(ipa_) - - - ## check if all the phones in lexicon_ipa are in fame_phoneset.py. - #timer_start = time.time() - #phoneset_lex = get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa') - #print("elapsed time: {}".format(time.time() - timer_start)) - - #phoneset_py = fame_phoneset.phoneset_ipa - #set(phoneset_lex) - set(phoneset_py) - - ##timer_start = time.time() - ##extracted = find_phone(lexicon_ipa, 'ⁿ') - ##print("elapsed time: {}".format(time.time() - timer_start)) - - - # lex.asr is Kaldi compatible version of lex.ipa. - # to check... - #lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation']) - #with open(lex_ipa_, "w", encoding="utf-8") as fout: - # for word, pronunciation in zip(lexicon_ipa['word'], lexicon_ipa['pronunciation']): - # # ignore nasalization and '.' - # pronunciation_ = pronunciation.replace(u'ⁿ', '') - # pronunciation_ = pronunciation_.replace('.', '') - # pronunciation_split = convert_phone_set.split_ipa_fame(pronunciation_) - # fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split))) - # convert each lexicon from ipa description to fame_htk phoneset. #am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk) #am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk) diff --git a/acoustic_model/fame_ipa.py b/acoustic_model/fame_ipa.py new file mode 100644 index 0000000..4d44f0a --- /dev/null +++ b/acoustic_model/fame_ipa.py @@ -0,0 +1,107 @@ +""" definition of the phones to be used. """ + +phoneset = [ + # vowels + 'i̯', + 'i̯ⁿ', + 'y', + 'i', + 'i.', + 'iⁿ', + 'i:', + 'i:ⁿ', + 'ɪ', + 'ɪⁿ', + 'ɪ.', + #'ɪ:', # not included in lex.ipa + 'ɪ:ⁿ', + 'e', + 'e:', + 'e:ⁿ', + 'ə', + 'əⁿ', + 'ə:', + 'ɛ', + 'ɛ.', + 'ɛⁿ', + 'ɛ:', + 'ɛ:ⁿ', + 'a', + 'aⁿ', + 'a.', + 'a:', + 'a:ⁿ', + 'ṷ', + 'ṷ.', + 'ṷⁿ', + #'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone. + 'u', + 'uⁿ', + 'u.', + 'u:', + 'u:ⁿ', + 'ü', + 'ü.', + 'üⁿ', + 'ü:', + 'ü:ⁿ', + 'o', + 'oⁿ', + 'o.', + 'o:', + 'o:ⁿ', + 'ö', + 'ö.', + 'öⁿ', + 'ö:', + 'ö:ⁿ', + 'ɔ', + 'ɔ.', + 'ɔⁿ', + 'ɔ:', + 'ɔ:ⁿ', + #'ɔ̈', # not included in lex.ipa + 'ɔ̈.', + 'ɔ̈:', + + # plosives + 'p', + 'b', + 't', + 'tⁿ', + 'd', + 'k', + 'g', + 'ɡ', # = 'g' + + # nasals + 'm', + 'n', + 'ŋ', + + # fricatives + 'f', + 'v', + 's', + 's:', + 'z', + 'zⁿ', + 'x', + 'h', + + # tap and flip + 'r', + 'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'. + 'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'. + + # approximant + 'j', + 'j.', + 'l' + ] + + +## the list of multi character phones. +# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. +multi_character_phones = [i for i in phoneset if len(i) > 1] +multi_character_phones.sort(key=len, reverse=True) \ No newline at end of file diff --git a/acoustic_model/fame_test.py b/acoustic_model/fame_test.py new file mode 100644 index 0000000..121f4e5 --- /dev/null +++ b/acoustic_model/fame_test.py @@ -0,0 +1,93 @@ +import sys +import os +os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') + +import time + +import numpy as np +import pandas as pd + +import fame_functions +import defaultfiles as default +sys.path.append(default.toolbox_dir) +from phoneset import fame_ipa, fame_asr + + +lexicon_dir = os.path.join(default.fame_dir, 'lexicon') +lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa') +lexicon_asr = os.path.join(lexicon_dir, 'lex.asr') + + +## check if all the phones in lexicon.ipa are in fame_ipa.py. +#timer_start = time.time() +#phoneset_lex = fame_functions.get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa') +#phoneset_py = fame_ipa.phoneset +#print("phones which is in lexicon.ipa but not in fame_ipa.py:\n{}".format( +# set(phoneset_lex) - set(phoneset_py))) +#print("elapsed time: {}".format(time.time() - timer_start)) + +# check which word has the phone. +#timer_start = time.time() +#extracted = find_phone(lexicon_ipa, 'ⁿ') +#print("elapsed time: {}".format(time.time() - timer_start)) + + +## get the correspondence between lex_ipa and lex_asr. +lex_asr = fame_functions.load_lexicon(lexicon_asr) +lex_ipa = fame_functions.load_lexicon(lexicon_ipa) +if 0: + timer_start = time.time() + translation_key_ipa2asr, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr) + print("elapsed time: {}".format(time.time() - timer_start)) + + np.save(os.path.join('phoneset', 'output_get_translation_key_translation_key.npy'), translation_key_ipa2asr) + np.save(os.path.join('phoneset', 'output_get_translation_key_phone_unknown.npy'), phone_unknown) +else: + translation_key_ipa2asr = np.load(os.path.join('phoneset', 'output_get_translation_key_translation_key.npy')).item() + phone_unknown = np.load(os.path.join('phoneset', 'output_get_translation_key_phone_unknown.npy')) + phone_unknown = list(phone_unknown) + +# manually check the correspondence for the phone in phone_unknown. +#p = phone_unknown[0] +#lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa') + +#for word in lex_ipa_['word']: +# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] +# if np.sum(lex_asr['word'] == word) > 0: +# asr = lex_asr[lex_asr['word'] == word].iat[0, 1] + +# ipa_list = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones) +# asr_list = asr.split(' ') +# if p in ipa_list and (len(ipa_list) == len(asr_list)): +# print("{0}: {1} --> {2}".format(word, ipa_list, asr_list)) +# for ipa_, asr_ in zip(ipa_list, asr_list): +# if ipa_ in phone_unknown: +# translation_key_ipa2asr[ipa_] = asr_ +# phone_unknown.remove(ipa_) +translation_key_ipa2asr['ə:'] = 'ə' +translation_key_ipa2asr['r.'] = 'r' +translation_key_ipa2asr['r:'] = 'r' +np.save(os.path.join('phoneset', 'fame_ipa2asr.npy'), translation_key_ipa2asr) + + +## check if all the phones in lexicon.asr are in translation_key_ipa2asr. +timer_start = time.time() +phoneset_lex = fame_functions.get_phoneset_from_lexicon(lexicon_asr, phoneset='asr') +phoneset_lex.remove("") +phoneset_asr = list(set(translation_key_ipa2asr.values())) +print("phones which is in lexicon.asr but not in the translation_key_ipa2asr:\n{}".format( + set(phoneset_lex) - set(phoneset_asr))) +print("elapsed time: {}".format(time.time() - timer_start)) + +## make the translation key between asr to htk. +#multi_character_phones = [i for i in phoneset_asr if len(i) > 1] +#multi_character_phones.sort(key=len, reverse=True) + +#lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation']) +#with open(lex_ipa_, "w", encoding="utf-8") as fout: +# for word, pronunciation in zip(lexicon_ipa['word'], lexicon_ipa['pronunciation']): +# # ignore nasalization and '.' +# pronunciation_ = pronunciation.replace(u'ⁿ', '') +# pronunciation_ = pronunciation_.replace('.', '') +# pronunciation_split = convert_phone_set.split_ipa_fame(pronunciation_) +# fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split))) \ No newline at end of file diff --git a/acoustic_model/fame_phoneset.py b/acoustic_model/phoneset/fame_asr.py similarity index 88% rename from acoustic_model/fame_phoneset.py rename to acoustic_model/phoneset/fame_asr.py index 2c2387a..01b3ab4 100644 --- a/acoustic_model/fame_phoneset.py +++ b/acoustic_model/phoneset/fame_asr.py @@ -1,7 +1,6 @@ """ definition of the phones to be used. """ -## phones in IPA. -phoneset_ipa = [ +phoneset = [ # vowels 'i̯', 'i̯ⁿ', @@ -103,5 +102,5 @@ phoneset_ipa = [ ## the list of multi character phones. # for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. -multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1] -multi_character_phones_ipa.sort(key=len, reverse=True) \ No newline at end of file +multi_character_phones = [i for i in phoneset if len(i) > 1] +multi_character_phones.sort(key=len, reverse=True) \ No newline at end of file diff --git a/acoustic_model/phoneset/fame_ipa.py b/acoustic_model/phoneset/fame_ipa.py new file mode 100644 index 0000000..01b3ab4 --- /dev/null +++ b/acoustic_model/phoneset/fame_ipa.py @@ -0,0 +1,106 @@ +""" definition of the phones to be used. """ + +phoneset = [ + # vowels + 'i̯', + 'i̯ⁿ', + 'y', + 'i', + 'i.', + 'iⁿ', + 'i:', + 'i:ⁿ', + 'ɪ', + 'ɪⁿ', + 'ɪ.', + #'ɪ:', # not included in lex.ipa + 'ɪ:ⁿ', + 'e', + 'e:', + 'e:ⁿ', + 'ə', + 'əⁿ', + 'ə:', + 'ɛ', + 'ɛ.', + 'ɛⁿ', + 'ɛ:', + 'ɛ:ⁿ', + 'a', + 'aⁿ', + 'a.', + 'a:', + 'a:ⁿ', + 'ṷ', + 'ṷ.', + 'ṷⁿ', + #'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. + 'u', + 'uⁿ', + 'u.', + 'u:', + 'u:ⁿ', + 'ü', + 'ü.', + 'üⁿ', + 'ü:', + 'ü:ⁿ', + 'o', + 'oⁿ', + 'o.', + 'o:', + 'o:ⁿ', + 'ö', + 'ö.', + 'öⁿ', + 'ö:', + 'ö:ⁿ', + 'ɔ', + 'ɔ.', + 'ɔⁿ', + 'ɔ:', + 'ɔ:ⁿ', + #'ɔ̈', # not included in lex.ipa + 'ɔ̈.', + 'ɔ̈:', + + # plosives + 'p', + 'b', + 't', + 'tⁿ', + 'd', + 'k', + 'g', + 'ɡ', # = 'g' + + # nasals + 'm', + 'n', + 'ŋ', + + # fricatives + 'f', + 'v', + 's', + 's:', + 'z', + 'zⁿ', + 'x', + 'h', + + # tap and flip + 'r', + 'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'. + 'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'. + + # approximant + 'j', + 'j.', + 'l' + ] + +## the list of multi character phones. +# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. +multi_character_phones = [i for i in phoneset if len(i) > 1] +multi_character_phones.sort(key=len, reverse=True) \ No newline at end of file diff --git a/acoustic_model/phoneset/fame_ipa2asr.npy b/acoustic_model/phoneset/fame_ipa2asr.npy new file mode 100644 index 0000000..b8852ba Binary files /dev/null and b/acoustic_model/phoneset/fame_ipa2asr.npy differ diff --git a/acoustic_model/phoneset/output_get_translation_key_phone_unknown.npy b/acoustic_model/phoneset/output_get_translation_key_phone_unknown.npy new file mode 100644 index 0000000..3faddd7 Binary files /dev/null and b/acoustic_model/phoneset/output_get_translation_key_phone_unknown.npy differ diff --git a/acoustic_model/phoneset/output_get_translation_key_translation_key.npy b/acoustic_model/phoneset/output_get_translation_key_translation_key.npy new file mode 100644 index 0000000..4eaf1b7 Binary files /dev/null and b/acoustic_model/phoneset/output_get_translation_key_translation_key.npy differ