fame_asr phoneset is added including reduced version and htk compatible version.
This commit is contained in:
@ -8,8 +8,8 @@ import tempfile
|
||||
#from collections import Counter
|
||||
import time
|
||||
|
||||
#import numpy as np
|
||||
#import pandas as pd
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import fame_functions
|
||||
import defaultfiles as default
|
||||
@ -54,6 +54,10 @@ conv_lexicon = 1
|
||||
#mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl']
|
||||
#FAME_dir = config['Settings']['FAME_dir']
|
||||
|
||||
#lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
|
||||
#lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa')
|
||||
#lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
|
||||
|
||||
#lex_asr = FAME_dir + '\\lexicon\\lex.asr'
|
||||
#lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk'
|
||||
#lex_oov = FAME_dir + '\\lexicon\\lex.oov'
|
||||
@ -111,71 +115,6 @@ if extract_features:
|
||||
## ======================= convert lexicon from ipa to fame_htk =======================
|
||||
if conv_lexicon:
|
||||
print('==== convert lexicon from ipa 2 fame ====\n')
|
||||
|
||||
#dir_out = r'c:\Users\Aki\source\repos\acoustic_model\_tmp'
|
||||
lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
|
||||
lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa')
|
||||
lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
|
||||
|
||||
# get the correspondence between lex_ipa and lex_asr.
|
||||
lex_asr = fame_functions.load_lexicon(lexicon_asr)
|
||||
lex_ipa = fame_functions.load_lexicon(lexicon_ipa)
|
||||
if 1:
|
||||
timer_start = time.time()
|
||||
translation_key, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr)
|
||||
print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
np.save('translation_key_ipa2asr.npy', translation_key)
|
||||
np.save('phone_unknown.npy', phone_unknown)
|
||||
else:
|
||||
translation_key = np.load('translation_key_ipa2asr.npy').item()
|
||||
phone_unknown = np.load('phone_unknown.npy')
|
||||
phone_unknown = list(phone_unknown)
|
||||
|
||||
|
||||
## manually check the correspondence for the phone in phone_unknown.
|
||||
#p = phone_unknown[0]
|
||||
#lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa')
|
||||
|
||||
#for word in lex_ipa_['word']:
|
||||
# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
||||
# if np.sum(lex_asr['word'] == word) > 0:
|
||||
# asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
||||
|
||||
# ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
||||
# asr_list = asr.split(' ')
|
||||
# if p in ipa_list and (len(ipa_list) == len(asr_list)):
|
||||
# print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
|
||||
# for ipa_, asr_ in zip(ipa_list, asr_list):
|
||||
# if ipa_ in phone_unknown:
|
||||
# translation_key[ipa_] = asr_
|
||||
# phone_unknown.remove(ipa_)
|
||||
|
||||
|
||||
## check if all the phones in lexicon_ipa are in fame_phoneset.py.
|
||||
#timer_start = time.time()
|
||||
#phoneset_lex = get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa')
|
||||
#print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
#phoneset_py = fame_phoneset.phoneset_ipa
|
||||
#set(phoneset_lex) - set(phoneset_py)
|
||||
|
||||
##timer_start = time.time()
|
||||
##extracted = find_phone(lexicon_ipa, 'ⁿ')
|
||||
##print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
|
||||
# lex.asr is Kaldi compatible version of lex.ipa.
|
||||
# to check...
|
||||
#lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation'])
|
||||
#with open(lex_ipa_, "w", encoding="utf-8") as fout:
|
||||
# for word, pronunciation in zip(lexicon_ipa['word'], lexicon_ipa['pronunciation']):
|
||||
# # ignore nasalization and '.'
|
||||
# pronunciation_ = pronunciation.replace(u'ⁿ', '')
|
||||
# pronunciation_ = pronunciation_.replace('.', '')
|
||||
# pronunciation_split = convert_phone_set.split_ipa_fame(pronunciation_)
|
||||
# fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split)))
|
||||
|
||||
# convert each lexicon from ipa description to fame_htk phoneset.
|
||||
#am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk)
|
||||
#am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk)
|
||||
|
Reference in New Issue
Block a user