lexicon is made.
This commit is contained in:
@ -5,7 +5,6 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||
import tempfile
|
||||
#import configparser
|
||||
#import subprocess
|
||||
#from collections import Counter
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
@ -29,44 +28,21 @@ dataset_list = ['devel', 'test', 'train']
|
||||
|
||||
# procedure
|
||||
extract_features = 0
|
||||
conv_lexicon = 1
|
||||
#check_lexicon = 0
|
||||
#make_mlf = 0
|
||||
#combine_files = 0
|
||||
#flat_start = 0
|
||||
#train_model = 1
|
||||
|
||||
|
||||
#sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir))
|
||||
#sys.path.append(forced_alignment_module)
|
||||
#from forced_alignment import convert_phone_set
|
||||
|
||||
make_lexicon = 0
|
||||
make_mlf = 0
|
||||
combine_files = 0
|
||||
flat_start = 0
|
||||
train_model = 0
|
||||
|
||||
|
||||
## ======================= load variables =======================
|
||||
|
||||
#config = configparser.ConfigParser()
|
||||
#config.sections()
|
||||
#config.read(config_ini)
|
||||
|
||||
#config_hcopy = config['Settings']['config_hcopy']
|
||||
#config_train = config['Settings']['config_train']
|
||||
#mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl']
|
||||
#FAME_dir = config['Settings']['FAME_dir']
|
||||
|
||||
#lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
|
||||
#lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa')
|
||||
#lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
|
||||
|
||||
#lex_asr = FAME_dir + '\\lexicon\\lex.asr'
|
||||
#lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk'
|
||||
#lex_oov = FAME_dir + '\\lexicon\\lex.oov'
|
||||
#lex_oov_htk = FAME_dir + '\\lexicon\\lex.oov_htk'
|
||||
##lex_ipa = FAME_dir + '\\lexicon\\lex.ipa'
|
||||
##lex_ipa_ = FAME_dir + '\\lexicon\\lex.ipa_'
|
||||
##lex_ipa_htk = FAME_dir + '\\lexicon\\lex.ipa_htk'
|
||||
#lex_htk = FAME_dir + '\\lexicon\\lex_original.htk'
|
||||
#lex_htk_ = FAME_dir + '\\lexicon\\lex.htk'
|
||||
lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
|
||||
lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
|
||||
lexicon_oov = os.path.join(lexicon_dir, 'lex.oov')
|
||||
lexicon_htk_asr = os.path.join(default.htk_dir, 'lexicon', 'lex.htk_asr')
|
||||
lexicon_htk_oov = os.path.join(default.htk_dir, 'lexicon', 'lex.htk_oov')
|
||||
lexicon_htk = os.path.join(default.htk_dir, 'lexicon', 'lex.htk')
|
||||
|
||||
#hcompv_scp = output_dir + '\\scp\\combined.scp'
|
||||
#combined_mlf = output_dir + '\\label\\combined.mlf'
|
||||
@ -88,8 +64,10 @@ if not os.path.exists(tmp_dir):
|
||||
|
||||
## ======================= extract features =======================
|
||||
if extract_features:
|
||||
print('==== extract features ====\n')
|
||||
|
||||
for dataset in dataset_list:
|
||||
print('==== {} ===='.format(dataset))
|
||||
print('==== dataset: {} ===='.format(dataset))
|
||||
|
||||
# a script file for HCopy
|
||||
print(">>> making a script file for HCopy... \n")
|
||||
@ -112,48 +90,28 @@ if extract_features:
|
||||
fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')
|
||||
|
||||
|
||||
## ======================= convert lexicon from ipa to fame_htk =======================
|
||||
if conv_lexicon:
|
||||
print('==== convert lexicon from ipa 2 fame ====\n')
|
||||
# convert each lexicon from ipa description to fame_htk phoneset.
|
||||
#am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk)
|
||||
#am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk)
|
||||
## ======================= make lexicon for HTK =======================
|
||||
if make_lexicon:
|
||||
print('==== make lexicon for HTK ====\n')
|
||||
|
||||
# convert each lexicon from fame_asr phoneset to fame_htk phoneset.
|
||||
print('>>> converting each lexicon from fame_asr phoneset to fame_htk phoneset... \n')
|
||||
fame_functions.lexicon_asr2htk(lexicon_asr, lexicon_htk_asr)
|
||||
fame_functions.lexicon_asr2htk(lexicon_oov, lexicon_htk_oov)
|
||||
|
||||
# combine lexicon
|
||||
print('>>> combining lexicon files into one lexicon... \n')
|
||||
# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov.
|
||||
# therefore there is no overlap between lex_asr and lex_oov.
|
||||
#am_func.combine_lexicon(lex_asr_htk, lex_oov_htk, lex_htk)
|
||||
fame_functions.combine_lexicon(lexicon_htk_asr, lexicon_htk_oov, lexicon_htk)
|
||||
|
||||
|
||||
## ======================= check if all the phones are successfully converted =======================
|
||||
if check_lexicon:
|
||||
print("==== check if all the phones are successfully converted. ====\n")
|
||||
|
||||
# the phones used in the lexicon.
|
||||
phonelist_asr = am_func.get_phonelist(lex_asr)
|
||||
phonelist_oov = am_func.get_phonelist(lex_oov)
|
||||
phonelist_htk = am_func.get_phonelist(lex_htk)
|
||||
|
||||
phonelist = phonelist_asr.union(phonelist_oov)
|
||||
|
||||
# the lines which include a specific phone.
|
||||
lines = am_func.find_phone(lex_asr, 'g')
|
||||
|
||||
# statistics over the lexicon
|
||||
lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation'])
|
||||
pronunciation = lexicon_htk['pronunciation']
|
||||
phones_all = []
|
||||
for word in pronunciation:
|
||||
phones_all = phones_all + word.split()
|
||||
c = Counter(phones_all)
|
||||
|
||||
|
||||
## =======================
|
||||
## manually make changes to the pronunciation dictionary and save it as lex.htk
|
||||
## =======================
|
||||
# (1) Replace all tabs with single space;
|
||||
# (2) Put a '\' before any dictionary entry beginning with single quote
|
||||
#http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html
|
||||
## =======================
|
||||
## manually make changes to the pronunciation dictionary and save it as lex.htk
|
||||
## =======================
|
||||
# (1) Replace all tabs with single space;
|
||||
# (2) Put a '\' before any dictionary entry beginning with single quote
|
||||
#http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html
|
||||
fame_functions.fix_single_quote(lexicon_htk)
|
||||
|
||||
|
||||
## ======================= make label file =======================
|
||||
|
Reference in New Issue
Block a user