novoapi_functions.py is adjusted to use convert_phoneset.py.

This commit is contained in:
yemaozi88
2019-04-22 00:59:53 +02:00
parent b444b70af9
commit 2004399179
5 changed files with 283 additions and 132 deletions

View File

@ -16,50 +16,53 @@ import defaultfiles as default
sys.path.append(default.toolbox_dir)
import file_handling as fh
from htk import pyhtk
#from scripts import run_command
## ======================= user define =======================
# procedure
combine_all = 1
make_lexicon = 0
make_label = 0 # it takes roughly 4800 sec on Surface pro 2.
make_mlf = 0
extract_features = 0
flat_start = 0
train_monophone_without_sp = 0
add_sp = 0
train_monophone_with_re_aligned_mlf = 0
flat_start = 1
train_monophone_without_sp = 1
add_sp = 1
train_monophone_with_re_aligned_mlf = 1
increase_mixture = 1
train_triphone = 0
train_triphone_tied = 1
train_triphone_tied = 0
# pre-defined values.
dataset_list = ['devel', 'test', 'train']
feature_size = 39
feature_size = 30
improvement_threshold = 0.3
hmmdefs_name = 'hmmdefs'
proto_name = 'proto'
lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov')
config_dir = os.path.join(default.htk_dir, 'config')
phonelist_full_txt = os.path.join(config_dir, 'phonelist_full.txt')
tree_hed = os.path.join(config_dir, 'tree.hed')
quest_hed = os.path.join(config_dir, 'quests.hed')
tree_hed = os.path.join(config_dir, 'tree.hed')
quests_hed = os.path.join(config_dir, 'quests.hed')
model_dir = os.path.join(default.htk_dir, 'model')
model_mono0_dir = os.path.join(model_dir, 'mono0')
model_mono1_dir = os.path.join(model_dir, 'mono1')
model_mono1sp_dir = os.path.join(model_dir, 'mono1sp')
model_mono1sp2_dir = os.path.join(model_dir, 'mono1sp2')
model_tri1_dir = os.path.join(model_dir, 'tri1')
model_tri1_dir = os.path.join(model_dir, 'tri1')
model_tri1tied_dir = os.path.join(model_dir, 'tri1tied')
# directories / files to be made.
lexicon_dir = os.path.join(default.htk_dir, 'lexicon')
lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr')
lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov')
lexicon_htk = os.path.join(lexicon_dir, 'lex.htk')
lexicon_htk_with_sp = os.path.join(lexicon_dir, 'lex_with_sp.htk')
lexicon_htk_triphone = os.path.join(lexicon_dir, 'lex_triphone.htk')
feature_dir = os.path.join(default.htk_dir, 'mfc')
@ -71,10 +74,20 @@ fh.make_new_directory(label_dir, existing_dir='leave')
## training
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
mlf_file_train_with_sp = os.path.join(label_dir, 'train_phone_with_sp.mlf')
mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
if combine_all:
hcompv_scp_train = os.path.join(tmp_dir, 'all.scp')
mlf_file_train = os.path.join(label_dir, 'all_phone.mlf')
mlf_file_train_word = os.path.join(label_dir, 'all_word.mlf')
mlf_file_train_with_sp = os.path.join(label_dir, 'all_phone_with_sp.mlf')
mlf_file_train_aligned = os.path.join(label_dir, 'all_phone_aligned.mlf')
triphone_mlf = os.path.join(label_dir, 'all_triphone.mlf')
else:
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
mlf_file_train_word = os.path.join(label_dir, 'train_word.mlf')
mlf_file_train_with_sp = os.path.join(label_dir, 'train_phone_with_sp.mlf')
mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
triphone_mlf = os.path.join(label_dir, 'train_triphone.mlf')
hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp')
## testing
@ -104,19 +117,18 @@ if make_lexicon:
print('>>> fixing the lexicon...')
fame_functions.fix_lexicon(lexicon_htk)
## add sp to the end of each line.
#print('>>> adding sp...')
#with open(lexicon_htk) as f:
# lines = f.read().split('\n')
#lines = [line + ' sp' for line in lines]
#with open(lexicon_htk_with_sp, 'wb') as f:
# f.write(bytes('\n'.join(lines), 'ascii'))
## adding sp to the lexicon for HTK.
print('>>> adding sp to the lexicon...')
with open(lexicon_htk) as f:
lines = f.read().split('\n')
with open(lexicon_htk_with_sp, 'wb') as f:
f.write(bytes(' sp\n'.join(lines), 'ascii'))
print("elapsed time: {}".format(time.time() - timer_start))
## intialize the instance for HTK.
chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk, feature_size)
chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk_with_sp, feature_size)
## ======================= make label files =======================
@ -152,7 +164,7 @@ if make_label:
shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic'))
label_file = os.path.join(label_dir_, filename + '.lab')
chtk.create_label_file(sentence_htk, label_file)
chtk.make_label_file(sentence_htk, label_file)
else:
os.remove(dictionary_file)
@ -174,7 +186,6 @@ if make_mlf:
os.remove(empty_dic_file)
for dataset in dataset_list:
#wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
feature_dir_ = os.path.join(feature_dir, dataset)
label_dir_ = os.path.join(label_dir, dataset)
mlf_word = os.path.join(label_dir, dataset + '_word.mlf')
@ -183,11 +194,11 @@ if make_mlf:
print(">>> generating a word level mlf file for {}...".format(dataset))
chtk.label2mlf(label_dir_, mlf_word)
print(">>> generating a phone level mlf file for {}...".format(dataset))
chtk.mlf_word2phone(mlf_phone, mlf_word, with_sp=False)
chtk.mlf_word2phone(mlf_phone_with_sp, mlf_word, with_sp=True)
print("elapsed time: {}".format(time.time() - timer_start))
@ -197,7 +208,7 @@ if extract_features:
timer_start = time.time()
print('==== extract features on dataset {} ===='.format(dataset))
wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
label_dir_ = os.path.join(label_dir, dataset)
feature_dir_ = os.path.join(feature_dir, dataset)
fh.make_new_directory(feature_dir_, existing_dir='delete')
@ -217,8 +228,8 @@ if extract_features:
+ os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc'))
for lab_file in lab_list]
if os.path.exists(empty_mfc_file):
os.remove(empty_mfc_file)
#if os.path.exists(empty_mfc_file):
# os.remove(empty_mfc_file)
with open(hcopy_scp.name, 'wb') as f:
f.write(bytes('\n'.join(feature_list), 'ascii'))
@ -235,9 +246,64 @@ if extract_features:
with open(hcompv_scp, 'wb') as f:
f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii'))
print(">>> extracting features on stimmen...")
chtk.wav2mfc(os.path.join(htk_stimmen_dir, 'hcopy.scp'))
print("elapsed time: {}".format(time.time() - timer_start))
## ======================= flat start monophones =======================
if combine_all:
# script files.
fh.concatenate(
os.path.join(tmp_dir, 'devel.scp'),
os.path.join(tmp_dir, 'test.scp'),
hcompv_scp_train
)
fh.concatenate(
hcompv_scp_train,
os.path.join(tmp_dir, 'train.scp'),
hcompv_scp_train
)
# phone level mlfs.
fh.concatenate(
os.path.join(label_dir, 'devel_phone.mlf'),
os.path.join(label_dir, 'test_phone.mlf'),
mlf_file_train
)
fh.concatenate(
mlf_file_train,
os.path.join(label_dir, 'train_phone.mlf'),
mlf_file_train
)
# phone level mlfs with sp.
fh.concatenate(
os.path.join(label_dir, 'devel_phone_with_sp.mlf'),
os.path.join(label_dir, 'test_phone_with_sp.mlf'),
mlf_file_train_with_sp
)
fh.concatenate(
mlf_file_train_with_sp,
os.path.join(label_dir, 'train_phone_with_sp.mlf'),
mlf_file_train_with_sp
)
# word level mlfs.
fh.concatenate(
os.path.join(label_dir, 'devel_word.mlf'),
os.path.join(label_dir, 'test_word.mlf'),
mlf_file_train_word
)
fh.concatenate(
mlf_file_train_word,
os.path.join(label_dir, 'train_word.mlf'),
mlf_file_train_word
)
## ======================= flat start monophones =======================
if flat_start:
timer_start = time.time()
@ -246,17 +312,14 @@ if flat_start:
chtk.flat_start(hcompv_scp_train, model_mono0_dir)
# create macros.
# make macros.
vFloors = os.path.join(model_mono0_dir, 'vFloors')
if os.path.exists(vFloors):
chtk.create_macros(vFloors)
chtk.make_macros(vFloors)
# allocate mean & variance to all phones in the phone list
print('>>> allocating mean & variance to all phones in the phone list...')
chtk.create_hmmdefs(
os.path.join(model_mono0_dir, proto_name),
os.path.join(model_mono0_dir, 'hmmdefs')
)
chtk.make_hmmdefs(model_mono0_dir)
print("elapsed time: {}".format(time.time() - timer_start))
@ -320,8 +383,9 @@ if train_monophone_with_re_aligned_mlf:
os.path.join(modeln_dir, 'macros'),
os.path.join(modeln_dir, 'hmmdefs'),
mlf_file_train_aligned,
os.path.join(label_dir, 'train_word.mlf'),
mlf_file_train_word,
hcompv_scp_train)
chtk.fix_mlf(mlf_file_train_aligned)
print('>>> updating the script file... ')
chtk.update_script_file(
@ -349,24 +413,55 @@ if train_monophone_with_re_aligned_mlf:
print("elapsed time: {}".format(time.time() - timer_start))
## ======================= train triphone =======================
if train_triphone:
print('==== traina triphone model ====')
## ======================= increase mixture =======================
if increase_mixture:
print('==== increase mixture ====')
timer_start = time.time()
for nmix in [2, 4, 8, 16]:
if nmix == 2:
modeln_dir_ = model_mono1sp2_dir
else:
modeln_dir_ = os.path.join(model_dir, 'mono'+str(nmix_))
modeln_dir = os.path.join(model_dir, 'mono'+str(nmix))
triphonelist_txt = os.path.join(config_dir, 'triphonelist.txt')
triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
print('mixture: {}'.format(nmix))
fh.make_new_directory(modeln_dir, existing_dir='delete')
niter = chtk.get_niter_max(modeln_dir_)
chtk.increase_mixture(
os.path.join(modeln_dir_, 'iter'+str(niter), 'hmmdefs'),
nmix,
os.path.join(modeln_dir, 'iter0'),
model_type='monophone_with_sp')
shutil.copy2(os.path.join(modeln_dir_, 'iter'+str(niter), 'macros'),
os.path.join(modeln_dir, 'iter0', 'macros'))
print('>>> making triphone list... ')
chtk.make_triphonelist(
triphonelist_txt,
triphone_mlf,
mlf_file_train_aligned)
#improvement_threshold = -10
niter = chtk.re_estimation_until_saturated(
modeln_dir,
os.path.join(modeln_dir_, 'iter0'),
improvement_threshold,
hcompv_scp_train_updated,
os.path.join(htk_stimmen_dir, 'mfc'),
'mfc',
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
mlf_file=mlf_file_train_aligned,
lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
model_type='monophone_with_sp'
)
nmix_ = nmix
print('>>> making triphone header... ')
chtk.make_tri_hed(
os.path.join(config_dir, 'mktri.hed')
)
print("elapsed time: {}".format(time.time() - timer_start))
## ======================= train triphone =======================
print('>>> making triphone list... ')
chtk.make_triphonelist(
mlf_file_train_aligned,
triphone_mlf)
if train_triphone:
print('==== train triphone model ====')
timer_start = time.time()
print('>>> init triphone model... ')
niter = chtk.get_niter_max(model_mono1sp2_dir)
@ -377,8 +472,8 @@ if train_triphone:
)
print('>>> re-estimation... ')
# I wanted to train until satulated:
# #niter = chtk.re_estimation_until_saturated(
## I wanted to train until satulated:
#niter = chtk.re_estimation_until_saturated(
# model_tri1_dir,
# os.path.join(model_tri1_dir, 'iter0'),
# improvement_threshold,
@ -395,7 +490,6 @@ if train_triphone:
# ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???]
# therefore only two times re-estimation is performed.
output_dir = model_tri1_dir
for niter in range(1, 4):
hmm_n = 'iter' + str(niter)
hmm_n_pre = 'iter' + str(niter-1)
@ -414,18 +508,59 @@ if train_triphone:
print("elapsed time: {}".format(time.time() - timer_start))
## ======================= train triphone =======================
## ======================= train tied-state triphones =======================
if train_triphone_tied:
print('==== traina tied-state triphone ====')
print('==== train tied-state triphones ====')
timer_start = time.time()
print('>>> making lexicon for triphone... ')
chtk.make_triphone_full(phonelist_full_txt, lexicon_htk_triphone)
chtk.make_lexicon_triphone(phonelist_full_txt, lexicon_htk_triphone)
chtk.combine_phonelists(phonelist_full_txt)
print('>>> making headers... ')
chtk.make_tree_header(tree_hed)
fame_phonetics.make_quests_hed(quest_hed)
print('>>> making a tree header... ')
fame_phonetics.make_quests_hed(quests_hed)
stats = os.path.join(r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\model\tri1\iter3', 'stats')
chtk.make_tree_header(tree_hed, quests_hed, stats, config_dir)
print("elapsed time: {}".format(time.time() - timer_start))
print('>>> init triphone model... ')
niter = chtk.get_niter_max(model_tri1_dir)
fh.make_new_directory(os.path.join(model_tri1tied_dir, 'iter0'), existing_dir='leave')
chtk.init_triphone(
os.path.join(model_tri1_dir, 'iter'+str(niter)),
os.path.join(model_tri1tied_dir, 'iter0'),
tied=True)
# I wanted to train until satulated:
#niter = chtk.re_estimation_until_saturated(
# model_tri1tied_dir,
# os.path.join(model_tri1tied_dir, 'iter0'),
# improvement_threshold,
# hcompv_scp_train_updated,
# os.path.join(htk_stimmen_dir, 'mfc'),
# 'mfc',
# os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
# mlf_file=triphone_mlf,
# lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
# model_type='triphone'
# )
#
# but because the data size is limited, some triphone cannot be trained and received the error:
# ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???]
# therefore only 3 times re-estimation is performed.
output_dir = model_tri1tied_dir
for niter in range(1, 4):
hmm_n = 'iter' + str(niter)
hmm_n_pre = 'iter' + str(niter-1)
_modeln_dir = os.path.join(output_dir, hmm_n)
_modeln_dir_pre = os.path.join(output_dir, hmm_n_pre)
fh.make_new_directory(_modeln_dir, 'leave')
chtk.re_estimation(
os.path.join(_modeln_dir_pre, 'hmmdefs'),
_modeln_dir,
hcompv_scp_train_updated,
mlf_file=triphone_mlf,
macros=os.path.join(_modeln_dir_pre, 'macros'),
model_type='triphone')
print("elapsed time: {}".format(time.time() - timer_start))