@ -16,50 +16,53 @@ import defaultfiles as default
@@ -16,50 +16,53 @@ import defaultfiles as default
sys . path . append ( default . toolbox_dir )
import file_handling as fh
from htk import pyhtk
#from scripts import run_command
## ======================= user define =======================
# procedure
combine_all = 1
make_lexicon = 0
make_label = 0 # it takes roughly 4800 sec on Surface pro 2.
make_mlf = 0
extract_features = 0
flat_start = 0
train_monophone_without_sp = 0
add_sp = 0
train_monophone_with_re_aligned_mlf = 0
flat_start = 1
train_monophone_without_sp = 1
add_sp = 1
train_monophone_with_re_aligned_mlf = 1
increase_mixture = 1
train_triphone = 0
train_triphone_tied = 1
train_triphone_tied = 0
# pre-defined values.
dataset_list = [ ' devel ' , ' test ' , ' train ' ]
feature_size = 39
feature_size = 30
improvement_threshold = 0.3
hmmdefs_name = ' hmmdefs '
proto_name = ' proto '
lexicon_asr = os . path . join ( default . fame_dir , ' lexicon ' , ' lex.asr ' )
lexicon_oov = os . path . join ( default . fame_dir , ' lexicon ' , ' lex.oov ' )
config_dir = os . path . join ( default . htk_dir , ' config ' )
phonelist_full_txt = os . path . join ( config_dir , ' phonelist_full.txt ' )
tree_hed = os . path . join ( config_dir , ' tree.hed ' )
quest_hed = os . path . join ( config_dir , ' quests.hed ' )
tree_hed = os . path . join ( config_dir , ' tree.hed ' )
quests _hed = os . path . join ( config_dir , ' quests.hed ' )
model_dir = os . path . join ( default . htk_dir , ' model ' )
model_mono0_dir = os . path . join ( model_dir , ' mono0 ' )
model_mono1_dir = os . path . join ( model_dir , ' mono1 ' )
model_mono1sp_dir = os . path . join ( model_dir , ' mono1sp ' )
model_mono1sp2_dir = os . path . join ( model_dir , ' mono1sp2 ' )
model_tri1_dir = os . path . join ( model_dir , ' tri1 ' )
model_tri1_dir = os . path . join ( model_dir , ' tri1 ' )
model_tri1tied_dir = os . path . join ( model_dir , ' tri1tied ' )
# directories / files to be made.
lexicon_dir = os . path . join ( default . htk_dir , ' lexicon ' )
lexicon_htk_asr = os . path . join ( lexicon_dir , ' lex.htk_asr ' )
lexicon_htk_oov = os . path . join ( lexicon_dir , ' lex.htk_oov ' )
lexicon_htk = os . path . join ( lexicon_dir , ' lex.htk ' )
lexicon_htk_with_sp = os . path . join ( lexicon_dir , ' lex_with_sp.htk ' )
lexicon_htk_triphone = os . path . join ( lexicon_dir , ' lex_triphone.htk ' )
feature_dir = os . path . join ( default . htk_dir , ' mfc ' )
@ -71,10 +74,20 @@ fh.make_new_directory(label_dir, existing_dir='leave')
@@ -71,10 +74,20 @@ fh.make_new_directory(label_dir, existing_dir='leave')
## training
hcompv_scp_train = os . path . join ( tmp_dir , ' train.scp ' )
mlf_file_train = os . path . join ( label_dir , ' train_phone.mlf ' )
mlf_file_train_with_sp = os . path . join ( label_dir , ' train_phone_with_sp.mlf ' )
mlf_file_train_aligned = os . path . join ( label_dir , ' train_phone_aligned.mlf ' )
if combine_all :
hcompv_scp_train = os . path . join ( tmp_dir , ' all.scp ' )
mlf_file_train = os . path . join ( label_dir , ' all_phone.mlf ' )
mlf_file_train_word = os . path . join ( label_dir , ' all_word.mlf ' )
mlf_file_train_with_sp = os . path . join ( label_dir , ' all_phone_with_sp.mlf ' )
mlf_file_train_aligned = os . path . join ( label_dir , ' all_phone_aligned.mlf ' )
triphone_mlf = os . path . join ( label_dir , ' all_triphone.mlf ' )
else :
hcompv_scp_train = os . path . join ( tmp_dir , ' train.scp ' )
mlf_file_train = os . path . join ( label_dir , ' train_phone.mlf ' )
mlf_file_train_word = os . path . join ( label_dir , ' train_word.mlf ' )
mlf_file_train_with_sp = os . path . join ( label_dir , ' train_phone_with_sp.mlf ' )
mlf_file_train_aligned = os . path . join ( label_dir , ' train_phone_aligned.mlf ' )
triphone_mlf = os . path . join ( label_dir , ' train_triphone.mlf ' )
hcompv_scp_train_updated = hcompv_scp_train . replace ( ' .scp ' , ' _updated.scp ' )
## testing
@ -104,19 +117,18 @@ if make_lexicon:
@@ -104,19 +117,18 @@ if make_lexicon:
print ( ' >>> fixing the lexicon... ' )
fame_functions . fix_lexicon ( lexicon_htk )
## add sp to the end of each line.
#print('>>> adding sp...')
#with open(lexicon_htk) as f:
# lines = f.read().split('\n')
#lines = [line + ' sp' for line in lines]
#with open(lexicon_htk_with_sp, 'wb') as f:
# f.write(bytes('\n'.join(lines), 'ascii'))
## adding sp to the lexicon for HTK.
print ( ' >>> adding sp to the lexicon... ' )
with open ( lexicon_htk ) as f :
lines = f . read ( ) . split ( ' \n ' )
with open ( lexicon_htk_with_sp , ' wb ' ) as f :
f . write ( bytes ( ' sp \n ' . join ( lines ) , ' ascii ' ) )
print ( " elapsed time: {} " . format ( time . time ( ) - timer_start ) )
## intialize the instance for HTK.
chtk = pyhtk . HTK ( config_dir , fame_asr . phoneset_htk , lexicon_htk , feature_size )
chtk = pyhtk . HTK ( config_dir , fame_asr . phoneset_htk , lexicon_htk_with_sp , feature_size )
## ======================= make label files =======================
@ -152,7 +164,7 @@ if make_label:
@@ -152,7 +164,7 @@ if make_label:
shutil . move ( dictionary_file , os . path . join ( label_dir_ , filename + ' .dic ' ) )
label_file = os . path . join ( label_dir_ , filename + ' .lab ' )
chtk . creat e_label_file( sentence_htk , label_file )
chtk . mak e_label_file( sentence_htk , label_file )
else :
os . remove ( dictionary_file )
@ -174,7 +186,6 @@ if make_mlf:
@@ -174,7 +186,6 @@ if make_mlf:
os . remove ( empty_dic_file )
for dataset in dataset_list :
#wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
feature_dir_ = os . path . join ( feature_dir , dataset )
label_dir_ = os . path . join ( label_dir , dataset )
mlf_word = os . path . join ( label_dir , dataset + ' _word.mlf ' )
@ -183,11 +194,11 @@ if make_mlf:
@@ -183,11 +194,11 @@ if make_mlf:
print ( " >>> generating a word level mlf file for {} ... " . format ( dataset ) )
chtk . label2mlf ( label_dir_ , mlf_word )
print ( " >>> generating a phone level mlf file for {} ... " . format ( dataset ) )
chtk . mlf_word2phone ( mlf_phone , mlf_word , with_sp = False )
chtk . mlf_word2phone ( mlf_phone_with_sp , mlf_word , with_sp = True )
print ( " elapsed time: {} " . format ( time . time ( ) - timer_start ) )
@ -197,7 +208,7 @@ if extract_features:
@@ -197,7 +208,7 @@ if extract_features:
timer_start = time . time ( )
print ( ' ==== extract features on dataset {} ==== ' . format ( dataset ) )
wav_dir_ = os . path . join ( default . fame_dir , ' fame ' , ' wav ' , dataset )
wav_dir_ = os . path . join ( default . fame_dir , ' fame ' , ' wav ' , dataset )
label_dir_ = os . path . join ( label_dir , dataset )
feature_dir_ = os . path . join ( feature_dir , dataset )
fh . make_new_directory ( feature_dir_ , existing_dir = ' delete ' )
@ -217,8 +228,8 @@ if extract_features:
@@ -217,8 +228,8 @@ if extract_features:
+ os . path . join ( feature_dir_ , os . path . basename ( lab_file ) . replace ( ' .lab ' , ' .mfc ' ) )
for lab_file in lab_list ]
if os . path . exists ( empty_mfc_file ) :
os . remove ( empty_mfc_file )
#if os.path.exists(empty_mfc_file) :
# os.remove(empty_mfc_file )
with open ( hcopy_scp . name , ' wb ' ) as f :
f . write ( bytes ( ' \n ' . join ( feature_list ) , ' ascii ' ) )
@ -235,9 +246,64 @@ if extract_features:
@@ -235,9 +246,64 @@ if extract_features:
with open ( hcompv_scp , ' wb ' ) as f :
f . write ( bytes ( ' \n ' . join ( mfc_list ) + ' \n ' , ' ascii ' ) )
print ( " >>> extracting features on stimmen... " )
chtk . wav2mfc ( os . path . join ( htk_stimmen_dir , ' hcopy.scp ' ) )
print ( " elapsed time: {} " . format ( time . time ( ) - timer_start ) )
## ======================= flat start monophones =======================
if combine_all :
# script files.
fh . concatenate (
os . path . join ( tmp_dir , ' devel.scp ' ) ,
os . path . join ( tmp_dir , ' test.scp ' ) ,
hcompv_scp_train
)
fh . concatenate (
hcompv_scp_train ,
os . path . join ( tmp_dir , ' train.scp ' ) ,
hcompv_scp_train
)
# phone level mlfs.
fh . concatenate (
os . path . join ( label_dir , ' devel_phone.mlf ' ) ,
os . path . join ( label_dir , ' test_phone.mlf ' ) ,
mlf_file_train
)
fh . concatenate (
mlf_file_train ,
os . path . join ( label_dir , ' train_phone.mlf ' ) ,
mlf_file_train
)
# phone level mlfs with sp.
fh . concatenate (
os . path . join ( label_dir , ' devel_phone_with_sp.mlf ' ) ,
os . path . join ( label_dir , ' test_phone_with_sp.mlf ' ) ,
mlf_file_train_with_sp
)
fh . concatenate (
mlf_file_train_with_sp ,
os . path . join ( label_dir , ' train_phone_with_sp.mlf ' ) ,
mlf_file_train_with_sp
)
# word level mlfs.
fh . concatenate (
os . path . join ( label_dir , ' devel_word.mlf ' ) ,
os . path . join ( label_dir , ' test_word.mlf ' ) ,
mlf_file_train_word
)
fh . concatenate (
mlf_file_train_word ,
os . path . join ( label_dir , ' train_word.mlf ' ) ,
mlf_file_train_word
)
## ======================= flat start monophones =======================
if flat_start :
timer_start = time . time ( )
@ -246,17 +312,14 @@ if flat_start:
@@ -246,17 +312,14 @@ if flat_start:
chtk . flat_start ( hcompv_scp_train , model_mono0_dir )
# creat e macros.
# mak e macros.
vFloors = os . path . join ( model_mono0_dir , ' vFloors ' )
if os . path . exists ( vFloors ) :
chtk . creat e_macros( vFloors )
chtk . mak e_macros( vFloors )
# allocate mean & variance to all phones in the phone list
print ( ' >>> allocating mean & variance to all phones in the phone list... ' )
chtk . create_hmmdefs (
os . path . join ( model_mono0_dir , proto_name ) ,
os . path . join ( model_mono0_dir , ' hmmdefs ' )
)
chtk . make_hmmdefs ( model_mono0_dir )
print ( " elapsed time: {} " . format ( time . time ( ) - timer_start ) )
@ -320,8 +383,9 @@ if train_monophone_with_re_aligned_mlf:
@@ -320,8 +383,9 @@ if train_monophone_with_re_aligned_mlf:
os . path . join ( modeln_dir , ' macros ' ) ,
os . path . join ( modeln_dir , ' hmmdefs ' ) ,
mlf_file_train_aligned ,
os . path . join ( label_dir , ' train_word.mlf ' ) ,
mlf_file_train_word ,
hcompv_scp_train )
chtk . fix_mlf ( mlf_file_train_aligned )
print ( ' >>> updating the script file... ' )
chtk . update_script_file (
@ -349,24 +413,55 @@ if train_monophone_with_re_aligned_mlf:
@@ -349,24 +413,55 @@ if train_monophone_with_re_aligned_mlf:
print ( " elapsed time: {} " . format ( time . time ( ) - timer_start ) )
## ======================= train triphon e =======================
if train_triphon e:
print ( ' ==== tra ina triphon e model ==== ' )
## ======================= increase mixtur e =======================
if increase_mixtur e:
print ( ' ==== incre ase mix tu re ==== ' )
timer_start = time . time ( )
for nmix in [ 2 , 4 , 8 , 16 ] :
if nmix == 2 :
modeln_dir_ = model_mono1sp2_dir
else :
modeln_dir_ = os . path . join ( model_dir , ' mono ' + str ( nmix_ ) )
modeln_dir = os . path . join ( model_dir , ' mono ' + str ( nmix ) )
print ( ' mixture: {} ' . format ( nmix ) )
fh . make_new_directory ( modeln_dir , existing_dir = ' delete ' )
niter = chtk . get_niter_max ( modeln_dir_ )
chtk . increase_mixture (
os . path . join ( modeln_dir_ , ' iter ' + str ( niter ) , ' hmmdefs ' ) ,
nmix ,
os . path . join ( modeln_dir , ' iter0 ' ) ,
model_type = ' monophone_with_sp ' )
shutil . copy2 ( os . path . join ( modeln_dir_ , ' iter ' + str ( niter ) , ' macros ' ) ,
os . path . join ( modeln_dir , ' iter0 ' , ' macros ' ) )
#improvement_threshold = -10
niter = chtk . re_estimation_until_saturated (
modeln_dir ,
os . path . join ( modeln_dir_ , ' iter0 ' ) ,
improvement_threshold ,
hcompv_scp_train_updated ,
os . path . join ( htk_stimmen_dir , ' mfc ' ) ,
' mfc ' ,
os . path . join ( htk_stimmen_dir , ' word_lattice.ltc ' ) ,
mlf_file = mlf_file_train_aligned ,
lexicon = os . path . join ( htk_stimmen_dir , ' lexicon_recognition.dic ' ) ,
model_type = ' monophone_with_sp '
)
nmix_ = nmix
triphonelist_txt = os . path . join ( config_dir , ' triphonelist.txt ' )
triphone_mlf = os . path . join ( default . htk_dir , ' label ' , ' train_triphone.mlf ' )
print ( " elapsed time: {} " . format ( time . time ( ) - timer_start ) )
print ( ' >>> making triphone list... ' )
chtk . make_triphonelist (
triphonelist_txt ,
triphone_mlf ,
mlf_file_train_aligned )
print ( ' >>> making triphone header... ' )
chtk . make_tri_hed (
os . path . join ( config_dir , ' mktri.hed ' )
)
## ======================= train triphone =======================
print ( ' >>> making triphone list... ' )
chtk . make_triphonelist (
mlf_file_train_aligned ,
triphone_mlf )
if train_triphone :
print ( ' ==== train triphone model ==== ' )
timer_start = time . time ( )
print ( ' >>> init triphone model... ' )
niter = chtk . get_niter_max ( model_mono1sp2_dir )
@ -377,8 +472,8 @@ if train_triphone:
@@ -377,8 +472,8 @@ if train_triphone:
)
print ( ' >>> re-estimation... ' )
# I wanted to train until satulated:
# # niter = chtk.re_estimation_until_saturated(
## I wanted to train until satulated:
#niter = chtk.re_estimation_until_saturated(
# model_tri1_dir,
# os.path.join(model_tri1_dir, 'iter0'),
# improvement_threshold,
@ -395,7 +490,6 @@ if train_triphone:
@@ -395,7 +490,6 @@ if train_triphone:
# ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???]
# therefore only two times re-estimation is performed.
output_dir = model_tri1_dir
for niter in range ( 1 , 4 ) :
hmm_n = ' iter ' + str ( niter )
hmm_n_pre = ' iter ' + str ( niter - 1 )
@ -414,18 +508,59 @@ if train_triphone:
@@ -414,18 +508,59 @@ if train_triphone:
print ( " elapsed time: {} " . format ( time . time ( ) - timer_start ) )
## ======================= train triphone =======================
## ======================= train tied-state t riphones =======================
if train_triphone_tied :
print ( ' ==== traina tied-state triphone ==== ' )
print ( ' ==== train tied-state triphones ==== ' )
timer_start = time . time ( )
print ( ' >>> making lexicon for triphone... ' )
chtk . make_triphone_full ( phonelist_full_txt , lexicon_htk_triphone )
chtk . make_lexicon_triphone ( phonelist_full_txt , lexicon_htk_triphone )
chtk . combine_phonelists ( phonelist_full_txt )
print ( ' >>> making headers... ' )
chtk . make_tree_header ( tree_hed )
fame_phonetics . make_quests_hed ( quest_hed )
print ( ' >>> making a tree header... ' )
fame_phonetics . make_quests_hed ( quests_hed )
stats = os . path . join ( r ' c: \ OneDrive \ Research \ rug \ experiments \ acoustic_model \ fame \ htk \ model \ tri1 \ iter3 ' , ' stats ' )
chtk . make_tree_header ( tree_hed , quests_hed , stats , config_dir )
print ( " elapsed time: {} " . format ( time . time ( ) - timer_start ) )
print ( ' >>> init triphone model... ' )
niter = chtk . get_niter_max ( model_tri1_dir )
fh . make_new_directory ( os . path . join ( model_tri1tied_dir , ' iter0 ' ) , existing_dir = ' leave ' )
chtk . init_triphone (
os . path . join ( model_tri1_dir , ' iter ' + str ( niter ) ) ,
os . path . join ( model_tri1tied_dir , ' iter0 ' ) ,
tied = True )
# I wanted to train until satulated:
#niter = chtk.re_estimation_until_saturated(
# model_tri1tied_dir,
# os.path.join(model_tri1tied_dir, 'iter0'),
# improvement_threshold,
# hcompv_scp_train_updated,
# os.path.join(htk_stimmen_dir, 'mfc'),
# 'mfc',
# os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
# mlf_file=triphone_mlf,
# lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
# model_type='triphone'
# )
#
# but because the data size is limited, some triphone cannot be trained and received the error:
# ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???]
# therefore only 3 times re-estimation is performed.
output_dir = model_tri1tied_dir
for niter in range ( 1 , 4 ) :
hmm_n = ' iter ' + str ( niter )
hmm_n_pre = ' iter ' + str ( niter - 1 )
_modeln_dir = os . path . join ( output_dir , hmm_n )
_modeln_dir_pre = os . path . join ( output_dir , hmm_n_pre )
fh . make_new_directory ( _modeln_dir , ' leave ' )
chtk . re_estimation (
os . path . join ( _modeln_dir_pre , ' hmmdefs ' ) ,
_modeln_dir ,
hcompv_scp_train_updated ,
mlf_file = triphone_mlf ,
macros = os . path . join ( _modeln_dir_pre , ' macros ' ) ,
model_type = ' triphone ' )
print ( " elapsed time: {} " . format ( time . time ( ) - timer_start ) )