From bf586fcde5145083f96b44fb35f8ddf214d78ee8 Mon Sep 17 00:00:00 2001 From: yemaozi88 <428968@gmail.com> Date: Sat, 23 Mar 2019 21:52:48 +0100 Subject: [PATCH] triphone training is added. --- .vs/acoustic_model/v15/.suo | Bin 97280 -> 94720 bytes acoustic_model/fame_functions.py | 1 + acoustic_model/fame_hmm.py | 156 +++++++++++++++++++++---------- 3 files changed, 109 insertions(+), 48 deletions(-) diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 7c3c3f1759b95cd36dd61c15cb362819a689f880..95b20ba880bf7179f2f25d238bed7cb7d8e4b881 100644 GIT binary patch delta 6573 zcmdT|3sjX=7XJ6oW`<(y$ z=bUfvea?SPeXgdDTsLI~cqxja11{QZHVDN9@Y!7wta|mN?bzRYisr!3y-GBw4fMYF zR<(IO1jDjO{GePVNEh0C4QhDYHpwA_9FlEZMmqFh$l(LpMxovW7=Tz{G>{4u0Cxd* z0D2$;@C4ie4e%I05k#KLV*+@9w-I`z#A|O zNC(COg+K`~7qBnEX5$TNfu0Ze0a?IQAV;}LQIb%}1x5h;z%+-P>5$h$#yINS9)fz} z0G(~i2k7%1_3F(fX(N_7lp4rvUr#Mu zb!HzE9)~LZv=cDI&-b@ zI~PrcpeZ z`$RM7>`#y(_-O?+H;qWSYqHO>m3GIzRRl7lNZic59dGnWBE&J-{X+Hug-EWY@2}1fElbo|T-;SW&m4MXJe^C?_{i zh_yK6X|lxY%zkx0zkhn(zgIX>pjcAf?2m@bAwRisB8{@%GU|SkmTBZ?^~?5Ftzkhu z)D;tkN--)0T#23O*jiDLZLk`O(kXRy*uHt!e{kpc;+jM2AOG|Khf@Duokm8CN4%f$ zQ4~V9%fjXy#UZh9xOMfEwK^VT^!fq)c|SKg^oJetkC17bONM4{L8Z-Ml#urWTOIm? zkWAn5#b zHj!nnYOXo*pV>*Zrk1|yVyRVy$s6QX^|;8~Wb!xE&%YQvv_2pt@tH}lM_e2>!a6GM zG?~}WE1&WH{`e_xtv&riQT=P*2fxTW(Z8ktz?QxWJp(je19=+qTgWgREbl>1hI|w< z(MwT20p0C51Nguz0tFvJcfny2N^)}+rTkn9N2$o2Ium{6Re^rHQ9ga~g-Rf7F?Q4N^`-4`dwV)WadV!twtNX2&I#gRx8 zOVEB0=fzAcNZ*{LC>vpzJ^?d?!D}K?HFTE(yc7>C#93JnJ+%;fj28w@!OaEsRoB|D zOkcBv{m67h@rE3MqplqN!y)HD+Wit|KaR(N`a;LVd_F{lXZ9Iph?%vGA(MI45t8Di zRXcUtF`}rxb&ISWOBu4ef(+K8x@V}%lu0^{17Td#ZN6=?j?Yakt<>GtLY8FJvR1XM zR^f_QCSE^~c=N8jLCWnI1pI0Pxo*Xq=ZW*d(ZUiHse_Z*%@4u+lvuMX4W@CHZj58yc*5^KcY|cejX~f2avaD*qQ8=c=gq>DUP95 z@lbN92gQi8#wgGD*m(OGx56G|e#9h7c1Fvm{mI?8G2T(eu^bnFo5&eI;U|7!ZaM5q zeD%|gly6#2=|(yif9@@vXg%d&-g9^m@%EHB8qCk@tbKbt$*Qi+QGd0EbWR`<*D!QQ zQd)|04fd5(R*K%nK&$CdliIcMu==ZJx7CAu7FJf3)mPUos9Rc-nw*?u^?Bk;>UwOj zs1-64b%`@-(0irhsao8+D{&^^WO*C@QG=w@gBQyo$bgtg zAr9ofK#v(}{}xyWxd8nN@EJ57_BXFm6eL+`8bd}|F@{29NEAiO?e287-0V-G@P99! zjXU}hk2}GYs-($_jI>CUHH69q{-l>T>1eb(?oU26NV>UEgDiKaKpE&x6_>UtT}{0&l_!w5y8k`xiNva>?)C=Nji?hxk^NEv;No zRllsFZqf2(JjX)kcq7nBPnT5rek4W8V*wN$db52%{Caec#ppOYv@cHZ&qa2Op}Fi+ zd@uzIijL*^m za}#I!#=2_r6}^2?mz*X|^Gfg8$MG_hA1}hMEt;Z`wU6UjJXvY3K1b3k=x{rxME>d^MjFbvuV>VbVXN;LF0K zK1CzqDUQ7KeFH_F}wN|7fflR><6(H~bkPVLo~ta96Q@({-|L-62qIr19%ldQ?0 zJ+zj)tdKpK6i8d`bz>&Q$r)MHO0(pOsgxtrvuGGXw0A6p{#;b6OBRW^ju_rLz8P^$ zYh{sudix=^-7N;8G(J*H4*N~+SXFKr)ykep^ejDO-_jE^$RL{vDQrMQ#@Op=(o}>)zLR6> zW~nQpe4J|iQ7a#sf}1~hsFkI&DOj}58zzgVAO##s!aTm3McFbt8Pob-#(#f?`T6*y zRrcjv(Zy<|^S_DTV`p7Dl{~pSkqY@O9(#+Y($gf>3<{E^xk#+PWW%+IYD1hPnm>Rf zB+(E~)=0rSIFkm*&}sA_@}!eTf3YD=zt^vr%E_5d;S$5zrcwBBF;E=8JlMXVs2Mmj z>Pl#;-S<*tdkW=?3!UL|>vdG+y8!pOO8gfdhrbK>i*J%VJ|Fp8+lwncx_9exdZK^k zHKlOo*4Y%YH~$98Al}zFTG+o*^i825(o{^} I4BXX!1G6GcQUCw| delta 7365 zcmeHL4OEj?n$G(Xenmy)Po?}sR1|~&2}vkg@&^bZBtQ@-wm=932!x-6KuRS#{n5&( z$qxL+tWE`*1c`Qur^X;6+(hofnbKnL~KMXMLdKkL}(DqKx$+mq8L$wU}N}v z`2pI5iX}ni`Y$f}d^)V_5!HxfL>^-EgnsQ<3swSMeZE$#x6hNAzUv;@5|n#`+!y42 z z!pu>D@-l=N;a3L#SPabfeTZMWEKzi`ac?V21F@>wVx}@|8w^(;d6bN$?EaC7SV{vM)-pPc`M>!1Z&%jYz)>>9P_oIU_x{Rm1g8l zM0Zf`39>cFb53W2u`=g;=J^+ky75D58i0f~yzrc)I6Bff)p z7V!*%t>?SQ2N8!5WHc%<@hDcFM|>Y~3~?Os0)qTQluCaZE3DU>$TPwES>(46=Me88 zE+Br4cpJgmUPS&$u>McTml0Qk^1H;a;Dw34Sa)>&U-=BD7uf(VdF_jLbM9ChTqE|_ zF?gSRivMSbJx&`6Y;zI$WLES#T-=8XP{I=EM(oRcU)3cn(8c?CLI&p*B;Ey*8vhZu zoku>8%OHY;>y#5f9zA7+WUns!1h`k+f!kyxew|ma@;Vo|nA{0VX>u{7d(B1r!Lx=x zon#Av^TeLAn<@kl8{^uQa4Gg<`D3dUuN;Xqs}0`tuAhNN$oDuoPeB+>%YxNp_2h+^ z*23QRelBZw8ZLg^cXjrWjpR&u9Eo}Qlj%*VoHsSO76OA#KcR!r(-}P8E>ihq!iqg1 zJf1X*$Gd@R&Sm?J3&T!5Xph9M(MuvQsC8HzxG?aQEew1>hE>elN8I*C3b$ zQnD$=8&fWYg5iBxuA7Lo;3)v3*VxZrkOVjX?^52-E0P~$T7Bdpc?S0P+bR7f2`PBTrJ|md&Hiujbv?Eg65Gu?`L>#*F}e{X7k#Cnr|TNcxv%TR#r&^{RW7I*y!66q4InIzFah|E3_e4db3Rjt-0o()Upop9w_#U4vlvoBQ-NR*U`*>}`;}Fwne7MKjX0kL5+B$oBOuZ!Qpg3A2 z$;y+Cmb6#~O)jF}`D+rn(?Ms$VIis9mE(SDhK%lVk>cI1Xf@t-F@inUs5}}(o^9Op zP!aIoz;%#f$ISFmB^L=Sb)swiYY9Dom$nl9u?nJk{0-84Oo?3N?!5!wC!g&oXI3CK zYb`%5*gpCSR@i-5aa*>=JO-;BBG&pc=C`Jx)6-Lx{ z>~IJ9B61a;j_yakh5RY97}Lg2k+&j0&!ZrKjox2d_%Qv_TU;ow46iT~k=veY{0b&8 z*L`w@Pxf4maA%$SLstK?tv$1xXNsF|arcq8BK>DMcG6+vx=-`HcaB8B@7T>V%6awx zd1&u)dL3=UxZmIoIrMzH~U$bAj``$9dl7$jeQuNcZJvl5=e!4NtoBf9NJ2 zuZ7o#Cy#9W$E{B)uWj;t03`37)85k;6+o&#O!6jucoRIA!aN_bA*X*f1GoXs{PP2U z%E%#}f8*FVzu??=i=)^hksUTecr+~>9;OF{_)L3a5oD7{-!B6<@}BwC101<=HzVRQ zs0xMnqX)x4KzD~h_77a(0y2{=q7gk1O^b_QWU!Js>|D~zXu^fm570XI1 zaK7v3Rk>4PBxPs%Sau+sM?32jRn-n*MQMjsjzwEzz{hq6))Y*X>y`2b#_iG^nVD7O z_1c3UN+l zt87SZwQW`n*sbcp66=uKSvS*tRymgSV1w^+4nT?Uy}R5vVBwY5mA6i!K1mt(|HqaEB_-;j?} z)JSTF(R4$3rmB4Nma;KJW3Fu2D7AIwi4AQ-_0G0F?Upj3Le(WL@6=Qc=<3@YrYgIv zr@EusTvcyS>jos7I~DB>rWT_@R@S1>RuAdRMC}EcmX1nwwWD;nTqo%mP+R+DgXS)o z!I@cC+SjwWQ#4d>bhJ4wMw70?sgc_ob)7Y(4xf+i2!~C@(gIm|M`ouZw{plK7Fso> zM!VH4$sLiJbITlx-10%Gp}j=Y>!>kmdllxkhC0ob!I9j}jtX&orMR@L<*T-CokM1~ z3pJ)1Wo27iqdY&);*>bMs`SH^M#V@)ZjH{6UzJnc)!WjU*WTTwbxL!@8i}NKL|>kR zAzL8PXxsDkrY$UjvbRrz$*=aqR9?!+YiL2i=6^7|4A@IOo*}VF@VUV3Xr1ph9llMp@2BESLugMryhv?ZVSv892C`uXn`u8SUW4m>Y%Mg= zzHS&Lmz~jc+gb>tlN%tF{!0Ze8=bVEVg1(9Zh#VcHVtQUI17@<<)$z?n}zqC ztpE$PiNFm*0aIp+up#yvpmQI>*3Yx@L!8_19QsKyZ1XRHjswQ|)nXj$W*7FFmBVX6 zr_s=8U@l=_il1epZ>LF0NQI;HmH^|pO$92}G6f_=a!7ziTfy9G56p1S))n-s8p819 zfCT`187&Mf0Ou8PQdbY}0Zo4p_6MTp#0FSFUvC66vt$9iu7Vh9ZiHlJ>m=&G%5>y$ zNzBQmP0$K-ngtO?-a%?>fy3aR;9Mjf7tjMnd`~$Wi{84p0kXfQCBw(@OMW4*>7>sD zYB+hb0}6rmS#T~FI&d!jXu#=b(^uNjR2f~63~kH+t0<>{F8Z$`w9cQ;52^I*DiDy+ zCz9!j&VYZ$=UHpC3(O}m3ek>!&tYitsje4SjAk96uN6ZSJ>3IITp^Ec=Yy0U?nT$( z%nJhht>> fixing the lexicon...') fame_functions.fix_lexicon(lexicon_htk) - print("elapsed time: {}".format(time.time() - timer_start)) + ## add sp to the end of each line. + #print('>>> adding sp...') + #with open(lexicon_htk) as f: + # lines = f.read().split('\n') + #lines = [line + ' sp' for line in lines] + #with open(lexicon_htk_with_sp, 'wb') as f: + # f.write(bytes('\n'.join(lines), 'ascii')) + + print("elapsed time: {}".format(time.time() - timer_start)) + ## intialize the instance for HTK. chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk, feature_size) @@ -164,12 +176,15 @@ if make_mlf: label_dir_ = os.path.join(label_dir, dataset) mlf_word = os.path.join(label_dir, dataset + '_word.mlf') mlf_phone = os.path.join(label_dir, dataset + '_phone.mlf') + mlf_phone_with_sp = os.path.join(label_dir, dataset + '_phone_with_sp.mlf') print(">>> generating a word level mlf file for {}...".format(dataset)) chtk.label2mlf(label_dir_, mlf_word) print(">>> generating a phone level mlf file for {}...".format(dataset)) - chtk.mlf_word2phone(mlf_phone, mlf_word) + chtk.mlf_word2phone(mlf_phone, mlf_word, with_sp=False) + chtk.mlf_word2phone(mlf_phone_with_sp, mlf_word, with_sp=True) + print("elapsed time: {}".format(time.time() - timer_start)) @@ -224,33 +239,33 @@ if extract_features: if flat_start: timer_start = time.time() print('==== flat start ====') - fh.make_new_directory(model0_dir, existing_dir='leave') + fh.make_new_directory(model_mono0_dir, existing_dir='leave') - chtk.flat_start(hcompv_scp_train, model0_dir) + chtk.flat_start(hcompv_scp_train, model_mono0_dir) # create macros. - vFloors = os.path.join(model0_dir, 'vFloors') + vFloors = os.path.join(model_mono0_dir, 'vFloors') if os.path.exists(vFloors): chtk.create_macros(vFloors) # allocate mean & variance to all phones in the phone list print('>>> allocating mean & variance to all phones in the phone list...') chtk.create_hmmdefs( - os.path.join(model0_dir, proto_name), - os.path.join(model0_dir, 'hmmdefs') + os.path.join(model_mono0_dir, proto_name), + os.path.join(model_mono0_dir, 'hmmdefs') ) print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= train model without short pause ======================= -if train_model_without_sp: - print('==== train model without sp ====') +if train_monophone_without_sp: + print('==== train monophone without sp ====') timer_start = time.time() niter = chtk.re_estimation_until_saturated( - model1_dir, - model0_dir, improvement_threshold, hcompv_scp_train, + model_mono1_dir, + model_mono0_dir, improvement_threshold, hcompv_scp_train, os.path.join(htk_stimmen_dir, 'mfc'), 'mfc', os.path.join(htk_stimmen_dir, 'word_lattice.ltc'), @@ -270,32 +285,34 @@ if add_sp: # make model with sp. print('>>> adding sp state to the last model in the previous step...') - fh.make_new_directory(model1sp_dir, existing_dir='leave') - niter = chtk.get_niter_max(model1_dir) - modeln_dir_pre = os.path.join(model1_dir, 'iter'+str(niter)) - modeln_dir = os.path.join(model1sp_dir, 'iter0') + fh.make_new_directory(model_mono1sp_dir, existing_dir='leave') + niter = chtk.get_niter_max(model_mono1_dir) + modeln_dir_pre = os.path.join(model_mono1_dir, 'iter'+str(niter)) + modeln_dir = os.path.join(model_mono1sp_dir, 'iter0') + + #hmmdefs_pre = os.path.join(modeln_dir_pre, 'hmmdefs') chtk.add_sp(modeln_dir_pre, modeln_dir) print("elapsed time: {}".format(time.time() - timer_start)) niter = chtk.re_estimation_until_saturated( - model1sp_dir, modeln_dir, improvement_threshold, hcompv_scp_train, + model_mono1sp_dir, modeln_dir, improvement_threshold, hcompv_scp_train, os.path.join(htk_stimmen_dir, 'mfc'), 'mfc', os.path.join(htk_stimmen_dir, 'word_lattice.ltc'), - mlf_file=mlf_file_train, + mlf_file=mlf_file_train_with_sp, lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'), model_type='monophone_with_sp' ) ## ======================= train model with re-aligned mlf ======================= -if train_model_with_re_aligned_mlf: - print('==== traina model with re-aligned mlf ====') +if train_monophone_with_re_aligned_mlf: + print('==== traina monophone with re-aligned mlf ====') print('>>> re-aligning the training data... ') timer_start = time.time() - niter = chtk.get_niter_max(model1sp_dir) - modeln_dir = os.path.join(model1sp_dir, 'iter'+str(niter)) + niter = chtk.get_niter_max(model_mono1sp_dir) + modeln_dir = os.path.join(model_mono1sp_dir, 'iter'+str(niter)) chtk.make_aligned_label( os.path.join(modeln_dir, 'macros'), os.path.join(modeln_dir, 'hmmdefs'), @@ -306,18 +323,18 @@ if train_model_with_re_aligned_mlf: print('>>> updating the script file... ') chtk.update_script_file( mlf_file_train_aligned, - mlf_file_train, + mlf_file_train_with_sp, hcompv_scp_train, hcompv_scp_train_updated) print("elapsed time: {}".format(time.time() - timer_start)) print('>>> re-estimation... ') timer_start = time.time() - fh.make_new_directory(model1sp2_dir, existing_dir='leave') - niter = chtk.get_niter_max(model1sp_dir) + fh.make_new_directory(model_mono1sp2_dir, existing_dir='leave') + niter = chtk.get_niter_max(model_mono1sp_dir) niter = chtk.re_estimation_until_saturated( - model1sp2_dir, - os.path.join(model1sp_dir, 'iter'+str(niter)), + model_mono1sp2_dir, + os.path.join(model_mono1sp_dir, 'iter'+str(niter)), improvement_threshold, hcompv_scp_train_updated, os.path.join(htk_stimmen_dir, 'mfc'), @@ -332,25 +349,68 @@ if train_model_with_re_aligned_mlf: ## ======================= train triphone ======================= if train_triphone: - model_out_dir = os.path.join(model_dir, 'hmm1_tri', 'iter1') + print('==== traina triphone model ====') + #model_out_dir = os.path.join(model_dir, 'hmm1_tri', 'iter1') - triphonelist_txt = os.path.join(config_dir, 'triphonelist_txt') + triphonelist_txt = os.path.join(config_dir, 'triphonelist.txt') triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf') + print('>>> making triphone list... ') chtk.make_triphonelist( triphonelist_txt, triphone_mlf, mlf_file_train_aligned) - #run_command([ - # 'HERest', '-B', - # '-C', config_train, - # '-I', triphone_mlf, - # '-t', '250.0', '150.0', '1000.0', - # '-s', 'stats' - # '-S', hcompv_scp_train, - # '-H', macros, - # '-H', hmmdefs, - # '-M', model_out_dir, - # os.path.join(config_dir, 'triphonelist.txt') - #]) + print('>>> making triphone header... ') + chtk.make_tri_hed( + os.path.join(config_dir, 'mktri.hed') + ) + + print('>>> init triphone model... ') + niter = chtk.get_niter_max(model_mono1sp2_dir) + fh.make_new_directory(os.path.join(model_tri1_dir, 'iter0'), existing_dir='leave') + chtk.init_triphone( + os.path.join(model_mono1sp2_dir, 'iter'+str(niter)), + os.path.join(model_tri1_dir, 'iter0') + ) + + print('>>> re-estimation... ') + # I wanted to train until satulated: + # #niter = chtk.re_estimation_until_saturated( + # model_tri1_dir, + # os.path.join(model_tri1_dir, 'iter0'), + # improvement_threshold, + # hcompv_scp_train_updated, + # os.path.join(htk_stimmen_dir, 'mfc'), + # 'mfc', + # os.path.join(htk_stimmen_dir, 'word_lattice.ltc'), + # mlf_file=triphone_mlf, + # lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'), + # model_type='triphone' + # ) + # + # but because the data size is limited, some triphone cannot be trained and received the error: + # ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???] + # therefore only two times re-estimation is performed. + output_dir = model_tri1_dir + + for niter in range(1, 4): + hmm_n = 'iter' + str(niter) + hmm_n_pre = 'iter' + str(niter-1) + _modeln_dir = os.path.join(output_dir, hmm_n) + _modeln_dir_pre = os.path.join(output_dir, hmm_n_pre) + + fh.make_new_directory(_modeln_dir, 'leave') + chtk.re_estimation( + os.path.join(_modeln_dir_pre, 'hmmdefs'), + _modeln_dir, + hcompv_scp_train_updated, + mlf_file=triphone_mlf, + macros=os.path.join(_modeln_dir_pre, 'macros'), + model_type='triphone') + + +## ======================= train triphone ======================= +if train_triphone_tied: + print('==== traina tied-state triphone ====') + \ No newline at end of file