From fdd165ce6a95b6aa4737975a5b593d95760f7f1d Mon Sep 17 00:00:00 2001 From: yemaozi88 <428968@gmail.com> Date: Fri, 8 Mar 2019 23:13:08 +0100 Subject: [PATCH] re-aligned mlf file include less files than original mlf file. Therefore the scp file should also be updated accordingly, when re-estimation is performed. this bug is fixed. --- .vs/acoustic_model/v15/.suo | Bin 103424 -> 97280 bytes acoustic_model/fame_hmm.py | 55 +++++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index b545ca0d012cfbacdf8292681333d1af94215c62..7c3c3f1759b95cd36dd61c15cb362819a689f880 100644 GIT binary patch delta 7086 zcmeI0dvMd&mB)2{7RDF{gi(wGjxle7jV;NNCAmDTr;RPkdRX$KE=87P$+m0>J#1Ma zSkR^+g|N0R;nFk(qNaqTO$$~U10~d1NnA>CEornvc(Y|L#5Cdyjwo;wQ7?x95zAR&cy@67YEVkH<&;G&MEFYO|2C)8=K| zf|7Nq zb~RF%F2~UpAfk5)JPLjf;@!GL%xdq5&O?YyACNpDv;&KJ;(z* z-hKGPADTG+Dw=?BNn6>ks#Z6yix0#MGnknhIeAVi#$5dTC_1;tDM!k3G?*`xeeh3 zg8x*$#f11gR(w>5Y>L(WYsvd_i-#L9yBLunnFle;6q&6?BbH-GMh(^{8_P_M2r^p@ zJC-wQL^03$?n&DZsnN;17aP)YOsl~<72AzSJ5r8Jt3~?eY;Eqt66@(t z+l)+^(HXgzRGB&^8OuM2cK59MVazW;61mO%!+^53=B^)p3NuOMo5<71w~!OaGssEg zS>!q7+sJp2=aKIrY}u#Ko<_!y6!HS{edI-C965`ye&^8s06C9bNZ0)k?F8U%S%-^Q z_!07B4 z@0Uqr@o?S)xc#5cNA)lz7jTLE@JBmk&vZ(?za*PW7IKNA+uR;r>iwc@pqw5g^vq*W zoHEs11+uc>=kV*)dj)f0tX{&=;%>;N#THnd@@kJjaxH&sX$hAqDV_wX)`Ku5+L>^*XF!3)_nmehGOvJ@F%H`)JyZjGC9h;}}pD9mX2A$0KOJh8)GpIma;b z?5KGidAeV)`7{>ToS8I+&}02+0oagLSeERHdqR zT%MoYz$G{ElXV;oYM?q*9lit!>5uuF@W_!@r0a7wW9D&IxGfFvr*EE5b|`7*l0j~4 z-vs2bKTr!18A2f+q{-df*!E4pe+dVrYQA-54xA+KjZEdRjw~e(enC+AA4mAh_e^}R zZ{@#AM{`pX$BsgtWTq$fa=Xcm zPyvZQ6;F=w$4=hnW;_GB)O+8ayCC@@M5+QjngNL9M;B5TUQq+Fyjhegdh=~aUgop;|K(%mADf&2 z+``K@w%*la{$R*T{u`Syd6P@LQbL7#UjuEm2!9@|)M|7psZzC3q1VbcN+mMcMwLWTxlty^7Oh04l<7nd?XvoP zk#M)azhqn3-52_pLF4gih;#F2jU|hIw`@h1Kj?J3zUc6Ed;0s_{o#%eSJ2~&L7ao$ z2c`g-I2_0Qo!A1fbpyVfe7*!#w4k+5pHda~h)Op*TgDRCa)QCm7R^@aJ z?5t%2>fBbFWms+3dQAa)W7HuwTJ$ElyR+S3^?4k=urXk@Ylf^ATht&js=QKteW0!- z5OubzY-*jUwnEiVrw!TFMwz=dsOk5&HE6VcZMRL+(4g)Kx2tXK;d+U`r>#CH6-C^o zrBZosr9Gw;vtcCP#~**=bfrqMCr+5Rm#xdqt%_ok=aL7$o~|%7!D( zTF+2Vt47`#t5Znbrm(nfDAZY3QR{8-8$;okO5EhP>b>oesClQ}qX>0Hr20r$=dJig33?W^)a+##{koOPxrq?NQWs8=8Wq)~-Q!Q$*Qk@mRb~ ztu~!0DBsqt?rL*8oN8sALv6GSnCqlnRi!>pqs}r|8?86VJwcs+r!wU2QQBgq_S!(- zwr=S_t8>s5^EurnPt2f-w41uEwS!YrbniUaQlqF+)_Y332gQv8gEEodQ0t8Ny>ju8 z!Yi&DREz6F3R{=n&_8H(8vE5=SDW3?5*iY38*GrZHp*)29G`O3l0Y2ngUU!mWN=$G zjV@Qas#4;M$zwfD=4hi+J=7q!nrxL#6_%cUN4KP_x5pS$RLBf+dGnCDz5;JRmE2(L zsx-S>*o9E(eEQ~IYsB2A(NI$X6u_(OZxIzQhM%!eH;+yTAXw0kF>NO0Ot{l|?Mr0m zdnG3}0elis^so?W=&ywE9E&n`!B8$fPJitpynWy0i{gJbP}992A~$XZ-od*SUq!9Ax_HtNy;y!@GMG(jT;LaqvXc4TS&lbTFTC*9})20Gg0v=kk74s$? z6bi1NU9bM`tk6qF-Z=&B}|}hYEDKNudjh z=!g>n)Tf1f?l6zJavWV*1kg@zG=PpMuL;rDW;(hLmQ#@n479Zt^l*@ELq<25K}+7> zA;dTLLiCQ0-q3;V(|-tC*gux9#iusmQ@=}h*1==M_<9MhG>i7t!SgiigdlyT9?D@a zTj@?(Q;+-oaszbGKrfz@*P?lJcLQY8Lyb^Ef6;*3_~a&Z(L?*tvD4l%HiDjB)ZuE5 z8=#21){#vo4fx#22Jq3a2@)_c9_u#0>2%JlcM&}5DNy0w0KFMa^LyKCV1WwRj>G7nxT0zTtYCulp$5x`#mNI!G zGb=p!g{AaL8)QRl`T)>hwnFvH2Jp*Q&bVniTm`z?0Y_#|XOO3&{AFY{y-z1|7} z>fHgw%-2P9x|P+DhvkCt{7&cuI>rtWp1h+>q~kEXz|2WHzLGxShAzUrTR<;2Lit@f za_#~A5MIse7@u;34#wa1KsC_7K3vKr4=!c;FyQLT=?h)xsr5Zj3@*0+RWxaa9{R5) zbj~LwgB41M2gzs0lRk4}|1XO_j^aOe*ZQ$2 b@b~fZPWCx?E&0mHEo9`R^e-~`aM7Ou^MD3& delta 7012 zcmeHL3sjX=7XJ6o6%Y{a<>eI+0RaJ#_bUXw;uQo5L5FKgruBm0{v{ua;b81|*(3xbub6;XKSu@sZX3f#>e*3)7 z*?XUT&X2KwnDL(7#ILa1f)G%5-59`Yr1P<-aAM0P+~N& z7P*D?urK6?sERc^wT^fS#TO#*gQRAp8rv}gk`};7mXtC`&a%lt;J`uU(;(Laqitnk zz&U^(7zZ>0sel{M1h@cV@Dgx55DSa|q9v(Ang~HuhJl9z%We6Q;3yy&NCZNF*?2Vf$_G&BJfNg*p{CR zz84V6E(Om5<^t(J5|9tv2b8hp!za&~07(pemyI)QEXqVGu;o_|D&J<4TfuEW$Dq2k zkh|nM&%+=8zB?L*twMgNVV!rQ_(BA`?h@P)1JMWY1;mV6u&_iNAgt>Q2nC8HlxVl^ zSjdHdu*vxCOu25{4t$`6d3Sh@o z+T}V3Vib|=rd)tBVM=T1z(B1{&IJn#h-5eAE|dvV76Y#Vc1wyjnaB^eWMyYbx<-RS zQ7MwebnKB~v%{NhrR{@iMT-ZKx0|!eZH`Jz&*r*Cv10cOHpe{*NEvksXYV!OG+WVNgQnzP_%tAuW(;KF1 zAugn7b`RVDSuFGs@T0(Ez%PNFfY`~ez?*9wv0Ac&FKPJOuMS!B8a6L180rL$!6h*u%l zwe?VE$)CEs4MX?p)`C{ny|NqktfDMaQ|V)*M5>e(s;;cRs7HTt`I+9X!0Q9;lMcnN zE1AJg6!5@7wS8dO_hCsy~f%}SaxGX{m3!lqv!t`w2C z$$QcjVme-IDh@tNrZ6qa;h8ZAgP{mttvJDv;PoMrR4YqT)mZl6jJ1hEvm*ThU4%V_{IHF;fWL0f0aoHZ zcEncHa6l;HiO!Nh8iOB|#bi6@Yw2ib$(Xx;hx#7i81NkMJn#Z=9C#6U33wSW11Er9 z;3RMg5UUVrkX1kIDmB=R)*>qgSy?M9DKf05JSgAZLOdshd`-S_mr41JMdxj1@)E(# z3kU!ztLu5Oibe)ZAX%!aUtPA?u&k`18NYE2%|1|AYfAU-)kF(e^PY#59Jy=#{ox;L zeTx=NteaP#(X{h=^*;_{7{}6`AZG!lHS0U5Yh6D*#jdRzu*6f*Olln|Noz$tSX>HY z5m^@?`W*y|SP_rF_7*q~i;F|#Towow!QGDFa6AKk0bGvy*Fz*p9Im&)BABvJT&Jk- z2a5oF13U%1AWVcdI719Vvkyt!=uE~AsT;X9s6HN!bB$ewB0%p(ejHew&kw*NK1WAd zFVCcLn3^yx*m4k~thaDgG;SJ{`#`6A(S8(o8`vH3x(T`%56(e857+fYw0k>FlD-gk z)AV@jI*Y(l(BLX~DLSl4wq9QY;4@g5D^{@#^{;10(iYTTLL?FqATe8#R>QAL!TI3f znB`NH&zK7Rpx*(#B-J2q$cN8^o5AzI7XOk529(Xt%A<0(axbyZ+FV)R-Xyo=*p#;1 z%DTpyc2?37&&z$ugBkWtqs7PuTUuRd_hzO0qTSPDlhUkBtL=6CvYJM)s@8GAsTs+! ziFaA6e})+&?rE%9S-i5oc1c}LL|NC9MsD<_bW_ih2goBiAvre5+O&pUZ1rcx14d=T z&697^tvNQ#>OvG_y7r8Vr*cYGdSzw1%#u#-H|;+?-$^;BQr?i+?&B4vig$K9bwBSU zo1D&lMQUlabO!T7>lRe7DFwMP^>}pEK_m63jguOrW?&_9OD%2YPL(pHGCY48r5cm* zyped~M2hC`OeZya^@cAi{GgP37f=8{Ttv>MlOJT0@`}u6^#z%N`aUAXEGwVLtm{R+ zN#A>%x-XjvYd`VnWaYH8@;8|&y_O}C@};btRdx02*px^u*CkUMKNm)IBE;#K@tTh7 zB1v~gmhUDaKmJRu68}&`p=??Id;FN17Sj&qb}dbaDX_geyVO69_SuUv*z|pBOVLT* zr=bw^YG5sUM_7tZ*o&62^gY&Ad+Z%kSbv-5YppyQ4NI0UuUS!UsA)8?3;p9=U=RZM zXl0x z42^NH%1vW9H8jVvimL%EYoG9YH@YRqO@1_0yqz4$6OwqE>4)x|jV&0>>aP0oO@5?> zI=mS^o2(ydc#YS>n%c^yI)ga0jkpO?Z5j(Chh!O0;}{PD z!nhQ`+tm~Z4`(6QLV7U2H)-w~q=ZS33;FvRihx&bWA9!YXLq*mJg0aTJCX};R?{E0 z`jU>yc&LWb`NJBT>Uq)80I`_(c@0HjzFqtRf*MV=8)@uKvDW{6N0+JQFClva`k@El zfG&c_M^jt5yr#UiX+^bRN&O0;p?v$Gqj6L`=w`O(kC7Bfw>j-wqsYkjXb@|jilULj zB}95l;$}ZeqeZs{il6$aKJ{Hr=l5~1}D9-2x7wdxZuTCMHC`UJaI~)RPI_y z9$R!|zRk{PUJu34hs4h=pfsM4Pldd17P+xEay)oQGCA|UL^>#Ml|%^d!CNng_>pPU z!md~NVq`6!l0(aQcq+VlKEIfVdF_u)+@>X1Qr*o`zH27AvNyAW(QzgZok+js4ViR= z)`}(%@V+ear|ni*mql@WRyJ*LNH7*|7#9C&hdj_27zsX{qdTOE#*1a#)Q;5*?`^Tkf)$Lp;3~ zb8|JR_*=Pf*qll<@5`f*!8Q%y-gEIbubWPBw9`7lJF`g5+w#%wrpaQgaymB_P!#PK zN?*iP1*FF#)nQh?a|UHvS5+~Wg4o9U19uir`Yg!gb-0dxT&4@i9fT172fav@d%k-J{35n5xL`T-bk{=o7>%a^UV! zv*7rK5-POnCzbC`B|W>oKdfhaJw5H{)ldPgZJbNNJ^Fholf;7JXrW^sH5--_%ykQ( zjU%P7!%Oq53)^Ki%drLc>JkQI8&f@ae>r7Y4$6;kBxsd&jb=U0=$&%0=YLRNvN-+! zjJ11U?E>OqMeqt;6<#$sr;+?pA~kUM)Y)4rHprP0U*k*}e0LS?r%l3k5Af+N=<$Sx z!g$hL5uovhMSiT3{J*`i>p=YMIhl#c34EptUgmi&GV7xqt#YsQ8E diff --git a/acoustic_model/fame_hmm.py b/acoustic_model/fame_hmm.py index 4cffa2f..99ac65e 100644 --- a/acoustic_model/fame_hmm.py +++ b/acoustic_model/fame_hmm.py @@ -27,8 +27,8 @@ extract_features = 0 flat_start = 0 train_model_without_sp = 0 add_sp = 0 -train_model_with_re_aligned_mlf = 1 -train_triphone = 0 +train_model_with_re_aligned_mlf = 0 +train_triphone = 1 @@ -45,9 +45,6 @@ lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov') config_dir = os.path.join(default.htk_dir, 'config') -sil_hed = os.path.join(config_dir, 'sil.hed') -prototype = os.path.join(config_dir, proto_name) - model_dir = os.path.join(default.htk_dir, 'model') model0_dir = os.path.join(model_dir, 'hmm0') model1_dir = os.path.join(model_dir, 'hmm1') @@ -72,6 +69,7 @@ fh.make_new_directory(label_dir, existing_dir='leave') hcompv_scp_train = os.path.join(tmp_dir, 'train.scp') mlf_file_train = os.path.join(label_dir, 'train_phone.mlf') mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf') +hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp') ## testing htk_stimmen_dir = os.path.join(default.htk_dir, 'stimmen') @@ -304,6 +302,13 @@ if train_model_with_re_aligned_mlf: mlf_file_train_aligned, os.path.join(label_dir, 'train_word.mlf'), hcompv_scp_train) + + print('>>> updating the script file... ') + chtk.update_script_file( + mlf_file_train_aligned, + mlf_file_train, + hcompv_scp_train, + hcompv_scp_train_updated) print("elapsed time: {}".format(time.time() - timer_start)) print('>>> re-estimation... ') @@ -314,11 +319,11 @@ if train_model_with_re_aligned_mlf: model1sp2_dir, os.path.join(model1sp_dir, 'iter'+str(niter)), improvement_threshold, - hcompv_scp_train, + hcompv_scp_train_updated, os.path.join(htk_stimmen_dir, 'mfc'), 'mfc', os.path.join(htk_stimmen_dir, 'word_lattice.ltc'), - mlf_file=mlf_file_train, + mlf_file=mlf_file_train_aligned, lexicon_file=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'), model_type='monophone_with_sp' ) @@ -327,19 +332,25 @@ if train_model_with_re_aligned_mlf: ## ======================= train triphone ======================= if train_triphone: - triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf') - macros = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'macros') - hmmdefs = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'hmmdefs') model_out_dir = os.path.join(model_dir, 'hmm1_tri', 'iter1') - run_command([ - 'HERest', '-B', - '-C', config_train, - '-I', triphone_mlf, - '-t', '250.0', '150.0', '1000.0', - '-s', 'stats' - '-S', hcompv_scp_train, - '-H', macros, - '-H', hmmdefs, - '-M', model_out_dir, - os.path.join(config_dir, 'triphonelist.txt') - ]) \ No newline at end of file + + triphonelist_txt = os.path.join(config_dir, 'triphonelist_txt') + triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf') + + chtk.make_triphonelist( + triphonelist_txt, + triphone_mlf, + mlf_file_train_aligned) + + #run_command([ + # 'HERest', '-B', + # '-C', config_train, + # '-I', triphone_mlf, + # '-t', '250.0', '150.0', '1000.0', + # '-s', 'stats' + # '-S', hcompv_scp_train, + # '-H', macros, + # '-H', hmmdefs, + # '-M', model_out_dir, + # os.path.join(config_dir, 'triphonelist.txt') + #])