From b1b1942fa0372fec88a7e0f06a98699ebffc5277 Mon Sep 17 00:00:00 2001 From: yemaozi88 <428968@gmail.com> Date: Sun, 3 Mar 2019 02:05:37 +0100 Subject: [PATCH] test on stimmen data is added. --- .vs/acoustic_model/v15/.suo | Bin 96256 -> 102400 bytes acoustic_model/acoustic_model.pyproj | 2 +- acoustic_model/fame_functions.py | 27 ++++- acoustic_model/fame_hmm.py | 143 ++++++++++++++------------- acoustic_model/htk_vs_kaldi.py | 16 ++- acoustic_model/phoneset/fame_asr.py | 6 +- acoustic_model/stimmen_test.py | 15 ++- 7 files changed, 133 insertions(+), 76 deletions(-) diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 1238af17ce169a064f0f0a3453aecbdaee497dce..712c255714ef6ffda4e8bb7262540e56cc859336 100644 GIT binary patch delta 6626 zcmdT}d3aM*7SDO##5aE1xcQ*-@nxK1fM!XQ&5#`d)fAa*ClSOpmt-`I4 zjc0{Bpcb3dx8>eP=3Rs7he|BrM{*D-?2Mh>K~U3JFsQYY0((;}mIA#IiXKC6eC^DGjA_6bxLXz_cWrS%2D`6sGF#$J?Gis_yVTWAsiGHToRf-{#2r29FeLo%3foJN0UKe_&OiM?=r)|d&#^D&xOXfuum?`CG$FnpeGC@Fdb=xegvkL zS3)G&ms)<8CtXiqYEuYIZ5AOwZ3F4tE{CLvz|`_WgsD_5D&{tlFp|KH`f|&wQ%CkZ z!o}2T$d*oEY6}P%1OtJo#Zi?pRd4?)1k!5@Na-g4d{>iqyN~YHchm|J$ZlD|`+n)%> zlxJ_UY%hP_<#++pB6LAMm&Y8Y#J~$KYG5N2DlZh1oPyew=^Ac2jcQc{w5{CffA{1{ zlSktOcmo@>8{Im=<>=51qZY#nKIe})jJ{X7($ia@az(v)3{C+?V}rhhvl{)5z=qhC zklSbp)AaJc28IY8`(e;=#5xIqKJndgLp{T3$gg0-h>fneR7_1e9$ZA@eM~-=6bjn4 z)cAJ79fWnX+hW&~vVpLX@LK|}6)j>imQFYO$YH~S%7a|IXL>R`-9BSbVJDfMAv~`< z;$|<9{vu&N;Q+x)c!_Y3@Ct$3aCwz<>f;Ny4bWR?LtCNfI+KS-O<_WJw|j`-P7_>) zR5^9h$KcXC!RlKEi*Ff*@iIi*Ne+^DhZ9@fvl_rEHii)r?{Ult8%d8=GTX! zbB+kB@a&uxXuyUWHoEgd+(m+WjNpzFTtg4xp*V|tW~c@{ONs#x-EbX>l^wDv=Onlq zi-Uoa1{RiN$j-)1YDvfft7M)Bk+N;37u-_?ceCKO3v9Q^w$Wa2EnM4Ps*~Fnt^gD@ zG=aG^0kIg1qbACB}c#9+|!-Fw+R-76PoD8|6@Uhg4myxRHQ_?VXQmSve zXxTv13EoQ2SIg&a3Kd+pe-9twx!b!#vwiJV5@bQXoL6>4jp8!u{D~nD)TF$J&L=l{ zmI^_#PXh|Y}})U1aUU<&Ezh6b{C#o zf25y^qnfi4+g&??)YkU7Kbe_;(dA3VFhvTl0alFFs!4b{#?&KgIK zc~q_(lMReDlCRO{4*X;48ii7u^YTWZ|Oj8EFWYWjJPuYD^2ytb39ijvJh@qqj#iyCJrVljTWupyYzxQeDR8gdZ(*(KzMn5U6!M%nvM+l+bvqa!aWX64!IcVcTs`O?XA5# z-Q%7IxUVY~E85c4){*)6Z?nQWX> zwvgOPXeYP`NVuEuJ4zi|ZjehA4v-zVa`LtM$CVme_L2Ss0lky;vCsB-zz?6J`XNdq z!7r0Sso)zlx5jF$d3gcGM5ef3Yowaif!PnvH)rJbFf1hugfKk6dw}b~?cqHvH@e!B zpb-mAc4_n!$dN}q6bhJfDhZFB(O^lt0mX+!_lE+eH7B5MR~p(T>U5cP`w|y?8d}r% z$;-RnyF>dFmQ6IFde?Q-!2A;!I17<`Nh!oh*lN=@=)44SVyzR7z2uK>;#O zpvxGgkFWboJmBBI|Mt&^7->T>q~Wqh-jB~Dl6uPOW@}DfhDD(o7d*Nft@rMc_v~B% z2qkH{-9Jv$Ui;{a_x~_r_V$D$IW7v(SX}s!;gSJ&B$yIDes*Zl375??>Zr>l z1o!q3_et8*{;8{R-s3crzZI#``#>0$JrgM#_U{Fzo&w>3Ui&bS%!TDR%si0nI=BIM z9E=3O!~^NF_n-|t`(OB(7fex&zKaP3kBYH_Qj0?tzh4XJnV*Ww_iMmKrrV@3SzyE& zZ_GyH(V;*m#?i5!f7?Lz9=E|oeSTLA+P4*V?6Qpb9glMkC&3cA;qXTAoS?(kcHAb5 zZ;>J%x1pKfeOuIiL^+@HQ9rs1SJIZEyb^{#Y}QH^3k;L8W`GWZOH4G1IZ{y;#LzIN zzcI#r6g=;WD1_gg)0d*P>@i=pt8rk3uUm8x30 z(#)sH4KYf#Ojw4t&qVZ`&`T$hAR{7;j{W{Lx-iutsoe-0@mO07Zu&YMUR0`QvT<_P z{jdk8bk)*HkdOU7%g4+i{iS3rG{FO8zJ+EJjf1<=`#vfPkg~1`ETHms0qw1Yi zH8qvBi=379PH9CvB|s$=he~zvFsipEBtWr&cv4EXH=x-sSz1lFvd(niWk{z7Kpcl$ z#N~`3exr0g9wsD41YD{tUJeO5lU4RTVD~X0i37nVwWz`NbO_jlKr}UT6jgIbNw#Dt zh_0eYtM}a{{Xz9BqR1Z=8=*NcbPb1HbRe-Tt*Wl9rm)7Oc79?|a+7nKb<$1J>Ui&oQI5 zjyKQuMGXxhD=-ATv|2;-eB$@*`;FivLB-y?0@A=lc*pR8-%h{9-tXv_zd!94?mbCN z1`!9Y(M$R?SgxNQy|lcha?#R;+QrVQ6}3G1iQMx@+A-14t4b5%AWpIk265Xe;>tT1 zY(e~{v?hu+g%Vdky&Z3sY^z{`G+`BmU3nY~Gu^CAfd{-m(fJEo%?2t`R08C|%@>A( zeGzS3QML1Kx~F}I7BmDet*uByG67Qh-wMDykAQ(cul>d7ovCQ{p7eH=;w+dP=MsA2>=8p*pXFReI^AY2jt&jj6=Kgs z2*Znebm713wtOWzuWO1p34m}tKQ z`sz*QeKxO@m<)qL=l(gITS*g=Av>hm_p!8XFbtI}S}iL{faLw0g z?!SO!$^bfFzw-Lt+N=g+%*EH^6^&Ag7U(;&OM~8RVtDT^=41EjZ0!FgYBM+;-~(Hd zaqO(5E1o#a?WEKfEBL=z_Lk8t@{~EOvn~TRt0m1qs-E9{xfA?qZ;#~5o!Nb8#Hp5dO61BU}?1^ru2cg}KEoeH;}G5P~Zi858- zi|I0fgmp8QbuP2QcHbXUa-@gKK=X^@>SqWEmh2WVbQ(s&S0H_9hg&*tDS~a#+vVPo zAV{tx$m-0R1Jwe|lUAA`*LO3IzudjR{~sMYrBCdzQgW1nPD;2DKF5>$lR8ZmAVx?+ ziPDLB+g#Z3Yy3@w*57yB1VK{Ajr1j{cuwk%A-f9bqjaeBsT~}hx;n^G2TcC~r$EGg delta 5306 zcmeI0dr;KZ7037dSf1I{I!OhmfP~82~*J+zs-; z5D*QLKmh3L*8n{T_=9-xb8tT}fK^}}7zRZDharaool*}|sivxKqG1dSBY_D-DUB;3 zjbIR%4no00KnwaFJ`cK|QXkx!8``kYt1%n;y(!3iKcb ztOlZ8l(CAQ3|Xk?A|3@|52C|-#W)($tmq|*oB+9`HFr=>Ig|=er8J0v)zE8{`ZCDn zie9J4Mu@X8e7(!8(E6@E_7J9-4TV2)vM@h`?Kb z2)d~E1wI4yzQ7B+C@HY{f2n~Ef9fE17e&K_=cWw3qzM%ybmtqk6-30w9uo*lJwt%fbv~7pnq3CUpJ3%}6 z1(3lm@C)X!u;dyX z$Kv$w`#yj%d)+tm{=~8;=-j$`BQdAl$y*XBAW3#r8BaZT@QA%4bwQ)tB2N76lk`j+ z9wznI5#yc`JDA<7n5q_cBis04Jabg8rB-&Maw^LTHu*N=aVJ>q(rIdk=#bf#aM$B7 zJuYTOc6N^UuljND=pQ~-ez<2i8eCkREHy-$S_?EjKOJ`@s4`b~g z>Y1(WZDw`pbf2_e{=>l=CRN+9p^tu=awUV=T>7wCSZ;!!N;TPQ?|eomCD434Dnq_U zqDH*t=*iaHQQ`)q>W=q$Pr`<3!&-$?7{V;cd1K?GloS5tK zbSX#7BW|EIU@dqI2%GSqxP+R(dawaBgD1d7&ZQ{wkHYLEKGq9wFJL?wpiGfg~0r(yBLACco&`mt68;k|pzwb5GK{ z^^=Bub67KTW+8VMT_u<15IYnSpfTe~V?i2Vr#1S!)1gcN8NdoM!9xyLz9{8t{}nv%tiC#JidQ=C5Xeo0;_xema&;tf-$ z_{O&ki#f|`=S>S(4wF_f6K7w{o8EJ>wxPpbmfJRsQ`C9*xD-g@YeT%NMXX(K zsomE-0Y0wPo(N-aSy4YWF(n~MnXCj0h+zdc1Gr63I`&3wyvAfnNJ{Q~)N*EB~c5z{#X+yR@e`^YE!*vvL7ZDg6lGuSiv(eUU6u$H-6C%Grdhp2Pi zJYt@v7`CrASu&Z})~3n4B^67}u8qITl3+^hUFj;Z3egb2(i$!Bu_<9}VsBHC*djj? zLKbhYAA%Sf0%}|(rtfaYnmh2XSIQ>aQjYX-qqpmI62RUITzqmE*ME_@l&DqtQ9vi z^9zUFhhLsZ@?JHQWt-d9*-k%d=J_j`=kEU9>*Q0aa6w`dp;j^MRFIRmbm~KstPZpTq>n{vK>Bvp$>6xg4X( z%fhE-9=;H(RmE=SpPb|!am!>=N>9YO>3$l`Puobx+ZIwJJAQsPU$KFrpk(fHenj#n zNp?zX=3k=S(N`|}xIXpl@_bI(i};!qsTYXvYZ2nH@-3+&h$NC&tQ6`Fd+Rgb%^I=> zoyJKGML%B^U!|rae1MLUne|I2%e+1tkxXVAZ|tNK{7e{ybtDqqj+lt7g$FbtH>^qI z;VlvLNM9Cdx!p{o#8eN+@2mOIV9G!e>i8=@ux=ef3;9YP3g<0GyzZ7z=kE`F57#r# zh@_xTvCs@`th<|AC9>+o<=owU_847`dgqwwuQ|NM0uOm^M)EczWl2FQzSNgqqb=f$ z+%a&MvPk>Ag0qUa`F@K3mN>YbA6dz5z2fwfoH> zbwMj?+0pti4KBS>$Krxz)dh9!~EL%@>zOPceDy(8VuR@e9_c z*T~K|Oe}L`B)4WzJwK}2.0 4d8c8573-32f0-4a62-9e62-3ce5cc680390 . - htk_vs_kaldi.py + fame_hmm.py . diff --git a/acoustic_model/fame_functions.py b/acoustic_model/fame_functions.py index 9d3992b..9ca7e0d 100644 --- a/acoustic_model/fame_functions.py +++ b/acoustic_model/fame_functions.py @@ -12,6 +12,10 @@ import defaultfiles as default import convert_phoneset from phoneset import fame_ipa, fame_asr +sys.path.append(default.toolbox_dir) +from htk import pyhtk + + #def read_fileFA(fileFA): # """ # read the result file of HTK forced alignment. @@ -371,4 +375,25 @@ def ipa2htk(ipa): asr_splitted = convert_phoneset.convert_phoneset(ipa_splitted, translation_key_ipa2asr) asr_splitted = fame_asr.phone_reduction(asr_splitted) htk_splitted = convert_phoneset.convert_phoneset(asr_splitted, fame_asr.translation_key_asr2htk) - return ''.join(htk_splitted) \ No newline at end of file + return ''.join(htk_splitted) + + +def performance_on_stimmen(stimmen_dir, hmmdefs): + #hmmdefs = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\model_\hmm1\iter20\hmmdefs' + #stimmen_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\stimmen' + lattice_file = os.path.join(stimmen_dir, 'word_lattice.ltc') + hvite_scp = os.path.join(stimmen_dir, 'hvite.scp') + #fh.make_filelist(os.path.join(stimmen_dir, 'mfc'), hvite_scp, file_type='mfc') + hresult_scp = os.path.join(stimmen_dir, 'hresult.scp') + #fh.make_filelist(os.path.join(stimmen_dir, 'mfc'), hresult_scp, file_type='rec') + lexicon_file = os.path.join(stimmen_dir, 'lexicon_recognition.dic') + chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_file) + + result = chtk.recognition( + lattice_file, + hmmdefs, + hvite_scp + ) + per_sentence, per_word = chtk.calc_recognition_performance(hresult_scp) + + return per_sentence['accuracy'] \ No newline at end of file diff --git a/acoustic_model/fame_hmm.py b/acoustic_model/fame_hmm.py index 7228c00..4e2f430 100644 --- a/acoustic_model/fame_hmm.py +++ b/acoustic_model/fame_hmm.py @@ -22,30 +22,27 @@ from htk import pyhtk # procedure make_lexicon = 0 make_label = 0 # it takes roughly 4800 sec on Surface pro 2. -make_htk_files = 0 +make_mlf = 0 extract_features = 0 flat_start = 0 train_model_without_sp = 0 add_sp = 0 train_model_with_sp = 0 -train_model_with_sp_align_mlf = 1 +train_model_with_sp_align_mlf = 0 +train_triphone = 0 # pre-defined values. - dataset_list = ['devel', 'test', 'train'] hmmdefs_name = 'hmmdefs' -proto_name = 'proto39' +proto_name = 'proto' lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr') lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov') config_dir = os.path.join(default.htk_dir, 'config') -config_hcopy = os.path.join(config_dir, 'config.HCopy') -config_train = os.path.join(config_dir, 'config.train') -global_ded = os.path.join(config_dir, 'global.ded') -mkphones_led = os.path.join(config_dir, 'mkphones.led') + sil_hed = os.path.join(config_dir, 'sil.hed') prototype = os.path.join(config_dir, proto_name) @@ -53,25 +50,20 @@ model_dir = os.path.join(default.htk_dir, 'model') # directories / files to be made. - lexicon_dir = os.path.join(default.htk_dir, 'lexicon') lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr') lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov') lexicon_htk = os.path.join(lexicon_dir, 'lex.htk') -phonelist_txt = os.path.join(config_dir, 'phonelist.txt') -model0_dir = os.path.join(model_dir, 'hmm0') -model1_dir = os.path.join(model_dir, 'hmm1') + +#model1_dir = os.path.join(model_dir, 'hmm1') feature_dir = os.path.join(default.htk_dir, 'mfc') -if not os.path.exists(feature_dir): - os.makedirs(feature_dir) +fh.make_new_directory(feature_dir, existing_dir='leave') tmp_dir = os.path.join(default.htk_dir, 'tmp') -if not os.path.exists(tmp_dir): - os.makedirs(tmp_dir) +fh.make_new_directory(tmp_dir, existing_dir='leave') label_dir = os.path.join(default.htk_dir, 'label') -if not os.path.exists(label_dir): - os.makedirs(label_dir) +fh.make_new_directory(label_dir, existing_dir='leave') ## training hcompv_scp_train = os.path.join(tmp_dir, 'train.scp') @@ -98,20 +90,21 @@ if make_lexicon: # therefore there is no overlap between lex_asr and lex_oov. fame_functions.combine_lexicon(lexicon_htk_asr, lexicon_htk_oov, lexicon_htk) - ## ======================= - ## manually make changes to the pronunciation dictionary and save it as lex.htk - ## ======================= + ## fixing the lexicon for HTK. # (1) Replace all tabs with single space; # (2) Put a '\' before any dictionary entry beginning with single quote - #http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html + # http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html print('>>> fixing the lexicon...') fame_functions.fix_lexicon(lexicon_htk) print("elapsed time: {}".format(time.time() - timer_start)) +## intialize the instance for HTK. +chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk) + + ## ======================= make label files ======================= if make_label: - # train_2002_gongfansaken_10347.lab is empty. should be removed. for dataset in dataset_list: timer_start = time.time() print("==== making label files on dataset {}".format(dataset)) @@ -120,7 +113,7 @@ if make_label: wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) label_dir_ = os.path.join(label_dir, dataset) dictionary_file = os.path.join(label_dir_, 'temp.dic') - fh.make_new_directory(label_dir_) + fh.make_new_directory(label_dir_, existing_dir='leave') # list of scripts with open(script_list, "rt", encoding="utf-8") as fin: @@ -135,56 +128,48 @@ if make_label: sentence_htk = fame_functions.word2htk(sentence) wav_file = os.path.join(wav_dir_, filename + '.wav') - if os.path.exists(wav_file) and pyhtk.can_be_ascii(sentence_htk) == 0: - if pyhtk.create_dictionary_without_log( - sentence_htk, global_ded, dictionary_file, lexicon_htk) == 0: + if os.path.exists(wav_file) and chtk.can_be_ascii(sentence_htk) == 0: + if chtk.get_number_of_missing_words( + sentence_htk, dictionary_file) == 0: # when the file name is too long, HDMan command does not work. # therefore first temporary dictionary_file is made, then renamed. shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic')) label_file = os.path.join(label_dir_, filename + '.lab') - pyhtk.create_label_file(sentence_htk, label_file) + chtk.create_label_file(sentence_htk, label_file) else: os.remove(dictionary_file) + print("elapsed time: {}".format(time.time() - timer_start)) -## ======================= make other required files ======================= -if make_htk_files: +## ======================= make master label files ======================= +if make_mlf: timer_start = time.time() - print("==== making files required for HTK ====") + print("==== making master label files ====") - print(">>> making a phonelist...") - pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt) + # train_2002_gongfansaken_10347.lab is empty. should be removed. + empty_lab_file = os.path.join(label_dir, 'train', 'train_2002_gongfansaken_10347.lab') + empty_dic_file = empty_lab_file.replace('.lab', '.dic') + + if os.path.exists(empty_lab_file): + os.remove(empty_lab_file) + if os.path.exists(empty_dic_file): + os.remove(empty_dic_file) for dataset in dataset_list: - wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) + #wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) feature_dir_ = os.path.join(feature_dir, dataset) label_dir_ = os.path.join(label_dir, dataset) mlf_word = os.path.join(label_dir, dataset + '_word.mlf') mlf_phone = os.path.join(label_dir, dataset + '_phone.mlf') - #print(">>> making a script file for {}...".format(dataset)) - #listdir = glob.glob(os.path.join(wav_dir_, '*.dic')) - #mfc_list = [filename.replace(wav_dir_, feature_dir_).replace('.dic', '.mfc') for filename in listdir] - #hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') - #with open(hcompv_scp, 'wb') as f: - # f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii')) - - print(">>> making a mlf file for {}...".format(dataset)) - lab_list = glob.glob(os.path.join(label_dir_, '*.lab')) - with open(mlf_word, 'wb') as fmlf: - fmlf.write(bytes('#!MLF!#\n', 'ascii')) - for label_file in lab_list: - filename = os.path.basename(label_file) - fmlf.write(bytes('\"*/{}\"\n'.format(filename), 'ascii')) - with open(label_file) as flab: - lines = flab.read() - fmlf.write(bytes(lines + '.\n', 'ascii')) - - print(">>> generating phone level transcription for {}...".format(dataset)) - pyhtk.mlf_word2phone(lexicon_htk, mlf_phone, mlf_word, mkphones_led) - print("elapsed time: {}".format(time.time() - timer_start)) + print(">>> generating a word level mlf file for {}...".format(dataset)) + chtk.label2mlf(label_dir_, mlf_word) + print(">>> generating a phone level mlf file for {}...".format(dataset)) + chtk.mlf_word2phone(mlf_phone, mlf_word) + + print("elapsed time: {}".format(time.time() - timer_start)) ## ======================= extract features ======================= @@ -196,7 +181,7 @@ if extract_features: wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) label_dir_ = os.path.join(label_dir, dataset) feature_dir_ = os.path.join(feature_dir, dataset) - fh.make_new_directory(feature_dir_) + fh.make_new_directory(feature_dir_, existing_dir='delete') # a script file for HCopy print(">>> making a script file for HCopy...") @@ -212,12 +197,15 @@ if extract_features: os.path.join(wav_dir_, os.path.basename(lab_file).replace('.lab', '.wav')) + '\t' + os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc')) for lab_file in lab_list] + + if os.path.exists(empty_mfc_file): + os.remove(empty_mfc_file) with open(hcopy_scp.name, 'wb') as f: f.write(bytes('\n'.join(feature_list), 'ascii')) # extract features. print(">>> extracting features on {}...".format(dataset)) - pyhtk.wav2mfc(config_hcopy, hcopy_scp.name) + chtk.wav2mfc(hcopy_scp.name) os.remove(hcopy_scp.name) # make hcompv.scp. @@ -235,21 +223,18 @@ if extract_features: if flat_start: timer_start = time.time() print('==== flat start ====') - pyhtk.flat_start(config_train, hcompv_scp_train, model0_dir, prototype) + feature_size = 39 + model0_dir = os.path.join(model_dir, 'hmm0') + fh.make_new_directory(model0_dir, existing_dir='leave') + + chtk.flat_start(hcompv_scp_train, model0_dir, feature_size) # allocate mean & variance to all phones in the phone list print('>>> allocating mean & variance to all phones in the phone list...') - pyhtk.create_hmmdefs( + chtk.create_hmmdefs( os.path.join(model0_dir, proto_name), - os.path.join(model0_dir, 'hmmdefs'), - phonelist_txt) - - # make macros - print('>>> making macros...') - with open(os.path.join(model0_dir, 'vFloors')) as f: - lines = f.read() - with open(os.path.join(model0_dir, 'macros'), 'wb') as f: - f.write(bytes('~o 39\n' + lines, 'ascii')) + os.path.join(model0_dir, 'hmmdefs') + ) print("elapsed time: {}".format(time.time() - timer_start)) @@ -362,4 +347,24 @@ if train_model_with_sp_align_mlf: hcompv_scp_train, phonelist_txt, mlf_file=mlf_file_train_aligned, macros=os.path.join(modeln_dir_pre, 'macros')) - print("elapsed time: {}".format(time.time() - timer_start)) \ No newline at end of file + print("elapsed time: {}".format(time.time() - timer_start)) + + +# train triphone. +if train_triphone: + triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf') + macros = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'macros') + hmmdefs = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'hmmdefs') + model_out_dir = os.path.join(model_dir, 'hmm1_tri', 'iter1') + run_command([ + 'HERest', '-B', + '-C', config_train, + '-I', triphone_mlf, + '-t', '250.0', '150.0', '1000.0', + '-s', 'stats' + '-S', hcompv_scp_train, + '-H', macros, + '-H', hmmdefs, + '-M', model_out_dir, + os.path.join(config_dir, 'triphonelist.txt') + ]) \ No newline at end of file diff --git a/acoustic_model/htk_vs_kaldi.py b/acoustic_model/htk_vs_kaldi.py index c35a42f..5297b79 100644 --- a/acoustic_model/htk_vs_kaldi.py +++ b/acoustic_model/htk_vs_kaldi.py @@ -53,7 +53,7 @@ from htk import pyhtk # procedure make_dic_file = 0 -make_HTK_files = 1 +make_HTK_files = 0 extract_features = 0 #make_htk_dict_files = 0 #do_forced_alignment_htk = 0 @@ -171,7 +171,7 @@ if make_HTK_files: filename = row['filename'].replace('.wav', '.lab') label_file = os.path.join(feature_dir, filename) with open(label_file, 'wb') as f: - label_string = 'START\n' + row['word'].upper() + '\nEND\n' + label_string = 'SILENCE\n' + row['word'].upper() + '\nSILENCE\n' f.write(bytes(label_string, 'ascii')) @@ -249,7 +249,7 @@ with open(hresult_scp, 'wb') as f: # calculate result performance = np.zeros((1, 2)) -for niter in range(1, 50): +for niter in range(50, 60): output = pyhtk.recognition( os.path.join(config_dir, 'config.rec'), lattice_file, @@ -265,6 +265,16 @@ for niter in range(1, 50): + #output = run_command_with_output([ + # 'HVite', '-T', '1', + # '-C', config_rec, + # '-w', lattice_file, + # '-H', hmm, + # dictionary_file, phonelist_txt, + # '-S', HVite_scp + #]) + + ## ======================= forced alignment using HTK ======================= if do_forced_alignment_htk: diff --git a/acoustic_model/phoneset/fame_asr.py b/acoustic_model/phoneset/fame_asr.py index 6165d5c..398d2b3 100644 --- a/acoustic_model/phoneset/fame_asr.py +++ b/acoustic_model/phoneset/fame_asr.py @@ -128,7 +128,11 @@ translation_key_word2htk = { 'ä': 'ao', 'ë': 'ee', 'ï': 'ie', 'ö': 'oe', 'ü': 'ue', } #[translation_key_word2htk.get(i, i) for i in not_in_ascii] - +#Stop: p, b, t, d, k, g +#Nasal: m, n, ng(ŋ) +#Fricative: s, z, f, v, h, x +#Liquid: l, r +#Vowel: a, a:, e:, i, i:, i_(i̯), o, o:, u, u:, u_(ṷ), oe(ö), oe:(ö:), ue(ü), ue:(ü:), O(ɔ), O:(ɔ:), Oe(ɔ̈), A(ə), E(ɛ), E:(ɛ:), I(ɪ), I:(ɪ:) ## the list of multi character phones. diff --git a/acoustic_model/stimmen_test.py b/acoustic_model/stimmen_test.py index 93546ca..f7911a7 100644 --- a/acoustic_model/stimmen_test.py +++ b/acoustic_model/stimmen_test.py @@ -77,4 +77,17 @@ for word in word_list: for key, value in zip(c.keys(), c.values()): if value > 3: pronunciations[key] = value - print(pronunciations) \ No newline at end of file + print(pronunciations) + + +monophone_mlf = os.path.join(default.htk_dir, 'label', 'train_phone_aligned.mlf') +triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf') +def filenames_in_mlf(file_mlf): + with open(file_mlf) as f: + lines_ = f.read().split('\n') + lines = [line for line in lines_ if len(line.split(' ')) == 1 and line != '.'] + filenames = [line.replace('"', '').replace('*/', '') for line in lines[1:-1]] + return filenames +filenames_mono = filenames_in_mlf(monophone_mlf) +filenames_tri = filenames_in_mlf(triphone_mlf) +