From bbed3402288e37370cf31e78645460f9e7859132 Mon Sep 17 00:00:00 2001 From: yemaozi88 <428968@gmail.com> Date: Wed, 25 Apr 2018 09:07:46 +0200 Subject: [PATCH] based on the recommendation from linguists, the total number of phones is reduced. --- .vs/acoustic_model/v15/.suo | Bin 38912 -> 44544 bytes acoustic_model.sln | 18 +- .../acoustic_model_functions.cpython-36.pyc | Bin 2062 -> 3362 bytes acoustic_model/acoustic_model.py | 339 +++++++++++++++--- acoustic_model/acoustic_model_functions.py | 40 ++- acoustic_model/config.ini | 1 + acoustic_model/performance_check.py | 22 ++ 7 files changed, 357 insertions(+), 63 deletions(-) create mode 100644 acoustic_model/performance_check.py diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 0c5501e676ccbc5d6f1c8271c43cbd2ad62459ea..67b05b0096282eae98c5f71ba2fc9630420f18dc 100644 GIT binary patch delta 5070 zcmeHL3v83u75=Yd2j@v1CJl*`I4?-36Kp3Q3B)0ZO-f!QAqljFL7c>4hF~YLt%VS9 z*w$pFTSiP_rmAKs4wWU@@Qq z)IbW54Dg(|aGe;gxvw7d!f^YcaIHn18K!AyNuIFKb4zl?N;2w_Cp$HZD#By@%}UU# zf%ec~>Y@fT8i6KY9ncKSOtvPgIzYsSW_>-NXbd01yT5yTw-JY6#_0VW!h&3p6k@Bk z@NRimEEyL~xV;EWN4*Ky415-d2g37cl`d+s zb0`q4R`N`Wjn*rtFiaJ(75U*gay54dbA8{#q~`%>!xQ%kh@SzkhB|)#pxMQ*RkGQ! z35#vyi}#COx*6ZBX4}dR4@7PSv0l*d!r@r9`E*tnO5h$B7uI6Y+~O8#P;yJ#C5O~4 zd7&SNqD6JSuv_qA4?8hPPB4bvvIteQ| zZ1Pze&w-c)xXfqw?`-BVp+rSGO{Xi>eEJIju+i(;GPz(+vpJA;Gw^-b$wl-{sU*oO z5a}>`BWmnXOI4b&gj0vdmiZgb@1LB?8u+1V+(ZVQhThQ}E39ZQtKBrS>DtWIKff@x z@k?w=a}GQ!L9;2~z6SyN3jJh}HIQbdvkD_sl`Cl|ZG@)Fl=SzshA6?ZEW^r>&7?Q8 zVP!l33)kD!5n#(fo>{BD0^&UF(l@B(-OqjThQ6K7$Lq;k)S}Ks=fiXJ4}myGx0b(l zFOHI8Q%7T6OqhvqkS}E&on5z&Y|Bfg$)Z8-OPl1nPwWw zYEiN`@>vz|!EyND@}G{vZJx+AeKOouYg2MwAn&-Ed|JP#qMKUmTiZkv89kBgAMeJ| z?L_1spJ4aSn{)3TN3Y%2=iTbv*s4ez7JYJ8(>vg)edFXPI9!?lZ@nsiKYk<^n z&2$JgR()`^yh?FTPD}5t%nM#$bv=gGA1n*3y+WEgeV{IzEOpw%XOCXATs_cHb0l}8 z<(K&f-x7$JTH0QhlJFYNo^L5hLd=PfwA5O!r5EeE{n|D;Wcm;mouFKj4q1m6;c+Lz zsE|H%m_sGl$!Odyl_0!Yk;7CYK`O_)NGg+R(YFFrsbrGOc#6<(!czjujFE>zi)`|6 zIl9(J25FBZIF?c6L>DzT7-_aXL1RGw{D-$fha0kmM5h~82S=S%3R>UM-^{t${fTl{ zNoclpDV^!4pstQgdg+;}Xig3|9Z^a2?Y*ybUlX>K20Xh=|iu_xZd|w@X*&b?o$fv_;fEo=&Gmx6mcm32GY53Puc6 zD8vG)@^nIktD>F;#>?o0KT#~B)Bdme7qd#_qMyzLtpjcQ7~~wWuH4+=anR?-HNjJ( z4rO9aND90Qra9Z$OT|z8K;D5ETOd+6ou4b5UvO4q`!P-JeTohYnCOl1eCi)4q{97& z#KPe8{=a1H*nS}17ifpjD_ERkGXQl%6pMbpV=l4-QDfJ_sqBAUe4z9EiM>%nq*S+^Zx zMPcr7xvj{rBE#gbNH<N4Ip!aGjgEQ6(5ub*!Ps7;V!WakHP|;6wg3f<6e9U1C zQTO?mbfbcTV;w2{yL_(LL2z;>hpUbR&UCqm>x(a(G5)cUbd6g0kcK*4pP3)aVroJm za?rE+%XE6tYFuzDX-i{5f}!@Y;x{+nDLK@ib!p@a5l?a&ndF_h_RiBiFGOs5jz7!% zX`g>N_(fCS{x&%#zO1B2^2>2At)ql|{9=IH26V-K`9hWj;*}_|_=zA5H95Nn-5z(Z zw|IloVww>k8>4URz%ZY+z@Tx{S+ zud^cQ@eMjGE{D%MXz$bOT6|r7&h7>WcZ}J>?*#IEFbXs zc4SR=|8{Yk&t2>6bJ*-&J9Egc;JIz#42wr9cWm8S13{5-)jA!0J%aZct%xzu<9g`$mwhUUJOr3ynbkmBX#7E{M zK6)0mu&_tAFG4h6x45|&QCZ^LL*n*fE@s(>MKeEa(_n@s)1C8gVVL1FF4=DL>phQq z&%Ni|bI<*|FY)d%{y;GcA1?{vnFNw~mBMOH7B4#YEsN(kZXyF*yo9UYsf%=StwwTHyWR@a|5haLah*E?RQHCf-m=H9v z5_{S=ExOn*tHwUmXKc*F*Z6}l^+u6sE(qyD5QZ=ab<#(QL13z7HU{KBZGl=8(zE^( zPyVMPM2J`f$!LodFQTbY-0qGBjiC1LMI%Tm+J?)5JSba4t3h#4Hf#?H9k;#CJPRm` zeacPDqgW}T1EoY^>%gh#XKUexqE1RVmF!nYY)J4txI$cV1ClOs9owT$md#CuKjW9f z8D1+MLNjNfUTOADh8Hur5GMa0$Td1JChiQoo?JX&NgFux*&or)5@q*wjuH;8CXR$1 zZYrs~ygO!B@Zi~+E_3@OK|{HT4xLa{crr~=%G?0r$-y3QX4b+Pg%+kW>tu`;qKqY- zcn1<+!R9Qpln$42P7v=xVgznRSIZa~j`qu$37j$gtO9Pt;B^Z`DWwn1fZkYlFr{(? z0vp^>nP^FmK?IwU`ar{rU?#~--a~IP?t=1#0_vZf0HHda=qVEH?$2gMaO(P@Hy_3M zd14kmD_;iA{Qh7GvcHo7T4%{5RA}>IMuWx`S{49KZG$L*RJ1L(K_qxaV~-BHS?G_S zW|kjW{@rQzbL_d(+FxqmK$c!K>vyo=H;u*VODszvXfO(HjgFyJcKAw6O0(?z^h>?B zf4I7@<50l!!!(Sd>B(seOC5XbvegoNJSF|j(Tnx_XM{uX?#ywq1WL{MpokJr@q>i8 z@2&HdR1)U%m41E&_A3!p2r3C?L=9qgY#q{8L_Gpb@w%0*ex)7zHbgt31K~h)B3?wS zLChMUem(MCi0*lEn20xeq9&!OD5Jv_!a5ywUdEKjQK|GvBB!K@_Z{N9qBE&AQe5c{ zjirg=y-0ilo72ovDlxoY4AW_iqKsLlcd->F6&jd!Snr+M+u6MnS^1?@g~q6;(V~)C zBwiE2t%z!2Gf93qvw$;lk0+^Ofz*`=5#RzZh37P9#;7%@jF z_M^X4|2U3e$l)9Fz@jgF3<hqKe9I%{d__Z$ujIPWWM`3|OuT?u7I=fn{?a-+0j$DUYqxoRsH6~Pq zpH>x$4>41b#xv+%;AUoiB%~DG7a>)4AVFtbH+NyhbA?K{{1088yeDf21@l8|5NGXHP#yWC+6q>;uPc>t_+_&DxDGzt~GrQqYYz8 z?K*W{L8n2j)8*%>4H~;$oonc{>pF5B`GuN-tz%$la>?6!x?i!b>FIX7?C6E@=4aSA zii;rJ76+Dw7O$(Zm}gQrw#vq6d}}a$ulUya8cw$E0c(o_x>}-C4_J&gC0KZf6<=lS zF53@sM26dI)t0Lp8jCde+mfd*Y;V)*JJbbQbX=#+FHjqD^9$7mdwX7|Ay-q_*5=r1 zTG;)tkof`0*|50|-m@D+7ZS?mgx)pJtqK6ET`r@~2P)v z@&7p8$8}G{;C!GFZ02uo&fTwTSW0!1+P_kt( Z;Ch=Kf#eZ`*X?#Q*q12rN=80pe*x>t@;d+k diff --git a/acoustic_model.sln b/acoustic_model.sln index 7a60eb3..69850c8 100644 --- a/acoustic_model.sln +++ b/acoustic_model.sln @@ -5,7 +5,21 @@ VisualStudioVersion = 15.0.26730.12 MinimumVisualStudioVersion = 10.0.40219.1 Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "acoustic_model", "acoustic_model\acoustic_model.pyproj", "{4D8C8573-32F0-4A62-9E62-3CE5CC680390}" EndProject -Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "forced_alignment", "..\forced_alignment\forced_alignment\forced_alignment.pyproj", "{92E4D819-38D0-467A-ABEE-09662EEAA084}" +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{3DCEA49A-8FD7-4255-A223-573DCD2595E0}" + ProjectSection(SolutionItems) = preProject + ..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py + ..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py + ..\forced_alignment\forced_alignment\defaultfiles.py = ..\forced_alignment\forced_alignment\defaultfiles.py + ..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj + ..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py + ..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py + ..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py + ..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py + ..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py + ..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py + ..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py + ..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py + EndProjectSection EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -15,8 +29,6 @@ Global GlobalSection(ProjectConfigurationPlatforms) = postSolution {4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU - {92E4D819-38D0-467A-ABEE-09662EEAA084}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {92E4D819-38D0-467A-ABEE-09662EEAA084}.Release|Any CPU.ActiveCfg = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/acoustic_model/__pycache__/acoustic_model_functions.cpython-36.pyc b/acoustic_model/__pycache__/acoustic_model_functions.cpython-36.pyc index 7e0973a8b8867e9c90853d2194f9f6d0416aede5..99ef1fa1494729ba8ab09edd98170be0e7edef36 100644 GIT binary patch literal 3362 zcmZ`*O>Z056`k)dk`gUDmK3<{GEIk@w z%C0h36LYT3RaIH6v8avjnB&%YJ7}YAG5fDd&|wa9QExDhwNP)eHtV47vJJM0`WEZ5 zTd22~`c&y{y@GYh@$`cyANRj36JGWopQZgWpYS;8^Q6ejeiY}EvP$FdJZH&xx_NWp z!P{RQ!hGd&QNHgIqhlDZ6Csp85Kf`&#W{R4D z1LH!P2pkwFTmMw}HqP<->Dw8#PR1KlywME@lPs>%JS+E#D{=oEP6=0bhXbCU%UucA zFwH^&oak5HMWJY2)l(hyj@CxqM!BPQHNK0PSfhFTD8gHKBwvrvsFaafgBLTc(rT5d z;NXLqe)(RlfrIbPj9MS*;2XGH8`n5=?+p%_(+`#$lKjXAk}D+poScC69}eioh;FcmroNrW;(sD7uv?9bF%mEo(^8p0PL4Z!qM0#3jD zSb}v3unf$Z7&8#ASi>xY>wcxz2D5gRkv6CGX06xer^-zKW?zD6Hxf|m2t;T4tD7K7 z%_;qhVHywps3?+%uP_}amnpPq&lei6rskv?eDrJRm6K#~&JeK@UQUO8Mdv*3nGLoc zI-E``bc6{<8c+~w?ZxLL!}x8QHpgj}lz}UaAomD^MntFJc?PDY5pnem;7^l9lpxzAJ2@I_PS}|d_@tpaSlKv1hmx5MfTvK~ z%SW{i-u_amt;UdVW+vc=OPCE;JgiM-?~(?qwRL8pZ;T9@g(^OL4u(q??E6mvGSI#l zCOnbkmFM@<5<-};C8Jm9^B=<075w~1=rzQa&~qRCH;|X03yU0@v7XJ}!=k{H`$*{d zPcR@{s3ni$Dq)=XD-6Q@CfpwMi#56F5f-hqfRw;kVC?|)4V6E@{0eR21?4Y^_TSKG zw17x|O0?G&nqV-KSqOp?P(1*e8dG-_lo~QTsl;b}ZDvTs4Q4Y3*~+TS5%L_+x0zep zXEuMxyrzcdY}Oh%K;E4zHFDb$eR#6nc#^f@htBk||D?&ge&jE(UgRanp#S?nJ@%{I zKYI2D%2VeNau(9ndP)+;Ma~(2A7?{Yn55yQ6rXixy;glr!YK4O849f&PMvS}XYRKL zGw-47NN|&%2tAsg$i+RrpHm`>l z#5?G&4N%hYmLqA_$l8ar0hu4%U5mId%fqrj4h-H|A77e4Iq8}d-o9pDxY*>6SW=S8 zD*6^;@g5@5L(LyzB-UvhKaye+Sjmxpp>ZQ3DH16n8|foiG`^2WtVKpyNa&7?T3Smt zp(c9GYio5xqP!1+dk(l~RWt$#W%gpk#)w&4HTu|xZbueoUFKmm^=xpmg}+9($L~zs zE=_s2*#nQxPas=D&Z4xdefwZ>hIX4uT@8d)K0EMt1<#B{t3mFu?h{lto6jqgo zarvU>2Aw52D5D*$cMpTsauz(20uoqKyAKxiVPIgzfh-T{=5%>eMmFOHr6oVUf=!K< zNZq~zZjhpEsasGIXvrs-@7?C_lIePR1yc*48I|%x!l8?}ymvYAgmc;2Mo6sPt?7C` z87Gjns2cL0(U*)-j@y`xYXC*7=-zxqhMT<8$owtyhm{x?)?Y+e%B#!Bas*M66H`f<^G;T$ZpiYqMsP@R9}f;2zei zf}Q`sga3g4fP#1L@unwF-aPwW7D};!eDA$)=6&&0p&o z@0lm|8y=YN_JH@zANyq2Mz7!zSO`b4(^y5-_*q)js0D&nBJ;_)ac70nRf66rT*9o0 zrpXIE9H*J4uB%8@)W)pP>hh4oni1~LJ(34}GWMGz9L4TZJ0LnlXl%3$U9;--A6=l@ z&C*;~GANIWyj5--Vq{c8k ' + model0_dir + '\\' + hmmdefs_name + subprocess.call(subprocessStr, shell=True) + + +## ======================= estimate monophones ======================= +if train_model: + iter_num_max = 3 + for mix_num in [16, 32, 64, 128]: + for iter_num in range(1, iter_num_max+1): + print("===== mix{}, iter{} =====".format(mix_num, iter_num)) + iter_num_pre = iter_num - 1 + modelN_dir = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num) + if not os.path.exists(modelN_dir): + os.makedirs(modelN_dir) + + if iter_num == 1 and mix_num == 1: + modelN_dir_pre = model0_dir + else: + modelN_dir_pre = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num_pre) + + ## re-estimation + subprocessStr = 'HERest -T 1 -C ' + config_train + ' -v 0.01 -I ' + combined_mlf + ' -H ' + modelN_dir_pre + '\\' + hmmdefs_name + ' -M ' + modelN_dir + ' ' + phonelist + ' -S ' + hcompv_scp + subprocess.call(subprocessStr, shell=True) + + mix_num_next = mix_num * 2 + modelN_dir_next = model_dir + '\\hmm' + str(mix_num_next) + '-0' + if not os.path.exists(modelN_dir_next): + os.makedirs(modelN_dir_next) + + header_file = modelN_dir + '\\mix' + str(mix_num_next) + '.hed' + with open(header_file, 'w') as fout: + fout.write("MU %d {*.state[2-4].mix}" % (mix_num_next)) + + subprocessStr = 'HHEd -T 1 -H ' + modelN_dir + '\\' + hmmdefs_name + ' -M ' + modelN_dir_next + ' ' + header_file + ' ' + phonelist + subprocess.call(subprocessStr, shell=True) + + +### ======================= forced alignment ======================= +#if forced_alignment: +# try: +# scripts.run_command([ +# 'HVite','-T', '1', '-a', '-C', configHVite, +# '-H', AcousticModel, '-m', '-I', +# mlf_file, '-i', fa_file, '-S', +# script_file, htk_dict_file, filePhoneList +# ]) +# except: +# print("\033[91mHVite command failed with these input files:\033[0m") +# print(_debug_show_file('HVite config', configHVite)) +# print(_debug_show_file('Accoustic model', AcousticModel)) +# print(_debug_show_file('Master Label file', mlf_file)) +# print(_debug_show_file('Output', fa_file)) +# print(_debug_show_file('Script file', script_file)) +# print(_debug_show_file('HTK dictionary', htk_dict_file)) +# print(_debug_show_file('Phoneme list', filePhoneList)) +# raise + + +##os.remove(hcopy_scp.name) diff --git a/acoustic_model/acoustic_model_functions.py b/acoustic_model/acoustic_model_functions.py index c742dfd..04e19c3 100644 --- a/acoustic_model/acoustic_model_functions.py +++ b/acoustic_model/acoustic_model_functions.py @@ -1,9 +1,18 @@ import os import sys +import pandas as pd + + +## ======================= user define ======================= repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model' curr_dir = repo_dir + '\\acoustic_model' +forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment' + + sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir)) +sys.path.append(forced_alignment_module) +from forced_alignment import convert_phone_set def make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp): @@ -61,4 +70,33 @@ def find_phone(lexicon_file, phone): pron = line[1] if phone in pron: extracted.append(line) - return extracted \ No newline at end of file + return extracted + + +def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out): + """ Convert a lexicon file from IPA to HTK format for FAME! corpus. """ + + lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation']) + with open(lexicon_file_out, "w", encoding="utf-8") as fout: + for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']): + pronunciation_no_space = pronunciation.replace(' ', '') + pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space) + if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk: + fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk)) + + +def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out): + """ Combine two lexicon files and sort by words. """ + + with open(lexicon_file1, "rt", encoding="utf-8") as fin: + lines1 = fin.read() + lines1 = lines1.split('\n') + with open(lexicon_file2, "rt", encoding="utf-8") as fin: + lines2 = fin.read() + lines2 = lines2.split('\n') + + lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation']) + lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation']) + lex = pd.concat([lex1, lex2]) + lex = lex.sort_values(by='word', ascending=True) + lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t') \ No newline at end of file diff --git a/acoustic_model/config.ini b/acoustic_model/config.ini index dd7b4fc..e11c611 100644 --- a/acoustic_model/config.ini +++ b/acoustic_model/config.ini @@ -1,4 +1,5 @@ [Settings] config_hcopy = c:\cygwin64\home\Aki\acoustic_model\config\config.HCopy config_train = c:\cygwin64\home\Aki\acoustic_model\config\config.train +mkhmmdefs_pl = c:\cygwin64\home\Aki\acoustic_model\src\acoustic_model\mkhmmdefs.pl FAME_dir = d:\OneDrive\Research\rug\experiments\friesian\corpus \ No newline at end of file diff --git a/acoustic_model/performance_check.py b/acoustic_model/performance_check.py new file mode 100644 index 0000000..a3e66d9 --- /dev/null +++ b/acoustic_model/performance_check.py @@ -0,0 +1,22 @@ +### ======================= forced alignment ======================= +#if forced_alignment: +# try: +# scripts.run_command([ +# 'HVite','-T', '1', '-a', '-C', configHVite, +# '-H', AcousticModel, '-m', '-I', +# mlf_file, '-i', fa_file, '-S', +# script_file, htk_dict_file, filePhoneList +# ]) +# except: +# print("\033[91mHVite command failed with these input files:\033[0m") +# print(_debug_show_file('HVite config', configHVite)) +# print(_debug_show_file('Accoustic model', AcousticModel)) +# print(_debug_show_file('Master Label file', mlf_file)) +# print(_debug_show_file('Output', fa_file)) +# print(_debug_show_file('Script file', script_file)) +# print(_debug_show_file('HTK dictionary', htk_dict_file)) +# print(_debug_show_file('Phoneme list', filePhoneList)) +# raise + + +##os.remove(hcopy_scp.name)