From df046ffc264e171e3914c9f559a9e4008d917e97 Mon Sep 17 00:00:00 2001 From: yemaozi88 <428968@gmail.com> Date: Sun, 30 Dec 2018 23:30:33 +0100 Subject: [PATCH] forced_alignment_novo.py is removed (.gitignore). commit history is cleaned up. --- .gitignore | 3 ++ .vs/acoustic_model/v15/.suo | Bin 79360 -> 69120 bytes .../__pycache__/defaultfiles.cpython-36.pyc | Bin 897 -> 995 bytes acoustic_model/check_novoapi.py | 32 ++++++++++++++---- acoustic_model/defaultfiles.py | 10 ++++-- 5 files changed, 35 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 3c4efe2..b1d3894 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ ## Ignore Visual Studio temporary files, build results, and ## files generated by popular Visual Studio add-ons. +## important ## +.acoustic_model/forced_alignment_novo.py + # User-specific files *.suo *.user diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index b0dbc237cecd5fcbc4b9412f889b0c8ae84ae80c..c60d6d48c2b45e516a9b0efbb6ed4fd01b885260 100644 GIT binary patch delta 6627 zcmds5dvH|M8NX+<**w@hAPIydB%6eU5VFbMeJ2F+BnpN{APALJ*lac-H6cq_7zu=| zXcdXlWIu z$6bD$@0|19bHDdFCxMgNz?0hH>Wnf;l45{wCnhG)NCK#MsU=&J?3*@J`s1gMibjmo zE~kD?zp&C*n)@_08tRYf*ZDAhD4Wj5WW_p}Wuzr-FdIY_fW;Ifnr$$>|b2?~QlG>zMAQl4a03GlRCd<(p0cPYUX9{fs zT?r%ub$}LNo_e%wl0|4201bcxSWHPN*|yt2H3Q3m6~Ibh70?2#0eFR0v{t|n?R!RA zD#T_ODWMA^KLma>(lSfzpjTqEwfCS)$70jEVvz(`3ff9wymGiKM|7Ep)S4R}}#e&Rv1A*Md8Ts6jX0jBwt1YicheDlyw zdM*WY62P8Y4e(m*ISXI_*mJW1c&<%#T^>4#0K3i%Om$rb`s}()fbA1?o$5LMunl9} zPCo3pIG_k%&)rS#lI)RH>r>e+lT6&w`UGc@7lWNZ55N|h%FvB|2VkdPCgh1dBQ1q= zHlcPf1tFgbOb2EFX+S!V0n7wu0hz#TAPdL_a)3F20muW`g89+55ba!`2q=z@@%$tf zV5|%<0_6gcR{_ET$bgyT#9KyuXM%dam`~gFgPIaSU+ee87GSc9b|x)L5Udlsi9Mg_ z1MvYzx)lC4V~5a{g6}Co{j(kjCOU7JZY(=XFjconeI{G}@^^Hbsp`fIs%~IVnO|~S z1S?nq1y|7F8M0;`E4f;mAJrSZ%xIspvmX{Y?4(w8V^(#+yt&?%)dPAAnH z*R)mz=LIO?@!F)XQRFzK6Of~RsvWR|?Ank)GurfyNX>M8^FdqB^aoC5nZG#T`?!gY z-C_vcr5U~Tu!vQwGX>U`C2Jt~`&faWE)Uh(>cvhhw2%VUaoRQDOf;35b~m}Zw|Vc! za5epTO(qTASE8|+N0T=T{RX@?88zfjF~xH=@#5!|jB3~dc z!-gW%TkhN0E!9Y(vC4$sF6-zWLePj!j_c^^EbrRV#jA8j1C3OAA|-M6E>rXg<}^@)ER#QH6}`616h4zvhIm;42e!Qy&!wC< z3e|G~pEi89End*pU=c4u-FK$bCQ7H0$C7E}#KA)`LWEj1lyA+|TPlOW#*{82e9x()w{h#t~#Hu(d~D;ntMI&Zda?f$LDHoTBEi*JpLY^Y?f{I zjTVc=Av??M4v)j^k?lR+zD{rO-hj4z+xA|zf_}=v_fXyJn$UU;S(8)4smbd_Dm&B# zTyW|=DP=kA)LwwgE|+88nJzGlrx^0C;pp(h2B8V&G-|o>bJ&IbIS-4Bz36f9--njN zL0$4CH|I$+@`McK&qT77Hy^ff_)NK8jP9)!OArj#ZxO4#(oy*xi%eQ$niG%5YP8rp zhDjJ|g6U_q$;gTDKhm`#Sbq3Q0(87Da!yV|zJTu_s?$0Rsy#Ns)o4Q~=LdBneN3+t zelh4UTHPHUqtk4&7-dJL$!Ryc+nk=tf%*!hv22k!SY_GXZZ=n%jb4kR-Ds6P?MAoB z;V^pJ+#a{b>T!BJ*2x#t=)SHO8s8R;nyL;G_u7@(B#~28j_tHd@&ldlV6W6Cd8K_n zS(x(f8w=NE==0zG(;G)g!l{N{-fm0%@K+`EFE$;0ReU`Atykqwo5Sz)nbe|%`o6>T z_S72H^%gbUwTJ@MesLFlRXr%~r0t7_C{T+^SBpehc2;nmpAZ#vE^T6pb*s;Sm0xi2 z3#0W04H?YGu3h>^LyfWVCd5Q@G%m=$@!_lH`$ZN{W?_+%u6JOYC=OX z!Qt9x#)k#f4j0f*TXRFVYZ7@+=bb|%k?Z(tbnu`#^tmASKo$)?T6y(XNuhngo<@U>Wf_9BQgn;A(<0lf3l=pbU zqJJy)LR<7xt@b7qyToeKrdW-VANr2D==J?oLbX+JXuRmU;+QrZe@`r>{GU5%Fpw%r zN&f3nntQULqLj@ZeeBUTs+#!9Mu-8Uk&y)mAWeq?w8{OSqe0G)S%MYuQ z*B?n_T577ZHky~v$U!r;{Oxu^%@00Sf_UgeJY-%u>l&=;V+p=AWoFz`@1|F zdwScw-L8u}Gr!C}=Npy71^5%l7W>#mt6F6d1C|_fY|z97v^iix*H}o?-B| zhAJ0t>)Tv)cXwUq#&)$b&R@CYlhm!7NivxOZRd?*NZqos)6@lqH*Fmp0=5Zo!R{e@>~KWY?TI zR>>r_W0elTj#bOC8lOrye-E$#Hsy4oXr%L*>9`p1iT*_%t%yiDA(YkAuzRf-F(Aof zl)P!;1@QzkY*Th8i83Xi7yE98A~J+!(H;2HMMM!V)bywdnnAT)P=u@9qb$)@WxQDA z&>uT=W7WyxeT_K8quJ|jq>kcl`;S60J|7JpZvGR&sbBT(A|_uXGIaQLL@~RwUooT!t>QEw z0Z$u*E9qJMg|-Si!CGXjcPyzyd={g$Ef6z1`!L{=>ZH4IgW>kZZ72UPMppI~<1Y1g zS1z`=VmM;|6h~B*rjG3`6RqNA)D#N>#Do|%aiFOC@zrVy|E+FQWJAa)7tf2$EaIB% zE8R^cX&#vGkLd7iIYhq+CkYFJ+l4jm-nC9m)BEx- LyLeOTw$uLvi!1O) delta 11175 zcmeHN32>9g71sVfz+i)^jbk7InTFtE3+wo0Nz@5Ce#x>e%Q`Gybpo<1Te4+I(QRRn zn!8Oxv0hTzPFjf4bW-<>)%zMJ>15iJK-(|`GA*Tdo6t_%X&cBiao-BbaZI3;1e#8~ znosZT{`>xaci;QpzJGU<4;3a)7w&6XUCiV03J|Z&&dws?5k$q~=5@@9osaIQ9sA@_ zBBI55h%<4r68^#5&U}JEydYkDI~I4YhF1#KCay!a1cCRMEk#<0*n(JxxE`?#aRZ_f zLF?2bU4wWP`L(&SGNe@q+K^hL8xik8(E1yYZbGa_2yE(px3q|RUwoJ0T6A)re@^m zl?2ijL@S~VVL%uW^qO_jT(PPK6EzfJ=L%F&&^JJ(N8W2qg)1-ObDk(B_ zxT&BLsbJE44e7)y*HdlNQVUWYf-1N@S0+K)g@B$CIoSh~+H1;2G3!B$A-wR_k_|<) z8&ME#-2i7wlu*7?S+oJo{k6yKx06(9h^&BcX@ZIK;Gd;?ibhas2F5FPmFSVXn_3=- zxQa|U*F%=l8&QD2q#EwgUYjlO(JG(UvLNS5E5D5yT4N7VY9e*gh=r&tbULO@s)S&pDrQFjIw&BAax5@NM8y_+Uhf*Tkaz%BZ>=8 zk@Pm^!p&dqM3HRsQS#wdMMi%84+sQKRY4Uy1x3#*Q_>`8pXbA-=lNxS#TS<kO!)guRc}7BOhNKZCR3S@Ksn8<{1t%-O^eCT(Pjs1n4J<_$1YR4BL?>)(gCAF&s) z4?!zTB26J4$mJhI`cc%-=SWI7Q=Ap8sCfi>~?k&umF)bzO4>QvvKO@CPoK`{vUKm~f=7wb_ug7)tv-Y3%^Do&fk#EvYTu{XTTKj4Y@teP5nV%I1mHX8V;U*nawB$mk_WY z-wfZ=DA$S1JIDN zFu?a7F%)2qUV#@6+F;+)>q~?<=h7b!o_KouBguyg(i03Q4-G*2)9XRiSqSBc?-@8awG$eq>ab~RKx)~XcqnfiDzQ#0W3?iITEPJ!R<7_|>OgZ*@L z>EI4vfzvzKA9MzTZl9NqZGI@=M$3l8ck^wKa2lYQX8et=77fQ>uu9;KF)2^w6&>6LVapmU#L~= z8|iHgu)?sju8x(BO1k2l&}9xN*p3e8pj%^@UovK-i}Z>Ugv zOlIlQD8lE@r-|%*imr>Mn5Nx7CbV@9w)rh>Zeh1uV;+}8Z6Uc%6N$>Ytj5@Y#qMur zBR(z~b~F!J?Icg1?$21!F<(I3f4rX;n z|7vC)!+$=4qpqha6*)66N@^{>>Y%%3o_{a&3fqrM? zQxX|Ig1gMQBz57yrts#sYofW}Jd;>KYd=( zZoZP9jX(`kk*VB!y8NxiT8T?=l2^({QYV+wO!c>uXJ0s?WbHyc06^XYqVqQ2`T*6eb z{Vhh!bgsBkLGl|GYyLw(oS=*E=TglK-9DgO43`@aUq;Nk<*!EBRXKmjv#K(`os(bV58(%{33iUH%~h); zZ^#udF3`*Hn7(g8NSB z=&_r*RD>Me51ebs08G7b41`C^s^~GFbE$l0Q;y>n?)B7Wesp{Z;nTwb`BaMxJ&{9W zM0(bUru{!zMmE6Ek#hLetLq;i_36(Nm^@Yk{Far#KQa9u<>4n&YoYwaO4xd$3|DBE z<~AZ-Fmw1@#eQF~-|k3#c9w)7`CJl`ZP&fqH&Wgue)@v$Y4O8v(LZJPqA$@uW$r#X zjwc$}N;rBrJ{kkTekLTw(@f6}syt3?3$8#Re$MeAw^l0VBtIKUC=D*&q%;~em%z|n+e1-gtlzDj)JRB;(e=)B=GdufI!Pk?; zLh;jd4mXTsOny8;UWtKl}R{Y7zuNN}pcjR*K HfO`5j20xY; diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc index 1057f0f82ea34feb6ac08c2606e0d64785a9860e..b05a22104e5312286a75a49d6eca1e47cfca325a 100644 GIT binary patch delta 524 zcmYk1PfNov7{-${TmP=DbN&YrVZy+95d}R7iU$!r>>!50HulEZP216Kie5x|7o<18 zfnUO#ch9?e@>}?>*0Dg6=l8sM^CZvexB9eQC>XutN_|ZyvwkKSKS$s?>&Iu>;G?>GC+>Bh9hXq)KC0LI0Vg_i9R91fQTjcEZ zqnudua?3f~ub=z!DD>`R{i1gIX!cq+;8h3V(C-OFcA^o#YTZh~q8*V>Ug+D2(|;IT z2Y%HL?t*Ho?+JCR#zJXDq0stRl#v{anDig*_U&+YZ|mQy6h|zGgwcNJ+=LD(Ng$tSCCSmU?`=B4GofX08oI|)tvUuY%=N_l-1rN%jT ROPt!&W}Fr$yGu*VroVydhA{vD delta 421 zcma)2O-sW-5Zz5SpKa1wDu{>{L?jf8AmYbK(4**~2XR@5!C7nCm`JwP>O~}XQQ}Gb z1Ntxg8(#Y-1TSt&DLpzc!+Vc;Z(s(-qw%oeIQH*XjlG%(4bW8S67R_t&vF0)93hH8 zp%_%EaMV`YgkvC}Wh&J`r}|ja!N?S9OqeN|2^O`$ruJC1p#Vj2GD3F>(~^uTPPhvt z8DGvc>P)m58Ri0RBCqoDQ}^yy_jlQG%n#DA$Jt5qkadCx!nJOC6L%PI33d@&aV9qN zWqj&2{<+NZPyDvMc*KFHQ4oXRwd>0y=7M*HA9TV-KN5Yf^sOX<+UIwz+c5SaOsO?G zok`mu#*3L-z+tE&0~e%HP$Mg*gdLLNHOwRf1jYDnuc; Jg3Gvy{{R@(Z)*Sm diff --git a/acoustic_model/check_novoapi.py b/acoustic_model/check_novoapi.py index 40defe6..35da212 100644 --- a/acoustic_model/check_novoapi.py +++ b/acoustic_model/check_novoapi.py @@ -25,14 +25,32 @@ mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_ stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) -phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) -df = pd.read_excel(phonelist_novo70_, 'list') +## novo phoneset translation_key = dict() -for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']): - if not pd.isnull(ipa): - print('{0}:{1}'.format(ipa, novo70)) - translation_key[ipa] = novo70 -#df = pd.read_excel(stimmen_transcription, 'check') +#phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) +#df = pd.read_excel(phonelist_novo70_, 'list') +## *_simple includes columns which has only one phone in. +#for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']): +# if not pd.isnull(ipa): +# print('{0}:{1}'.format(ipa, novo70)) +# translation_key[ipa] = novo70 +#phonelist_novo70 = np.unique(list(df['novo70_simple'])) + +phoneset_ipa = [] +phoneset_novo70 = [] +with open(default.cmu69_phoneset, "rt", encoding="utf-8") as fin: + lines = fin.read() + lines = lines.split('\n') + for line in lines: + words = line.split('\t') + if len(words) > 1: + novo70 = words[0] + ipa = words[1] + phoneset_ipa.append(ipa) + phoneset_novo70.append(novo70) + translation_key[ipa] = novo70 +phoneset_ipa = np.unique(phoneset_ipa) +phoneset_novo70 = np.unique(phoneset_novo70) diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py index 9f4d4fa..4f98e6c 100644 --- a/acoustic_model/defaultfiles.py +++ b/acoustic_model/defaultfiles.py @@ -3,7 +3,7 @@ import os #default_hvite_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'htk', 'config.HVite') cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' -kaldi_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5' + #config_hcopy = os.path.join(cygwin_dir, 'config', 'config.HCopy') #config_train = os.path.join(cygwin_dir, 'config', 'config.train') config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite') @@ -30,11 +30,15 @@ repo_dir = r'C:\Users\Aki\source\repos' ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') -fame_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus' +WSL_dir = r'C:\OneDrive\WSL' +fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') +fame_s5_dir = os.path.join(fame_dir, 's5') +fame_corpus_dir = os.path.join(fame_dir, 'corpus') experiments_dir = r'c:\OneDrive\Research\rug\experiments' stimmen_transcription_xlsx = os.path.join(experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') stimmen_data_dir = os.path.join(experiments_dir, 'stimmen', 'data') phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') -phonelist_novo70_xlsx = os.path.join(experiments_dir, 'Nederlandse phonesets_aki.xlsx') +novo_api_dir = os.path.join(WSL_dir, 'python-novo-api') +cmu69_phoneset = os.path.join(novo_api_dir, 'novoapi', 'asr', 'phoneset', 'en', 'cmu69.phoneset') \ No newline at end of file