From 24ac56ac0e903ffa9a4353e3b9ed7ac2c7f79855 Mon Sep 17 00:00:00 2001 From: yemaozi88 <428968@gmail.com> Date: Mon, 21 Jan 2019 21:56:55 +0100 Subject: [PATCH] to transfer working environment to McRoberts laptop. --- .vs/acoustic_model/v15/.suo | Bin 91648 -> 89600 bytes acoustic_model.sln | 1 + .../__pycache__/defaultfiles.cpython-36.pyc | Bin 1221 -> 1489 bytes acoustic_model/acoustic_model.pyproj | 3 +- ...unctions.py => acoustic_model_function.py} | 0 acoustic_model/defaultfiles.py | 6 ++++ acoustic_model/forced_aligner_comparison.py | 34 +++++++++++++++++- .../{acoustic_model.py => train_hmm_fame.py} | 0 reus-test/reus1008-reus.dic | 3 ++ reus-test/reus1008-reus.lab | 1 + reus-test/reus1008-reus.txt | 6 ++++ reus-test/reus1167-man.dic | 3 ++ reus-test/reus1167-man.lab | 1 + reus-test/reus1167-man.txt | 10 ++++++ reus-test/reus3768-mantsje.dic | 3 ++ reus-test/reus3768-mantsje.lab | 1 + reus-test/reus3768-mantsje.txt | 10 ++++++ 17 files changed, 79 insertions(+), 3 deletions(-) rename acoustic_model/{acoustic_model_functions.py => acoustic_model_function.py} (100%) rename acoustic_model/{acoustic_model.py => train_hmm_fame.py} (100%) create mode 100644 reus-test/reus1008-reus.dic create mode 100644 reus-test/reus1008-reus.lab create mode 100644 reus-test/reus1008-reus.txt create mode 100644 reus-test/reus1167-man.dic create mode 100644 reus-test/reus1167-man.lab create mode 100644 reus-test/reus1167-man.txt create mode 100644 reus-test/reus3768-mantsje.dic create mode 100644 reus-test/reus3768-mantsje.lab create mode 100644 reus-test/reus3768-mantsje.txt diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index d1781336b5b5753275939cd558826245edaf3f75..a64dccd2b8f51b535d34ee38bbad6b0adb2f7cd9 100644 GIT binary patch delta 6854 zcmcgw4RqAimH*$#%w#5C3K7`~1hmJqr|ogoWxI+;K5Xy+i)4TIKQj^myW6eq zoAaA@-`9Qj-Fx4C@BN*}P0l{k?y{H=P16E^>%+st$TR{{4$T{5uznDk`uUrOeoM2s z!!XyXHK4%=@UA!0X*yprbeWc;ct39n*c|$OQ0MNzA5tunF|JGa`O|9D3LSxbI`FWs zEOk>rn|qd3l`*+g2D|*gaGba^ptEZi`o;T<;-5;2qOy_Z5xNrfVZazb%q0EAOs@m+ zzH%UP3m|5S0VV;`F9-Poz;7n;5}2t=x07@bJ4r6K5j)v@CWw&j0bbyb`^NvWOJGmpO#?cmcVk~wWK zfga_p=j+ti#XpJ8+DoMF6A zXkAG=YuO%?V~WCb=lIIbS(b%pk}czHezM!0zKtxRRN=wh8Hp*fS#HPNKQSN6DDFu# zu8;|Gm^QirK7o79i2{- z!hak&dvWBy9zK6@&i+Mz7+s?S7WrSJc9dc})XzdcN6M*rq;?kBUC zYpqy$8xEOvtqB{e6*|a9d6RY2)R>^kX4(CCOI@@s*uM)yaGsaB~@A}8-&C)Y4tdgy2&*cf{x1Np}B)rJ-kZT zl~(gO5EnxEYFsDTxg)-dD)|!r@8XWpIG4rcB>3rF#d|Krc&^@QHnrQ_RKAHle^*wnMjUWS)2KfAo{+(UXNo! zH>uPp%6q1Q!aTMGyD9wH3D?`lg)RQ&(%Lr;&$DK=!{#aD#Kk%={s+KzK%8cWFBhde zwP#)wx$ws`GG~wbM_{gOvQ;u3) zct^3*EN9A)k*^ka(mL)adD8u$fr9E#X~U-55y;U?t9jwlgWUgknWt>h0J-u~IHfM# zc<)zC>h$=@Lv@4Rz$b%+z3DX&uke+75XwYnGkx_Q$BY-qld|Xoa#cZBqjEitDtKbj zM%vDslHO+f!ZLOyJA29ux~bKvTCsI~KdE*z#d7R#4zlxYn{q``DVM%o#m*D9&}fsU zL0-hgAZ-U<+@9aF%)rjXJoO;rpX!dF3A}2xJMHY4*Eetcbl-=OuimxOec;H;81Yq5 zNrV+X!!E71ux(AhEA6sD-?`VjWJ^hE)iwYP2oO!aFmdF$oxvwBKM=S3$J>9{XnrSek6jz@5A_r*v~ckCXV#rm5Wa-W`1RV=3(1 z>g-uzaBs6wq|SWHBe_L>&g;NAi+{h?%0JqX{*}<%P@ANNgD5q8d27pt#&*ZjwpFby zjZ4=z>MFqEjKzhSdHl(CyK;`Ea6Z#KF*tupX6_An_3<&T9Znwir&wt>=H}&Wq5P`o-hkLIfV28y;eX-o9$oyPmtCiAp=lL8kF>W7^Aavmk?eaux_4jvK87C`Co zC7`#!73=V3X$BfFx)Boap0lY?b9!MxZf2o(+7xXeI_KcRpRNBxIy{P|XqEUCYQ?C_ z*Zl01Gk|OotAlRDYR~n13TOH0A1$!y1AY)dM+=X zW#A(vd2**Y$D1BNOpx2V+>?cqXXx1wL(c(o&t5~1+w59A@PA|QiyJ)Luk{hF;9tHT zr%p|$RI4`#A{z4W>x&~^I)AN0@wLjWPrR>w*4z0H#HYhjcxPW>*ilF%ZmB;?jl8Sx z@5X3akX#B@X%1}-j;#>|8y|nOfcL(ZqW-OrGE^H;BA=XX<~6T{ux<9P=lchUGY9VH z`;Xp18gD&1t>-8`?|h3~z6kIB1+Hzk6H=`+Q=A^^>W=1c+gsry!rigkggf_fY#E#z z5*b}5`wmTjGAwSSZJy*^q^qGV2+fw$s zGgnmAum1Bz-rDz5IQGA2eB#agZ*YgF`^H-3%tVOSX8gb1c{zKqAjEqa#=JOuxV#`< zU8trO)oG?M6>Y%nVd}taUOh0DD^|zjQg(VeC3D_DvN*5%DBQ|<3~u$7Gx!po-*V>& z)Q)iOyH9fcxiH@I-sH;tWJlCS+%B3@RXM$?yeg|CzoaOuAit_It2no~B+EX%q^R6p zQB+i3K4r@z+1X3lHm!238TqelqbW6bM78P~O~sWz`7Su81XC`reWzQ(ei8kSpZ@$O z5jUR^4p|sq{*RgZzUn_=!)B6wJfSPJ1sKmOC#x=oIXXjtCb zvEJdo3uL!#n#sR@r&tVT99iIoYgLOv#4qYE#w@+~b9t@k$^KoLbOudfdM6=qgr%1} z)VQLdquH^dskw1Iq|-Z$bOmlXH=k#H8uOibLn&p8P%$Hg(rHMew?y31BASbpH~1}@ z`!oI&8-0tLVTM}FT^93=Ef#+Zcdst!4GE(m@`Qd-KoN2V`;)xIqkAvJ(?NE2w`doWdD=e7 zBR+IKRnGY&WecHpdQhDXq$!hcTo0D$-w(W3g^{%Py8>QSy)R9o&kQ{0>LclLh1w+ znno{Cr_Sz~JgUB#lYX7sH75amE$V?#>JZ+R-hl#oht!!o@2m;BAP0(Q0=c|GT8bzQ zixkHI*Kpom6yBR!Ow05!t5k6*MYD5GdhYMk<_ks*|B(?f{Kl*xe9td=oJB{5v}WKs3AD6zL^E-fT-dO2Q~clgUClxUiU&h=kuN8MdZFR0Wy zYN2gXIbQwHLX&h$zFI-g5%;W3lBbYz8Ym5hi14~v-8=}Zskj>YFwiNUqb*SvBWZar v<}p}aeUPeeotdV2$GU4Uwpo`T=C&JGB~PdECb{a~kh2$ZE-!1CcyO7n$R$;xPM z4a(4>woI;~$y7oP!9?&7Y6$lbSkYSAQSGKl6TYcVRkss2fM0@Bx8k?uJ*z1S3%)%H z?YroW;V2ASW*`~+ll{(vgT(YEqOn1q=R7zrOgS!du3F=`@EVVMiD!MoabX$7rG4Z9 z7I0vS2^<)nYl%`|0(?Eu8wea1UP}s6=10ec=R7zrP6Ee;=ktgb5cpskqWOf$0{NG- zLPg-Xl@P8KH#5oS6Y>bGj0Y#gZG=UmWVesv%ZaZbEWrxhB&hAM&X~D6xLikcbW|Uv z`F(^2!u^CmK#8s)L||88s-$|U960F9g`ZSs7RfFo|)vn-`Sfl{!z2PSvX(k8-Y!UF`3 z*MmXIHc1`UR4gf60MB(;$4-2nBrgztMmRwDIpIYDD{kzv&H>5dhUwT;dW;=|?pt=C zbi}NfO@5HdlQ1bV`G0d^u1fOs1>CVR1TQUI2Tp7%+s?bv~buS4IVgIuBKp>>0MQo_zUb+LkJ4LCIY)qN*RVR0E%7o|>)LlLj z1>q9WHd67Ia5QiBdv}TLPAWX7U>96MEwvw)&?0=+^pD-Js!^#Cyg z*dmwm-fEHFe^lwGJ`g>RPW)~-l_ov#i%3COETG<6ig#JPa&#f&%9%FME9Mn`@K%f7 zM?^I2(JI=t7XhRC;&6(~Sdf#Go;$0p*k0G@;BHxzXDh3BSQ={AIBXVYbA6rDUfpPG za5Ob$xE+n|Ojk==t0_1#OeXZW{Fvp651*Bto;9SU9?RM?@c4b9I@|K)we_{`<;!uh zYdZd>Rfo~d)4Es1LDbPLarYEn-1PRDzgcfgP@g#^P!))^0-EpkOKv~D`(-ljX*23) zW~JwthO}-Vt<%ul)jxJ-)inC zH=v{!|8tU^z- zhl)&&o(-`|>^dXpS3BHabA3KHtr2xOvodnBuc5KtxyEUC)!J6l7LNLATce|~vDR77 z@e^-M$9ZiQ`NViI;PU3lw+516eOuPBz1$r~Hn^s}-=aM{XGG5$igdEH-h8^`L(c;qDYgV(kEEZ(*hShqXnVeB2I^}p706-ikDagO z>c2B6M)!mrQZfD9u#m3_Lxo)&{5@SVEa@v=q~%*f<*>j#uS^YG9f4jKL(1P!`X8Pj zf|i{2uU0np;fZV1vlhI2RNb9L^z-!1Wv%qd`YQ$MeL_Xa>-DO#xZ z;gQ5KAJUvqfqkA=0X=^(EB+EQNGtu=^Gcz-I}K)IUR5#rSDGU_=?cl6 zK_`R>>nV$Jx{rB)j^vR}_|g34D(rnPAK!~f#JRb#a-s%m!ub-y|DrU)G1;z$?fB$5 z3%-5!C>+BlK6nxzTCc-{@1&AshkRZQF&OvZ$J(A?bHSwBQ?OvM{<|FkH})Pv^|^N- z9LHa9{8$GHD;YTuK6ROnF>z7k3C08RoPwdTOfY-s}?`Frv3*^}}8bA6+9 z%1@2~yZrnZsNkbmwc71B%lAQTFlA9GZ&`eGZ(79YDAdU)l%bL zO)cAk(3?-**1`xU{!)YX=1Iz({WC&X=T+NIgbM8Va5lv&0n+5C7Oo%~QE!DkS?(r@$0S(#=NMm<#!`TT3}}@=xjb;*nU{WTXyn-j)}( zzRh0cY~q)2?&*AOn=F%l@&DD4Pw?Am=WH8hJw$^zN(d7twsG&PRR31hOd&;(T!l&6 zJd5CL=>7r_ebtpv3UcYKaKKl-4EBiFX-FIO*Jt=I(6y;aRG~TMLA1A}aR!euyz?t# k;eq0lJ#coy?|QO6T7XB+p8k5d7k8Q=fo;gt{l9nqHw|Y#=l}o! diff --git a/acoustic_model.sln b/acoustic_model.sln index 7d8fcbe..406d9e5 100644 --- a/acoustic_model.sln +++ b/acoustic_model.sln @@ -16,6 +16,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution ..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py ..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py ..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py + reus-test\reus-test.py = reus-test\reus-test.py ..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py ..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc index afdf53bce6459c0c3a963fab68861c4f9d009276..ef367cd734c67a0e96e15b9eed496d2a540e3efb 100644 GIT binary patch delta 503 zcmZXRze~eF6vuN(6m63>{oVSrY8MBKRzYzP9RzU`aXC*y%)7Q}o2KMa5Eq9IE-sSM z!BxafT;2QwTwVP~d@Z30-reK9@8`WQZ@7=?&v<21*JF=whwMa?q;KiZnZ$Y@LzKyV z<@`n+$W(+7=5z07I1ac~^ zE`=5Hz%6^1oeAy>cF=a1@cO>Zna_KkV08~`Fs+W$=(@Zc{0CqK7HbA=*7LdGf$4g{ zJAzql$Ao@hH_h7m#`g7*9j=k(EjH@1OUuWHHrL&#?X1;?1@OA4+Me6++5x zf@*LTgzw~;(9n_h$XECzUyxEbsobt4aHnZ|-MZ5-!SVeG{2u!2G|~BbrzAdc delta 285 zcmcb}eUwwhn3tC;rpP5`FBb#DV+JI^24p(`aq%C9i7L_cTq)csJk2aoT&WCMOj*pS zTq(TGOi|o$9v_g$lfoax3l-r_5deztL3w;Bf-Gk!WU(5(bH>0j=arkpycIfhrP7kxG$Hkx7vSDiTd$3}(=jo9w|jd-8my zm5jG0dotTGMosQuj%5^^{EWGM^F$UtMlCg2.0 4d8c8573-32f0-4a62-9e62-3ce5cc680390 . - - + forced_aligner_comparison.py . diff --git a/acoustic_model/acoustic_model_functions.py b/acoustic_model/acoustic_model_function.py similarity index 100% rename from acoustic_model/acoustic_model_functions.py rename to acoustic_model/acoustic_model_function.py diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py index 42ba4e1..f53100f 100644 --- a/acoustic_model/defaultfiles.py +++ b/acoustic_model/defaultfiles.py @@ -31,6 +31,12 @@ ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification') +htk_config_dir = r'c:\Users\Aki\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017' +config_hvite = os.path.join(htk_config_dir, 'config.HVite') +#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo') +acoustic_model = r'c:\cygwin64\home\Aki\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo' +phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt') + WSL_dir = r'C:\OneDrive\WSL' fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') fame_s5_dir = os.path.join(fame_dir, 's5') diff --git a/acoustic_model/forced_aligner_comparison.py b/acoustic_model/forced_aligner_comparison.py index 9243f95..d9d34a4 100644 --- a/acoustic_model/forced_aligner_comparison.py +++ b/acoustic_model/forced_aligner_comparison.py @@ -1,10 +1,42 @@ import os os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') +import sys import defaultfiles as default +sys.path.append(default.forced_alignment_module_dir) +from forced_alignment import pyhtk, convert_phone_set, scripts -wav_dir = r'c:\Users\Aki\source\repos\acoustic_model\reus-test' +reus_dir = r'c:\Users\Aki\source\repos\acoustic_model\reus-test' +wav_dir = reus_dir wav_files = ['reus1008-reus.wav', 'reus1167-man.wav', 'reus3768-mantsje.wav'] +word = 'reus' +pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə'] + +for wav_file in wav_files: + file_lab = os.path.join(reus_dir, wav_file.replace('.wav', '.lab')) + file_dic = os.path.join(reus_dir, wav_file.replace('.wav', '.dic')) + file_txt = os.path.join(reus_dir, wav_file.replace('.wav', '.txt')) + + # output htk dict file + with open(file_dic, 'w', encoding="utf-8") as f: + for ipa in pronunciation_ipa: + cgn = convert_phone_set.ipa2cgn([ipa.replace(':', 'ː')]) + barbara = convert_phone_set.cgn2barbara(cgn) + f.write(word.upper() + '\t' + barbara + '\n') + + # output htk label file. + pyhtk._create_label_file(word, file_lab) + + scripts.run_command([ + 'HVite','-T', '1', + '-a', + '-C', default.config_hvite, + '-H', default.acoustic_model, + '-m', + '-i', file_txt, + #'-S', script_file, + file_dic, default.phonelist_txt, os.path.join(wav_dir, wav_file) + ]) \ No newline at end of file diff --git a/acoustic_model/acoustic_model.py b/acoustic_model/train_hmm_fame.py similarity index 100% rename from acoustic_model/acoustic_model.py rename to acoustic_model/train_hmm_fame.py diff --git a/reus-test/reus1008-reus.dic b/reus-test/reus1008-reus.dic new file mode 100644 index 0000000..4d22a33 --- /dev/null +++ b/reus-test/reus1008-reus.dic @@ -0,0 +1,3 @@ +REUS r eu s +REUS m ac n +REUS m ac n t s j @ diff --git a/reus-test/reus1008-reus.lab b/reus-test/reus1008-reus.lab new file mode 100644 index 0000000..0475f18 --- /dev/null +++ b/reus-test/reus1008-reus.lab @@ -0,0 +1 @@ +REUS diff --git a/reus-test/reus1008-reus.txt b/reus-test/reus1008-reus.txt new file mode 100644 index 0000000..9726c94 --- /dev/null +++ b/reus-test/reus1008-reus.txt @@ -0,0 +1,6 @@ +#!MLF!# +"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1008-reus.rec" +0 9700000 r -12463.852539 REUS +9700000 12800000 eu -3622.108887 +12800000 26250001 s -17303.216797 +. diff --git a/reus-test/reus1167-man.dic b/reus-test/reus1167-man.dic new file mode 100644 index 0000000..4d22a33 --- /dev/null +++ b/reus-test/reus1167-man.dic @@ -0,0 +1,3 @@ +REUS r eu s +REUS m ac n +REUS m ac n t s j @ diff --git a/reus-test/reus1167-man.lab b/reus-test/reus1167-man.lab new file mode 100644 index 0000000..0475f18 --- /dev/null +++ b/reus-test/reus1167-man.lab @@ -0,0 +1 @@ +REUS diff --git a/reus-test/reus1167-man.txt b/reus-test/reus1167-man.txt new file mode 100644 index 0000000..06ad7b8 --- /dev/null +++ b/reus-test/reus1167-man.txt @@ -0,0 +1,10 @@ +#!MLF!# +"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1167-man.rec" +0 150000 m -230.057571 REUS +150000 300000 ac -250.994858 +300000 450000 n -202.377716 +450000 4600000 t -5128.984375 +4600000 5050000 s -711.338501 +5050000 5450000 j -564.730591 +5450000 16049999 @ -13249.787109 +. diff --git a/reus-test/reus3768-mantsje.dic b/reus-test/reus3768-mantsje.dic new file mode 100644 index 0000000..4d22a33 --- /dev/null +++ b/reus-test/reus3768-mantsje.dic @@ -0,0 +1,3 @@ +REUS r eu s +REUS m ac n +REUS m ac n t s j @ diff --git a/reus-test/reus3768-mantsje.lab b/reus-test/reus3768-mantsje.lab new file mode 100644 index 0000000..0475f18 --- /dev/null +++ b/reus-test/reus3768-mantsje.lab @@ -0,0 +1 @@ +REUS diff --git a/reus-test/reus3768-mantsje.txt b/reus-test/reus3768-mantsje.txt new file mode 100644 index 0000000..8e2bc08 --- /dev/null +++ b/reus-test/reus3768-mantsje.txt @@ -0,0 +1,10 @@ +#!MLF!# +"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus3768-mantsje.rec" +0 150000 m -217.347229 REUS +150000 1150000 ac -1266.293579 +1150000 1650000 n -583.382568 +1650000 11100000 t -11259.270508 +11100000 11250000 s -247.939255 +11250000 11550000 j -445.511444 +11550000 24150000 @ -16769.048828 +.