From da0242b0e1e9df758bf3f5dcfabe99fa7d9b8ba1 Mon Sep 17 00:00:00 2001 From: yemaozi88 <428968@gmail.com> Date: Wed, 6 Feb 2019 00:00:14 +0100 Subject: [PATCH] make sure all the phones in stimmen transcription can be treated correctly. --- .vs/acoustic_model/v15/.suo | Bin 97792 -> 106496 bytes .../__pycache__/defaultfiles.cpython-36.pyc | Bin 1282 -> 1051 bytes acoustic_model/acoustic_model.pyproj | 1 + acoustic_model/check_novoapi.py | 48 +---- acoustic_model/defaultfiles.py | 67 +++---- acoustic_model/fame_functions.py | 11 ++ acoustic_model/fame_hmm.py | 5 +- acoustic_model/fame_test.py | 4 + acoustic_model/htk_vs_kaldi.py | 165 ++++++++++++------ acoustic_model/novoapi_forced_alignment.py | 2 +- acoustic_model/novoapi_functions.py | 2 +- acoustic_model/phoneset/fame_asr.py | 6 +- acoustic_model/phoneset/fame_ipa.py | 33 +++- acoustic_model/phoneset/fame_ipa2asr.npy | Bin 1559 -> 1582 bytes acoustic_model/stimmen_functions.py | 38 ++++ 15 files changed, 232 insertions(+), 150 deletions(-) create mode 100644 acoustic_model/stimmen_functions.py diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index ce9095444dca9c05b39c00500ddc4d81ab0ec4d8..ef753d55acaf9cacc87dd7fb1e4d5794c0656e11 100644 GIT binary patch delta 5607 zcmd^D3s6+&6~5>0@{pG+f-X@J5fMRgK|~}z7F_UAqhOR6vk{15bW|X+BtBBv7>&<} zyT8d*o2c0u(snY5;U?Kh$Kd+HsIev;?d0hg>zKqejZW2yDBJHWcM*IvX{OV5rvLKo zIsdu;fB*BJ^PT_R#eO5e?g`vv>LW{9Ge}Z5sCuBq>-Dl#2c*7L*h7vm>|Q?X)%Q=* zqp*19i>2~X+AOZf&(h0ciEdf=c9@-m@r4#DNZ(aN1siI{qlY_r2S9cPehCT!{J>1Sd8M+ndui3gl-VYQ5B|s_g7%&T%4U_?o1AL!(np~hswk>Ma z9S?QMuxaK&?Jf`3^nsAQfv16RU@Nc!5I3kYq9kMa1BAnr?v{1Na%i*O?J4&5)Hq9N=dJ+rfw$A0=vW!B2@Q z=zdB(0i7w41n`7FP40m7Gok`IJIjoCUb{XRG6Y~6WdY*=o}UTHj0gaj5h+r+Brzol z03MJH$&~m7z&oD+*`5-)=<~RVkQsnK!EEDJ2-_53D!~0BO+Ezq2p|dqvc#5vxB;9- zPM<$@Jh&QtmYl*3nqK25?4h@#;|z%!z#cKykR&;Pb-;RnZMBdaHGPwKA*8RPayqrV zRXUb_{>*w~ysX9>PLngjMnY6a)R0qL!FP_R?pzbL7;eqrYDKFz#>r}2jK;y^^2xU5Or;K`kc1HvUBHXcX{Sb@#e zs!b_DvMW)pk2*;Mn0kC3_`|XkIy0b;76x9l23V=y6gt@z_b<;Htdr|wuaO)~;`;sL zeg4@-Ox1%kK9|Jh5e30aKc=bA$F*2_k7yb+K*VpI5@zEbE^%{Z%3Y8pUt5VjE9;S4 z+Jy6ExPZk8YzAHcct-4-xu;%KrE09IDk4wVQ;Zz!R(mxY+qpXB9D9-*(Aug)Mt{-mpUtBc1T z2``H~XL7g@-xJ4q${yADN>2eqy)qE3#WeAzT=bJ=(m1n z?kOE^@=e)b*_c>ew_$CM#oGqD-HBgRZDos8Ixf)mltOeEqOc-u`p>G8IBw?GMSgf}=N>h3=b6x>lz^{Q_z^lM( zfY_2?r2~$_?%{{f<0*}he*n)2a}T>p^sX!)W72t(5vqBPN9T`5jodyCF1udXM-jDBanK_kZz%(r55KT!a&k^x@n|3*11I!JrWaKb#IcHk_4UUBR9KnLTrsK^#)$;Zpa#hMA{hd0s?%f%>de=eyonAR{a;3l5pf>zI zm+H4HCU)_+s{{Scg$mc(C2GkYt4=ppvWdNWqoZ4kM3VTeh8xRJ@zvh$^$mOI$45Cm z>x4S?sg!rW+9?MKWc?2TrT(AZO0k0gkKVhcSMZB#qb+{YqJL^ z%R?#S&KtJHujuX`YJu-BzP?%HPew%8oYIA5#kR64TWaN!JZHu45#(x!>;Za$#A9zvbPnC7Go8@gwpyGL3p=T?-LRh8MS zK0kbsZb=J*ggXC(A99kKX0EZ*fdTk=>KJFunA57AeSk}VyQteosh10!y+1UH?4n2Jwp!{p@gnx?#B zq}~W_LboD5GqhF$ogd+C(Y;4XyG7l>MJ;a8_qq_IT~Ia~X`pXmya8z~%@Yp|Z%geW z)s|AFM+p5nx_!9uTH$M!TiI^;q<3E1KKghjLdVOL?^l+tb+CB&>(`=(3>t3A$r+SB zJU=JHlAbqom?`s@VLp=k>C~=@+Vi2W^E}=x5{-(vKRvBPA;7+8usb`50!cA{Nf*LyEmCWe4JQq@ub?>61?V)PC1{kb$55+_Y8qqbyDAnli>n{Y2?`BRwW^hK?3F z=c9j&d*QQNw}kIB&mwndFUlq{xjC8cO*0Bnhu;Vy5qm1WwccAk>BeVwYfJM?h0Z@U z+n6*`oVnKhzd`00%K@@voA6ndVaXbkHFVgJK||8>O@lIU2F8NK;dkM;pVj17+UOl_ zYTG6LZy*Z1S&BV|%t~zt^%T>OCkRLLl6&0r|Db64{YcTY(nB$X&|l;3T7mE1%GH0W zLY#T`88+)qet2D{CCyEoRx(maN7{l}v zA-=7&T|n_EW+NpDx@t6>elIEajr^0hrX8v6#ZSsx4>nX}blB6&vV3h!#-OQgiYG7q zkGPUR-hjGP3RKSarzy&rVbC^ArYQG|`NgGlwRQc4^jXNeOKFWP0(XoOh8-s5v2^S>V;M~)zv&-SnyU%` delta 4270 zcmd6q4Nz3q702Is?5>Lf@+BfHxFAyMCm3ToRSwe^~3&#owBOW5X3CSWy%U_|oU|!6*bGP8P+hVXo>sk!D*5 z&4S0ZMr=|y(Ocq{`U*M4#-PoiZn$g5-~-Jj$P!iKL@yeIfH05(Fsr|1p#Kt>0IUK3 zU0iXXp9&Je2*49ogCy`BFb-sb=^zHg1Ag3*pa%2iT=A$zZq=UeHeUG4@406B* zz|}S*NUClQ0H26}bll;}xiY& z0Q={_7J_WRQuPNb0k56DO8I;>J|9b!3wkFkMxQHE1p5Sd5=;VqDPiY=`GDgWz%B$o z0Hxq5um~&$OTbdF4D^+>SIX7!`@%6vD0c#_++8#{cv!%((Onef*o49J0mlolFM=(B zvEDzrW-BSmHc$(;gB@Ter~|vfF2L#3!@7YE1lR-if(9T#BiIM_g9G3ocpV%9Z-B$# z2;jMzV4J~FQdFfyOzA%?l-(!bI>{j?mrf>6n5sus(2qrBWDOk?xA5r^l~FaeQHW@> z8($2xsruk#BRQs%TGKk{yF3F^jpQ~0PW&~H#52OriSrkKi?|y+_*B-Zq$~yd~%yH5vQIR}boQ=s4Jpo%WY<94J(sG1|UB5{Sei_p7 zBPzz-u|cXwQ$=AymdMKL5g+8+`z=?>luG3}kv-+KXM*Zkp~~`Xnj~FC6m1ktY9)RA zH2?e+<4Y`Mfhk4L_(D5>b9nRQd(d%F`Dg*hSu1#98Mn(&@T#{hTq1FzO0(p^Eqba- zzw(r9sI^qH=gJ1BXOk+`88lA5w~8WVVlkzJhmX#WdiTbh_62wH7oQlGy-J+Uj}~=# zyHfuW|IX&>zaHs|JT+xU>#?&ZaC?8QTto-vyJ=IDnRY{)ZdyEtEP5>}R&a${XE{~F z8H3J4uJFxw)NVz5QIIB*U+9DY_@2z~wCc5ac0MV1xp4BLqPE1sM+gtBbMP2$;Z7VY z4ktRKmPB^3U_?9y&HxF0okKYF3C4sa=Yu@XKpoOoP?({uGJ`zDs+ch|JfXHvmYnI% zKI1x(F)?c5o<1Iq*o$!kkf0Il1N*@N@H*g_L$F8PMe&vvbVOt3imz5+rYBeR>`*;= zU?FsGr*LCt#VHCb%fg-HsaExpo_}AQ4IxHILl1dgR6Xk~o-JQj{JCXH6&B!99L1~g z^06L!&e?-I2K?&trv}#_cy-T8+de)hmACPETUC_nID2+mV1uyJTe0h78*fFyIUjCS z81q(Y%$@lbVPSddY=@OAQQ^l&TIALDZ471av>en>G)uoY$>>MN3 zT`Lyp*EC`N0T-{CB+MHwQbm#)WkffBO8O1c&~78vpJ7WW3D>8l=ypVk$j_2va&RQ` zJ##=C-U6fW3_*(Q8cq|0cGDF4(L5vXTw9P-H>oicD#BWP^+y>7WajG~9~t82SItss+s z>~QOomK-^qru3$jCS4=>dp5UG>|f@y;hzZ~(j7--_Y@#> zHQvejbo4iwP)HLWB%>EHP9kyT`r!XT-9!C#|Nj(^8pP>@Y%2EBRPqtqT=H4v)#+3$ zuRKXMS+|s;y=(GlPjJgR46i;;dEUJF)MgdslMB6t&(MhhEsMxxOs$ diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc index ef57dca2e2702e60280cdc7eb03df48c0b4899cb..545949dc9f1ff2f1c8bc72b5858babe7242ef981 100644 GIT binary patch delta 622 zcmYk2%Wl&^6ozN)aePU9X<9-GNU#7TBvK(Efyx4b3aq#YbwPC$WSN{vGi_o=wi`+| zmB=G_gT#s@OJ0O6QpAoGZ^5x^ta{|p|9|Jqncx2Ge6`N*yRP%|%fawhi?KiK!b6@t zp*Q-qPG&zX^2mCx9JXWgGPAR1;1u4ZsHcPpgD1gru6Xg%7BfU+1STzynRKBiMtKC{#nV^Mivw-f z5T!4^X%B9%Tcac%tHWh)PItHp!@Vr8CD$%YZRmLi#o*(#+KSqn#5No=Gx|V_P%hkb2vn z;1~EMZ&~pJED#H>A2BH^u`)B~+;iu8?iu-Et5yH>^BjL|YT9>g=~sjGQ~1l@Fi?$F zJPm17M-^H{I@2zy7Y5U*aiyb`60HELm8eE*S5;(|*o3H6pbE9-g9`!f$-`T<{sg@0esnxMM7$5oYs{2|i`M zl9;$bBo%j^X+W8Whh)m|SWV*HXGi#hWJ2PT3CA*sgv+pCVIboT9+LiCk}2uCfj?tH zF@fx101E05kB|IxYt$vK3$x>TL`pv7E>S%2k*R{f`y9-Z8YB<-C~k%0z-JzpYM|yS z?m*%4wa?F9#&`aE4u=v&@af5`7f6?f2sRX9B;&jPO;|cOGU3!yc+td2cZnY@CqY>} zAs*#@B_e-<*+}M{k%u@hf*08}!DQqqWctA@AR&+Mp9(HH@p~VMfJw>TTkfhzu-R|54IkJX=vs8@Q*L{e$%BDCAP2QdGQbD`=X|+C-OR7YQ#LCo?sYLg^xRVAS zKNu{?gY85QBr?HPhjM|e)W&p`7$H&P#JC8!k9NRSCv_F3($-AB|F5;_U5PxF1$S?B zISVxkr!(-^Wd~eFCbT%^6Dz%Oxjb9R!whsXkoP(#mb}a5g##I|O8{Qx`UUfa%ratc zLF9%xg + diff --git a/acoustic_model/check_novoapi.py b/acoustic_model/check_novoapi.py index 96d8e8f..2a3ae59 100644 --- a/acoustic_model/check_novoapi.py +++ b/acoustic_model/check_novoapi.py @@ -20,12 +20,12 @@ from forced_alignment import convert_phone_set #import acoustic_model_functions as am_func import convert_xsampa2ipa import novoapi_functions +import stimmen_functions sys.path.append(default.accent_classification_dir) import output_confusion_matrix ## procedure forced_alignment_novo70 = True -balance_sample_numbers = False ## ===== load novo phoneset ===== @@ -98,36 +98,7 @@ def search_phone_ipa(x, phone_list): ## ===== load all transcriptions (df) ===== -df = pd.read_excel(stimmen_transcription_, 'original') - -# mapping from ipa to xsampa -mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) -#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): -# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) -# if not ipa_converted == ipa: -# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) - -ipas = [] -famehtks = [] -for xsampa in df['Self Xsampa']: - if not isinstance(xsampa, float): # 'NaN' - # typo? - xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t') - xsampa = xsampa.replace(';', ':') - - ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) - ipa = ipa.replace('ː', ':') - ipa = ipa.replace(' ', '') - ipas.append(ipa) - else: - ipas.append('') - -# extract interesting cols. -df = pd.DataFrame({'filename': df['Filename'], - 'word': df['Word'], - 'xsampa': df['Self Xsampa'], - 'ipa': pd.Series(ipas)}) - +df = stimmen_functions.load_transcriptions() word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)] word_list = sorted(word_list) @@ -183,21 +154,6 @@ if forced_alignment_novo70: # samples in which all pronunciations are written in novo70. samples = df_.query("ipa in @pronunciation_ipa") - - - ## ===== balance sample numbers ===== - if balance_sample_numbers: - c = Counter(samples['ipa']) - sample_num_list = [c[key] for key in c.keys()] - sample_num = np.min(sample_num_list) - - samples_balanced = pd.DataFrame(index=[], columns=list(samples.keys())) - for key in c.keys(): - samples_ = samples[samples['ipa'] == key] - samples_balanced = samples_balanced.append(samples_.sample(sample_num), ignore_index = True) - - samples = samples_balanced - results = pd.DataFrame(index=[], columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh']) diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py index 2188c97..ef0dfd4 100644 --- a/acoustic_model/defaultfiles.py +++ b/acoustic_model/defaultfiles.py @@ -2,63 +2,40 @@ import os # add path of the parent directory #os.path.dirname(os.path.realpath(__file__)) -#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' - -#htk_dir = r'C:\Aki\htk_fame' -htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk' - - -#config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite') -#mkhmmdefs_pl = os.path.join(cygwin_dir, 'src', 'acoustic_model', 'mkhmmdefs.pl') - -#dbLexicon = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\lexicon.accdb -#scriptBarbara = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\pronvars_barbara.perl -#exeG2P = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\string2phon.exe - -#[pyHTK] -#configHVite = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\config.HVite -#filePhoneList = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\phonelist_barbara.txt -#AcousticModel = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\hmmdefs_16-2_barbara.compo - -#dbLexicon = config['cLexicon']['dbLexicon'] -#scriptBarbara = config['cLexicon']['scriptBarbara'] -#exeG2P = config['cLexicon']['exeG2P'] - -#configHVite = config['pyHTK']['configHVite'] -#filePhoneList = config['pyHTK']['filePhoneList'] -#AcousticModel = config['pyHTK']['AcousticModel'] - +# repos repo_dir = r'C:\Users\Aki\source\repos' ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification') toolbox_dir = os.path.join(repo_dir, 'toolbox') -#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017' -#config_hvite = os.path.join(htk_config_dir, 'config.HVite') -#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo') -#acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo' -phonelist_txt = os.path.join(htk_dir, 'config', 'phonelist.txt') - WSL_dir = r'C:\OneDrive\WSL' -#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') -fame_dir = r'c:\OneDrive\Research\rug\_data\FAME' +novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi') +#novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi' -fame_s5_dir = os.path.join(fame_dir, 's5') -fame_corpus_dir = os.path.join(fame_dir, 'corpus') - -experiments_dir = r'c:\OneDrive\Research\rug\experiments' +# working directories +rug_dir = r'c:\OneDrive\Research\rug' +experiments_dir = os.path.join(rug_dir, 'experiments') +htk_dir = os.path.join(experiments_dir, 'acoustic_model', 'fame', 'htk') stimmen_dir = os.path.join(experiments_dir, 'stimmen') -stimmen_data_dir = os.path.join(stimmen_dir, 'data') + +# data +fame_dir = os.path.join(rug_dir, '_data', 'FAME') +#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') # 44.1 kHz #stimmen_wav_dir = os.path.join(stimmen_dir, 'wav') # 16 kHz stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' -stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi') - -stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx') +stimmen_transcription_xlsx = os.path.join(stimmen_dir, 'data', 'Frisian Variants Picture Task Stimmen.xlsx') phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') +novo70_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'nl', 'novo70.phoneset') + + + +#phonelist_txt = os.path.join(htk_dir, 'config', 'phonelist.txt') +#fame_s5_dir = os.path.join(fame_dir, 's5') +#fame_corpus_dir = os.path.join(fame_dir, 'corpus') +#stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi') +# novoapi_functions + -novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi') -#novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi' -novo70_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'nl', 'novo70.phoneset') \ No newline at end of file diff --git a/acoustic_model/fame_functions.py b/acoustic_model/fame_functions.py index cb87620..295ed79 100644 --- a/acoustic_model/fame_functions.py +++ b/acoustic_model/fame_functions.py @@ -341,3 +341,14 @@ def fix_single_quote(lexicon_file): def word2htk(word): return ''.join([fame_asr.translation_key_word2htk.get(i, i) for i in word]) + + +def ipa2htk(ipa): + curr_dir = os.path.dirname(os.path.abspath(__file__)) + translation_key_ipa2asr = np.load(os.path.join(curr_dir, 'phoneset', 'fame_ipa2asr.npy')).item(0) + + ipa_splitted = convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones) + ipa_splitted = fame_ipa.phone_reduction(ipa_splitted) + asr_splitted = convert_phoneset.convert_phoneset(ipa_splitted, translation_key_ipa2asr) + htk_splitted = convert_phoneset.convert_phoneset(asr_splitted, fame_asr.translation_key_asr2htk) + return ''.join(htk_splitted) \ No newline at end of file diff --git a/acoustic_model/fame_hmm.py b/acoustic_model/fame_hmm.py index 19d5f56..b3d1070 100644 --- a/acoustic_model/fame_hmm.py +++ b/acoustic_model/fame_hmm.py @@ -27,7 +27,7 @@ extract_features = 0 flat_start = 0 train_model_without_sp = 0 add_sp = 0 -train_model_with_sp = 0 +train_model_with_sp = 1 @@ -321,7 +321,8 @@ if add_sp: ## ======================= train model with short pause ======================= if train_model_with_sp: print('==== train model with sp ====') - for niter in range(niter_max+1, niter_max*2+1): + #for niter in range(niter_max+1, niter_max*2+1): + for niter in range(20, 50): timer_start = time.time() hmm_n = 'iter' + str(niter) hmm_n_pre = 'iter' + str(niter-1) diff --git a/acoustic_model/fame_test.py b/acoustic_model/fame_test.py index c7b2e59..c1a432e 100644 --- a/acoustic_model/fame_test.py +++ b/acoustic_model/fame_test.py @@ -69,6 +69,10 @@ else: translation_key_ipa2asr['ə:'] = 'ə' translation_key_ipa2asr['r.'] = 'r' translation_key_ipa2asr['r:'] = 'r' +# added for stimmen. +translation_key_ipa2asr['ɪ:'] = 'ɪ:' +translation_key_ipa2asr['y:'] = 'y' + np.save(os.path.join('phoneset', 'fame_ipa2asr.npy'), translation_key_ipa2asr) diff --git a/acoustic_model/htk_vs_kaldi.py b/acoustic_model/htk_vs_kaldi.py index ca7f6af..c1e5c97 100644 --- a/acoustic_model/htk_vs_kaldi.py +++ b/acoustic_model/htk_vs_kaldi.py @@ -1,84 +1,145 @@ import os os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') - import sys -import csv -import subprocess -from collections import Counter -import re -import numpy as np +#import csv +#import subprocess +#from collections import Counter +#import re +import shutil +import glob + +#import numpy as np import pandas as pd -import matplotlib.pyplot as plt -from sklearn.metrics import confusion_matrix +#import matplotlib.pyplot as plt +#from sklearn.metrics import confusion_matrix -import acoustic_model_functions as am_func -import convert_xsampa2ipa +#import acoustic_model_functions as am_func +#import convert_xsampa2ipa import defaultfiles as default -from forced_alignment import pyhtk +#from forced_alignment import pyhtk +#sys.path.append(default.forced_alignment_module_dir) +#from forced_alignment import convert_phone_set +#import acoustic_model_functions as am_func +import convert_xsampa2ipa +import stimmen_functions +import fame_functions +import convert_phoneset +from phoneset import fame_ipa, fame_asr +sys.path.append(default.toolbox_dir) +import file_handling as fh +from htk import pyhtk ## ======================= user define ======================= -excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') -data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data') +#excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') +#data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data') -wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k +#wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k -acoustic_model_dir = os.path.join(default.experiments_dir, 'friesian', 'acoustic_model', 'model') -htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short') -fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k') -result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result') +#acoustic_model_dir = os.path.join(default.experiments_dir, 'friesian', 'acoustic_model', 'model') +#htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short') +#fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k') +#result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result') -kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme') -kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict') -lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt') +#kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme') +#kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict') +#lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt') #lex_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr') #lex_asr_htk = os.path.join(default.fame_dir, 'lexicon', 'lex.asr_htk') - -# procedure -make_htk_dict_files = 0 -do_forced_alignment_htk = 0 -eval_forced_alignment_htk = 0 -make_kaldi_data_files = 0 -make_kaldi_lexicon_txt = 0 -load_forced_alignment_kaldi = 1 -eval_forced_alignment_kaldi = 1 +## procedure +#make_htk_dict_files = 0 +#do_forced_alignment_htk = 0 +#eval_forced_alignment_htk = 0 +#make_kaldi_data_files = 0 +#make_kaldi_lexicon_txt = 0 +#load_forced_alignment_kaldi = 1 +#eval_forced_alignment_kaldi = 1 -## ======================= add paths ======================= -sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) -from forced_alignment import convert_phone_set -from forced_alignment import pyhtk +### ======================= add paths ======================= +#sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) +#from forced_alignment import convert_phone_set +#from forced_alignment import pyhtk -sys.path.append(os.path.join(default.repo_dir, 'toolbox')) -from evaluation import plot_confusion_matrix +#sys.path.append(os.path.join(default.repo_dir, 'toolbox')) +#from evaluation import plot_confusion_matrix + +config_dir = os.path.join(default.htk_dir, 'config') +model_dir = os.path.join(default.htk_dir, 'model') +lattice_file = os.path.join(config_dir, 'stimmen.ltc') +#pyhtk.create_word_lattice_file( +# os.path.join(config_dir, 'stimmen.net'), +# lattice_file) +hvite_scp = os.path.join(default.htk_dir, 'tmp', 'stimmen_test.scp') + +## ======================= make test data ====================== +# copy wav files which is in the stimmen data. +stimmen_test_dir = r'c:\OneDrive\Research\rug\_data\stimmen_test' +fh.make_filelist(stimmen_test_dir, hvite_scp, file_type='wav') + +df = stimmen_functions.load_transcriptions() +word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)] +word_list = sorted(word_list) + +#for index, row in df.iterrows(): +# filename = row['filename'] +# if isinstance(filename, str): +# wav_file = os.path.join(default.stimmen_wav_dir, filename) +# if os.path.exists(wav_file): +# shutil.copy(wav_file, os.path.join(stimmen_test_dir, filename)) +# pyhtk.create_label_file( +# row['word'], +# os.path.join(stimmen_test_dir, filename.replace('.wav', '.lab'))) -## ======================= convert phones ====================== -mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) +# after manually removed files which does not contain clear sound, +# update df as df_test. +#wav_file_list = glob.glob(os.path.join(stimmen_test_dir, '*.wav')) +#df_test = pd.DataFrame(index=[], columns=list(df.keys())) +#for wav_file in wav_file_list: +# filename = os.path.basename(wav_file) +# df_ = df[df['filename'].str.match(filename)] +# df_test = pd.concat([df_test, df_]) -xls = pd.ExcelFile(excel_file) - -## check conversion -#df = pd.read_excel(xls, 'frequency') -#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): -# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_) -# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) -# if not ipa_converted == ipa: -# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) +#output = pyhtk.recognition( +# os.path.join(default.htk_dir, 'config', 'config.rec', +# lattice_file, +# os.path.join(model_dir, 'hmm1', 'iter13'), +# dictionary_file, +# os.path.join(config_dir, 'phonelist.txt'), +# hvite_scp) -## check phones included in FAME! -# the phones used in the lexicon. -#phonelist = am_func.get_phonelist(lex_asr) +## check phones included in stimmen but not in FAME! +splitted_ipas = [' '.join( + convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones)) + for ipa in df['ipa']] +stimmen_phones = set(' '.join(splitted_ipas)) +stimmen_phones = list(stimmen_phones) +#stimmen_phones = list(set(fame_asr.phone_reduction(list(stimmen_phones)))) +#fame_phones = fame_asr.phoneset_short +fame_phones = fame_ipa.phoneset +stimmen_phones.sort() +fame_phones.sort() +print('phones which are used in stimmen transcription but not in FAME corpus are:\n{}'.format( + set(stimmen_phones) - set(fame_phones) + )) +for ipa in df['ipa']: + ipa_splitted = convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones) + if ':' in ipa_splitted: + print(ipa_splitted) + +htk = [fame_functions.ipa2htk(ipa) for ipa in df['ipa']] + +ipa = 'e:χ' +fame_functions.ipa2htk(ipa) -# the lines which include a specific phone. -#lines = am_func.find_phone(lex_asr, 'x') # Filename, Word, Self Xsampa diff --git a/acoustic_model/novoapi_forced_alignment.py b/acoustic_model/novoapi_forced_alignment.py index 932d7c1..3fd32b9 100644 --- a/acoustic_model/novoapi_forced_alignment.py +++ b/acoustic_model/novoapi_forced_alignment.py @@ -52,7 +52,7 @@ p = argparse.ArgumentParser() #p.add_argument("--user", default=None) #p.add_argument("--password", default=None) p.add_argument("--user", default='martijn.wieling') -p.add_argument("--password", default='fa0Thaic') +p.add_argument("--password", default='xxxxxx') args = p.parse_args() #wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav' diff --git a/acoustic_model/novoapi_functions.py b/acoustic_model/novoapi_functions.py index 0c72b45..3cd502e 100644 --- a/acoustic_model/novoapi_functions.py +++ b/acoustic_model/novoapi_functions.py @@ -173,7 +173,7 @@ def forced_alignment(wav_file, word, pronunciation_ipa): # username / password cannot be passed as artuments... p = argparse.ArgumentParser() p.add_argument("--user", default='martijn.wieling') - p.add_argument("--password", default='fa0Thaic') + p.add_argument("--password", default='xxxxxx') args = p.parse_args() rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) diff --git a/acoustic_model/phoneset/fame_asr.py b/acoustic_model/phoneset/fame_asr.py index 22e9d65..b11359b 100644 --- a/acoustic_model/phoneset/fame_asr.py +++ b/acoustic_model/phoneset/fame_asr.py @@ -73,12 +73,14 @@ reduction_key = { # already removed beforehand in phoneset. Just to be sure. phones_to_be_removed = ['ú', 's:', 'ɔ̈:'] -phoneset_short = [reduction_key.get(i, i) for i in phoneset +def phone_reduction(phones): + return [reduction_key.get(i, i) for i in phones if not i in phones_to_be_removed] -phoneset_short = list(set(phoneset_short)) +phoneset_short = list(set(phone_reduction(phoneset))) phoneset_short.sort() + ## translation_key to htk format (ascii). # phones which gives UnicodeEncodeError when phone.encode("ascii") # are replaced with other characters. diff --git a/acoustic_model/phoneset/fame_ipa.py b/acoustic_model/phoneset/fame_ipa.py index 4d44f0a..8859b9f 100644 --- a/acoustic_model/phoneset/fame_ipa.py +++ b/acoustic_model/phoneset/fame_ipa.py @@ -5,6 +5,7 @@ phoneset = [ 'i̯', 'i̯ⁿ', 'y', + 'y:', # not included in lex.ipa, but in stimmen. 'i', 'i.', 'iⁿ', @@ -13,7 +14,7 @@ phoneset = [ 'ɪ', 'ɪⁿ', 'ɪ.', - #'ɪ:', # not included in lex.ipa + 'ɪ:', # not included in lex.ipa, but in stimmen. 'ɪ:ⁿ', 'e', 'e:', @@ -100,7 +101,37 @@ phoneset = [ 'l' ] +## reduce the number of phones. +# the phones which are used in stimmen transcription but not in FAME corpus. +# replacements are based on the advice from Jelske Dijkstra on 2018/06/21. +stimmen_replacement = { + 'æ': 'ɛ', + 'ø': 'ö', # or 'ö:' + 'ø:': 'ö:', # Aki added. + 'œ': 'ɔ̈', # or 'ɔ̈:' + 'œ:': 'ɔ̈:', # Aki added. + 'ɐ': 'a', # or 'a:' + 'ɐ:': 'a:', # Aki added. + 'ɑ': 'a', # or 'a:' + 'ɑ:': 'a:', # Aki added + 'ɒ': 'ɔ', # or 'ɔ:' + 'ɒ:': 'ɔ:', # Aki added. + 'ɾ': 'r', + 'ʁ': 'r', + 'ʊ': 'u', + 'χ': 'x', + # aki guessed. + 'ʀ': 'r', + 'ɹ': 'r', + 'w': 'ö' + } +phoneset.extend(list(stimmen_replacement.keys())) + +def phone_reduction(phones): + return [stimmen_replacement.get(i, i) for i in phones] + + ## the list of multi character phones. # for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. multi_character_phones = [i for i in phoneset if len(i) > 1] diff --git a/acoustic_model/phoneset/fame_ipa2asr.npy b/acoustic_model/phoneset/fame_ipa2asr.npy index b8852ba0b63ce511930af4ffc0c9b8cf6e5f11c6..687111dbefc9a0c10d11e33d9ce37689b6e48491 100644 GIT binary patch delta 38 scmbQvvyNwj0GnI{GXn#|$yHW`y&1g`Oh9&}RbgL7YiVLhVSkby0ND%-5C8xG delta 15 WcmZ3-Go5FH02_O0Vo70dk{$pj+yw&w diff --git a/acoustic_model/stimmen_functions.py b/acoustic_model/stimmen_functions.py new file mode 100644 index 0000000..9d28093 --- /dev/null +++ b/acoustic_model/stimmen_functions.py @@ -0,0 +1,38 @@ +import os +os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') + +import pandas as pd + +import convert_xsampa2ipa +import defaultfiles as default + + +def load_transcriptions(): + stimmen_transcription = pd.ExcelFile(default.stimmen_transcription_xlsx) + df = pd.read_excel(stimmen_transcription, 'original') + + # mapping from ipa to xsampa + mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) + #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): + # ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) + # if not ipa_converted == ipa: + # print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) + + ipas = [] + for xsampa in df['Self Xsampa']: + if not isinstance(xsampa, float): # 'NaN' + # typo? + xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t').replace(';', ':') + + ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) + ipa = ipa.replace('ː', ':').replace(' ', '') + ipas.append(ipa) + else: + ipas.append('') + + df_ = pd.DataFrame({'filename': df['Filename'], + 'word': df['Word'], + 'xsampa': df['Self Xsampa'], + 'ipa': pd.Series(ipas)}) + df_ = df_[~df_['ipa'].str.contains('/')] + return df_.dropna()