From df0e96c4f1c7e129b9e1a0d9f1f37c7ad7c371a7 Mon Sep 17 00:00:00 2001 From: yemaozi88 <428968@gmail.com> Date: Fri, 24 Aug 2018 23:42:32 +0200 Subject: [PATCH] when convert_xsampa2ipa.conversion is used, '/' is recognized as escape sequence. to fix this bug, xsampa2ipa function is made. --- .vs/acoustic_model/v15/.suo | Bin 34304 -> 60416 bytes .vs/config/applicationhost.config | 1031 ++++++++++++++++++++++ acoustic_model/acoustic_model.pyproj | 4 +- acoustic_model/phone_conversion_check.py | 54 ++ acoustic_model/pyKaldi.py | 26 - 5 files changed, 1087 insertions(+), 28 deletions(-) create mode 100644 .vs/config/applicationhost.config create mode 100644 acoustic_model/phone_conversion_check.py delete mode 100644 acoustic_model/pyKaldi.py diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index fea19a8bbf4d409361bcc61ea5e2f0c8856e8da6..08f86848888a22747bde3877c133438504588205 100644 GIT binary patch delta 5762 zcmeHKeN5En9lz(eljB7=;N)P_isd zm-R_L_kDTZzt8jiJiqgeDt$L1&UY!}toMEq^^fJ(#R7#w5iB9E(knipyUJnpW;ohu zFv3p9n-5y?inr!p7^w8%4S-^_P6}FH^trKe}4Nd+!)YJg$fek<{U+(T)w*pnM4Y9^e+&A%6?}PPT7A z{sH(H_#^Nq;LpHK;4i=};I9Dpf|VV&+CNa5iS0Zr(`atq{E zAapy(m88>z8q#P|>UPc~&X~~Ib2f1U&Y%H+Gsq3_Rbc;qSss8Kl=XvN8M2r~Aq4q7+OP8M0sW z(LlTG@lrz0bQ+H%nG#_*Ig4& zUqg+Gm%zhF({H4!+Qnp2s_0%y&D2xOv(sg*nLJrO(MET(hQ&@A$UfoMd7d(Jtng=5 z%FP{`XE7Ty8*e78?}TWifgHO=gR00-bpzdn3)f;IU3QUYzAu)qw+`UqH(a4A!*wIMuSGh>*FAWh44!8YE>u#@$_0*OQ2=_&y+$?rhQ=~pV=Gea>`xmx2IFB zF)Onj9N2^Rx>?aCY5Sn}gZDbvq9evRY5c{&g{1>n4G2I%+4c-hv9^AAm{~}cdncXDOBU~w&2g5xYIU(; z!>pk%9B0L8vMt_Eqr2<13o(44tBrJ-XSwsNasZZPFb7YFrV+uWb^nNX7eP-F-sq zYVYlFHn;Wm>~r?JJ9>NQ%x1F~qubk7ka}y1qr=r|Jm7BaZZ%Fe@i_ap!`I9{i0)Qb zL8rU7=g^^pr6!xDVu`KTSZTH`F&3FjrAAv}Nr`b)VRdy$>8esowaqm2OP(`P9!egh z5aUr@%Od)4$JM|`o6iXPV%s|i{FJMUqP`t1U#)T1e|e2bPuZW1TKA;q&2S>A@xmE4 zrn-(imDy5MNJAYdVh&$MW!2Q1&q#axz?$3Ioo#yyT!ZykV@bx8)rLB*fAYM`o7X4v zvpZSzL1rN?RnwhS%A6<95KGv=QzTDa^|X_(?Woq(Pfm&7Tkm$`HwFk?+i^i8@JDp0 z{1A$kUTtn1TdAbnEyn|gTJ)+w`R;RK4tJU?l|lY!W7e#~JvvL9IzJn;Dg)NNSrL>q z@Wvwqu9gTqaDQQ>h}`@^G!Hd?TdOE&+ z_!`(}$Z0cu4IX9uX_xmM=Z8Pi4>@1r^zo22|bq3IP1g8X-y z%M_F!c(wVTU&r%-O`S0+O4yoDFSh@DEbrUF=LIw7b7}i@vnfaW1d%&j=cRwZm3=lz z9_t%?+v^h)?Dxqs7_c6I!{@e=)ol$KKDzBK!V|;)Wln$fN#9Rdbv^X?!FoFH^-NRk z_f^oP!Ic4@&nm_aD@kjQrrNq?^y~a|x_M+L`9^|NKb%aB-!ouY%ZZLIqH{;<|I=8KlDgt@eC3HE_t*XlNzxw6Igz;Ft-*IMzVzkm=WjfJk4#=2 Zy;YP(Yky4V;@%X*aSP*DtSd8Hy5IXG-j4Ra%t zriLf-gqjFsXK*Qz>JW1!nRG_vH3$P@K0=Kk{R*Udi6_~L`~pdSiZZlB#VuNbidB*_ z1?f^mDq-a%se_u3`HX|E$k!Dr(s_Q$(554^nM#C9kSlHLA zL1XPOy)ft&oN>ME3_Omz`WJJHE7hC{kJ1d$ad0Mk*hZL6?qP3(YuOB|1Xo%mj2cX= z0;WwfbIS1`wK*=zyfr0aM8%_s(A8k5`a5S+i~38)&$Fr6 zi|5ThzXAHR`WKZLj&GFB*F4|Bbz;ZX zV#n5TTe!{m+laIuSp6xCl84Lnr-1h!h3JMiVWgoDt+B1Ip71xwz;E{e-^_!*xesL7 z2@uuFpYS*0w`k%uG?etfhH-G$eL>WA9A|6@?zs=<);Zc5>|3+!ZT9Z&ow-F;Q;s<& zH?!2TdUdUiY| + + + + + + +
+
+
+
+
+
+
+
+ + + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ +
+
+ +
+
+
+
+
+
+ +
+
+
+
+
+ +
+
+
+ +
+
+ +
+
+ +
+
+
+ + +
+
+
+
+
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj index 2230f18..74793ee 100644 --- a/acoustic_model/acoustic_model.pyproj +++ b/acoustic_model/acoustic_model.pyproj @@ -4,7 +4,7 @@ 2.0 4d8c8573-32f0-4a62-9e62-3ce5cc680390 . - performance_check.py + phone_conversion_check.py . @@ -31,7 +31,7 @@ Code - + Code diff --git a/acoustic_model/phone_conversion_check.py b/acoustic_model/phone_conversion_check.py new file mode 100644 index 0000000..743cdd0 --- /dev/null +++ b/acoustic_model/phone_conversion_check.py @@ -0,0 +1,54 @@ +import os +os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') + +import sys + +import pandas as pd + + +## ======================= user define ======================= + +forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment' +ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter' + +experiments_dir = r'c:\OneDrive\Research\rug\experiments' +excel_file = experiments_dir + '\\stimmen\\data\\Frisian Variants Picture Task Stimmen.xlsx' + + +## ======================= add paths ======================= + +sys.path.append(forced_alignment_module) +from forced_alignment import convert_phone_set + +import convert_xsampa2ipa + + +xls = pd.ExcelFile(excel_file) +df = pd.read_excel(xls, 'frequency') + +mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir) + +def xsampa2ipa(mapping, xsampa): + # make a multi_character_list to split 'xsampa'. + multi_character_list = [] + for i in list(mapping): + if len(i) > 1: + multi_character_list.append(i) + + # conversion + ipa = [] + for phone in convert_phone_set.multi_character_tokenize(xsampa, multi_character_list): + ipa.append(mapping.get(phone, phone)) + ipa = ''.join(ipa) + + # strange conversion. + ipa = ipa.replace('ɡ', 'g') + + return ipa + + +for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): + #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_) + ipa_converted = xsampa2ipa(mapping, xsampa) + if not ipa_converted == ipa: + print('{0}: {1} - {2}'.format(xsampa_, ipa_converted, ipa)) \ No newline at end of file diff --git a/acoustic_model/pyKaldi.py b/acoustic_model/pyKaldi.py deleted file mode 100644 index c65a99b..0000000 --- a/acoustic_model/pyKaldi.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -import sys - -forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment' - -## ======================= add paths ======================= - -sys.path.append(forced_alignment_module) -from forced_alignment import convert_phone_set - - -htk_dict_file = r'C:\OneDrive\Research\rug\experiments\stimmen\dic_top3\Reus.dic' -#kaldi_lexicon = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\data\lang\phones\' -alignment_txt = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\exp\tri1_alignme\merged_alignment.txt' -phones_txt = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\exp\tri1_alignme\phones.txt' -phone_map_txt = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\data\local\lang\phone_map.txt' - -with open(phone_map_txt, 'r', encoding="utf-8") as f: - lines = f.read() - lines = lines.split('\n') - -with open(alignment_txt, 'r', encoding="utf-8") as f: - lines = -#phone_in = [line for line in lines if 'SIL' in line] -#if len(phone_in) == 1: - \ No newline at end of file