when convert_xsampa2ipa.conversion is used, '/' is recognized as escape sequence. to fix this bug, xsampa2ipa function is made.

This commit is contained in:
yemaozi88 2018-08-24 23:42:32 +02:00
parent 22b9ae966b
commit df0e96c4f1
5 changed files with 1087 additions and 28 deletions

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -4,7 +4,7 @@
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
<ProjectHome>.</ProjectHome>
<StartupFile>performance_check.py</StartupFile>
<StartupFile>phone_conversion_check.py</StartupFile>
<SearchPath>
</SearchPath>
<WorkingDirectory>.</WorkingDirectory>
@ -31,7 +31,7 @@
<Compile Include="performance_check.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="pyKaldi.py">
<Compile Include="phone_conversion_check.py">
<SubType>Code</SubType>
</Compile>
</ItemGroup>

View File

@ -0,0 +1,54 @@
import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import sys
import pandas as pd
## ======================= user define =======================
forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment'
ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter'
experiments_dir = r'c:\OneDrive\Research\rug\experiments'
excel_file = experiments_dir + '\\stimmen\\data\\Frisian Variants Picture Task Stimmen.xlsx'
## ======================= add paths =======================
sys.path.append(forced_alignment_module)
from forced_alignment import convert_phone_set
import convert_xsampa2ipa
xls = pd.ExcelFile(excel_file)
df = pd.read_excel(xls, 'frequency')
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir)
def xsampa2ipa(mapping, xsampa):
# make a multi_character_list to split 'xsampa'.
multi_character_list = []
for i in list(mapping):
if len(i) > 1:
multi_character_list.append(i)
# conversion
ipa = []
for phone in convert_phone_set.multi_character_tokenize(xsampa, multi_character_list):
ipa.append(mapping.get(phone, phone))
ipa = ''.join(ipa)
# strange conversion.
ipa = ipa.replace('ɡ', 'g')
return ipa
for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
#ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_)
ipa_converted = xsampa2ipa(mapping, xsampa)
if not ipa_converted == ipa:
print('{0}: {1} - {2}'.format(xsampa_, ipa_converted, ipa))

View File

@ -1,26 +0,0 @@
import os
import sys
forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment'
## ======================= add paths =======================
sys.path.append(forced_alignment_module)
from forced_alignment import convert_phone_set
htk_dict_file = r'C:\OneDrive\Research\rug\experiments\stimmen\dic_top3\Reus.dic'
#kaldi_lexicon = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\data\lang\phones\'
alignment_txt = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\exp\tri1_alignme\merged_alignment.txt'
phones_txt = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\exp\tri1_alignme\phones.txt'
phone_map_txt = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\data\local\lang\phone_map.txt'
with open(phone_map_txt, 'r', encoding="utf-8") as f:
lines = f.read()
lines = lines.split('\n')
with open(alignment_txt, 'r', encoding="utf-8") as f:
lines =
#phone_in = [line for line in lines if 'SIL' in line]
#if len(phone_in) == 1: