when convert_xsampa2ipa.conversion is used, '/' is recognized as escape sequence. to fix this bug, xsampa2ipa function is made.
This commit is contained in:
parent
22b9ae966b
commit
df0e96c4f1
Binary file not shown.
1031
.vs/config/applicationhost.config
Normal file
1031
.vs/config/applicationhost.config
Normal file
File diff suppressed because it is too large
Load Diff
@ -4,7 +4,7 @@
|
|||||||
<SchemaVersion>2.0</SchemaVersion>
|
<SchemaVersion>2.0</SchemaVersion>
|
||||||
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
||||||
<ProjectHome>.</ProjectHome>
|
<ProjectHome>.</ProjectHome>
|
||||||
<StartupFile>performance_check.py</StartupFile>
|
<StartupFile>phone_conversion_check.py</StartupFile>
|
||||||
<SearchPath>
|
<SearchPath>
|
||||||
</SearchPath>
|
</SearchPath>
|
||||||
<WorkingDirectory>.</WorkingDirectory>
|
<WorkingDirectory>.</WorkingDirectory>
|
||||||
@ -31,7 +31,7 @@
|
|||||||
<Compile Include="performance_check.py">
|
<Compile Include="performance_check.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
<Compile Include="pyKaldi.py">
|
<Compile Include="phone_conversion_check.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
54
acoustic_model/phone_conversion_check.py
Normal file
54
acoustic_model/phone_conversion_check.py
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
import os
|
||||||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= user define =======================
|
||||||
|
|
||||||
|
forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment'
|
||||||
|
ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter'
|
||||||
|
|
||||||
|
experiments_dir = r'c:\OneDrive\Research\rug\experiments'
|
||||||
|
excel_file = experiments_dir + '\\stimmen\\data\\Frisian Variants Picture Task Stimmen.xlsx'
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= add paths =======================
|
||||||
|
|
||||||
|
sys.path.append(forced_alignment_module)
|
||||||
|
from forced_alignment import convert_phone_set
|
||||||
|
|
||||||
|
import convert_xsampa2ipa
|
||||||
|
|
||||||
|
|
||||||
|
xls = pd.ExcelFile(excel_file)
|
||||||
|
df = pd.read_excel(xls, 'frequency')
|
||||||
|
|
||||||
|
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir)
|
||||||
|
|
||||||
|
def xsampa2ipa(mapping, xsampa):
|
||||||
|
# make a multi_character_list to split 'xsampa'.
|
||||||
|
multi_character_list = []
|
||||||
|
for i in list(mapping):
|
||||||
|
if len(i) > 1:
|
||||||
|
multi_character_list.append(i)
|
||||||
|
|
||||||
|
# conversion
|
||||||
|
ipa = []
|
||||||
|
for phone in convert_phone_set.multi_character_tokenize(xsampa, multi_character_list):
|
||||||
|
ipa.append(mapping.get(phone, phone))
|
||||||
|
ipa = ''.join(ipa)
|
||||||
|
|
||||||
|
# strange conversion.
|
||||||
|
ipa = ipa.replace('ɡ', 'g')
|
||||||
|
|
||||||
|
return ipa
|
||||||
|
|
||||||
|
|
||||||
|
for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
||||||
|
#ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_)
|
||||||
|
ipa_converted = xsampa2ipa(mapping, xsampa)
|
||||||
|
if not ipa_converted == ipa:
|
||||||
|
print('{0}: {1} - {2}'.format(xsampa_, ipa_converted, ipa))
|
@ -1,26 +0,0 @@
|
|||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment'
|
|
||||||
|
|
||||||
## ======================= add paths =======================
|
|
||||||
|
|
||||||
sys.path.append(forced_alignment_module)
|
|
||||||
from forced_alignment import convert_phone_set
|
|
||||||
|
|
||||||
|
|
||||||
htk_dict_file = r'C:\OneDrive\Research\rug\experiments\stimmen\dic_top3\Reus.dic'
|
|
||||||
#kaldi_lexicon = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\data\lang\phones\'
|
|
||||||
alignment_txt = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\exp\tri1_alignme\merged_alignment.txt'
|
|
||||||
phones_txt = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\exp\tri1_alignme\phones.txt'
|
|
||||||
phone_map_txt = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\data\local\lang\phone_map.txt'
|
|
||||||
|
|
||||||
with open(phone_map_txt, 'r', encoding="utf-8") as f:
|
|
||||||
lines = f.read()
|
|
||||||
lines = lines.split('\n')
|
|
||||||
|
|
||||||
with open(alignment_txt, 'r', encoding="utf-8") as f:
|
|
||||||
lines =
|
|
||||||
#phone_in = [line for line in lines if 'SIL' in line]
|
|
||||||
#if len(phone_in) == 1:
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user