54 lines
1.6 KiB
Python
54 lines
1.6 KiB
Python
|
import os
|
|||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
|||
|
|
|||
|
import sys
|
|||
|
|
|||
|
import pandas as pd
|
|||
|
|
|||
|
|
|||
|
## ======================= user define =======================
|
|||
|
|
|||
|
forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment'
|
|||
|
ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter'
|
|||
|
|
|||
|
experiments_dir = r'c:\OneDrive\Research\rug\experiments'
|
|||
|
excel_file = experiments_dir + '\\stimmen\\data\\Frisian Variants Picture Task Stimmen.xlsx'
|
|||
|
|
|||
|
|
|||
|
## ======================= add paths =======================
|
|||
|
|
|||
|
sys.path.append(forced_alignment_module)
|
|||
|
from forced_alignment import convert_phone_set
|
|||
|
|
|||
|
import convert_xsampa2ipa
|
|||
|
|
|||
|
|
|||
|
xls = pd.ExcelFile(excel_file)
|
|||
|
df = pd.read_excel(xls, 'frequency')
|
|||
|
|
|||
|
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir)
|
|||
|
|
|||
|
def xsampa2ipa(mapping, xsampa):
|
|||
|
# make a multi_character_list to split 'xsampa'.
|
|||
|
multi_character_list = []
|
|||
|
for i in list(mapping):
|
|||
|
if len(i) > 1:
|
|||
|
multi_character_list.append(i)
|
|||
|
|
|||
|
# conversion
|
|||
|
ipa = []
|
|||
|
for phone in convert_phone_set.multi_character_tokenize(xsampa, multi_character_list):
|
|||
|
ipa.append(mapping.get(phone, phone))
|
|||
|
ipa = ''.join(ipa)
|
|||
|
|
|||
|
# strange conversion.
|
|||
|
ipa = ipa.replace('ɡ', 'g')
|
|||
|
|
|||
|
return ipa
|
|||
|
|
|||
|
|
|||
|
for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
|||
|
#ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_)
|
|||
|
ipa_converted = xsampa2ipa(mapping, xsampa)
|
|||
|
if not ipa_converted == ipa:
|
|||
|
print('{0}: {1} - {2}'.format(xsampa_, ipa_converted, ipa))
|