import os os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import sys import pandas as pd ## ======================= user define ======================= forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment' ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter' experiments_dir = r'c:\OneDrive\Research\rug\experiments' excel_file = experiments_dir + '\\stimmen\\data\\Frisian Variants Picture Task Stimmen.xlsx' ## ======================= add paths ======================= sys.path.append(forced_alignment_module) from forced_alignment import convert_phone_set import convert_xsampa2ipa xls = pd.ExcelFile(excel_file) df = pd.read_excel(xls, 'frequency') mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir) def xsampa2ipa(mapping, xsampa): # make a multi_character_list to split 'xsampa'. multi_character_list = [] for i in list(mapping): if len(i) > 1: multi_character_list.append(i) # conversion ipa = [] for phone in convert_phone_set.multi_character_tokenize(xsampa, multi_character_list): ipa.append(mapping.get(phone, phone)) ipa = ''.join(ipa) # strange conversion. ipa = ipa.replace('ɡ', 'g') return ipa for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_) ipa_converted = xsampa2ipa(mapping, xsampa) if not ipa_converted == ipa: print('{0}: {1} - {2}'.format(xsampa_, ipa_converted, ipa))