import os os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import pandas as pd import convert_xsampa2ipa import defaultfiles as default def load_transcriptions(): stimmen_transcription = pd.ExcelFile(default.stimmen_transcription_xlsx) df = pd.read_excel(stimmen_transcription, 'original') # mapping from ipa to xsampa mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): # ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) # if not ipa_converted == ipa: # print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) ipas = [] for xsampa in df['Self Xsampa']: if not isinstance(xsampa, float): # 'NaN' # typo? xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t').replace(';', ':') ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) ipa = ipa.replace('ː', ':').replace(' ', '') ipas.append(ipa) else: ipas.append('') df_ = pd.DataFrame({'filename': df['Filename'], 'word': df['Word'], 'xsampa': df['Self Xsampa'], 'ipa': pd.Series(ipas)}) df_ = df_[~df_['ipa'].str.contains('/')] return df_.dropna()