39 lines
1.2 KiB
Python
39 lines
1.2 KiB
Python
|
import os
|
|||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
|||
|
|
|||
|
import pandas as pd
|
|||
|
|
|||
|
import convert_xsampa2ipa
|
|||
|
import defaultfiles as default
|
|||
|
|
|||
|
|
|||
|
def load_transcriptions():
|
|||
|
stimmen_transcription = pd.ExcelFile(default.stimmen_transcription_xlsx)
|
|||
|
df = pd.read_excel(stimmen_transcription, 'original')
|
|||
|
|
|||
|
# mapping from ipa to xsampa
|
|||
|
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
|
|||
|
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
|||
|
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
|||
|
# if not ipa_converted == ipa:
|
|||
|
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
|||
|
|
|||
|
ipas = []
|
|||
|
for xsampa in df['Self Xsampa']:
|
|||
|
if not isinstance(xsampa, float): # 'NaN'
|
|||
|
# typo?
|
|||
|
xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t').replace(';', ':')
|
|||
|
|
|||
|
ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
|||
|
ipa = ipa.replace('ː', ':').replace(' ', '')
|
|||
|
ipas.append(ipa)
|
|||
|
else:
|
|||
|
ipas.append('')
|
|||
|
|
|||
|
df_ = pd.DataFrame({'filename': df['Filename'],
|
|||
|
'word': df['Word'],
|
|||
|
'xsampa': df['Self Xsampa'],
|
|||
|
'ipa': pd.Series(ipas)})
|
|||
|
df_ = df_[~df_['ipa'].str.contains('/')]
|
|||
|
return df_.dropna()
|