make sure all the phones in stimmen transcription can be treated correctly.

This commit is contained in:
yemaozi88 2019-02-06 00:00:14 +01:00
parent ab3887c6ca
commit da0242b0e1
15 changed files with 232 additions and 150 deletions

Binary file not shown.

View File

@ -51,6 +51,7 @@
<Compile Include="fame_hmm.py" /> <Compile Include="fame_hmm.py" />
<Compile Include="phoneset\fame_asr.py" /> <Compile Include="phoneset\fame_asr.py" />
<Compile Include="phoneset\fame_ipa.py" /> <Compile Include="phoneset\fame_ipa.py" />
<Compile Include="stimmen_functions.py" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<Content Include="config.ini" /> <Content Include="config.ini" />

View File

@ -20,12 +20,12 @@ from forced_alignment import convert_phone_set
#import acoustic_model_functions as am_func #import acoustic_model_functions as am_func
import convert_xsampa2ipa import convert_xsampa2ipa
import novoapi_functions import novoapi_functions
import stimmen_functions
sys.path.append(default.accent_classification_dir) sys.path.append(default.accent_classification_dir)
import output_confusion_matrix import output_confusion_matrix
## procedure ## procedure
forced_alignment_novo70 = True forced_alignment_novo70 = True
balance_sample_numbers = False
## ===== load novo phoneset ===== ## ===== load novo phoneset =====
@ -98,36 +98,7 @@ def search_phone_ipa(x, phone_list):
## ===== load all transcriptions (df) ===== ## ===== load all transcriptions (df) =====
df = pd.read_excel(stimmen_transcription_, 'original') df = stimmen_functions.load_transcriptions()
# mapping from ipa to xsampa
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
# if not ipa_converted == ipa:
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
ipas = []
famehtks = []
for xsampa in df['Self Xsampa']:
if not isinstance(xsampa, float): # 'NaN'
# typo?
xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t')
xsampa = xsampa.replace(';', ':')
ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
ipa = ipa.replace('ː', ':')
ipa = ipa.replace(' ', '')
ipas.append(ipa)
else:
ipas.append('')
# extract interesting cols.
df = pd.DataFrame({'filename': df['Filename'],
'word': df['Word'],
'xsampa': df['Self Xsampa'],
'ipa': pd.Series(ipas)})
word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)] word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)]
word_list = sorted(word_list) word_list = sorted(word_list)
@ -183,21 +154,6 @@ if forced_alignment_novo70:
# samples in which all pronunciations are written in novo70. # samples in which all pronunciations are written in novo70.
samples = df_.query("ipa in @pronunciation_ipa") samples = df_.query("ipa in @pronunciation_ipa")
## ===== balance sample numbers =====
if balance_sample_numbers:
c = Counter(samples['ipa'])
sample_num_list = [c[key] for key in c.keys()]
sample_num = np.min(sample_num_list)
samples_balanced = pd.DataFrame(index=[], columns=list(samples.keys()))
for key in c.keys():
samples_ = samples[samples['ipa'] == key]
samples_balanced = samples_balanced.append(samples_.sample(sample_num), ignore_index = True)
samples = samples_balanced
results = pd.DataFrame(index=[], results = pd.DataFrame(index=[],
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh']) columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])

View File

@ -2,63 +2,40 @@ import os
# add path of the parent directory # add path of the parent directory
#os.path.dirname(os.path.realpath(__file__)) #os.path.dirname(os.path.realpath(__file__))
#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' # repos
#htk_dir = r'C:\Aki\htk_fame'
htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk'
#config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
#mkhmmdefs_pl = os.path.join(cygwin_dir, 'src', 'acoustic_model', 'mkhmmdefs.pl')
#dbLexicon = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\lexicon.accdb
#scriptBarbara = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\pronvars_barbara.perl
#exeG2P = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\string2phon.exe
#[pyHTK]
#configHVite = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\config.HVite
#filePhoneList = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\phonelist_barbara.txt
#AcousticModel = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\hmmdefs_16-2_barbara.compo
#dbLexicon = config['cLexicon']['dbLexicon']
#scriptBarbara = config['cLexicon']['scriptBarbara']
#exeG2P = config['cLexicon']['exeG2P']
#configHVite = config['pyHTK']['configHVite']
#filePhoneList = config['pyHTK']['filePhoneList']
#AcousticModel = config['pyHTK']['AcousticModel']
repo_dir = r'C:\Users\Aki\source\repos' repo_dir = r'C:\Users\Aki\source\repos'
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification') accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
toolbox_dir = os.path.join(repo_dir, 'toolbox') toolbox_dir = os.path.join(repo_dir, 'toolbox')
#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
#config_hvite = os.path.join(htk_config_dir, 'config.HVite')
#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
#acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
phonelist_txt = os.path.join(htk_dir, 'config', 'phonelist.txt')
WSL_dir = r'C:\OneDrive\WSL' WSL_dir = r'C:\OneDrive\WSL'
#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi')
fame_dir = r'c:\OneDrive\Research\rug\_data\FAME' #novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi'
fame_s5_dir = os.path.join(fame_dir, 's5') # working directories
fame_corpus_dir = os.path.join(fame_dir, 'corpus') rug_dir = r'c:\OneDrive\Research\rug'
experiments_dir = os.path.join(rug_dir, 'experiments')
experiments_dir = r'c:\OneDrive\Research\rug\experiments' htk_dir = os.path.join(experiments_dir, 'acoustic_model', 'fame', 'htk')
stimmen_dir = os.path.join(experiments_dir, 'stimmen') stimmen_dir = os.path.join(experiments_dir, 'stimmen')
stimmen_data_dir = os.path.join(stimmen_dir, 'data')
# data
fame_dir = os.path.join(rug_dir, '_data', 'FAME')
#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
# 44.1 kHz # 44.1 kHz
#stimmen_wav_dir = os.path.join(stimmen_dir, 'wav') #stimmen_wav_dir = os.path.join(stimmen_dir, 'wav')
# 16 kHz # 16 kHz
stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen'
stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi') stimmen_transcription_xlsx = os.path.join(stimmen_dir, 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx')
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
novo70_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'nl', 'novo70.phoneset')
#phonelist_txt = os.path.join(htk_dir, 'config', 'phonelist.txt')
#fame_s5_dir = os.path.join(fame_dir, 's5')
#fame_corpus_dir = os.path.join(fame_dir, 'corpus')
#stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi')
# novoapi_functions
novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi')
#novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi'
novo70_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'nl', 'novo70.phoneset')

View File

@ -341,3 +341,14 @@ def fix_single_quote(lexicon_file):
def word2htk(word): def word2htk(word):
return ''.join([fame_asr.translation_key_word2htk.get(i, i) for i in word]) return ''.join([fame_asr.translation_key_word2htk.get(i, i) for i in word])
def ipa2htk(ipa):
curr_dir = os.path.dirname(os.path.abspath(__file__))
translation_key_ipa2asr = np.load(os.path.join(curr_dir, 'phoneset', 'fame_ipa2asr.npy')).item(0)
ipa_splitted = convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones)
ipa_splitted = fame_ipa.phone_reduction(ipa_splitted)
asr_splitted = convert_phoneset.convert_phoneset(ipa_splitted, translation_key_ipa2asr)
htk_splitted = convert_phoneset.convert_phoneset(asr_splitted, fame_asr.translation_key_asr2htk)
return ''.join(htk_splitted)

View File

@ -27,7 +27,7 @@ extract_features = 0
flat_start = 0 flat_start = 0
train_model_without_sp = 0 train_model_without_sp = 0
add_sp = 0 add_sp = 0
train_model_with_sp = 0 train_model_with_sp = 1
@ -321,7 +321,8 @@ if add_sp:
## ======================= train model with short pause ======================= ## ======================= train model with short pause =======================
if train_model_with_sp: if train_model_with_sp:
print('==== train model with sp ====') print('==== train model with sp ====')
for niter in range(niter_max+1, niter_max*2+1): #for niter in range(niter_max+1, niter_max*2+1):
for niter in range(20, 50):
timer_start = time.time() timer_start = time.time()
hmm_n = 'iter' + str(niter) hmm_n = 'iter' + str(niter)
hmm_n_pre = 'iter' + str(niter-1) hmm_n_pre = 'iter' + str(niter-1)

View File

@ -69,6 +69,10 @@ else:
translation_key_ipa2asr['ə:'] = 'ə' translation_key_ipa2asr['ə:'] = 'ə'
translation_key_ipa2asr['r.'] = 'r' translation_key_ipa2asr['r.'] = 'r'
translation_key_ipa2asr['r:'] = 'r' translation_key_ipa2asr['r:'] = 'r'
# added for stimmen.
translation_key_ipa2asr['ɪ:'] = 'ɪ:'
translation_key_ipa2asr['y:'] = 'y'
np.save(os.path.join('phoneset', 'fame_ipa2asr.npy'), translation_key_ipa2asr) np.save(os.path.join('phoneset', 'fame_ipa2asr.npy'), translation_key_ipa2asr)

View File

@ -1,84 +1,145 @@
import os import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import sys import sys
import csv
import subprocess
from collections import Counter
import re
import numpy as np #import csv
#import subprocess
#from collections import Counter
#import re
import shutil
import glob
#import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt #import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix #from sklearn.metrics import confusion_matrix
import acoustic_model_functions as am_func #import acoustic_model_functions as am_func
import convert_xsampa2ipa #import convert_xsampa2ipa
import defaultfiles as default import defaultfiles as default
from forced_alignment import pyhtk #from forced_alignment import pyhtk
#sys.path.append(default.forced_alignment_module_dir)
#from forced_alignment import convert_phone_set
#import acoustic_model_functions as am_func
import convert_xsampa2ipa
import stimmen_functions
import fame_functions
import convert_phoneset
from phoneset import fame_ipa, fame_asr
sys.path.append(default.toolbox_dir)
import file_handling as fh
from htk import pyhtk
## ======================= user define ======================= ## ======================= user define =======================
excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') #excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data') #data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data')
wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k #wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k
acoustic_model_dir = os.path.join(default.experiments_dir, 'friesian', 'acoustic_model', 'model') #acoustic_model_dir = os.path.join(default.experiments_dir, 'friesian', 'acoustic_model', 'model')
htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short') #htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short')
fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k') #fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k')
result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result') #result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result')
kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme') #kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme')
kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict') #kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict')
lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt') #lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt')
#lex_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr') #lex_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
#lex_asr_htk = os.path.join(default.fame_dir, 'lexicon', 'lex.asr_htk') #lex_asr_htk = os.path.join(default.fame_dir, 'lexicon', 'lex.asr_htk')
## procedure
# procedure #make_htk_dict_files = 0
make_htk_dict_files = 0 #do_forced_alignment_htk = 0
do_forced_alignment_htk = 0 #eval_forced_alignment_htk = 0
eval_forced_alignment_htk = 0 #make_kaldi_data_files = 0
make_kaldi_data_files = 0 #make_kaldi_lexicon_txt = 0
make_kaldi_lexicon_txt = 0 #load_forced_alignment_kaldi = 1
load_forced_alignment_kaldi = 1 #eval_forced_alignment_kaldi = 1
eval_forced_alignment_kaldi = 1
## ======================= add paths ======================= ### ======================= add paths =======================
sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) #sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
from forced_alignment import convert_phone_set #from forced_alignment import convert_phone_set
from forced_alignment import pyhtk #from forced_alignment import pyhtk
sys.path.append(os.path.join(default.repo_dir, 'toolbox')) #sys.path.append(os.path.join(default.repo_dir, 'toolbox'))
from evaluation import plot_confusion_matrix #from evaluation import plot_confusion_matrix
config_dir = os.path.join(default.htk_dir, 'config')
model_dir = os.path.join(default.htk_dir, 'model')
lattice_file = os.path.join(config_dir, 'stimmen.ltc')
#pyhtk.create_word_lattice_file(
# os.path.join(config_dir, 'stimmen.net'),
# lattice_file)
hvite_scp = os.path.join(default.htk_dir, 'tmp', 'stimmen_test.scp')
## ======================= make test data ======================
# copy wav files which is in the stimmen data.
stimmen_test_dir = r'c:\OneDrive\Research\rug\_data\stimmen_test'
fh.make_filelist(stimmen_test_dir, hvite_scp, file_type='wav')
df = stimmen_functions.load_transcriptions()
word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)]
word_list = sorted(word_list)
#for index, row in df.iterrows():
# filename = row['filename']
# if isinstance(filename, str):
# wav_file = os.path.join(default.stimmen_wav_dir, filename)
# if os.path.exists(wav_file):
# shutil.copy(wav_file, os.path.join(stimmen_test_dir, filename))
# pyhtk.create_label_file(
# row['word'],
# os.path.join(stimmen_test_dir, filename.replace('.wav', '.lab')))
## ======================= convert phones ====================== # after manually removed files which does not contain clear sound,
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) # update df as df_test.
#wav_file_list = glob.glob(os.path.join(stimmen_test_dir, '*.wav'))
#df_test = pd.DataFrame(index=[], columns=list(df.keys()))
#for wav_file in wav_file_list:
# filename = os.path.basename(wav_file)
# df_ = df[df['filename'].str.match(filename)]
# df_test = pd.concat([df_test, df_])
xls = pd.ExcelFile(excel_file) #output = pyhtk.recognition(
# os.path.join(default.htk_dir, 'config', 'config.rec',
## check conversion # lattice_file,
#df = pd.read_excel(xls, 'frequency') # os.path.join(model_dir, 'hmm1', 'iter13'),
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): # dictionary_file,
# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_) # os.path.join(config_dir, 'phonelist.txt'),
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) # hvite_scp)
# if not ipa_converted == ipa:
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
## check phones included in FAME! ## check phones included in stimmen but not in FAME!
# the phones used in the lexicon. splitted_ipas = [' '.join(
#phonelist = am_func.get_phonelist(lex_asr) convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones))
for ipa in df['ipa']]
stimmen_phones = set(' '.join(splitted_ipas))
stimmen_phones = list(stimmen_phones)
#stimmen_phones = list(set(fame_asr.phone_reduction(list(stimmen_phones))))
#fame_phones = fame_asr.phoneset_short
fame_phones = fame_ipa.phoneset
stimmen_phones.sort()
fame_phones.sort()
print('phones which are used in stimmen transcription but not in FAME corpus are:\n{}'.format(
set(stimmen_phones) - set(fame_phones)
))
for ipa in df['ipa']:
ipa_splitted = convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones)
if ':' in ipa_splitted:
print(ipa_splitted)
htk = [fame_functions.ipa2htk(ipa) for ipa in df['ipa']]
ipa = 'e:χ'
fame_functions.ipa2htk(ipa)
# the lines which include a specific phone.
#lines = am_func.find_phone(lex_asr, 'x')
# Filename, Word, Self Xsampa # Filename, Word, Self Xsampa

View File

@ -52,7 +52,7 @@ p = argparse.ArgumentParser()
#p.add_argument("--user", default=None) #p.add_argument("--user", default=None)
#p.add_argument("--password", default=None) #p.add_argument("--password", default=None)
p.add_argument("--user", default='martijn.wieling') p.add_argument("--user", default='martijn.wieling')
p.add_argument("--password", default='fa0Thaic') p.add_argument("--password", default='xxxxxx')
args = p.parse_args() args = p.parse_args()
#wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav' #wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav'

View File

@ -173,7 +173,7 @@ def forced_alignment(wav_file, word, pronunciation_ipa):
# username / password cannot be passed as artuments... # username / password cannot be passed as artuments...
p = argparse.ArgumentParser() p = argparse.ArgumentParser()
p.add_argument("--user", default='martijn.wieling') p.add_argument("--user", default='martijn.wieling')
p.add_argument("--password", default='fa0Thaic') p.add_argument("--password", default='xxxxxx')
args = p.parse_args() args = p.parse_args()
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)

View File

@ -73,12 +73,14 @@ reduction_key = {
# already removed beforehand in phoneset. Just to be sure. # already removed beforehand in phoneset. Just to be sure.
phones_to_be_removed = ['ú', 's:', 'ɔ̈:'] phones_to_be_removed = ['ú', 's:', 'ɔ̈:']
phoneset_short = [reduction_key.get(i, i) for i in phoneset def phone_reduction(phones):
return [reduction_key.get(i, i) for i in phones
if not i in phones_to_be_removed] if not i in phones_to_be_removed]
phoneset_short = list(set(phoneset_short)) phoneset_short = list(set(phone_reduction(phoneset)))
phoneset_short.sort() phoneset_short.sort()
## translation_key to htk format (ascii). ## translation_key to htk format (ascii).
# phones which gives UnicodeEncodeError when phone.encode("ascii") # phones which gives UnicodeEncodeError when phone.encode("ascii")
# are replaced with other characters. # are replaced with other characters.

View File

@ -5,6 +5,7 @@ phoneset = [
'', '',
'i̯ⁿ', 'i̯ⁿ',
'y', 'y',
'y:', # not included in lex.ipa, but in stimmen.
'i', 'i',
'i.', 'i.',
'iⁿ', 'iⁿ',
@ -13,7 +14,7 @@ phoneset = [
'ɪ', 'ɪ',
'ɪⁿ', 'ɪⁿ',
'ɪ.', 'ɪ.',
#'ɪ:', # not included in lex.ipa 'ɪ:', # not included in lex.ipa, but in stimmen.
'ɪ:ⁿ', 'ɪ:ⁿ',
'e', 'e',
'e:', 'e:',
@ -100,7 +101,37 @@ phoneset = [
'l' 'l'
] ]
## reduce the number of phones.
# the phones which are used in stimmen transcription but not in FAME corpus.
# replacements are based on the advice from Jelske Dijkstra on 2018/06/21.
stimmen_replacement = {
'æ': 'ɛ',
'ø': 'ö', # or 'ö:'
'ø:': 'ö:', # Aki added.
'œ': 'ɔ̈', # or 'ɔ̈:'
'œ:': 'ɔ̈:', # Aki added.
'ɐ': 'a', # or 'a:'
'ɐ:': 'a:', # Aki added.
'ɑ': 'a', # or 'a:'
'ɑ:': 'a:', # Aki added
'ɒ': 'ɔ', # or 'ɔ:'
'ɒ:': 'ɔ:', # Aki added.
'ɾ': 'r',
'ʁ': 'r',
'ʊ': 'u',
'χ': 'x',
# aki guessed.
'ʀ': 'r',
'ɹ': 'r',
'w': 'ö'
}
phoneset.extend(list(stimmen_replacement.keys()))
def phone_reduction(phones):
return [stimmen_replacement.get(i, i) for i in phones]
## the list of multi character phones. ## the list of multi character phones.
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. # for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
multi_character_phones = [i for i in phoneset if len(i) > 1] multi_character_phones = [i for i in phoneset if len(i) > 1]

View File

@ -0,0 +1,38 @@
import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import pandas as pd
import convert_xsampa2ipa
import defaultfiles as default
def load_transcriptions():
stimmen_transcription = pd.ExcelFile(default.stimmen_transcription_xlsx)
df = pd.read_excel(stimmen_transcription, 'original')
# mapping from ipa to xsampa
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
# if not ipa_converted == ipa:
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
ipas = []
for xsampa in df['Self Xsampa']:
if not isinstance(xsampa, float): # 'NaN'
# typo?
xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t').replace(';', ':')
ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
ipa = ipa.replace('ː', ':').replace(' ', '')
ipas.append(ipa)
else:
ipas.append('')
df_ = pd.DataFrame({'filename': df['Filename'],
'word': df['Word'],
'xsampa': df['Self Xsampa'],
'ipa': pd.Series(ipas)})
df_ = df_[~df_['ipa'].str.contains('/')]
return df_.dropna()