diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index ce90954..ef753d5 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc index ef57dca..545949d 100644 Binary files a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc and b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc differ diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj index 5319301..11ca83b 100644 --- a/acoustic_model/acoustic_model.pyproj +++ b/acoustic_model/acoustic_model.pyproj @@ -51,6 +51,7 @@ + diff --git a/acoustic_model/check_novoapi.py b/acoustic_model/check_novoapi.py index 96d8e8f..2a3ae59 100644 --- a/acoustic_model/check_novoapi.py +++ b/acoustic_model/check_novoapi.py @@ -20,12 +20,12 @@ from forced_alignment import convert_phone_set #import acoustic_model_functions as am_func import convert_xsampa2ipa import novoapi_functions +import stimmen_functions sys.path.append(default.accent_classification_dir) import output_confusion_matrix ## procedure forced_alignment_novo70 = True -balance_sample_numbers = False ## ===== load novo phoneset ===== @@ -98,36 +98,7 @@ def search_phone_ipa(x, phone_list): ## ===== load all transcriptions (df) ===== -df = pd.read_excel(stimmen_transcription_, 'original') - -# mapping from ipa to xsampa -mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) -#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): -# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) -# if not ipa_converted == ipa: -# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) - -ipas = [] -famehtks = [] -for xsampa in df['Self Xsampa']: - if not isinstance(xsampa, float): # 'NaN' - # typo? - xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t') - xsampa = xsampa.replace(';', ':') - - ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) - ipa = ipa.replace('ː', ':') - ipa = ipa.replace(' ', '') - ipas.append(ipa) - else: - ipas.append('') - -# extract interesting cols. -df = pd.DataFrame({'filename': df['Filename'], - 'word': df['Word'], - 'xsampa': df['Self Xsampa'], - 'ipa': pd.Series(ipas)}) - +df = stimmen_functions.load_transcriptions() word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)] word_list = sorted(word_list) @@ -183,21 +154,6 @@ if forced_alignment_novo70: # samples in which all pronunciations are written in novo70. samples = df_.query("ipa in @pronunciation_ipa") - - - ## ===== balance sample numbers ===== - if balance_sample_numbers: - c = Counter(samples['ipa']) - sample_num_list = [c[key] for key in c.keys()] - sample_num = np.min(sample_num_list) - - samples_balanced = pd.DataFrame(index=[], columns=list(samples.keys())) - for key in c.keys(): - samples_ = samples[samples['ipa'] == key] - samples_balanced = samples_balanced.append(samples_.sample(sample_num), ignore_index = True) - - samples = samples_balanced - results = pd.DataFrame(index=[], columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh']) diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py index 2188c97..ef0dfd4 100644 --- a/acoustic_model/defaultfiles.py +++ b/acoustic_model/defaultfiles.py @@ -2,63 +2,40 @@ import os # add path of the parent directory #os.path.dirname(os.path.realpath(__file__)) -#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' - -#htk_dir = r'C:\Aki\htk_fame' -htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk' - - -#config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite') -#mkhmmdefs_pl = os.path.join(cygwin_dir, 'src', 'acoustic_model', 'mkhmmdefs.pl') - -#dbLexicon = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\lexicon.accdb -#scriptBarbara = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\pronvars_barbara.perl -#exeG2P = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\string2phon.exe - -#[pyHTK] -#configHVite = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\config.HVite -#filePhoneList = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\phonelist_barbara.txt -#AcousticModel = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\hmmdefs_16-2_barbara.compo - -#dbLexicon = config['cLexicon']['dbLexicon'] -#scriptBarbara = config['cLexicon']['scriptBarbara'] -#exeG2P = config['cLexicon']['exeG2P'] - -#configHVite = config['pyHTK']['configHVite'] -#filePhoneList = config['pyHTK']['filePhoneList'] -#AcousticModel = config['pyHTK']['AcousticModel'] - +# repos repo_dir = r'C:\Users\Aki\source\repos' ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification') toolbox_dir = os.path.join(repo_dir, 'toolbox') -#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017' -#config_hvite = os.path.join(htk_config_dir, 'config.HVite') -#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo') -#acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo' -phonelist_txt = os.path.join(htk_dir, 'config', 'phonelist.txt') - WSL_dir = r'C:\OneDrive\WSL' -#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') -fame_dir = r'c:\OneDrive\Research\rug\_data\FAME' +novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi') +#novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi' -fame_s5_dir = os.path.join(fame_dir, 's5') -fame_corpus_dir = os.path.join(fame_dir, 'corpus') - -experiments_dir = r'c:\OneDrive\Research\rug\experiments' +# working directories +rug_dir = r'c:\OneDrive\Research\rug' +experiments_dir = os.path.join(rug_dir, 'experiments') +htk_dir = os.path.join(experiments_dir, 'acoustic_model', 'fame', 'htk') stimmen_dir = os.path.join(experiments_dir, 'stimmen') -stimmen_data_dir = os.path.join(stimmen_dir, 'data') + +# data +fame_dir = os.path.join(rug_dir, '_data', 'FAME') +#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') # 44.1 kHz #stimmen_wav_dir = os.path.join(stimmen_dir, 'wav') # 16 kHz stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' -stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi') - -stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx') +stimmen_transcription_xlsx = os.path.join(stimmen_dir, 'data', 'Frisian Variants Picture Task Stimmen.xlsx') phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') +novo70_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'nl', 'novo70.phoneset') + + + +#phonelist_txt = os.path.join(htk_dir, 'config', 'phonelist.txt') +#fame_s5_dir = os.path.join(fame_dir, 's5') +#fame_corpus_dir = os.path.join(fame_dir, 'corpus') +#stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi') +# novoapi_functions + -novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi') -#novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi' -novo70_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'nl', 'novo70.phoneset') \ No newline at end of file diff --git a/acoustic_model/fame_functions.py b/acoustic_model/fame_functions.py index cb87620..295ed79 100644 --- a/acoustic_model/fame_functions.py +++ b/acoustic_model/fame_functions.py @@ -341,3 +341,14 @@ def fix_single_quote(lexicon_file): def word2htk(word): return ''.join([fame_asr.translation_key_word2htk.get(i, i) for i in word]) + + +def ipa2htk(ipa): + curr_dir = os.path.dirname(os.path.abspath(__file__)) + translation_key_ipa2asr = np.load(os.path.join(curr_dir, 'phoneset', 'fame_ipa2asr.npy')).item(0) + + ipa_splitted = convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones) + ipa_splitted = fame_ipa.phone_reduction(ipa_splitted) + asr_splitted = convert_phoneset.convert_phoneset(ipa_splitted, translation_key_ipa2asr) + htk_splitted = convert_phoneset.convert_phoneset(asr_splitted, fame_asr.translation_key_asr2htk) + return ''.join(htk_splitted) \ No newline at end of file diff --git a/acoustic_model/fame_hmm.py b/acoustic_model/fame_hmm.py index 19d5f56..b3d1070 100644 --- a/acoustic_model/fame_hmm.py +++ b/acoustic_model/fame_hmm.py @@ -27,7 +27,7 @@ extract_features = 0 flat_start = 0 train_model_without_sp = 0 add_sp = 0 -train_model_with_sp = 0 +train_model_with_sp = 1 @@ -321,7 +321,8 @@ if add_sp: ## ======================= train model with short pause ======================= if train_model_with_sp: print('==== train model with sp ====') - for niter in range(niter_max+1, niter_max*2+1): + #for niter in range(niter_max+1, niter_max*2+1): + for niter in range(20, 50): timer_start = time.time() hmm_n = 'iter' + str(niter) hmm_n_pre = 'iter' + str(niter-1) diff --git a/acoustic_model/fame_test.py b/acoustic_model/fame_test.py index c7b2e59..c1a432e 100644 --- a/acoustic_model/fame_test.py +++ b/acoustic_model/fame_test.py @@ -69,6 +69,10 @@ else: translation_key_ipa2asr['ə:'] = 'ə' translation_key_ipa2asr['r.'] = 'r' translation_key_ipa2asr['r:'] = 'r' +# added for stimmen. +translation_key_ipa2asr['ɪ:'] = 'ɪ:' +translation_key_ipa2asr['y:'] = 'y' + np.save(os.path.join('phoneset', 'fame_ipa2asr.npy'), translation_key_ipa2asr) diff --git a/acoustic_model/htk_vs_kaldi.py b/acoustic_model/htk_vs_kaldi.py index ca7f6af..c1e5c97 100644 --- a/acoustic_model/htk_vs_kaldi.py +++ b/acoustic_model/htk_vs_kaldi.py @@ -1,84 +1,145 @@ import os os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') - import sys -import csv -import subprocess -from collections import Counter -import re -import numpy as np +#import csv +#import subprocess +#from collections import Counter +#import re +import shutil +import glob + +#import numpy as np import pandas as pd -import matplotlib.pyplot as plt -from sklearn.metrics import confusion_matrix +#import matplotlib.pyplot as plt +#from sklearn.metrics import confusion_matrix -import acoustic_model_functions as am_func -import convert_xsampa2ipa +#import acoustic_model_functions as am_func +#import convert_xsampa2ipa import defaultfiles as default -from forced_alignment import pyhtk +#from forced_alignment import pyhtk +#sys.path.append(default.forced_alignment_module_dir) +#from forced_alignment import convert_phone_set +#import acoustic_model_functions as am_func +import convert_xsampa2ipa +import stimmen_functions +import fame_functions +import convert_phoneset +from phoneset import fame_ipa, fame_asr +sys.path.append(default.toolbox_dir) +import file_handling as fh +from htk import pyhtk ## ======================= user define ======================= -excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') -data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data') +#excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') +#data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data') -wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k +#wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k -acoustic_model_dir = os.path.join(default.experiments_dir, 'friesian', 'acoustic_model', 'model') -htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short') -fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k') -result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result') +#acoustic_model_dir = os.path.join(default.experiments_dir, 'friesian', 'acoustic_model', 'model') +#htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short') +#fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k') +#result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result') -kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme') -kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict') -lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt') +#kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme') +#kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict') +#lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt') #lex_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr') #lex_asr_htk = os.path.join(default.fame_dir, 'lexicon', 'lex.asr_htk') - -# procedure -make_htk_dict_files = 0 -do_forced_alignment_htk = 0 -eval_forced_alignment_htk = 0 -make_kaldi_data_files = 0 -make_kaldi_lexicon_txt = 0 -load_forced_alignment_kaldi = 1 -eval_forced_alignment_kaldi = 1 +## procedure +#make_htk_dict_files = 0 +#do_forced_alignment_htk = 0 +#eval_forced_alignment_htk = 0 +#make_kaldi_data_files = 0 +#make_kaldi_lexicon_txt = 0 +#load_forced_alignment_kaldi = 1 +#eval_forced_alignment_kaldi = 1 -## ======================= add paths ======================= -sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) -from forced_alignment import convert_phone_set -from forced_alignment import pyhtk +### ======================= add paths ======================= +#sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) +#from forced_alignment import convert_phone_set +#from forced_alignment import pyhtk -sys.path.append(os.path.join(default.repo_dir, 'toolbox')) -from evaluation import plot_confusion_matrix +#sys.path.append(os.path.join(default.repo_dir, 'toolbox')) +#from evaluation import plot_confusion_matrix + +config_dir = os.path.join(default.htk_dir, 'config') +model_dir = os.path.join(default.htk_dir, 'model') +lattice_file = os.path.join(config_dir, 'stimmen.ltc') +#pyhtk.create_word_lattice_file( +# os.path.join(config_dir, 'stimmen.net'), +# lattice_file) +hvite_scp = os.path.join(default.htk_dir, 'tmp', 'stimmen_test.scp') + +## ======================= make test data ====================== +# copy wav files which is in the stimmen data. +stimmen_test_dir = r'c:\OneDrive\Research\rug\_data\stimmen_test' +fh.make_filelist(stimmen_test_dir, hvite_scp, file_type='wav') + +df = stimmen_functions.load_transcriptions() +word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)] +word_list = sorted(word_list) + +#for index, row in df.iterrows(): +# filename = row['filename'] +# if isinstance(filename, str): +# wav_file = os.path.join(default.stimmen_wav_dir, filename) +# if os.path.exists(wav_file): +# shutil.copy(wav_file, os.path.join(stimmen_test_dir, filename)) +# pyhtk.create_label_file( +# row['word'], +# os.path.join(stimmen_test_dir, filename.replace('.wav', '.lab'))) -## ======================= convert phones ====================== -mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) +# after manually removed files which does not contain clear sound, +# update df as df_test. +#wav_file_list = glob.glob(os.path.join(stimmen_test_dir, '*.wav')) +#df_test = pd.DataFrame(index=[], columns=list(df.keys())) +#for wav_file in wav_file_list: +# filename = os.path.basename(wav_file) +# df_ = df[df['filename'].str.match(filename)] +# df_test = pd.concat([df_test, df_]) -xls = pd.ExcelFile(excel_file) - -## check conversion -#df = pd.read_excel(xls, 'frequency') -#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): -# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_) -# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) -# if not ipa_converted == ipa: -# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) +#output = pyhtk.recognition( +# os.path.join(default.htk_dir, 'config', 'config.rec', +# lattice_file, +# os.path.join(model_dir, 'hmm1', 'iter13'), +# dictionary_file, +# os.path.join(config_dir, 'phonelist.txt'), +# hvite_scp) -## check phones included in FAME! -# the phones used in the lexicon. -#phonelist = am_func.get_phonelist(lex_asr) +## check phones included in stimmen but not in FAME! +splitted_ipas = [' '.join( + convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones)) + for ipa in df['ipa']] +stimmen_phones = set(' '.join(splitted_ipas)) +stimmen_phones = list(stimmen_phones) +#stimmen_phones = list(set(fame_asr.phone_reduction(list(stimmen_phones)))) +#fame_phones = fame_asr.phoneset_short +fame_phones = fame_ipa.phoneset +stimmen_phones.sort() +fame_phones.sort() +print('phones which are used in stimmen transcription but not in FAME corpus are:\n{}'.format( + set(stimmen_phones) - set(fame_phones) + )) +for ipa in df['ipa']: + ipa_splitted = convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones) + if ':' in ipa_splitted: + print(ipa_splitted) + +htk = [fame_functions.ipa2htk(ipa) for ipa in df['ipa']] + +ipa = 'e:χ' +fame_functions.ipa2htk(ipa) -# the lines which include a specific phone. -#lines = am_func.find_phone(lex_asr, 'x') # Filename, Word, Self Xsampa diff --git a/acoustic_model/novoapi_forced_alignment.py b/acoustic_model/novoapi_forced_alignment.py index 932d7c1..3fd32b9 100644 --- a/acoustic_model/novoapi_forced_alignment.py +++ b/acoustic_model/novoapi_forced_alignment.py @@ -52,7 +52,7 @@ p = argparse.ArgumentParser() #p.add_argument("--user", default=None) #p.add_argument("--password", default=None) p.add_argument("--user", default='martijn.wieling') -p.add_argument("--password", default='fa0Thaic') +p.add_argument("--password", default='xxxxxx') args = p.parse_args() #wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav' diff --git a/acoustic_model/novoapi_functions.py b/acoustic_model/novoapi_functions.py index 0c72b45..3cd502e 100644 --- a/acoustic_model/novoapi_functions.py +++ b/acoustic_model/novoapi_functions.py @@ -173,7 +173,7 @@ def forced_alignment(wav_file, word, pronunciation_ipa): # username / password cannot be passed as artuments... p = argparse.ArgumentParser() p.add_argument("--user", default='martijn.wieling') - p.add_argument("--password", default='fa0Thaic') + p.add_argument("--password", default='xxxxxx') args = p.parse_args() rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) diff --git a/acoustic_model/phoneset/fame_asr.py b/acoustic_model/phoneset/fame_asr.py index 22e9d65..b11359b 100644 --- a/acoustic_model/phoneset/fame_asr.py +++ b/acoustic_model/phoneset/fame_asr.py @@ -73,12 +73,14 @@ reduction_key = { # already removed beforehand in phoneset. Just to be sure. phones_to_be_removed = ['ú', 's:', 'ɔ̈:'] -phoneset_short = [reduction_key.get(i, i) for i in phoneset +def phone_reduction(phones): + return [reduction_key.get(i, i) for i in phones if not i in phones_to_be_removed] -phoneset_short = list(set(phoneset_short)) +phoneset_short = list(set(phone_reduction(phoneset))) phoneset_short.sort() + ## translation_key to htk format (ascii). # phones which gives UnicodeEncodeError when phone.encode("ascii") # are replaced with other characters. diff --git a/acoustic_model/phoneset/fame_ipa.py b/acoustic_model/phoneset/fame_ipa.py index 4d44f0a..8859b9f 100644 --- a/acoustic_model/phoneset/fame_ipa.py +++ b/acoustic_model/phoneset/fame_ipa.py @@ -5,6 +5,7 @@ phoneset = [ 'i̯', 'i̯ⁿ', 'y', + 'y:', # not included in lex.ipa, but in stimmen. 'i', 'i.', 'iⁿ', @@ -13,7 +14,7 @@ phoneset = [ 'ɪ', 'ɪⁿ', 'ɪ.', - #'ɪ:', # not included in lex.ipa + 'ɪ:', # not included in lex.ipa, but in stimmen. 'ɪ:ⁿ', 'e', 'e:', @@ -100,7 +101,37 @@ phoneset = [ 'l' ] +## reduce the number of phones. +# the phones which are used in stimmen transcription but not in FAME corpus. +# replacements are based on the advice from Jelske Dijkstra on 2018/06/21. +stimmen_replacement = { + 'æ': 'ɛ', + 'ø': 'ö', # or 'ö:' + 'ø:': 'ö:', # Aki added. + 'œ': 'ɔ̈', # or 'ɔ̈:' + 'œ:': 'ɔ̈:', # Aki added. + 'ɐ': 'a', # or 'a:' + 'ɐ:': 'a:', # Aki added. + 'ɑ': 'a', # or 'a:' + 'ɑ:': 'a:', # Aki added + 'ɒ': 'ɔ', # or 'ɔ:' + 'ɒ:': 'ɔ:', # Aki added. + 'ɾ': 'r', + 'ʁ': 'r', + 'ʊ': 'u', + 'χ': 'x', + # aki guessed. + 'ʀ': 'r', + 'ɹ': 'r', + 'w': 'ö' + } +phoneset.extend(list(stimmen_replacement.keys())) + +def phone_reduction(phones): + return [stimmen_replacement.get(i, i) for i in phones] + + ## the list of multi character phones. # for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. multi_character_phones = [i for i in phoneset if len(i) > 1] diff --git a/acoustic_model/phoneset/fame_ipa2asr.npy b/acoustic_model/phoneset/fame_ipa2asr.npy index b8852ba..687111d 100644 Binary files a/acoustic_model/phoneset/fame_ipa2asr.npy and b/acoustic_model/phoneset/fame_ipa2asr.npy differ diff --git a/acoustic_model/stimmen_functions.py b/acoustic_model/stimmen_functions.py new file mode 100644 index 0000000..9d28093 --- /dev/null +++ b/acoustic_model/stimmen_functions.py @@ -0,0 +1,38 @@ +import os +os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') + +import pandas as pd + +import convert_xsampa2ipa +import defaultfiles as default + + +def load_transcriptions(): + stimmen_transcription = pd.ExcelFile(default.stimmen_transcription_xlsx) + df = pd.read_excel(stimmen_transcription, 'original') + + # mapping from ipa to xsampa + mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) + #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): + # ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) + # if not ipa_converted == ipa: + # print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) + + ipas = [] + for xsampa in df['Self Xsampa']: + if not isinstance(xsampa, float): # 'NaN' + # typo? + xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t').replace(';', ':') + + ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) + ipa = ipa.replace('ː', ':').replace(' ', '') + ipas.append(ipa) + else: + ipas.append('') + + df_ = pd.DataFrame({'filename': df['Filename'], + 'word': df['Word'], + 'xsampa': df['Self Xsampa'], + 'ipa': pd.Series(ipas)}) + df_ = df_[~df_['ipa'].str.contains('/')] + return df_.dropna()