Compare commits
3 Commits
ab3887c6ca
...
8f89f60538
Author | SHA1 | Date | |
---|---|---|---|
|
8f89f60538 | ||
|
f6e563ecd3 | ||
|
da0242b0e1 |
Binary file not shown.
Binary file not shown.
@ -51,6 +51,8 @@
|
|||||||
<Compile Include="fame_hmm.py" />
|
<Compile Include="fame_hmm.py" />
|
||||||
<Compile Include="phoneset\fame_asr.py" />
|
<Compile Include="phoneset\fame_asr.py" />
|
||||||
<Compile Include="phoneset\fame_ipa.py" />
|
<Compile Include="phoneset\fame_ipa.py" />
|
||||||
|
<Compile Include="stimmen_functions.py" />
|
||||||
|
<Compile Include="stimmen_test.py" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Content Include="config.ini" />
|
<Content Include="config.ini" />
|
||||||
|
@ -20,12 +20,12 @@ from forced_alignment import convert_phone_set
|
|||||||
#import acoustic_model_functions as am_func
|
#import acoustic_model_functions as am_func
|
||||||
import convert_xsampa2ipa
|
import convert_xsampa2ipa
|
||||||
import novoapi_functions
|
import novoapi_functions
|
||||||
|
import stimmen_functions
|
||||||
sys.path.append(default.accent_classification_dir)
|
sys.path.append(default.accent_classification_dir)
|
||||||
import output_confusion_matrix
|
import output_confusion_matrix
|
||||||
|
|
||||||
## procedure
|
## procedure
|
||||||
forced_alignment_novo70 = True
|
forced_alignment_novo70 = True
|
||||||
balance_sample_numbers = False
|
|
||||||
|
|
||||||
|
|
||||||
## ===== load novo phoneset =====
|
## ===== load novo phoneset =====
|
||||||
@ -98,36 +98,7 @@ def search_phone_ipa(x, phone_list):
|
|||||||
|
|
||||||
|
|
||||||
## ===== load all transcriptions (df) =====
|
## ===== load all transcriptions (df) =====
|
||||||
df = pd.read_excel(stimmen_transcription_, 'original')
|
df = stimmen_functions.load_transcriptions()
|
||||||
|
|
||||||
# mapping from ipa to xsampa
|
|
||||||
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
|
|
||||||
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
|
||||||
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
|
||||||
# if not ipa_converted == ipa:
|
|
||||||
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
|
||||||
|
|
||||||
ipas = []
|
|
||||||
famehtks = []
|
|
||||||
for xsampa in df['Self Xsampa']:
|
|
||||||
if not isinstance(xsampa, float): # 'NaN'
|
|
||||||
# typo?
|
|
||||||
xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t')
|
|
||||||
xsampa = xsampa.replace(';', ':')
|
|
||||||
|
|
||||||
ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
|
||||||
ipa = ipa.replace('ː', ':')
|
|
||||||
ipa = ipa.replace(' ', '')
|
|
||||||
ipas.append(ipa)
|
|
||||||
else:
|
|
||||||
ipas.append('')
|
|
||||||
|
|
||||||
# extract interesting cols.
|
|
||||||
df = pd.DataFrame({'filename': df['Filename'],
|
|
||||||
'word': df['Word'],
|
|
||||||
'xsampa': df['Self Xsampa'],
|
|
||||||
'ipa': pd.Series(ipas)})
|
|
||||||
|
|
||||||
word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)]
|
word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)]
|
||||||
word_list = sorted(word_list)
|
word_list = sorted(word_list)
|
||||||
|
|
||||||
@ -184,21 +155,6 @@ if forced_alignment_novo70:
|
|||||||
# samples in which all pronunciations are written in novo70.
|
# samples in which all pronunciations are written in novo70.
|
||||||
samples = df_.query("ipa in @pronunciation_ipa")
|
samples = df_.query("ipa in @pronunciation_ipa")
|
||||||
|
|
||||||
|
|
||||||
## ===== balance sample numbers =====
|
|
||||||
if balance_sample_numbers:
|
|
||||||
c = Counter(samples['ipa'])
|
|
||||||
sample_num_list = [c[key] for key in c.keys()]
|
|
||||||
sample_num = np.min(sample_num_list)
|
|
||||||
|
|
||||||
samples_balanced = pd.DataFrame(index=[], columns=list(samples.keys()))
|
|
||||||
for key in c.keys():
|
|
||||||
samples_ = samples[samples['ipa'] == key]
|
|
||||||
samples_balanced = samples_balanced.append(samples_.sample(sample_num), ignore_index = True)
|
|
||||||
|
|
||||||
samples = samples_balanced
|
|
||||||
|
|
||||||
|
|
||||||
results = pd.DataFrame(index=[],
|
results = pd.DataFrame(index=[],
|
||||||
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
|
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
|
||||||
|
|
||||||
|
@ -2,63 +2,40 @@ import os
|
|||||||
# add path of the parent directory
|
# add path of the parent directory
|
||||||
#os.path.dirname(os.path.realpath(__file__))
|
#os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
|
# repos
|
||||||
|
|
||||||
#htk_dir = r'C:\Aki\htk_fame'
|
|
||||||
htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk'
|
|
||||||
|
|
||||||
|
|
||||||
#config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
|
|
||||||
#mkhmmdefs_pl = os.path.join(cygwin_dir, 'src', 'acoustic_model', 'mkhmmdefs.pl')
|
|
||||||
|
|
||||||
#dbLexicon = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\lexicon.accdb
|
|
||||||
#scriptBarbara = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\pronvars_barbara.perl
|
|
||||||
#exeG2P = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\string2phon.exe
|
|
||||||
|
|
||||||
#[pyHTK]
|
|
||||||
#configHVite = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\config.HVite
|
|
||||||
#filePhoneList = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\phonelist_barbara.txt
|
|
||||||
#AcousticModel = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\hmmdefs_16-2_barbara.compo
|
|
||||||
|
|
||||||
#dbLexicon = config['cLexicon']['dbLexicon']
|
|
||||||
#scriptBarbara = config['cLexicon']['scriptBarbara']
|
|
||||||
#exeG2P = config['cLexicon']['exeG2P']
|
|
||||||
|
|
||||||
#configHVite = config['pyHTK']['configHVite']
|
|
||||||
#filePhoneList = config['pyHTK']['filePhoneList']
|
|
||||||
#AcousticModel = config['pyHTK']['AcousticModel']
|
|
||||||
|
|
||||||
repo_dir = r'C:\Users\Aki\source\repos'
|
repo_dir = r'C:\Users\Aki\source\repos'
|
||||||
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
||||||
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
||||||
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
|
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
|
||||||
toolbox_dir = os.path.join(repo_dir, 'toolbox')
|
toolbox_dir = os.path.join(repo_dir, 'toolbox')
|
||||||
|
|
||||||
#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
|
|
||||||
#config_hvite = os.path.join(htk_config_dir, 'config.HVite')
|
|
||||||
#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
|
|
||||||
#acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
|
|
||||||
phonelist_txt = os.path.join(htk_dir, 'config', 'phonelist.txt')
|
|
||||||
|
|
||||||
WSL_dir = r'C:\OneDrive\WSL'
|
WSL_dir = r'C:\OneDrive\WSL'
|
||||||
#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi')
|
||||||
fame_dir = r'c:\OneDrive\Research\rug\_data\FAME'
|
#novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi'
|
||||||
|
|
||||||
fame_s5_dir = os.path.join(fame_dir, 's5')
|
# working directories
|
||||||
fame_corpus_dir = os.path.join(fame_dir, 'corpus')
|
rug_dir = r'c:\OneDrive\Research\rug'
|
||||||
|
experiments_dir = os.path.join(rug_dir, 'experiments')
|
||||||
experiments_dir = r'c:\OneDrive\Research\rug\experiments'
|
htk_dir = os.path.join(experiments_dir, 'acoustic_model', 'fame', 'htk')
|
||||||
stimmen_dir = os.path.join(experiments_dir, 'stimmen')
|
stimmen_dir = os.path.join(experiments_dir, 'stimmen')
|
||||||
stimmen_data_dir = os.path.join(stimmen_dir, 'data')
|
|
||||||
|
# data
|
||||||
|
fame_dir = os.path.join(rug_dir, '_data', 'FAME')
|
||||||
|
#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
||||||
# 44.1 kHz
|
# 44.1 kHz
|
||||||
#stimmen_wav_dir = os.path.join(stimmen_dir, 'wav')
|
#stimmen_wav_dir = os.path.join(stimmen_dir, 'wav')
|
||||||
# 16 kHz
|
# 16 kHz
|
||||||
stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen'
|
stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen'
|
||||||
stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi')
|
stimmen_transcription_xlsx = os.path.join(stimmen_dir, 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
|
||||||
|
|
||||||
stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx')
|
|
||||||
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
|
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
|
||||||
|
|
||||||
novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi')
|
|
||||||
#novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi'
|
|
||||||
novo70_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'nl', 'novo70.phoneset')
|
novo70_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'nl', 'novo70.phoneset')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#phonelist_txt = os.path.join(htk_dir, 'config', 'phonelist.txt')
|
||||||
|
#fame_s5_dir = os.path.join(fame_dir, 's5')
|
||||||
|
#fame_corpus_dir = os.path.join(fame_dir, 'corpus')
|
||||||
|
#stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi')
|
||||||
|
# novoapi_functions
|
||||||
|
|
||||||
|
|
||||||
|
@ -341,3 +341,24 @@ def fix_single_quote(lexicon_file):
|
|||||||
|
|
||||||
def word2htk(word):
|
def word2htk(word):
|
||||||
return ''.join([fame_asr.translation_key_word2htk.get(i, i) for i in word])
|
return ''.join([fame_asr.translation_key_word2htk.get(i, i) for i in word])
|
||||||
|
|
||||||
|
|
||||||
|
def ipa2asr(ipa):
|
||||||
|
curr_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
translation_key_ipa2asr = np.load(os.path.join(curr_dir, 'phoneset', 'fame_ipa2asr.npy')).item(0)
|
||||||
|
|
||||||
|
ipa_splitted = convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones)
|
||||||
|
ipa_splitted = fame_ipa.phone_reduction(ipa_splitted)
|
||||||
|
asr_splitted = convert_phoneset.convert_phoneset(ipa_splitted, translation_key_ipa2asr)
|
||||||
|
return ''.join(asr_splitted)
|
||||||
|
|
||||||
|
|
||||||
|
def ipa2htk(ipa):
|
||||||
|
curr_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
translation_key_ipa2asr = np.load(os.path.join(curr_dir, 'phoneset', 'fame_ipa2asr.npy')).item(0)
|
||||||
|
|
||||||
|
ipa_splitted = convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones)
|
||||||
|
ipa_splitted = fame_ipa.phone_reduction(ipa_splitted)
|
||||||
|
asr_splitted = convert_phoneset.convert_phoneset(ipa_splitted, translation_key_ipa2asr)
|
||||||
|
htk_splitted = convert_phoneset.convert_phoneset(asr_splitted, fame_asr.translation_key_asr2htk)
|
||||||
|
return ''.join(htk_splitted)
|
@ -27,7 +27,7 @@ extract_features = 0
|
|||||||
flat_start = 0
|
flat_start = 0
|
||||||
train_model_without_sp = 0
|
train_model_without_sp = 0
|
||||||
add_sp = 0
|
add_sp = 0
|
||||||
train_model_with_sp = 0
|
train_model_with_sp = 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -321,7 +321,8 @@ if add_sp:
|
|||||||
## ======================= train model with short pause =======================
|
## ======================= train model with short pause =======================
|
||||||
if train_model_with_sp:
|
if train_model_with_sp:
|
||||||
print('==== train model with sp ====')
|
print('==== train model with sp ====')
|
||||||
for niter in range(niter_max+1, niter_max*2+1):
|
#for niter in range(niter_max+1, niter_max*2+1):
|
||||||
|
for niter in range(20, 50):
|
||||||
timer_start = time.time()
|
timer_start = time.time()
|
||||||
hmm_n = 'iter' + str(niter)
|
hmm_n = 'iter' + str(niter)
|
||||||
hmm_n_pre = 'iter' + str(niter-1)
|
hmm_n_pre = 'iter' + str(niter-1)
|
||||||
|
@ -69,6 +69,10 @@ else:
|
|||||||
translation_key_ipa2asr['ə:'] = 'ə'
|
translation_key_ipa2asr['ə:'] = 'ə'
|
||||||
translation_key_ipa2asr['r.'] = 'r'
|
translation_key_ipa2asr['r.'] = 'r'
|
||||||
translation_key_ipa2asr['r:'] = 'r'
|
translation_key_ipa2asr['r:'] = 'r'
|
||||||
|
# added for stimmen.
|
||||||
|
translation_key_ipa2asr['ɪ:'] = 'ɪ:'
|
||||||
|
translation_key_ipa2asr['y:'] = 'y'
|
||||||
|
|
||||||
np.save(os.path.join('phoneset', 'fame_ipa2asr.npy'), translation_key_ipa2asr)
|
np.save(os.path.join('phoneset', 'fame_ipa2asr.npy'), translation_key_ipa2asr)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,131 +1,120 @@
|
|||||||
import os
|
import os
|
||||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import csv
|
|
||||||
import subprocess
|
#import csv
|
||||||
from collections import Counter
|
#import subprocess
|
||||||
import re
|
#from collections import Counter
|
||||||
|
#import re
|
||||||
|
import shutil
|
||||||
|
import glob
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import matplotlib.pyplot as plt
|
#import matplotlib.pyplot as plt
|
||||||
from sklearn.metrics import confusion_matrix
|
#from sklearn.metrics import confusion_matrix
|
||||||
|
|
||||||
import acoustic_model_functions as am_func
|
#import acoustic_model_functions as am_func
|
||||||
import convert_xsampa2ipa
|
#import convert_xsampa2ipa
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
|
|
||||||
from forced_alignment import pyhtk
|
#from forced_alignment import pyhtk
|
||||||
|
#sys.path.append(default.forced_alignment_module_dir)
|
||||||
|
#from forced_alignment import convert_phone_set
|
||||||
|
#import acoustic_model_functions as am_func
|
||||||
|
import convert_xsampa2ipa
|
||||||
|
import stimmen_functions
|
||||||
|
import fame_functions
|
||||||
|
import convert_phoneset
|
||||||
|
from phoneset import fame_ipa, fame_asr
|
||||||
|
sys.path.append(default.toolbox_dir)
|
||||||
|
import file_handling as fh
|
||||||
|
from htk import pyhtk
|
||||||
|
|
||||||
|
|
||||||
## ======================= user define =======================
|
## ======================= user define =======================
|
||||||
excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
|
#excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
|
||||||
data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data')
|
#data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data')
|
||||||
|
|
||||||
wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k
|
#wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k
|
||||||
|
|
||||||
acoustic_model_dir = os.path.join(default.experiments_dir, 'friesian', 'acoustic_model', 'model')
|
#acoustic_model_dir = os.path.join(default.experiments_dir, 'friesian', 'acoustic_model', 'model')
|
||||||
htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short')
|
#htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short')
|
||||||
fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k')
|
#fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k')
|
||||||
result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result')
|
#result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result')
|
||||||
|
|
||||||
kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme')
|
#kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme')
|
||||||
kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict')
|
#kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict')
|
||||||
lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt')
|
#lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt')
|
||||||
|
|
||||||
#lex_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
|
#lex_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
|
||||||
#lex_asr_htk = os.path.join(default.fame_dir, 'lexicon', 'lex.asr_htk')
|
#lex_asr_htk = os.path.join(default.fame_dir, 'lexicon', 'lex.asr_htk')
|
||||||
|
|
||||||
|
## procedure
|
||||||
|
#make_htk_dict_files = 0
|
||||||
|
#do_forced_alignment_htk = 0
|
||||||
|
#eval_forced_alignment_htk = 0
|
||||||
|
#make_kaldi_data_files = 0
|
||||||
|
#make_kaldi_lexicon_txt = 0
|
||||||
|
#load_forced_alignment_kaldi = 1
|
||||||
|
#eval_forced_alignment_kaldi = 1
|
||||||
|
|
||||||
# procedure
|
#sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
|
||||||
make_htk_dict_files = 0
|
#from forced_alignment import convert_phone_set
|
||||||
do_forced_alignment_htk = 0
|
#from forced_alignment import pyhtk
|
||||||
eval_forced_alignment_htk = 0
|
|
||||||
make_kaldi_data_files = 0
|
#sys.path.append(os.path.join(default.repo_dir, 'toolbox'))
|
||||||
make_kaldi_lexicon_txt = 0
|
#from evaluation import plot_confusion_matrix
|
||||||
load_forced_alignment_kaldi = 1
|
|
||||||
eval_forced_alignment_kaldi = 1
|
config_dir = os.path.join(default.htk_dir, 'config')
|
||||||
|
model_dir = os.path.join(default.htk_dir, 'model')
|
||||||
|
lattice_file = os.path.join(config_dir, 'stimmen.ltc')
|
||||||
|
#pyhtk.create_word_lattice_file(
|
||||||
|
# os.path.join(config_dir, 'stimmen.net'),
|
||||||
|
# lattice_file)
|
||||||
|
hvite_scp = os.path.join(default.htk_dir, 'tmp', 'stimmen_test.scp')
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= load test data ======================
|
||||||
|
stimmen_test_dir = r'c:\OneDrive\Research\rug\_data\stimmen_test'
|
||||||
|
|
||||||
|
df = stimmen_functions.load_transcriptions_clean(stimmen_test_dir)
|
||||||
|
df = stimmen_functions.add_row_asr(df)
|
||||||
|
df = stimmen_functions.add_row_htk(df)
|
||||||
|
|
||||||
## ======================= add paths =======================
|
word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)]
|
||||||
sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
|
word_list = sorted(word_list)
|
||||||
from forced_alignment import convert_phone_set
|
|
||||||
from forced_alignment import pyhtk
|
|
||||||
|
|
||||||
sys.path.append(os.path.join(default.repo_dir, 'toolbox'))
|
# pronunciation variants
|
||||||
from evaluation import plot_confusion_matrix
|
for word in word_list:
|
||||||
|
df_ = df[df['word']==word]
|
||||||
|
print('{0} has {1} variants'.format(word, len(np.unique(df_['htk'])))
|
||||||
|
|
||||||
|
#fh.make_filelist(stimmen_test_dir, hvite_scp, file_type='wav')
|
||||||
|
|
||||||
## ======================= convert phones ======================
|
#output = pyhtk.recognition(
|
||||||
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
|
# os.path.join(default.htk_dir, 'config', 'config.rec',
|
||||||
|
# lattice_file,
|
||||||
|
# os.path.join(model_dir, 'hmm1', 'iter13'),
|
||||||
|
# dictionary_file,
|
||||||
|
# os.path.join(config_dir, 'phonelist.txt'),
|
||||||
|
# hvite_scp)
|
||||||
|
|
||||||
xls = pd.ExcelFile(excel_file)
|
#pyhtk.create_label_file(
|
||||||
|
# row['word'],
|
||||||
|
# os.path.join(stimmen_test_dir, filename.replace('.wav', '.lab')))
|
||||||
|
|
||||||
## check conversion
|
## ======================= make a HTK dic file ======================
|
||||||
#df = pd.read_excel(xls, 'frequency')
|
#if make_htk_dic_file:
|
||||||
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
# output_type = 3
|
||||||
# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_)
|
dictionary_txt = os.path.join(default.htk_dir, 'lexicon', 'stimmen.dic')
|
||||||
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
#for word in word_list:
|
||||||
# if not ipa_converted == ipa:
|
word = word_list[2]
|
||||||
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
# pronunciation variant of the target word.
|
||||||
|
pronunciations = df_test['asr'][df_test['word'].str.match(word)]
|
||||||
|
|
||||||
## check phones included in FAME!
|
|
||||||
# the phones used in the lexicon.
|
|
||||||
#phonelist = am_func.get_phonelist(lex_asr)
|
|
||||||
|
|
||||||
# the lines which include a specific phone.
|
|
||||||
#lines = am_func.find_phone(lex_asr, 'x')
|
|
||||||
|
|
||||||
|
|
||||||
# Filename, Word, Self Xsampa
|
|
||||||
df = pd.read_excel(xls, 'original')
|
|
||||||
|
|
||||||
ipas = []
|
|
||||||
famehtks = []
|
|
||||||
for xsampa in df['Self Xsampa']:
|
|
||||||
if not isinstance(xsampa, float): # 'NaN'
|
|
||||||
# typo?
|
|
||||||
xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t')
|
|
||||||
xsampa = xsampa.replace(';', ':')
|
|
||||||
|
|
||||||
ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
|
||||||
ipa = ipa.replace('ː', ':')
|
|
||||||
ipa = ipa.replace(' ', '')
|
|
||||||
ipas.append(ipa)
|
|
||||||
famehtk = convert_phone_set.ipa2famehtk(ipa)
|
|
||||||
famehtks.append(famehtk)
|
|
||||||
else:
|
|
||||||
ipas.append('')
|
|
||||||
famehtks.append('')
|
|
||||||
|
|
||||||
# extract interesting cols.
|
|
||||||
df = pd.DataFrame({'filename': df['Filename'],
|
|
||||||
'word': df['Word'],
|
|
||||||
'xsampa': df['Self Xsampa'],
|
|
||||||
'ipa': pd.Series(ipas),
|
|
||||||
'famehtk': pd.Series(famehtks)})
|
|
||||||
# cleansing.
|
|
||||||
df = df[~df['famehtk'].isin(['/', ''])]
|
|
||||||
|
|
||||||
word_list = np.unique(df['word'])
|
|
||||||
|
|
||||||
|
|
||||||
## ======================= make dict files used for HTK. ======================
|
|
||||||
if make_htk_dict_files:
|
|
||||||
output_type = 3
|
|
||||||
|
|
||||||
for word in word_list:
|
|
||||||
htk_dict_file = htk_dict_dir + '\\' + word + '.dic'
|
|
||||||
|
|
||||||
# pronunciation variant of the target word.
|
|
||||||
pronvar_ = df['famehtk'][df['word'].str.match(word)]
|
|
||||||
|
|
||||||
# make dic file.
|
# make dic file.
|
||||||
am_func.make_htk_dict(word, pronvar_, htk_dict_file, output_type)
|
#am_func.make_htk_dict(word, pronvar_, htk_dict_file, output_type)
|
||||||
|
|
||||||
|
|
||||||
## ======================= forced alignment using HTK =======================
|
## ======================= forced alignment using HTK =======================
|
||||||
|
@ -52,7 +52,7 @@ p = argparse.ArgumentParser()
|
|||||||
#p.add_argument("--user", default=None)
|
#p.add_argument("--user", default=None)
|
||||||
#p.add_argument("--password", default=None)
|
#p.add_argument("--password", default=None)
|
||||||
p.add_argument("--user", default='martijn.wieling')
|
p.add_argument("--user", default='martijn.wieling')
|
||||||
p.add_argument("--password", default='fa0Thaic')
|
p.add_argument("--password", default='xxxxxx')
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
|
||||||
#wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav'
|
#wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav'
|
||||||
|
@ -173,7 +173,7 @@ def forced_alignment(wav_file, word, pronunciation_ipa):
|
|||||||
# username / password cannot be passed as artuments...
|
# username / password cannot be passed as artuments...
|
||||||
p = argparse.ArgumentParser()
|
p = argparse.ArgumentParser()
|
||||||
p.add_argument("--user", default='martijn.wieling')
|
p.add_argument("--user", default='martijn.wieling')
|
||||||
p.add_argument("--password", default='fa0Thaic')
|
p.add_argument("--password", default='xxxxxx')
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
|
||||||
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
|
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
|
||||||
|
@ -73,12 +73,14 @@ reduction_key = {
|
|||||||
# already removed beforehand in phoneset. Just to be sure.
|
# already removed beforehand in phoneset. Just to be sure.
|
||||||
phones_to_be_removed = ['ú', 's:', 'ɔ̈:']
|
phones_to_be_removed = ['ú', 's:', 'ɔ̈:']
|
||||||
|
|
||||||
phoneset_short = [reduction_key.get(i, i) for i in phoneset
|
def phone_reduction(phones):
|
||||||
|
return [reduction_key.get(i, i) for i in phones
|
||||||
if not i in phones_to_be_removed]
|
if not i in phones_to_be_removed]
|
||||||
phoneset_short = list(set(phoneset_short))
|
phoneset_short = list(set(phone_reduction(phoneset)))
|
||||||
phoneset_short.sort()
|
phoneset_short.sort()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## translation_key to htk format (ascii).
|
## translation_key to htk format (ascii).
|
||||||
# phones which gives UnicodeEncodeError when phone.encode("ascii")
|
# phones which gives UnicodeEncodeError when phone.encode("ascii")
|
||||||
# are replaced with other characters.
|
# are replaced with other characters.
|
||||||
|
@ -5,6 +5,7 @@ phoneset = [
|
|||||||
'i̯',
|
'i̯',
|
||||||
'i̯ⁿ',
|
'i̯ⁿ',
|
||||||
'y',
|
'y',
|
||||||
|
'y:', # not included in lex.ipa, but in stimmen.
|
||||||
'i',
|
'i',
|
||||||
'i.',
|
'i.',
|
||||||
'iⁿ',
|
'iⁿ',
|
||||||
@ -13,7 +14,7 @@ phoneset = [
|
|||||||
'ɪ',
|
'ɪ',
|
||||||
'ɪⁿ',
|
'ɪⁿ',
|
||||||
'ɪ.',
|
'ɪ.',
|
||||||
#'ɪ:', # not included in lex.ipa
|
'ɪ:', # not included in lex.ipa, but in stimmen.
|
||||||
'ɪ:ⁿ',
|
'ɪ:ⁿ',
|
||||||
'e',
|
'e',
|
||||||
'e:',
|
'e:',
|
||||||
@ -100,6 +101,36 @@ phoneset = [
|
|||||||
'l'
|
'l'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
## reduce the number of phones.
|
||||||
|
# the phones which are used in stimmen transcription but not in FAME corpus.
|
||||||
|
# replacements are based on the advice from Jelske Dijkstra on 2018/06/21.
|
||||||
|
stimmen_replacement = {
|
||||||
|
'æ': 'ɛ',
|
||||||
|
'ø': 'ö', # or 'ö:'
|
||||||
|
'ø:': 'ö:', # Aki added.
|
||||||
|
'œ': 'ɔ̈', # or 'ɔ̈:'
|
||||||
|
'œ:': 'ɔ̈:', # Aki added.
|
||||||
|
'ɐ': 'a', # or 'a:'
|
||||||
|
'ɐ:': 'a:', # Aki added.
|
||||||
|
'ɑ': 'a', # or 'a:'
|
||||||
|
'ɑ:': 'a:', # Aki added
|
||||||
|
'ɒ': 'ɔ', # or 'ɔ:'
|
||||||
|
'ɒ:': 'ɔ:', # Aki added.
|
||||||
|
'ɾ': 'r',
|
||||||
|
'ʁ': 'r',
|
||||||
|
'ʊ': 'u',
|
||||||
|
'χ': 'x',
|
||||||
|
|
||||||
|
# aki guessed.
|
||||||
|
'ʀ': 'r',
|
||||||
|
'ɹ': 'r',
|
||||||
|
'w': 'ö'
|
||||||
|
}
|
||||||
|
phoneset.extend(list(stimmen_replacement.keys()))
|
||||||
|
|
||||||
|
def phone_reduction(phones):
|
||||||
|
return [stimmen_replacement.get(i, i) for i in phones]
|
||||||
|
|
||||||
|
|
||||||
## the list of multi character phones.
|
## the list of multi character phones.
|
||||||
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
|
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
|
||||||
|
Binary file not shown.
83
acoustic_model/stimmen_functions.py
Normal file
83
acoustic_model/stimmen_functions.py
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
import os
|
||||||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
import glob
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
import convert_xsampa2ipa
|
||||||
|
import defaultfiles as default
|
||||||
|
import fame_functions
|
||||||
|
|
||||||
|
|
||||||
|
def _load_transcriptions():
|
||||||
|
stimmen_transcription = pd.ExcelFile(default.stimmen_transcription_xlsx)
|
||||||
|
df = pd.read_excel(stimmen_transcription, 'original')
|
||||||
|
|
||||||
|
# mapping from ipa to xsampa
|
||||||
|
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
|
||||||
|
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
||||||
|
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
||||||
|
# if not ipa_converted == ipa:
|
||||||
|
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
||||||
|
|
||||||
|
ipas = []
|
||||||
|
for xsampa in df['Self Xsampa']:
|
||||||
|
if not isinstance(xsampa, float): # 'NaN'
|
||||||
|
# typo?
|
||||||
|
xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t').replace(';', ':')
|
||||||
|
|
||||||
|
ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
||||||
|
ipa = ipa.replace('ː', ':').replace(' ', '')
|
||||||
|
ipas.append(ipa)
|
||||||
|
else:
|
||||||
|
ipas.append('')
|
||||||
|
|
||||||
|
df_ = pd.DataFrame({'filename': df['Filename'],
|
||||||
|
'word': df['Word'],
|
||||||
|
'xsampa': df['Self Xsampa'],
|
||||||
|
'ipa': pd.Series(ipas)})
|
||||||
|
|
||||||
|
# not valid inputs, but seperator.
|
||||||
|
df_ = df_[~df_['ipa'].str.contains('/')]
|
||||||
|
return df_.dropna()
|
||||||
|
|
||||||
|
|
||||||
|
def load_transcriptions():
|
||||||
|
""" in default.stimmen_transcription_xlsx
|
||||||
|
rows of which wav files can be easily found"""
|
||||||
|
df = _load_transcriptions()
|
||||||
|
df_ = pd.DataFrame(index=[], columns=list(df.keys()))
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
filename = row['filename']
|
||||||
|
if isinstance(filename, str):
|
||||||
|
wav_file = os.path.join(default.stimmen_wav_dir, filename)
|
||||||
|
if os.path.exists(wav_file):
|
||||||
|
df_ = df_.append(row, ignore_index=True)
|
||||||
|
return df_
|
||||||
|
|
||||||
|
|
||||||
|
def load_transcriptions_clean(clean_wav_dir):
|
||||||
|
df = _load_transcriptions()
|
||||||
|
wav_file_list = glob.glob(os.path.join(clean_wav_dir, '*.wav'))
|
||||||
|
df_clean = pd.DataFrame(index=[], columns=list(df.keys()))
|
||||||
|
for wav_file in wav_file_list:
|
||||||
|
filename = os.path.basename(wav_file)
|
||||||
|
df_ = df[df['filename'].str.match(filename)]
|
||||||
|
df_clean = pd.concat([df_clean, df_])
|
||||||
|
return df_clean
|
||||||
|
|
||||||
|
|
||||||
|
def add_row_htk(df):
|
||||||
|
""" df['htk'] is made from df['ipa'] and added. """
|
||||||
|
htk = []
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
htk.append(fame_functions.ipa2htk(row['ipa']))
|
||||||
|
return df.assign(htk=htk)
|
||||||
|
|
||||||
|
|
||||||
|
def add_row_asr(df):
|
||||||
|
""" df['asr'] is made from df['ipa'] and added. """
|
||||||
|
asr = []
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
asr.append(fame_functions.ipa2asr(row['ipa']))
|
||||||
|
return df.assign(asr=asr)
|
64
acoustic_model/stimmen_test.py
Normal file
64
acoustic_model/stimmen_test.py
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import os
|
||||||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
import sys
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
#import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
import defaultfiles as default
|
||||||
|
import convert_xsampa2ipa
|
||||||
|
import stimmen_functions
|
||||||
|
import fame_functions
|
||||||
|
import convert_phoneset
|
||||||
|
from phoneset import fame_ipa, fame_asr
|
||||||
|
sys.path.append(default.toolbox_dir)
|
||||||
|
import file_handling as fh
|
||||||
|
from htk import pyhtk
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= user define =======================
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= make test data ======================
|
||||||
|
stimmen_test_dir = r'c:\OneDrive\Research\rug\_data\stimmen_test'
|
||||||
|
|
||||||
|
## copy wav files which is in the stimmen data.
|
||||||
|
df = stimmen_functions.load_transcriptions()
|
||||||
|
#for index, row in df.iterrows():
|
||||||
|
# filename = row['filename']
|
||||||
|
# wav_file = os.path.join(default.stimmen_wav_dir, filename)
|
||||||
|
# shutil.copy(wav_file, os.path.join(stimmen_test_dir, filename))
|
||||||
|
|
||||||
|
# after manually removed files which has too much noise and multiple words...
|
||||||
|
# update the info.
|
||||||
|
df_clean = stimmen_functions.load_transcriptions_clean(stimmen_test_dir)
|
||||||
|
|
||||||
|
# count how many files are removed due to the quality.
|
||||||
|
word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)]
|
||||||
|
word_list = sorted(word_list)
|
||||||
|
for word in word_list:
|
||||||
|
df_ = df[df['word']==word]
|
||||||
|
df_clean_ = df_clean[df_clean['word']==word]
|
||||||
|
print('word {0} has {1} clean files among {2} files ({3:.2f} [%]).'.format(
|
||||||
|
word, len(df_clean_), len(df_), len(df_clean_)/len(df_)*100))
|
||||||
|
|
||||||
|
|
||||||
|
## check phones included in stimmen but not in FAME!
|
||||||
|
splitted_ipas = [' '.join(
|
||||||
|
convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones))
|
||||||
|
for ipa in df['ipa']]
|
||||||
|
stimmen_phones = set(' '.join(splitted_ipas))
|
||||||
|
stimmen_phones = list(stimmen_phones)
|
||||||
|
fame_phones = fame_ipa.phoneset
|
||||||
|
stimmen_phones.sort()
|
||||||
|
fame_phones.sort()
|
||||||
|
print('phones which are used in stimmen transcription but not in FAME corpus are:\n{}'.format(
|
||||||
|
set(stimmen_phones) - set(fame_phones)
|
||||||
|
))
|
||||||
|
for ipa in df['ipa']:
|
||||||
|
ipa_splitted = convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones)
|
||||||
|
if ':' in ipa_splitted:
|
||||||
|
print(ipa_splitted)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user