|
|
|
@ -9,7 +9,7 @@ import sys
@@ -9,7 +9,7 @@ import sys
|
|
|
|
|
import shutil |
|
|
|
|
import glob |
|
|
|
|
|
|
|
|
|
#import numpy as np |
|
|
|
|
import numpy as np |
|
|
|
|
import pandas as pd |
|
|
|
|
#import matplotlib.pyplot as plt |
|
|
|
|
#from sklearn.metrics import confusion_matrix |
|
|
|
@ -75,24 +75,22 @@ lattice_file = os.path.join(config_dir, 'stimmen.ltc')
@@ -75,24 +75,22 @@ lattice_file = os.path.join(config_dir, 'stimmen.ltc')
|
|
|
|
|
hvite_scp = os.path.join(default.htk_dir, 'tmp', 'stimmen_test.scp') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## ======================= make test data ====================== |
|
|
|
|
# copy wav files which is in the stimmen data. |
|
|
|
|
## ======================= load test data ====================== |
|
|
|
|
stimmen_test_dir = r'c:\OneDrive\Research\rug\_data\stimmen_test' |
|
|
|
|
fh.make_filelist(stimmen_test_dir, hvite_scp, file_type='wav') |
|
|
|
|
|
|
|
|
|
df = stimmen_functions.load_transcriptions() |
|
|
|
|
df = stimmen_functions.load_transcriptions_clean(stimmen_test_dir) |
|
|
|
|
df = stimmen_functions.add_row_asr(df) |
|
|
|
|
df = stimmen_functions.add_row_htk(df) |
|
|
|
|
|
|
|
|
|
word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)] |
|
|
|
|
word_list = sorted(word_list) |
|
|
|
|
|
|
|
|
|
# pronunciation variants |
|
|
|
|
for word in word_list: |
|
|
|
|
df_ = df[df['word']==word] |
|
|
|
|
print('{0} has {1} variants'.format(word, len(np.unique(df_['htk']))) |
|
|
|
|
|
|
|
|
|
# after manually removed files which does not contain clear sound, |
|
|
|
|
# update df as df_test. |
|
|
|
|
wav_file_list = glob.glob(os.path.join(stimmen_test_dir, '*.wav')) |
|
|
|
|
df_test = pd.DataFrame(index=[], columns=list(df.keys())) |
|
|
|
|
for wav_file in wav_file_list: |
|
|
|
|
filename = os.path.basename(wav_file) |
|
|
|
|
df_ = df[df['filename'].str.match(filename)] |
|
|
|
|
df_test = pd.concat([df_test, df_]) |
|
|
|
|
#fh.make_filelist(stimmen_test_dir, hvite_scp, file_type='wav') |
|
|
|
|
|
|
|
|
|
#output = pyhtk.recognition( |
|
|
|
|
# os.path.join(default.htk_dir, 'config', 'config.rec', |
|
|
|
@ -102,58 +100,21 @@ for wav_file in wav_file_list:
@@ -102,58 +100,21 @@ for wav_file in wav_file_list:
|
|
|
|
|
# os.path.join(config_dir, 'phonelist.txt'), |
|
|
|
|
# hvite_scp) |
|
|
|
|
|
|
|
|
|
htk = [fame_functions.ipa2htk(ipa) for ipa in df['ipa']] |
|
|
|
|
|
|
|
|
|
ipa = 'e:χ' |
|
|
|
|
fame_functions.ipa2htk(ipa) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Filename, Word, Self Xsampa |
|
|
|
|
df = pd.read_excel(xls, 'original') |
|
|
|
|
|
|
|
|
|
ipas = [] |
|
|
|
|
famehtks = [] |
|
|
|
|
for xsampa in df['Self Xsampa']: |
|
|
|
|
if not isinstance(xsampa, float): # 'NaN' |
|
|
|
|
# typo? |
|
|
|
|
xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t') |
|
|
|
|
xsampa = xsampa.replace(';', ':') |
|
|
|
|
|
|
|
|
|
ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) |
|
|
|
|
ipa = ipa.replace('ː', ':') |
|
|
|
|
ipa = ipa.replace(' ', '') |
|
|
|
|
ipas.append(ipa) |
|
|
|
|
famehtk = convert_phone_set.ipa2famehtk(ipa) |
|
|
|
|
famehtks.append(famehtk) |
|
|
|
|
else: |
|
|
|
|
ipas.append('') |
|
|
|
|
famehtks.append('') |
|
|
|
|
|
|
|
|
|
# extract interesting cols. |
|
|
|
|
df = pd.DataFrame({'filename': df['Filename'], |
|
|
|
|
'word': df['Word'], |
|
|
|
|
'xsampa': df['Self Xsampa'], |
|
|
|
|
'ipa': pd.Series(ipas), |
|
|
|
|
'famehtk': pd.Series(famehtks)}) |
|
|
|
|
# cleansing. |
|
|
|
|
df = df[~df['famehtk'].isin(['/', ''])] |
|
|
|
|
#pyhtk.create_label_file( |
|
|
|
|
# row['word'], |
|
|
|
|
# os.path.join(stimmen_test_dir, filename.replace('.wav', '.lab'))) |
|
|
|
|
|
|
|
|
|
word_list = np.unique(df['word']) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## ======================= make dict files used for HTK. ====================== |
|
|
|
|
if make_htk_dict_files: |
|
|
|
|
output_type = 3 |
|
|
|
|
|
|
|
|
|
for word in word_list: |
|
|
|
|
htk_dict_file = htk_dict_dir + '\\' + word + '.dic' |
|
|
|
|
|
|
|
|
|
# pronunciation variant of the target word. |
|
|
|
|
pronvar_ = df['famehtk'][df['word'].str.match(word)] |
|
|
|
|
## ======================= make a HTK dic file ====================== |
|
|
|
|
#if make_htk_dic_file: |
|
|
|
|
# output_type = 3 |
|
|
|
|
dictionary_txt = os.path.join(default.htk_dir, 'lexicon', 'stimmen.dic') |
|
|
|
|
#for word in word_list: |
|
|
|
|
word = word_list[2] |
|
|
|
|
# pronunciation variant of the target word. |
|
|
|
|
pronunciations = df_test['asr'][df_test['word'].str.match(word)] |
|
|
|
|
|
|
|
|
|
# make dic file. |
|
|
|
|
am_func.make_htk_dict(word, pronvar_, htk_dict_file, output_type) |
|
|
|
|
#am_func.make_htk_dict(word, pronvar_, htk_dict_file, output_type) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## ======================= forced alignment using HTK ======================= |
|
|
|
|