Merge branch 'master' of https://git.webhosting.rug.nl/p280427/acoustic_model
This commit is contained in:
commit
04a862b2fd
Binary file not shown.
@ -16,6 +16,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
|||||||
..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py
|
..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py
|
||||||
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
|
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
|
||||||
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
|
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
|
||||||
|
reus-test\reus-test.py = reus-test\reus-test.py
|
||||||
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
|
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
|
||||||
..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py
|
..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py
|
||||||
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
|
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
|
||||||
|
Binary file not shown.
@ -4,8 +4,7 @@
|
|||||||
<SchemaVersion>2.0</SchemaVersion>
|
<SchemaVersion>2.0</SchemaVersion>
|
||||||
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
||||||
<ProjectHome>.</ProjectHome>
|
<ProjectHome>.</ProjectHome>
|
||||||
<StartupFile>
|
<StartupFile>forced_aligner_comparison.py</StartupFile>
|
||||||
</StartupFile>
|
|
||||||
<SearchPath>
|
<SearchPath>
|
||||||
</SearchPath>
|
</SearchPath>
|
||||||
<WorkingDirectory>.</WorkingDirectory>
|
<WorkingDirectory>.</WorkingDirectory>
|
||||||
@ -36,6 +35,9 @@
|
|||||||
<Compile Include="fa_test.py">
|
<Compile Include="fa_test.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
|
<Compile Include="forced_aligner_comparison.py">
|
||||||
|
<SubType>Code</SubType>
|
||||||
|
</Compile>
|
||||||
<Compile Include="novoapi_forced_alignment.py">
|
<Compile Include="novoapi_forced_alignment.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
|
@ -10,14 +10,13 @@ import shutil
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
from sklearn.metrics import confusion_matrix
|
from sklearn.metrics import confusion_matrix
|
||||||
from sklearn.metrics import accuracy_score
|
from sklearn.metrics import accuracy_score
|
||||||
import novoapi
|
import novoapi
|
||||||
|
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
sys.path.append(default.forced_alignment_module_dir)
|
sys.path.append(default.forced_alignment_module_dir)
|
||||||
from forced_alignment import pyhtk, convert_phone_set
|
from forced_alignment import convert_phone_set
|
||||||
#import acoustic_model_functions as am_func
|
#import acoustic_model_functions as am_func
|
||||||
import convert_xsampa2ipa
|
import convert_xsampa2ipa
|
||||||
import novoapi_functions
|
import novoapi_functions
|
||||||
@ -47,10 +46,6 @@ david_suggestion = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'w']
|
|||||||
## read pronunciation variants.
|
## read pronunciation variants.
|
||||||
stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx)
|
stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx)
|
||||||
df = pd.read_excel(stimmen_transcription_, 'frequency')
|
df = pd.read_excel(stimmen_transcription_, 'frequency')
|
||||||
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
|
||||||
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
|
||||||
# if not ipa_converted == ipa:
|
|
||||||
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
|
||||||
transcription_ipa = list(df['IPA'])
|
transcription_ipa = list(df['IPA'])
|
||||||
|
|
||||||
# transcription mistake?
|
# transcription mistake?
|
||||||
@ -63,6 +58,7 @@ for ipa in transcription_ipa:
|
|||||||
ipa = ipa.replace(':', 'ː')
|
ipa = ipa.replace(':', 'ː')
|
||||||
ipa = convert_phone_set.split_ipa(ipa)
|
ipa = convert_phone_set.split_ipa(ipa)
|
||||||
|
|
||||||
|
# list of phones not in novo70 phoneset.
|
||||||
not_in_novo70_ = [phone for phone in ipa
|
not_in_novo70_ = [phone for phone in ipa
|
||||||
if not phone in phoneset_ipa and not phone in david_suggestion]
|
if not phone in phoneset_ipa and not phone in david_suggestion]
|
||||||
not_in_novo70_ = [phone.replace('sp', '') for phone in not_in_novo70_]
|
not_in_novo70_ = [phone.replace('sp', '') for phone in not_in_novo70_]
|
||||||
@ -106,6 +102,10 @@ df = pd.read_excel(stimmen_transcription_, 'original')
|
|||||||
|
|
||||||
# mapping from ipa to xsampa
|
# mapping from ipa to xsampa
|
||||||
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
|
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
|
||||||
|
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
||||||
|
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
||||||
|
# if not ipa_converted == ipa:
|
||||||
|
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
||||||
|
|
||||||
ipas = []
|
ipas = []
|
||||||
famehtks = []
|
famehtks = []
|
||||||
@ -153,12 +153,12 @@ for word in word_list:
|
|||||||
|
|
||||||
|
|
||||||
## ===== forced alignment =====
|
## ===== forced alignment =====
|
||||||
reus_dir = r'C:\OneDrive\Desktop\Reus'
|
rozen_dir = r'c:\Users\Aki\source\repos\acoustic_model\rozen-test'
|
||||||
if forced_alignment_novo70:
|
if forced_alignment_novo70:
|
||||||
Results = pd.DataFrame(index=[],
|
Results = pd.DataFrame(index=[],
|
||||||
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
|
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
|
||||||
#for word in word_list:
|
#for word in word_list:
|
||||||
for word in ['Reus']:
|
for word in ['Rozen']:
|
||||||
# pronunciation variants top 3
|
# pronunciation variants top 3
|
||||||
df_per_word_ = df_per_word[df_per_word['word']==word]
|
df_per_word_ = df_per_word[df_per_word['word']==word]
|
||||||
df_per_word_ = df_per_word_.sort_values('frequency', ascending=False)
|
df_per_word_ = df_per_word_.sort_values('frequency', ascending=False)
|
||||||
@ -208,37 +208,35 @@ if forced_alignment_novo70:
|
|||||||
wav_file = os.path.join(default.stimmen_wav_dir, filename)
|
wav_file = os.path.join(default.stimmen_wav_dir, filename)
|
||||||
if os.path.exists(wav_file):
|
if os.path.exists(wav_file):
|
||||||
# for Martijn
|
# for Martijn
|
||||||
#shutil.copy(wav_file, os.path.join(reus_dir, filename))
|
shutil.copy(wav_file, os.path.join(rozen_dir, filename))
|
||||||
|
|
||||||
pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
|
# pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
|
||||||
result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
|
# result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
|
||||||
result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
|
# result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
|
||||||
result_ = pd.Series([
|
# result_ = pd.Series([
|
||||||
sample['filename'],
|
# sample['filename'],
|
||||||
sample['word'],
|
# sample['word'],
|
||||||
sample['xsampa'],
|
# sample['xsampa'],
|
||||||
sample['ipa'],
|
# sample['ipa'],
|
||||||
' '.join(result_ipa),
|
# ' '.join(result_ipa),
|
||||||
' '.join(result_novo70),
|
# ' '.join(result_novo70),
|
||||||
llh
|
# llh
|
||||||
], index=results.columns)
|
# ], index=results.columns)
|
||||||
results = results.append(result_, ignore_index = True)
|
# results = results.append(result_, ignore_index = True)
|
||||||
print('{0}/{1}: answer {2} - prediction {3}'.format(
|
# print('{0}/{1}: answer {2} - prediction {3}'.format(
|
||||||
i+1, len(samples), result_['ipa'], result_['result_ipa']))
|
# i+1, len(samples), result_['ipa'], result_['result_ipa']))
|
||||||
results.to_excel(os.path.join(reus_dir, 'results.xlsx'), encoding="utf-8")
|
# #results.to_excel(os.path.join(default.stimmen_dir, 'results.xlsx'), encoding="utf-8")
|
||||||
if len(results) > 0:
|
#if len(results) > 0:
|
||||||
Results = Results.append(results, ignore_index = True)
|
# Results = Results.append(results, ignore_index = True)
|
||||||
Results.to_excel(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
|
#Results.to_excel(os.path.join(default.stimmen_result_novoapi_dir, 'Results.xlsx'), encoding="utf-8")
|
||||||
else:
|
else:
|
||||||
Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
|
Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_result_novoapi_dir, 'Results.xlsx'), encoding="utf-8")
|
||||||
Results = pd.read_excel(Results_xlsx, 'Sheet1')
|
Results = pd.read_excel(Results_xlsx, 'Sheet1')
|
||||||
|
|
||||||
|
|
||||||
## ===== analysis =====
|
## ===== analysis =====
|
||||||
#result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi')
|
|
||||||
#for word in word_list:
|
#for word in word_list:
|
||||||
# if not word == 'Oog':
|
# if not word == 'Oog':
|
||||||
|
|
||||||
# Results_ = Results[Results['word'] == word]
|
# Results_ = Results[Results['word'] == word]
|
||||||
# y_true = list(Results_['ipa'])
|
# y_true = list(Results_['ipa'])
|
||||||
# y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])]
|
# y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])]
|
||||||
@ -249,4 +247,4 @@ else:
|
|||||||
# plt.figure()
|
# plt.figure()
|
||||||
# output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False)
|
# output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False)
|
||||||
# #plt.show()
|
# #plt.show()
|
||||||
# plt.savefig(os.path.join(result_novoapi_dir, word + '.png'))
|
# plt.savefig(os.path.join(default.stimmen_result_novoapi_dir, word + '.png'))
|
@ -31,6 +31,12 @@ ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
|||||||
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
||||||
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
|
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
|
||||||
|
|
||||||
|
htk_config_dir = r'c:\Users\Aki\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
|
||||||
|
config_hvite = os.path.join(htk_config_dir, 'config.HVite')
|
||||||
|
#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
|
||||||
|
acoustic_model = r'c:\cygwin64\home\Aki\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
|
||||||
|
phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
|
||||||
|
|
||||||
WSL_dir = r'C:\OneDrive\WSL'
|
WSL_dir = r'C:\OneDrive\WSL'
|
||||||
fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
||||||
fame_s5_dir = os.path.join(fame_dir, 's5')
|
fame_s5_dir = os.path.join(fame_dir, 's5')
|
||||||
@ -43,6 +49,7 @@ stimmen_data_dir = os.path.join(stimmen_dir, 'data')
|
|||||||
#stimmen_wav_dir = os.path.join(stimmen_dir, 'wav')
|
#stimmen_wav_dir = os.path.join(stimmen_dir, 'wav')
|
||||||
# 16 kHz
|
# 16 kHz
|
||||||
stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen'
|
stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen'
|
||||||
|
stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi')
|
||||||
|
|
||||||
stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx')
|
stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx')
|
||||||
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
|
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
|
||||||
|
42
acoustic_model/forced_aligner_comparison.py
Normal file
42
acoustic_model/forced_aligner_comparison.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import os
|
||||||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import defaultfiles as default
|
||||||
|
sys.path.append(default.forced_alignment_module_dir)
|
||||||
|
from forced_alignment import pyhtk, convert_phone_set, scripts
|
||||||
|
|
||||||
|
reus_dir = r'c:\Users\Aki\source\repos\acoustic_model\reus-test'
|
||||||
|
wav_dir = reus_dir
|
||||||
|
wav_files = ['reus1008-reus.wav',
|
||||||
|
'reus1167-man.wav',
|
||||||
|
'reus3768-mantsje.wav']
|
||||||
|
|
||||||
|
word = 'reus'
|
||||||
|
pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
|
||||||
|
|
||||||
|
for wav_file in wav_files:
|
||||||
|
file_lab = os.path.join(reus_dir, wav_file.replace('.wav', '.lab'))
|
||||||
|
file_dic = os.path.join(reus_dir, wav_file.replace('.wav', '.dic'))
|
||||||
|
file_txt = os.path.join(reus_dir, wav_file.replace('.wav', '.txt'))
|
||||||
|
|
||||||
|
# output htk dict file
|
||||||
|
with open(file_dic, 'w', encoding="utf-8") as f:
|
||||||
|
for ipa in pronunciation_ipa:
|
||||||
|
cgn = convert_phone_set.ipa2cgn([ipa.replace(':', 'ː')])
|
||||||
|
barbara = convert_phone_set.cgn2barbara(cgn)
|
||||||
|
f.write(word.upper() + '\t' + barbara + '\n')
|
||||||
|
|
||||||
|
# output htk label file.
|
||||||
|
pyhtk._create_label_file(word, file_lab)
|
||||||
|
|
||||||
|
scripts.run_command([
|
||||||
|
'HVite','-T', '1',
|
||||||
|
'-a',
|
||||||
|
'-C', default.config_hvite,
|
||||||
|
'-H', default.acoustic_model,
|
||||||
|
'-m',
|
||||||
|
'-i', file_txt,
|
||||||
|
#'-S', script_file,
|
||||||
|
file_dic, default.phonelist_txt, os.path.join(wav_dir, wav_file)
|
||||||
|
])
|
@ -7,7 +7,7 @@ import json
|
|||||||
from novoapi.backend import session
|
from novoapi.backend import session
|
||||||
|
|
||||||
import os
|
import os
|
||||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
#os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
|
|
||||||
|
|
||||||
|
BIN
reus-test/check_novoapi.zip
Normal file
BIN
reus-test/check_novoapi.zip
Normal file
Binary file not shown.
3
reus-test/reus1008-reus.dic
Normal file
3
reus-test/reus1008-reus.dic
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
REUS r eu s
|
||||||
|
REUS m ac n
|
||||||
|
REUS m ac n t s j @
|
1
reus-test/reus1008-reus.lab
Normal file
1
reus-test/reus1008-reus.lab
Normal file
@ -0,0 +1 @@
|
|||||||
|
REUS
|
6
reus-test/reus1008-reus.txt
Normal file
6
reus-test/reus1008-reus.txt
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#!MLF!#
|
||||||
|
"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1008-reus.rec"
|
||||||
|
0 9700000 r -12463.852539 REUS
|
||||||
|
9700000 12800000 eu -3622.108887
|
||||||
|
12800000 26250001 s -17303.216797
|
||||||
|
.
|
3
reus-test/reus1167-man.dic
Normal file
3
reus-test/reus1167-man.dic
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
REUS r eu s
|
||||||
|
REUS m ac n
|
||||||
|
REUS m ac n t s j @
|
1
reus-test/reus1167-man.lab
Normal file
1
reus-test/reus1167-man.lab
Normal file
@ -0,0 +1 @@
|
|||||||
|
REUS
|
10
reus-test/reus1167-man.txt
Normal file
10
reus-test/reus1167-man.txt
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#!MLF!#
|
||||||
|
"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1167-man.rec"
|
||||||
|
0 150000 m -230.057571 REUS
|
||||||
|
150000 300000 ac -250.994858
|
||||||
|
300000 450000 n -202.377716
|
||||||
|
450000 4600000 t -5128.984375
|
||||||
|
4600000 5050000 s -711.338501
|
||||||
|
5050000 5450000 j -564.730591
|
||||||
|
5450000 16049999 @ -13249.787109
|
||||||
|
.
|
3
reus-test/reus3768-mantsje.dic
Normal file
3
reus-test/reus3768-mantsje.dic
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
REUS r eu s
|
||||||
|
REUS m ac n
|
||||||
|
REUS m ac n t s j @
|
1
reus-test/reus3768-mantsje.lab
Normal file
1
reus-test/reus3768-mantsje.lab
Normal file
@ -0,0 +1 @@
|
|||||||
|
REUS
|
10
reus-test/reus3768-mantsje.txt
Normal file
10
reus-test/reus3768-mantsje.txt
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#!MLF!#
|
||||||
|
"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus3768-mantsje.rec"
|
||||||
|
0 150000 m -217.347229 REUS
|
||||||
|
150000 1150000 ac -1266.293579
|
||||||
|
1150000 1650000 n -583.382568
|
||||||
|
1650000 11100000 t -11259.270508
|
||||||
|
11100000 11250000 s -247.939255
|
||||||
|
11250000 11550000 j -445.511444
|
||||||
|
11550000 24150000 @ -16769.048828
|
||||||
|
.
|
BIN
rozen-test/pg_rozen_100_jko5r.wav
Normal file
BIN
rozen-test/pg_rozen_100_jko5r.wav
Normal file
Binary file not shown.
BIN
rozen-test/pg_rozen_113_o9kzs.wav
Normal file
BIN
rozen-test/pg_rozen_113_o9kzs.wav
Normal file
Binary file not shown.
BIN
rozen-test/pg_rozen_1296_zbve2.wav
Normal file
BIN
rozen-test/pg_rozen_1296_zbve2.wav
Normal file
Binary file not shown.
BIN
rozen-test/pg_rozen_1709_kq9xr.wav
Normal file
BIN
rozen-test/pg_rozen_1709_kq9xr.wav
Normal file
Binary file not shown.
BIN
rozen-test/pg_rozen_241_bahqi.wav
Normal file
BIN
rozen-test/pg_rozen_241_bahqi.wav
Normal file
Binary file not shown.
BIN
rozen-test/pg_rozen_5502_q79fd.wav
Normal file
BIN
rozen-test/pg_rozen_5502_q79fd.wav
Normal file
Binary file not shown.
BIN
rozen-test/pg_rozen_632_2m04y.wav
Normal file
BIN
rozen-test/pg_rozen_632_2m04y.wav
Normal file
Binary file not shown.
BIN
rozen-test/pg_rozen_911_1zvda.wav
Normal file
BIN
rozen-test/pg_rozen_911_1zvda.wav
Normal file
Binary file not shown.
119
rozen-test/rozen-test.py
Normal file
119
rozen-test/rozen-test.py
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import os
|
||||||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
|
||||||
|
from novoapi.backend import session
|
||||||
|
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument("--user", default='martijn.wieling')
|
||||||
|
p.add_argument("--password", default='xxxxx')
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True)
|
||||||
|
|
||||||
|
grammar = {
|
||||||
|
"type": "confusion_network",
|
||||||
|
"version": "1.0",
|
||||||
|
"data": {
|
||||||
|
"kind": "sequence",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"kind": "word",
|
||||||
|
"pronunciation": [
|
||||||
|
{
|
||||||
|
"phones": [
|
||||||
|
"r",
|
||||||
|
"eu0",
|
||||||
|
"s"
|
||||||
|
],
|
||||||
|
"id": 0
|
||||||
|
}
|
||||||
|
,
|
||||||
|
{
|
||||||
|
"phones": [
|
||||||
|
"m",
|
||||||
|
"a0",
|
||||||
|
"n"
|
||||||
|
],
|
||||||
|
"id": 1
|
||||||
|
}
|
||||||
|
,
|
||||||
|
{
|
||||||
|
"phones": [
|
||||||
|
"m",
|
||||||
|
"a0",
|
||||||
|
"n",
|
||||||
|
"t",
|
||||||
|
"s",
|
||||||
|
"y",
|
||||||
|
"ax"
|
||||||
|
],
|
||||||
|
"id": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"label": "reus"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"return_objects": [
|
||||||
|
"grammar"
|
||||||
|
],
|
||||||
|
"phoneset": "novo70"
|
||||||
|
}
|
||||||
|
|
||||||
|
res = rec.setgrammar(grammar)
|
||||||
|
#print "Set grammar result", res
|
||||||
|
|
||||||
|
|
||||||
|
## === novoapi/backend/session.py ===
|
||||||
|
#import wave
|
||||||
|
#import time
|
||||||
|
#from novoapi.backend.session import rpcid, segmentation
|
||||||
|
|
||||||
|
#wavf = "reus1008-reus.wav"
|
||||||
|
#w = wave.open(wavf, 'r')
|
||||||
|
#nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
|
||||||
|
#buf = w.readframes(nframes)
|
||||||
|
#w.close()
|
||||||
|
|
||||||
|
#buffer_size = 4096
|
||||||
|
#nbytes_sent = 0
|
||||||
|
#start = time.time()
|
||||||
|
#for j in range(0, len(buf), buffer_size):
|
||||||
|
# audio_packet = buf[j:j + buffer_size]
|
||||||
|
# nbytes_sent += len(audio_packet)
|
||||||
|
# rec.conn.send_binary(audio_packet)
|
||||||
|
#rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
|
||||||
|
#print(rpcid.next())
|
||||||
|
#rec.last_message = rec.conn.recv()
|
||||||
|
#message = json.loads(rec.last_message)
|
||||||
|
#result = session.segmentation(message["result"]["words"])
|
||||||
|
#result.export()
|
||||||
|
## ====================================
|
||||||
|
|
||||||
|
def result2pronunciation(result, word):
|
||||||
|
#result_ = res.export()[1]
|
||||||
|
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
|
||||||
|
llh = result_[0]['llh']
|
||||||
|
phones = result_[0]['phones']
|
||||||
|
pronunciation = [phone['label'] for phone in phones]
|
||||||
|
return pronunciation, llh
|
||||||
|
|
||||||
|
|
||||||
|
res = rec.recognize_wav("reus1008-reus.wav")
|
||||||
|
#print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n"
|
||||||
|
#print "Recognition result:", json.dumps(res.export(), indent=4)
|
||||||
|
result2pronunciation(res.export(), 'reus')
|
||||||
|
|
||||||
|
#print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n"
|
||||||
|
res2 = rec.recognize_wav("reus1167-man.wav")
|
||||||
|
#print "Recognition result:", json.dumps(res2.export(), indent=4)
|
||||||
|
result2pronunciation(res2.export(), 'reus')
|
||||||
|
|
||||||
|
#print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n"
|
||||||
|
res3 = rec.recognize_wav("reus3768-mantsje.wav")
|
||||||
|
#print "Recognition result:", json.dumps(res3.export(), indent=4)
|
||||||
|
result2pronunciation(res3.export(), 'reus')
|
Loading…
Reference in New Issue
Block a user