Compare commits
No commits in common. "de5c9cecb94ce64206dc409b5d7773f5f529524b" and "6edde06a4fdcf239bae7b01f6e389e00ee270647" have entirely different histories.
de5c9cecb9
...
6edde06a4f
Binary file not shown.
13152
HCompV.scp
Normal file
13152
HCompV.scp
Normal file
File diff suppressed because it is too large
Load Diff
@ -10,6 +10,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
|||||||
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
|
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
|
||||||
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
|
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
|
||||||
..\toolbox\evaluation.py = ..\toolbox\evaluation.py
|
..\toolbox\evaluation.py = ..\toolbox\evaluation.py
|
||||||
|
..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj
|
||||||
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
|
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
|
||||||
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
|
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
|
||||||
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
|
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
|
||||||
@ -17,7 +18,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
|||||||
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
|
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
|
||||||
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
|
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
|
||||||
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
|
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
|
||||||
..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py
|
|
||||||
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
|
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
|
||||||
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
|
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
|
||||||
EndProjectSection
|
EndProjectSection
|
||||||
|
Binary file not shown.
@ -4,8 +4,7 @@
|
|||||||
<SchemaVersion>2.0</SchemaVersion>
|
<SchemaVersion>2.0</SchemaVersion>
|
||||||
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
||||||
<ProjectHome>.</ProjectHome>
|
<ProjectHome>.</ProjectHome>
|
||||||
<StartupFile>
|
<StartupFile>check_novoapi.py</StartupFile>
|
||||||
</StartupFile>
|
|
||||||
<SearchPath>
|
<SearchPath>
|
||||||
</SearchPath>
|
</SearchPath>
|
||||||
<WorkingDirectory>.</WorkingDirectory>
|
<WorkingDirectory>.</WorkingDirectory>
|
||||||
|
@ -3,31 +3,23 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import csv
|
import csv
|
||||||
from collections import Counter
|
#import subprocess
|
||||||
import random
|
#from collections import Counter
|
||||||
import shutil
|
#import re
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import matplotlib.pyplot as plt
|
#import matplotlib.pyplot as plt
|
||||||
|
#from sklearn.metrics import confusion_matrix
|
||||||
|
|
||||||
from sklearn.metrics import confusion_matrix
|
import acoustic_model_functions as am_func
|
||||||
from sklearn.metrics import accuracy_score
|
|
||||||
import novoapi
|
|
||||||
|
|
||||||
import defaultfiles as default
|
|
||||||
sys.path.append(default.forced_alignment_module_dir)
|
|
||||||
from forced_alignment import pyhtk, convert_phone_set
|
|
||||||
#import acoustic_model_functions as am_func
|
|
||||||
import convert_xsampa2ipa
|
import convert_xsampa2ipa
|
||||||
|
import defaultfiles as default
|
||||||
|
|
||||||
|
from forced_alignment import pyhtk, convert_phone_set
|
||||||
|
|
||||||
|
import novoapi
|
||||||
import novoapi_functions
|
import novoapi_functions
|
||||||
sys.path.append(default.accent_classification_dir)
|
|
||||||
import output_confusion_matrix
|
|
||||||
|
|
||||||
## procedure
|
|
||||||
forced_alignment_novo70 = True
|
|
||||||
balance_sample_numbers = False
|
|
||||||
|
|
||||||
|
|
||||||
## ===== load novo phoneset =====
|
## ===== load novo phoneset =====
|
||||||
phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa = novoapi_functions.load_phonset()
|
phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa = novoapi_functions.load_phonset()
|
||||||
@ -147,106 +139,6 @@ df_per_word = pd.DataFrame(index=[], columns=df_samples.keys())
|
|||||||
|
|
||||||
for word in word_list:
|
for word in word_list:
|
||||||
df_samples_ = df_samples[df_samples['word']==word]
|
df_samples_ = df_samples[df_samples['word']==word]
|
||||||
df_samples_ = df_samples_[df_samples_['frequency']>2]
|
df_samples_ = df_samples_[df_samples_['frequency']>1]
|
||||||
df_per_word = df_per_word.append(df_samples_, ignore_index=True)
|
df_per_word = df_per_word.append(df_samples_, ignore_index=True)
|
||||||
#df_per_word.to_excel(os.path.join(default.stimmen_dir, 'pronunciation_variants_novo70.xlsx'), encoding="utf-8")
|
df_per_word.to_excel(os.path.join(default.stimmen_dir, 'pronunciation_variants_novo70.xlsx'), encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
## ===== forced alignment =====
|
|
||||||
reus_dir = r'C:\OneDrive\Desktop\Reus'
|
|
||||||
if forced_alignment_novo70:
|
|
||||||
Results = pd.DataFrame(index=[],
|
|
||||||
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
|
|
||||||
#for word in word_list:
|
|
||||||
for word in ['Reus']:
|
|
||||||
# pronunciation variants top 3
|
|
||||||
df_per_word_ = df_per_word[df_per_word['word']==word]
|
|
||||||
df_per_word_ = df_per_word_.sort_values('frequency', ascending=False)
|
|
||||||
if len(df_per_word_) < 3: # pauw, rozen
|
|
||||||
pronunciation_ipa = list(df_per_word_['ipa'])
|
|
||||||
elif word=='Reuzenrad':
|
|
||||||
pronunciation_ipa = [
|
|
||||||
df_per_word_.iloc[0]['ipa'],
|
|
||||||
df_per_word_.iloc[1]['ipa'],
|
|
||||||
df_per_word_.iloc[2]['ipa'],
|
|
||||||
df_per_word_.iloc[3]['ipa']]
|
|
||||||
else:
|
|
||||||
# oog, oor, reus, roeiboot
|
|
||||||
pronunciation_ipa = [
|
|
||||||
df_per_word_.iloc[0]['ipa'],
|
|
||||||
df_per_word_.iloc[1]['ipa'],
|
|
||||||
df_per_word_.iloc[2]['ipa']]
|
|
||||||
#print("{0}: {1}".format(word, pronunciation_ipa))
|
|
||||||
|
|
||||||
# samples for the word
|
|
||||||
df_ = df[df['word']==word]
|
|
||||||
|
|
||||||
# samples in which all pronunciations are written in novo70.
|
|
||||||
samples = df_.query("ipa in @pronunciation_ipa")
|
|
||||||
|
|
||||||
|
|
||||||
## ===== balance sample numbers =====
|
|
||||||
if balance_sample_numbers:
|
|
||||||
c = Counter(samples['ipa'])
|
|
||||||
sample_num_list = [c[key] for key in c.keys()]
|
|
||||||
sample_num = np.min(sample_num_list)
|
|
||||||
|
|
||||||
samples_balanced = pd.DataFrame(index=[], columns=list(samples.keys()))
|
|
||||||
for key in c.keys():
|
|
||||||
samples_ = samples[samples['ipa'] == key]
|
|
||||||
samples_balanced = samples_balanced.append(samples_.sample(sample_num), ignore_index = True)
|
|
||||||
|
|
||||||
samples = samples_balanced
|
|
||||||
|
|
||||||
|
|
||||||
results = pd.DataFrame(index=[],
|
|
||||||
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
|
|
||||||
|
|
||||||
for i in range(0, len(samples)):
|
|
||||||
sample = samples.iloc[i]
|
|
||||||
filename = sample['filename']
|
|
||||||
wav_file = os.path.join(default.stimmen_wav_dir, filename)
|
|
||||||
if os.path.exists(wav_file):
|
|
||||||
# for Martijn
|
|
||||||
#shutil.copy(wav_file, os.path.join(reus_dir, filename))
|
|
||||||
|
|
||||||
pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
|
|
||||||
result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
|
|
||||||
result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
|
|
||||||
result_ = pd.Series([
|
|
||||||
sample['filename'],
|
|
||||||
sample['word'],
|
|
||||||
sample['xsampa'],
|
|
||||||
sample['ipa'],
|
|
||||||
' '.join(result_ipa),
|
|
||||||
' '.join(result_novo70),
|
|
||||||
llh
|
|
||||||
], index=results.columns)
|
|
||||||
results = results.append(result_, ignore_index = True)
|
|
||||||
print('{0}/{1}: answer {2} - prediction {3}'.format(
|
|
||||||
i+1, len(samples), result_['ipa'], result_['result_ipa']))
|
|
||||||
results.to_excel(os.path.join(reus_dir, 'results.xlsx'), encoding="utf-8")
|
|
||||||
if len(results) > 0:
|
|
||||||
Results = Results.append(results, ignore_index = True)
|
|
||||||
Results.to_excel(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
|
|
||||||
else:
|
|
||||||
Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
|
|
||||||
Results = pd.read_excel(Results_xlsx, 'Sheet1')
|
|
||||||
|
|
||||||
|
|
||||||
## ===== analysis =====
|
|
||||||
#result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi')
|
|
||||||
#for word in word_list:
|
|
||||||
# if not word == 'Oog':
|
|
||||||
|
|
||||||
# Results_ = Results[Results['word'] == word]
|
|
||||||
# y_true = list(Results_['ipa'])
|
|
||||||
# y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])]
|
|
||||||
# y_pred = [ipa.replace('ː', ':') for ipa in y_pred_]
|
|
||||||
# pronunciation_variants = list(set(y_true))
|
|
||||||
# cm = confusion_matrix(y_true, y_pred, labels=pronunciation_variants)
|
|
||||||
|
|
||||||
# plt.figure()
|
|
||||||
# output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False)
|
|
||||||
# #plt.show()
|
|
||||||
# plt.savefig(os.path.join(result_novoapi_dir, word + '.png'))
|
|
@ -29,7 +29,6 @@ config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
|
|||||||
repo_dir = r'C:\Users\Aki\source\repos'
|
repo_dir = r'C:\Users\Aki\source\repos'
|
||||||
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
||||||
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
||||||
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
|
|
||||||
|
|
||||||
WSL_dir = r'C:\OneDrive\WSL'
|
WSL_dir = r'C:\OneDrive\WSL'
|
||||||
fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
||||||
|
@ -6,8 +6,6 @@ import json
|
|||||||
|
|
||||||
from novoapi.backend import session
|
from novoapi.backend import session
|
||||||
|
|
||||||
import os
|
|
||||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
|
|
||||||
|
|
||||||
@ -38,29 +36,10 @@ def load_phonset():
|
|||||||
phoneset_novo70.append(novo70)
|
phoneset_novo70.append(novo70)
|
||||||
translation_key_ipa2novo70[ipa] = novo70
|
translation_key_ipa2novo70[ipa] = novo70
|
||||||
translation_key_novo702ipa[novo70] = ipa
|
translation_key_novo702ipa[novo70] = ipa
|
||||||
|
|
||||||
# As per Nederlandse phoneset_aki.xlsx recieved from David
|
|
||||||
# [ɔː] oh / ohr # from ipa->novo70, only oh is used.
|
|
||||||
# [ɪː] ih / ihr # from ipa->novo70, only ih is used.
|
|
||||||
# [iː] iy
|
|
||||||
# [œː] uh
|
|
||||||
# [ɛː] eh
|
|
||||||
# [w] wv in IPA written as ʋ.
|
|
||||||
extra_ipa = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'ʋ']
|
|
||||||
extra_novo70 = ['oh', 'ih', 'iy', 'uh', 'eh', 'wv']
|
|
||||||
for ipa, novo70 in zip(extra_ipa, extra_novo70):
|
|
||||||
phoneset_ipa.append(ipa)
|
|
||||||
phoneset_novo70.append(novo70)
|
|
||||||
translation_key_ipa2novo70[ipa] = novo70
|
|
||||||
translation_key_novo702ipa[novo70] = ipa
|
|
||||||
|
|
||||||
translation_key_novo702ipa['ohr'] = 'ɔː'
|
|
||||||
translation_key_novo702ipa['ihr'] = 'ɪː'
|
|
||||||
|
|
||||||
phoneset_ipa = np.unique(phoneset_ipa)
|
phoneset_ipa = np.unique(phoneset_ipa)
|
||||||
phoneset_novo70 = np.unique(phoneset_novo70)
|
phoneset_novo70 = np.unique(phoneset_novo70)
|
||||||
|
|
||||||
return phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa
|
return
|
||||||
|
|
||||||
|
|
||||||
def multi_character_tokenize(line, multi_character_tokens):
|
def multi_character_tokenize(line, multi_character_tokens):
|
||||||
@ -191,9 +170,4 @@ def result2pronunciation(result, word):
|
|||||||
phones = result_[0]['phones']
|
phones = result_[0]['phones']
|
||||||
pronunciation_novo70 = [phone['label'] for phone in phones]
|
pronunciation_novo70 = [phone['label'] for phone in phones]
|
||||||
pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70]
|
pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70]
|
||||||
return pronunciation_ipa, pronunciation_novo70, llh
|
return pronunciation_ipa, pronunciation_novo70, llh
|
||||||
|
|
||||||
|
|
||||||
if __name__ == 'main':
|
|
||||||
pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
|
|
||||||
grammar = make_grammar('reus', pronunciation_ipa)
|
|
1600
acoustic_model/script.txt
Normal file
1600
acoustic_model/script.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -28,6 +28,7 @@ def print_info_tier(output, title, begin, end, label):
|
|||||||
print >> output, '\t\t\ttext = "%s"' % label
|
print >> output, '\t\t\ttext = "%s"' % label
|
||||||
|
|
||||||
|
|
||||||
|
#def print_tier(output, title, begin, end, segs, (format, formatter)):
|
||||||
def print_tier(output, title, begin, end, segs, format, formatter):
|
def print_tier(output, title, begin, end, segs, format, formatter):
|
||||||
print >> output, '\titem [%d]:' % 0
|
print >> output, '\titem [%d]:' % 0
|
||||||
print >> output, '\t\tclass = "IntervalTier"'
|
print >> output, '\t\tclass = "IntervalTier"'
|
||||||
@ -69,11 +70,8 @@ def seg2tg(fname, segments):
|
|||||||
|
|
||||||
nr_tiers = 3
|
nr_tiers = 3
|
||||||
print_header(output, begin, end, nr_tiers)
|
print_header(output, begin, end, nr_tiers)
|
||||||
#print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
|
print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
|
||||||
#print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
|
print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
|
||||||
#print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
|
print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
|
||||||
print_tier(output, "confidence", begin, end, confidences, '%.3f', lambda x: x)
|
|
||||||
print_tier(output, "words", begin, end, word_labels, '%s', lambda x: x)
|
|
||||||
print_tier(output, "phones", begin, end, phones, '%s', lambda x: x)
|
|
||||||
|
|
||||||
output.close()
|
output.close()
|
@ -266,7 +266,7 @@ def test(data=None):
|
|||||||
print("{0} validated not OK {1}".format(data, e.message))
|
print("{0} validated not OK {1}".format(data, e.message))
|
||||||
else:
|
else:
|
||||||
#print data, "validated OK"
|
#print data, "validated OK"
|
||||||
print("{0} validated OK".format(data))
|
print("{} validated OK".format(data))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
@ -188,8 +188,7 @@ class Recognizer(object):
|
|||||||
nbytes_sent = 0
|
nbytes_sent = 0
|
||||||
start = time.time()
|
start = time.time()
|
||||||
for j in range(0, len(buf), buffer_size):
|
for j in range(0, len(buf), buffer_size):
|
||||||
#audio_packet = str(buf[j:j + buffer_size])
|
audio_packet = str(buf[j:j + buffer_size])
|
||||||
audio_packet = buf[j:j + buffer_size]
|
|
||||||
nbytes_sent += len(audio_packet)
|
nbytes_sent += len(audio_packet)
|
||||||
self.conn.send_binary(audio_packet)
|
self.conn.send_binary(audio_packet)
|
||||||
self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
|
self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
|
@ -1,64 +0,0 @@
|
|||||||
novoapi( https://bitbucket.org/novolanguage/python-novo-api ) is written in Python 2.7.
|
|
||||||
To install it on Python 3.x the following points should be modified.
|
|
||||||
- basestring --> str
|
|
||||||
- print xxx --> print({}.format(xxx)).
|
|
||||||
- import xxx --> from . import xxx
|
|
||||||
- except Exception, e --> except Exception as e
|
|
||||||
- remove tuples from input arguments of a function.
|
|
||||||
Concretely...
|
|
||||||
|
|
||||||
=== novoapi\backend\__init__.py
|
|
||||||
#import session
|
|
||||||
from . import session
|
|
||||||
|
|
||||||
|
|
||||||
=== novoapi\backend\session.py
|
|
||||||
#except Exception, e:
|
|
||||||
except Exception as e:
|
|
||||||
|
|
||||||
#print self.last_message
|
|
||||||
print(self.last_message)
|
|
||||||
|
|
||||||
|
|
||||||
=== novoapi\asr\__init__.py
|
|
||||||
#import segments
|
|
||||||
#import spraaklab
|
|
||||||
from . import segments
|
|
||||||
from . import spraaklab
|
|
||||||
|
|
||||||
|
|
||||||
=== novoapi\asr\segments\praat.py
|
|
||||||
#print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
|
|
||||||
#print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
|
|
||||||
#print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
|
|
||||||
print_tier(output, "confidence", begin, end, confidences, '%.3f', lambda x: x)
|
|
||||||
print_tier(output, "words", begin, end, word_labels, '%s', lambda x: x)
|
|
||||||
print_tier(output, "phones", begin, end, phones, '%s', lambda x: x)
|
|
||||||
|
|
||||||
|
|
||||||
=== novoapi\asr\spraaklab\__init__.py ===
|
|
||||||
#import schema
|
|
||||||
from . import schema
|
|
||||||
|
|
||||||
|
|
||||||
=== novoapi\asr\spraaklab\schema.py ===
|
|
||||||
#if isinstance(object, basestring):
|
|
||||||
if isinstance(object, str):
|
|
||||||
|
|
||||||
except jsonschema.ValidationError as e:
|
|
||||||
#print data, "validated not OK", e.message
|
|
||||||
print("{0} validated not OK {1}".format(data, e.message))
|
|
||||||
else:
|
|
||||||
#print data, "validated OK"
|
|
||||||
print("{0} validated OK".format(data))
|
|
||||||
|
|
||||||
|
|
||||||
Then to make it correctly work, few more modification is needed.
|
|
||||||
When the wav file is read using the wave module, the output (named buf) is a string of bytes on Python 2.7 while buf is a byte object on Python 3.6.
|
|
||||||
Therefore...
|
|
||||||
|
|
||||||
=== novoapi\backend\session.py
|
|
||||||
#audio_packet = str(buf[j:j + buffer_size])
|
|
||||||
audio_packet = buf[j:j + buffer_size]
|
|
||||||
|
|
||||||
Also, because of this difference, Segment.__repr__ (novoapi\asr\segments\segments.py) does not work.
|
|
@ -1,119 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
import os
|
|
||||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
|
|
||||||
from novoapi.backend import session
|
|
||||||
|
|
||||||
p = argparse.ArgumentParser()
|
|
||||||
p.add_argument("--user", default='martijn.wieling')
|
|
||||||
p.add_argument("--password", default='xxxxx')
|
|
||||||
args = p.parse_args()
|
|
||||||
|
|
||||||
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True)
|
|
||||||
|
|
||||||
grammar = {
|
|
||||||
"type": "confusion_network",
|
|
||||||
"version": "1.0",
|
|
||||||
"data": {
|
|
||||||
"kind": "sequence",
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"kind": "word",
|
|
||||||
"pronunciation": [
|
|
||||||
{
|
|
||||||
"phones": [
|
|
||||||
"r",
|
|
||||||
"eu0",
|
|
||||||
"s"
|
|
||||||
],
|
|
||||||
"id": 0
|
|
||||||
}
|
|
||||||
,
|
|
||||||
{
|
|
||||||
"phones": [
|
|
||||||
"m",
|
|
||||||
"a0",
|
|
||||||
"n"
|
|
||||||
],
|
|
||||||
"id": 1
|
|
||||||
}
|
|
||||||
,
|
|
||||||
{
|
|
||||||
"phones": [
|
|
||||||
"m",
|
|
||||||
"a0",
|
|
||||||
"n",
|
|
||||||
"t",
|
|
||||||
"s",
|
|
||||||
"y",
|
|
||||||
"ax"
|
|
||||||
],
|
|
||||||
"id": 2
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"label": "reus"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"return_objects": [
|
|
||||||
"grammar"
|
|
||||||
],
|
|
||||||
"phoneset": "novo70"
|
|
||||||
}
|
|
||||||
|
|
||||||
res = rec.setgrammar(grammar)
|
|
||||||
#print "Set grammar result", res
|
|
||||||
|
|
||||||
|
|
||||||
## === novoapi/backend/session.py ===
|
|
||||||
#import wave
|
|
||||||
#import time
|
|
||||||
#from novoapi.backend.session import rpcid, segmentation
|
|
||||||
|
|
||||||
#wavf = "reus1008-reus.wav"
|
|
||||||
#w = wave.open(wavf, 'r')
|
|
||||||
#nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
|
|
||||||
#buf = w.readframes(nframes)
|
|
||||||
#w.close()
|
|
||||||
|
|
||||||
#buffer_size = 4096
|
|
||||||
#nbytes_sent = 0
|
|
||||||
#start = time.time()
|
|
||||||
#for j in range(0, len(buf), buffer_size):
|
|
||||||
# audio_packet = buf[j:j + buffer_size]
|
|
||||||
# nbytes_sent += len(audio_packet)
|
|
||||||
# rec.conn.send_binary(audio_packet)
|
|
||||||
#rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
|
|
||||||
#print(rpcid.next())
|
|
||||||
#rec.last_message = rec.conn.recv()
|
|
||||||
#message = json.loads(rec.last_message)
|
|
||||||
#result = session.segmentation(message["result"]["words"])
|
|
||||||
#result.export()
|
|
||||||
## ====================================
|
|
||||||
|
|
||||||
def result2pronunciation(result, word):
|
|
||||||
#result_ = res.export()[1]
|
|
||||||
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
|
|
||||||
llh = result_[0]['llh']
|
|
||||||
phones = result_[0]['phones']
|
|
||||||
pronunciation = [phone['label'] for phone in phones]
|
|
||||||
return pronunciation, llh
|
|
||||||
|
|
||||||
|
|
||||||
res = rec.recognize_wav("reus1008-reus.wav")
|
|
||||||
#print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n"
|
|
||||||
#print "Recognition result:", json.dumps(res.export(), indent=4)
|
|
||||||
result2pronunciation(res.export(), 'reus')
|
|
||||||
|
|
||||||
#print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n"
|
|
||||||
res2 = rec.recognize_wav("reus1167-man.wav")
|
|
||||||
#print "Recognition result:", json.dumps(res2.export(), indent=4)
|
|
||||||
result2pronunciation(res2.export(), 'reus')
|
|
||||||
|
|
||||||
#print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n"
|
|
||||||
res3 = rec.recognize_wav("reus3768-mantsje.wav")
|
|
||||||
#print "Recognition result:", json.dumps(res3.export(), indent=4)
|
|
||||||
result2pronunciation(res3.export(), 'reus')
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user