Compare commits
3 Commits
6edde06a4f
...
de5c9cecb9
Author | SHA1 | Date | |
---|---|---|---|
|
de5c9cecb9 | ||
|
8efb091715 | ||
|
05e8a671c1 |
Binary file not shown.
13152
HCompV.scp
13152
HCompV.scp
File diff suppressed because it is too large
Load Diff
@ -10,7 +10,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
|||||||
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
|
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
|
||||||
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
|
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
|
||||||
..\toolbox\evaluation.py = ..\toolbox\evaluation.py
|
..\toolbox\evaluation.py = ..\toolbox\evaluation.py
|
||||||
..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj
|
|
||||||
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
|
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
|
||||||
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
|
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
|
||||||
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
|
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
|
||||||
@ -18,6 +17,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
|||||||
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
|
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
|
||||||
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
|
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
|
||||||
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
|
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
|
||||||
|
..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py
|
||||||
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
|
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
|
||||||
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
|
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
|
||||||
EndProjectSection
|
EndProjectSection
|
||||||
|
Binary file not shown.
@ -4,7 +4,8 @@
|
|||||||
<SchemaVersion>2.0</SchemaVersion>
|
<SchemaVersion>2.0</SchemaVersion>
|
||||||
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
||||||
<ProjectHome>.</ProjectHome>
|
<ProjectHome>.</ProjectHome>
|
||||||
<StartupFile>check_novoapi.py</StartupFile>
|
<StartupFile>
|
||||||
|
</StartupFile>
|
||||||
<SearchPath>
|
<SearchPath>
|
||||||
</SearchPath>
|
</SearchPath>
|
||||||
<WorkingDirectory>.</WorkingDirectory>
|
<WorkingDirectory>.</WorkingDirectory>
|
||||||
|
@ -3,23 +3,31 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import csv
|
import csv
|
||||||
#import subprocess
|
from collections import Counter
|
||||||
#from collections import Counter
|
import random
|
||||||
#import re
|
import shutil
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
#import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
#from sklearn.metrics import confusion_matrix
|
|
||||||
|
|
||||||
import acoustic_model_functions as am_func
|
|
||||||
import convert_xsampa2ipa
|
|
||||||
import defaultfiles as default
|
|
||||||
|
|
||||||
from forced_alignment import pyhtk, convert_phone_set
|
|
||||||
|
|
||||||
|
from sklearn.metrics import confusion_matrix
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
import novoapi
|
import novoapi
|
||||||
|
|
||||||
|
import defaultfiles as default
|
||||||
|
sys.path.append(default.forced_alignment_module_dir)
|
||||||
|
from forced_alignment import pyhtk, convert_phone_set
|
||||||
|
#import acoustic_model_functions as am_func
|
||||||
|
import convert_xsampa2ipa
|
||||||
import novoapi_functions
|
import novoapi_functions
|
||||||
|
sys.path.append(default.accent_classification_dir)
|
||||||
|
import output_confusion_matrix
|
||||||
|
|
||||||
|
## procedure
|
||||||
|
forced_alignment_novo70 = True
|
||||||
|
balance_sample_numbers = False
|
||||||
|
|
||||||
|
|
||||||
## ===== load novo phoneset =====
|
## ===== load novo phoneset =====
|
||||||
phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa = novoapi_functions.load_phonset()
|
phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa = novoapi_functions.load_phonset()
|
||||||
@ -139,6 +147,106 @@ df_per_word = pd.DataFrame(index=[], columns=df_samples.keys())
|
|||||||
|
|
||||||
for word in word_list:
|
for word in word_list:
|
||||||
df_samples_ = df_samples[df_samples['word']==word]
|
df_samples_ = df_samples[df_samples['word']==word]
|
||||||
df_samples_ = df_samples_[df_samples_['frequency']>1]
|
df_samples_ = df_samples_[df_samples_['frequency']>2]
|
||||||
df_per_word = df_per_word.append(df_samples_, ignore_index=True)
|
df_per_word = df_per_word.append(df_samples_, ignore_index=True)
|
||||||
df_per_word.to_excel(os.path.join(default.stimmen_dir, 'pronunciation_variants_novo70.xlsx'), encoding="utf-8")
|
#df_per_word.to_excel(os.path.join(default.stimmen_dir, 'pronunciation_variants_novo70.xlsx'), encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
## ===== forced alignment =====
|
||||||
|
reus_dir = r'C:\OneDrive\Desktop\Reus'
|
||||||
|
if forced_alignment_novo70:
|
||||||
|
Results = pd.DataFrame(index=[],
|
||||||
|
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
|
||||||
|
#for word in word_list:
|
||||||
|
for word in ['Reus']:
|
||||||
|
# pronunciation variants top 3
|
||||||
|
df_per_word_ = df_per_word[df_per_word['word']==word]
|
||||||
|
df_per_word_ = df_per_word_.sort_values('frequency', ascending=False)
|
||||||
|
if len(df_per_word_) < 3: # pauw, rozen
|
||||||
|
pronunciation_ipa = list(df_per_word_['ipa'])
|
||||||
|
elif word=='Reuzenrad':
|
||||||
|
pronunciation_ipa = [
|
||||||
|
df_per_word_.iloc[0]['ipa'],
|
||||||
|
df_per_word_.iloc[1]['ipa'],
|
||||||
|
df_per_word_.iloc[2]['ipa'],
|
||||||
|
df_per_word_.iloc[3]['ipa']]
|
||||||
|
else:
|
||||||
|
# oog, oor, reus, roeiboot
|
||||||
|
pronunciation_ipa = [
|
||||||
|
df_per_word_.iloc[0]['ipa'],
|
||||||
|
df_per_word_.iloc[1]['ipa'],
|
||||||
|
df_per_word_.iloc[2]['ipa']]
|
||||||
|
#print("{0}: {1}".format(word, pronunciation_ipa))
|
||||||
|
|
||||||
|
# samples for the word
|
||||||
|
df_ = df[df['word']==word]
|
||||||
|
|
||||||
|
# samples in which all pronunciations are written in novo70.
|
||||||
|
samples = df_.query("ipa in @pronunciation_ipa")
|
||||||
|
|
||||||
|
|
||||||
|
## ===== balance sample numbers =====
|
||||||
|
if balance_sample_numbers:
|
||||||
|
c = Counter(samples['ipa'])
|
||||||
|
sample_num_list = [c[key] for key in c.keys()]
|
||||||
|
sample_num = np.min(sample_num_list)
|
||||||
|
|
||||||
|
samples_balanced = pd.DataFrame(index=[], columns=list(samples.keys()))
|
||||||
|
for key in c.keys():
|
||||||
|
samples_ = samples[samples['ipa'] == key]
|
||||||
|
samples_balanced = samples_balanced.append(samples_.sample(sample_num), ignore_index = True)
|
||||||
|
|
||||||
|
samples = samples_balanced
|
||||||
|
|
||||||
|
|
||||||
|
results = pd.DataFrame(index=[],
|
||||||
|
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
|
||||||
|
|
||||||
|
for i in range(0, len(samples)):
|
||||||
|
sample = samples.iloc[i]
|
||||||
|
filename = sample['filename']
|
||||||
|
wav_file = os.path.join(default.stimmen_wav_dir, filename)
|
||||||
|
if os.path.exists(wav_file):
|
||||||
|
# for Martijn
|
||||||
|
#shutil.copy(wav_file, os.path.join(reus_dir, filename))
|
||||||
|
|
||||||
|
pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
|
||||||
|
result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
|
||||||
|
result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
|
||||||
|
result_ = pd.Series([
|
||||||
|
sample['filename'],
|
||||||
|
sample['word'],
|
||||||
|
sample['xsampa'],
|
||||||
|
sample['ipa'],
|
||||||
|
' '.join(result_ipa),
|
||||||
|
' '.join(result_novo70),
|
||||||
|
llh
|
||||||
|
], index=results.columns)
|
||||||
|
results = results.append(result_, ignore_index = True)
|
||||||
|
print('{0}/{1}: answer {2} - prediction {3}'.format(
|
||||||
|
i+1, len(samples), result_['ipa'], result_['result_ipa']))
|
||||||
|
results.to_excel(os.path.join(reus_dir, 'results.xlsx'), encoding="utf-8")
|
||||||
|
if len(results) > 0:
|
||||||
|
Results = Results.append(results, ignore_index = True)
|
||||||
|
Results.to_excel(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
|
||||||
|
else:
|
||||||
|
Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
|
||||||
|
Results = pd.read_excel(Results_xlsx, 'Sheet1')
|
||||||
|
|
||||||
|
|
||||||
|
## ===== analysis =====
|
||||||
|
#result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi')
|
||||||
|
#for word in word_list:
|
||||||
|
# if not word == 'Oog':
|
||||||
|
|
||||||
|
# Results_ = Results[Results['word'] == word]
|
||||||
|
# y_true = list(Results_['ipa'])
|
||||||
|
# y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])]
|
||||||
|
# y_pred = [ipa.replace('ː', ':') for ipa in y_pred_]
|
||||||
|
# pronunciation_variants = list(set(y_true))
|
||||||
|
# cm = confusion_matrix(y_true, y_pred, labels=pronunciation_variants)
|
||||||
|
|
||||||
|
# plt.figure()
|
||||||
|
# output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False)
|
||||||
|
# #plt.show()
|
||||||
|
# plt.savefig(os.path.join(result_novoapi_dir, word + '.png'))
|
@ -29,6 +29,7 @@ config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
|
|||||||
repo_dir = r'C:\Users\Aki\source\repos'
|
repo_dir = r'C:\Users\Aki\source\repos'
|
||||||
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
||||||
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
||||||
|
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
|
||||||
|
|
||||||
WSL_dir = r'C:\OneDrive\WSL'
|
WSL_dir = r'C:\OneDrive\WSL'
|
||||||
fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
||||||
|
@ -6,6 +6,8 @@ import json
|
|||||||
|
|
||||||
from novoapi.backend import session
|
from novoapi.backend import session
|
||||||
|
|
||||||
|
import os
|
||||||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
|
|
||||||
|
|
||||||
@ -36,10 +38,29 @@ def load_phonset():
|
|||||||
phoneset_novo70.append(novo70)
|
phoneset_novo70.append(novo70)
|
||||||
translation_key_ipa2novo70[ipa] = novo70
|
translation_key_ipa2novo70[ipa] = novo70
|
||||||
translation_key_novo702ipa[novo70] = ipa
|
translation_key_novo702ipa[novo70] = ipa
|
||||||
|
|
||||||
|
# As per Nederlandse phoneset_aki.xlsx recieved from David
|
||||||
|
# [ɔː] oh / ohr # from ipa->novo70, only oh is used.
|
||||||
|
# [ɪː] ih / ihr # from ipa->novo70, only ih is used.
|
||||||
|
# [iː] iy
|
||||||
|
# [œː] uh
|
||||||
|
# [ɛː] eh
|
||||||
|
# [w] wv in IPA written as ʋ.
|
||||||
|
extra_ipa = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'ʋ']
|
||||||
|
extra_novo70 = ['oh', 'ih', 'iy', 'uh', 'eh', 'wv']
|
||||||
|
for ipa, novo70 in zip(extra_ipa, extra_novo70):
|
||||||
|
phoneset_ipa.append(ipa)
|
||||||
|
phoneset_novo70.append(novo70)
|
||||||
|
translation_key_ipa2novo70[ipa] = novo70
|
||||||
|
translation_key_novo702ipa[novo70] = ipa
|
||||||
|
|
||||||
|
translation_key_novo702ipa['ohr'] = 'ɔː'
|
||||||
|
translation_key_novo702ipa['ihr'] = 'ɪː'
|
||||||
|
|
||||||
phoneset_ipa = np.unique(phoneset_ipa)
|
phoneset_ipa = np.unique(phoneset_ipa)
|
||||||
phoneset_novo70 = np.unique(phoneset_novo70)
|
phoneset_novo70 = np.unique(phoneset_novo70)
|
||||||
|
|
||||||
return
|
return phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa
|
||||||
|
|
||||||
|
|
||||||
def multi_character_tokenize(line, multi_character_tokens):
|
def multi_character_tokenize(line, multi_character_tokens):
|
||||||
@ -170,4 +191,9 @@ def result2pronunciation(result, word):
|
|||||||
phones = result_[0]['phones']
|
phones = result_[0]['phones']
|
||||||
pronunciation_novo70 = [phone['label'] for phone in phones]
|
pronunciation_novo70 = [phone['label'] for phone in phones]
|
||||||
pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70]
|
pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70]
|
||||||
return pronunciation_ipa, pronunciation_novo70, llh
|
return pronunciation_ipa, pronunciation_novo70, llh
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == 'main':
|
||||||
|
pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
|
||||||
|
grammar = make_grammar('reus', pronunciation_ipa)
|
File diff suppressed because it is too large
Load Diff
@ -28,7 +28,6 @@ def print_info_tier(output, title, begin, end, label):
|
|||||||
print >> output, '\t\t\ttext = "%s"' % label
|
print >> output, '\t\t\ttext = "%s"' % label
|
||||||
|
|
||||||
|
|
||||||
#def print_tier(output, title, begin, end, segs, (format, formatter)):
|
|
||||||
def print_tier(output, title, begin, end, segs, format, formatter):
|
def print_tier(output, title, begin, end, segs, format, formatter):
|
||||||
print >> output, '\titem [%d]:' % 0
|
print >> output, '\titem [%d]:' % 0
|
||||||
print >> output, '\t\tclass = "IntervalTier"'
|
print >> output, '\t\tclass = "IntervalTier"'
|
||||||
@ -70,8 +69,11 @@ def seg2tg(fname, segments):
|
|||||||
|
|
||||||
nr_tiers = 3
|
nr_tiers = 3
|
||||||
print_header(output, begin, end, nr_tiers)
|
print_header(output, begin, end, nr_tiers)
|
||||||
print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
|
#print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
|
||||||
print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
|
#print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
|
||||||
print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
|
#print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
|
||||||
|
print_tier(output, "confidence", begin, end, confidences, '%.3f', lambda x: x)
|
||||||
|
print_tier(output, "words", begin, end, word_labels, '%s', lambda x: x)
|
||||||
|
print_tier(output, "phones", begin, end, phones, '%s', lambda x: x)
|
||||||
|
|
||||||
output.close()
|
output.close()
|
@ -266,7 +266,7 @@ def test(data=None):
|
|||||||
print("{0} validated not OK {1}".format(data, e.message))
|
print("{0} validated not OK {1}".format(data, e.message))
|
||||||
else:
|
else:
|
||||||
#print data, "validated OK"
|
#print data, "validated OK"
|
||||||
print("{} validated OK".format(data))
|
print("{0} validated OK".format(data))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
@ -188,7 +188,8 @@ class Recognizer(object):
|
|||||||
nbytes_sent = 0
|
nbytes_sent = 0
|
||||||
start = time.time()
|
start = time.time()
|
||||||
for j in range(0, len(buf), buffer_size):
|
for j in range(0, len(buf), buffer_size):
|
||||||
audio_packet = str(buf[j:j + buffer_size])
|
#audio_packet = str(buf[j:j + buffer_size])
|
||||||
|
audio_packet = buf[j:j + buffer_size]
|
||||||
nbytes_sent += len(audio_packet)
|
nbytes_sent += len(audio_packet)
|
||||||
self.conn.send_binary(audio_packet)
|
self.conn.send_binary(audio_packet)
|
||||||
self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
|
self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
|
64
novoapi_for_python3x/readme
Normal file
64
novoapi_for_python3x/readme
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
novoapi( https://bitbucket.org/novolanguage/python-novo-api ) is written in Python 2.7.
|
||||||
|
To install it on Python 3.x the following points should be modified.
|
||||||
|
- basestring --> str
|
||||||
|
- print xxx --> print({}.format(xxx)).
|
||||||
|
- import xxx --> from . import xxx
|
||||||
|
- except Exception, e --> except Exception as e
|
||||||
|
- remove tuples from input arguments of a function.
|
||||||
|
Concretely...
|
||||||
|
|
||||||
|
=== novoapi\backend\__init__.py
|
||||||
|
#import session
|
||||||
|
from . import session
|
||||||
|
|
||||||
|
|
||||||
|
=== novoapi\backend\session.py
|
||||||
|
#except Exception, e:
|
||||||
|
except Exception as e:
|
||||||
|
|
||||||
|
#print self.last_message
|
||||||
|
print(self.last_message)
|
||||||
|
|
||||||
|
|
||||||
|
=== novoapi\asr\__init__.py
|
||||||
|
#import segments
|
||||||
|
#import spraaklab
|
||||||
|
from . import segments
|
||||||
|
from . import spraaklab
|
||||||
|
|
||||||
|
|
||||||
|
=== novoapi\asr\segments\praat.py
|
||||||
|
#print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
|
||||||
|
#print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
|
||||||
|
#print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
|
||||||
|
print_tier(output, "confidence", begin, end, confidences, '%.3f', lambda x: x)
|
||||||
|
print_tier(output, "words", begin, end, word_labels, '%s', lambda x: x)
|
||||||
|
print_tier(output, "phones", begin, end, phones, '%s', lambda x: x)
|
||||||
|
|
||||||
|
|
||||||
|
=== novoapi\asr\spraaklab\__init__.py ===
|
||||||
|
#import schema
|
||||||
|
from . import schema
|
||||||
|
|
||||||
|
|
||||||
|
=== novoapi\asr\spraaklab\schema.py ===
|
||||||
|
#if isinstance(object, basestring):
|
||||||
|
if isinstance(object, str):
|
||||||
|
|
||||||
|
except jsonschema.ValidationError as e:
|
||||||
|
#print data, "validated not OK", e.message
|
||||||
|
print("{0} validated not OK {1}".format(data, e.message))
|
||||||
|
else:
|
||||||
|
#print data, "validated OK"
|
||||||
|
print("{0} validated OK".format(data))
|
||||||
|
|
||||||
|
|
||||||
|
Then to make it correctly work, few more modification is needed.
|
||||||
|
When the wav file is read using the wave module, the output (named buf) is a string of bytes on Python 2.7 while buf is a byte object on Python 3.6.
|
||||||
|
Therefore...
|
||||||
|
|
||||||
|
=== novoapi\backend\session.py
|
||||||
|
#audio_packet = str(buf[j:j + buffer_size])
|
||||||
|
audio_packet = buf[j:j + buffer_size]
|
||||||
|
|
||||||
|
Also, because of this difference, Segment.__repr__ (novoapi\asr\segments\segments.py) does not work.
|
119
reus-test/reus-test.py
Normal file
119
reus-test/reus-test.py
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import os
|
||||||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
|
||||||
|
from novoapi.backend import session
|
||||||
|
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument("--user", default='martijn.wieling')
|
||||||
|
p.add_argument("--password", default='xxxxx')
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True)
|
||||||
|
|
||||||
|
grammar = {
|
||||||
|
"type": "confusion_network",
|
||||||
|
"version": "1.0",
|
||||||
|
"data": {
|
||||||
|
"kind": "sequence",
|
||||||
|
"elements": [
|
||||||
|
{
|
||||||
|
"kind": "word",
|
||||||
|
"pronunciation": [
|
||||||
|
{
|
||||||
|
"phones": [
|
||||||
|
"r",
|
||||||
|
"eu0",
|
||||||
|
"s"
|
||||||
|
],
|
||||||
|
"id": 0
|
||||||
|
}
|
||||||
|
,
|
||||||
|
{
|
||||||
|
"phones": [
|
||||||
|
"m",
|
||||||
|
"a0",
|
||||||
|
"n"
|
||||||
|
],
|
||||||
|
"id": 1
|
||||||
|
}
|
||||||
|
,
|
||||||
|
{
|
||||||
|
"phones": [
|
||||||
|
"m",
|
||||||
|
"a0",
|
||||||
|
"n",
|
||||||
|
"t",
|
||||||
|
"s",
|
||||||
|
"y",
|
||||||
|
"ax"
|
||||||
|
],
|
||||||
|
"id": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"label": "reus"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"return_objects": [
|
||||||
|
"grammar"
|
||||||
|
],
|
||||||
|
"phoneset": "novo70"
|
||||||
|
}
|
||||||
|
|
||||||
|
res = rec.setgrammar(grammar)
|
||||||
|
#print "Set grammar result", res
|
||||||
|
|
||||||
|
|
||||||
|
## === novoapi/backend/session.py ===
|
||||||
|
#import wave
|
||||||
|
#import time
|
||||||
|
#from novoapi.backend.session import rpcid, segmentation
|
||||||
|
|
||||||
|
#wavf = "reus1008-reus.wav"
|
||||||
|
#w = wave.open(wavf, 'r')
|
||||||
|
#nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
|
||||||
|
#buf = w.readframes(nframes)
|
||||||
|
#w.close()
|
||||||
|
|
||||||
|
#buffer_size = 4096
|
||||||
|
#nbytes_sent = 0
|
||||||
|
#start = time.time()
|
||||||
|
#for j in range(0, len(buf), buffer_size):
|
||||||
|
# audio_packet = buf[j:j + buffer_size]
|
||||||
|
# nbytes_sent += len(audio_packet)
|
||||||
|
# rec.conn.send_binary(audio_packet)
|
||||||
|
#rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
|
||||||
|
#print(rpcid.next())
|
||||||
|
#rec.last_message = rec.conn.recv()
|
||||||
|
#message = json.loads(rec.last_message)
|
||||||
|
#result = session.segmentation(message["result"]["words"])
|
||||||
|
#result.export()
|
||||||
|
## ====================================
|
||||||
|
|
||||||
|
def result2pronunciation(result, word):
|
||||||
|
#result_ = res.export()[1]
|
||||||
|
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
|
||||||
|
llh = result_[0]['llh']
|
||||||
|
phones = result_[0]['phones']
|
||||||
|
pronunciation = [phone['label'] for phone in phones]
|
||||||
|
return pronunciation, llh
|
||||||
|
|
||||||
|
|
||||||
|
res = rec.recognize_wav("reus1008-reus.wav")
|
||||||
|
#print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n"
|
||||||
|
#print "Recognition result:", json.dumps(res.export(), indent=4)
|
||||||
|
result2pronunciation(res.export(), 'reus')
|
||||||
|
|
||||||
|
#print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n"
|
||||||
|
res2 = rec.recognize_wav("reus1167-man.wav")
|
||||||
|
#print "Recognition result:", json.dumps(res2.export(), indent=4)
|
||||||
|
result2pronunciation(res2.export(), 'reus')
|
||||||
|
|
||||||
|
#print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n"
|
||||||
|
res3 = rec.recognize_wav("reus3768-mantsje.wav")
|
||||||
|
#print "Recognition result:", json.dumps(res3.export(), indent=4)
|
||||||
|
result2pronunciation(res3.export(), 'reus')
|
BIN
reus-test/reus1008-reus.wav
Normal file
BIN
reus-test/reus1008-reus.wav
Normal file
Binary file not shown.
BIN
reus-test/reus1167-man.wav
Normal file
BIN
reus-test/reus1167-man.wav
Normal file
Binary file not shown.
BIN
reus-test/reus3768-mantsje.wav
Normal file
BIN
reus-test/reus3768-mantsje.wav
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user