The bug regarding novoapi for Python 3.6 is solved. The detail can be found in novoapi_for_python3x/readme.txt

This commit is contained in:
yemaozi88 2019-01-20 13:47:29 +01:00
parent 8efb091715
commit de5c9cecb9
24 changed files with 252 additions and 14789 deletions

Binary file not shown.

13152
HCompV.scp

File diff suppressed because it is too large Load Diff

View File

@ -10,7 +10,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py ..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py ..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
..\toolbox\evaluation.py = ..\toolbox\evaluation.py ..\toolbox\evaluation.py = ..\toolbox\evaluation.py
..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py ..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py ..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py ..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
@ -18,6 +17,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py ..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py ..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py ..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py ..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py ..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
EndProjectSection EndProjectSection

View File

@ -4,7 +4,8 @@
<SchemaVersion>2.0</SchemaVersion> <SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid> <ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
<ProjectHome>.</ProjectHome> <ProjectHome>.</ProjectHome>
<StartupFile>check_novoapi.py</StartupFile> <StartupFile>
</StartupFile>
<SearchPath> <SearchPath>
</SearchPath> </SearchPath>
<WorkingDirectory>.</WorkingDirectory> <WorkingDirectory>.</WorkingDirectory>

View File

@ -3,6 +3,9 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import sys import sys
import csv import csv
from collections import Counter
import random
import shutil
import numpy as np import numpy as np
import pandas as pd import pandas as pd
@ -12,17 +15,18 @@ from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score from sklearn.metrics import accuracy_score
import novoapi import novoapi
from forced_alignment import pyhtk, convert_phone_set
import acoustic_model_functions as am_func
import convert_xsampa2ipa
import defaultfiles as default import defaultfiles as default
sys.path.append(default.forced_alignment_module_dir)
from forced_alignment import pyhtk, convert_phone_set
#import acoustic_model_functions as am_func
import convert_xsampa2ipa
import novoapi_functions import novoapi_functions
sys.path.append(default.accent_classification_dir) sys.path.append(default.accent_classification_dir)
import output_confusion_matrix import output_confusion_matrix
## procedure ## procedure
forced_alignment_novo70 = True forced_alignment_novo70 = True
balance_sample_numbers = False
## ===== load novo phoneset ===== ## ===== load novo phoneset =====
@ -149,11 +153,12 @@ for word in word_list:
## ===== forced alignment ===== ## ===== forced alignment =====
reus_dir = r'C:\OneDrive\Desktop\Reus'
if forced_alignment_novo70: if forced_alignment_novo70:
Results = pd.DataFrame(index=[], Results = pd.DataFrame(index=[],
columns=['filename', 'word', 'ipa', 'result_ipa', 'result_novo70', 'llh']) columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
for word in word_list: #for word in word_list:
#for word in ['Oor']: for word in ['Reus']:
# pronunciation variants top 3 # pronunciation variants top 3
df_per_word_ = df_per_word[df_per_word['word']==word] df_per_word_ = df_per_word[df_per_word['word']==word]
df_per_word_ = df_per_word_.sort_values('frequency', ascending=False) df_per_word_ = df_per_word_.sort_values('frequency', ascending=False)
@ -179,23 +184,40 @@ if forced_alignment_novo70:
# samples in which all pronunciations are written in novo70. # samples in which all pronunciations are written in novo70.
samples = df_.query("ipa in @pronunciation_ipa") samples = df_.query("ipa in @pronunciation_ipa")
results = pd.DataFrame(index=[],
columns=['filename', 'word', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
#j = 0 ## ===== balance sample numbers =====
if balance_sample_numbers:
c = Counter(samples['ipa'])
sample_num_list = [c[key] for key in c.keys()]
sample_num = np.min(sample_num_list)
samples_balanced = pd.DataFrame(index=[], columns=list(samples.keys()))
for key in c.keys():
samples_ = samples[samples['ipa'] == key]
samples_balanced = samples_balanced.append(samples_.sample(sample_num), ignore_index = True)
samples = samples_balanced
results = pd.DataFrame(index=[],
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
for i in range(0, len(samples)): for i in range(0, len(samples)):
sample = samples.iloc[i] sample = samples.iloc[i]
wav_file = os.path.join(default.stimmen_wav_dir, sample['filename']) filename = sample['filename']
wav_file = os.path.join(default.stimmen_wav_dir, filename)
if os.path.exists(wav_file): if os.path.exists(wav_file):
#j += 1 # for Martijn
#print('{0} - {1}'.format(word, i)) #shutil.copy(wav_file, os.path.join(reus_dir, filename))
pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa] pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_) result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word) result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
result_ = pd.Series([ result_ = pd.Series([
sample['filename'], sample['filename'],
sample['ipa'],
sample['word'], sample['word'],
sample['xsampa'],
sample['ipa'],
' '.join(result_ipa), ' '.join(result_ipa),
' '.join(result_novo70), ' '.join(result_novo70),
llh llh
@ -203,7 +225,7 @@ if forced_alignment_novo70:
results = results.append(result_, ignore_index = True) results = results.append(result_, ignore_index = True)
print('{0}/{1}: answer {2} - prediction {3}'.format( print('{0}/{1}: answer {2} - prediction {3}'.format(
i+1, len(samples), result_['ipa'], result_['result_ipa'])) i+1, len(samples), result_['ipa'], result_['result_ipa']))
results.to_excel(os.path.join(reus_dir, 'results.xlsx'), encoding="utf-8")
if len(results) > 0: if len(results) > 0:
Results = Results.append(results, ignore_index = True) Results = Results.append(results, ignore_index = True)
Results.to_excel(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8") Results.to_excel(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
@ -213,19 +235,18 @@ else:
## ===== analysis ===== ## ===== analysis =====
result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi') #result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi')
for word in word_list: #for word in word_list:
if not word == 'Oog': # if not word == 'Oog':
#word = 'Reus'
Results_ = Results[Results['word'] == word] # Results_ = Results[Results['word'] == word]
y_true = list(Results_['ipa']) # y_true = list(Results_['ipa'])
y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])] # y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])]
y_pred = [ipa.replace('ː', ':') for ipa in y_pred_] # y_pred = [ipa.replace('ː', ':') for ipa in y_pred_]
pronunciation_variants = list(set(y_true)) # pronunciation_variants = list(set(y_true))
cm = confusion_matrix(y_true, y_pred, labels=pronunciation_variants) # cm = confusion_matrix(y_true, y_pred, labels=pronunciation_variants)
plt.figure() # plt.figure()
output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False) # output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False)
#plt.show() # #plt.show()
plt.savefig(os.path.join(result_novoapi_dir, word + '.png')) # plt.savefig(os.path.join(result_novoapi_dir, word + '.png'))

View File

@ -6,6 +6,8 @@ import json
from novoapi.backend import session from novoapi.backend import session
import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import defaultfiles as default import defaultfiles as default
@ -190,3 +192,8 @@ def result2pronunciation(result, word):
pronunciation_novo70 = [phone['label'] for phone in phones] pronunciation_novo70 = [phone['label'] for phone in phones]
pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70] pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70]
return pronunciation_ipa, pronunciation_novo70, llh return pronunciation_ipa, pronunciation_novo70, llh
if __name__ == 'main':
pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
grammar = make_grammar('reus', pronunciation_ipa)

File diff suppressed because it is too large Load Diff

View File

@ -28,7 +28,6 @@ def print_info_tier(output, title, begin, end, label):
print >> output, '\t\t\ttext = "%s"' % label print >> output, '\t\t\ttext = "%s"' % label
#def print_tier(output, title, begin, end, segs, (format, formatter)):
def print_tier(output, title, begin, end, segs, format, formatter): def print_tier(output, title, begin, end, segs, format, formatter):
print >> output, '\titem [%d]:' % 0 print >> output, '\titem [%d]:' % 0
print >> output, '\t\tclass = "IntervalTier"' print >> output, '\t\tclass = "IntervalTier"'
@ -70,8 +69,11 @@ def seg2tg(fname, segments):
nr_tiers = 3 nr_tiers = 3
print_header(output, begin, end, nr_tiers) print_header(output, begin, end, nr_tiers)
print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x)) #print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x)) #print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x)) #print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
print_tier(output, "confidence", begin, end, confidences, '%.3f', lambda x: x)
print_tier(output, "words", begin, end, word_labels, '%s', lambda x: x)
print_tier(output, "phones", begin, end, phones, '%s', lambda x: x)
output.close() output.close()

View File

@ -266,7 +266,7 @@ def test(data=None):
print("{0} validated not OK {1}".format(data, e.message)) print("{0} validated not OK {1}".format(data, e.message))
else: else:
#print data, "validated OK" #print data, "validated OK"
print("{} validated OK".format(data)) print("{0} validated OK".format(data))
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -188,7 +188,8 @@ class Recognizer(object):
nbytes_sent = 0 nbytes_sent = 0
start = time.time() start = time.time()
for j in range(0, len(buf), buffer_size): for j in range(0, len(buf), buffer_size):
audio_packet = str(buf[j:j + buffer_size]) #audio_packet = str(buf[j:j + buffer_size])
audio_packet = buf[j:j + buffer_size]
nbytes_sent += len(audio_packet) nbytes_sent += len(audio_packet)
self.conn.send_binary(audio_packet) self.conn.send_binary(audio_packet)
self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()})) self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))

View File

@ -0,0 +1,64 @@
novoapi( https://bitbucket.org/novolanguage/python-novo-api ) is written in Python 2.7.
To install it on Python 3.x the following points should be modified.
- basestring --> str
- print xxx --> print({}.format(xxx)).
- import xxx --> from . import xxx
- except Exception, e --> except Exception as e
- remove tuples from input arguments of a function.
Concretely...
=== novoapi\backend\__init__.py
#import session
from . import session
=== novoapi\backend\session.py
#except Exception, e:
except Exception as e:
#print self.last_message
print(self.last_message)
=== novoapi\asr\__init__.py
#import segments
#import spraaklab
from . import segments
from . import spraaklab
=== novoapi\asr\segments\praat.py
#print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
#print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
#print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
print_tier(output, "confidence", begin, end, confidences, '%.3f', lambda x: x)
print_tier(output, "words", begin, end, word_labels, '%s', lambda x: x)
print_tier(output, "phones", begin, end, phones, '%s', lambda x: x)
=== novoapi\asr\spraaklab\__init__.py ===
#import schema
from . import schema
=== novoapi\asr\spraaklab\schema.py ===
#if isinstance(object, basestring):
if isinstance(object, str):
except jsonschema.ValidationError as e:
#print data, "validated not OK", e.message
print("{0} validated not OK {1}".format(data, e.message))
else:
#print data, "validated OK"
print("{0} validated OK".format(data))
Then to make it correctly work, few more modification is needed.
When the wav file is read using the wave module, the output (named buf) is a string of bytes on Python 2.7 while buf is a byte object on Python 3.6.
Therefore...
=== novoapi\backend\session.py
#audio_packet = str(buf[j:j + buffer_size])
audio_packet = buf[j:j + buffer_size]
Also, because of this difference, Segment.__repr__ (novoapi\asr\segments\segments.py) does not work.

119
reus-test/reus-test.py Normal file
View File

@ -0,0 +1,119 @@
#!/usr/bin/env python
import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import argparse
import json
from novoapi.backend import session
p = argparse.ArgumentParser()
p.add_argument("--user", default='martijn.wieling')
p.add_argument("--password", default='xxxxx')
args = p.parse_args()
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True)
grammar = {
"type": "confusion_network",
"version": "1.0",
"data": {
"kind": "sequence",
"elements": [
{
"kind": "word",
"pronunciation": [
{
"phones": [
"r",
"eu0",
"s"
],
"id": 0
}
,
{
"phones": [
"m",
"a0",
"n"
],
"id": 1
}
,
{
"phones": [
"m",
"a0",
"n",
"t",
"s",
"y",
"ax"
],
"id": 2
}
],
"label": "reus"
}
]
},
"return_objects": [
"grammar"
],
"phoneset": "novo70"
}
res = rec.setgrammar(grammar)
#print "Set grammar result", res
## === novoapi/backend/session.py ===
#import wave
#import time
#from novoapi.backend.session import rpcid, segmentation
#wavf = "reus1008-reus.wav"
#w = wave.open(wavf, 'r')
#nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
#buf = w.readframes(nframes)
#w.close()
#buffer_size = 4096
#nbytes_sent = 0
#start = time.time()
#for j in range(0, len(buf), buffer_size):
# audio_packet = buf[j:j + buffer_size]
# nbytes_sent += len(audio_packet)
# rec.conn.send_binary(audio_packet)
#rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
#print(rpcid.next())
#rec.last_message = rec.conn.recv()
#message = json.loads(rec.last_message)
#result = session.segmentation(message["result"]["words"])
#result.export()
## ====================================
def result2pronunciation(result, word):
#result_ = res.export()[1]
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
llh = result_[0]['llh']
phones = result_[0]['phones']
pronunciation = [phone['label'] for phone in phones]
return pronunciation, llh
res = rec.recognize_wav("reus1008-reus.wav")
#print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n"
#print "Recognition result:", json.dumps(res.export(), indent=4)
result2pronunciation(res.export(), 'reus')
#print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n"
res2 = rec.recognize_wav("reus1167-man.wav")
#print "Recognition result:", json.dumps(res2.export(), indent=4)
result2pronunciation(res2.export(), 'reus')
#print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n"
res3 = rec.recognize_wav("reus3768-mantsje.wav")
#print "Recognition result:", json.dumps(res3.export(), indent=4)
result2pronunciation(res3.export(), 'reus')

BIN
reus-test/reus1008-reus.wav Normal file

Binary file not shown.

BIN
reus-test/reus1167-man.wav Normal file

Binary file not shown.

Binary file not shown.