120 lines
3.0 KiB
Python
120 lines
3.0 KiB
Python
|
#!/usr/bin/env python
|
||
|
import os
|
||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||
|
|
||
|
import argparse
|
||
|
import json
|
||
|
|
||
|
from novoapi.backend import session
|
||
|
|
||
|
p = argparse.ArgumentParser()
|
||
|
p.add_argument("--user", default='martijn.wieling')
|
||
|
p.add_argument("--password", default='xxxxx')
|
||
|
args = p.parse_args()
|
||
|
|
||
|
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True)
|
||
|
|
||
|
grammar = {
|
||
|
"type": "confusion_network",
|
||
|
"version": "1.0",
|
||
|
"data": {
|
||
|
"kind": "sequence",
|
||
|
"elements": [
|
||
|
{
|
||
|
"kind": "word",
|
||
|
"pronunciation": [
|
||
|
{
|
||
|
"phones": [
|
||
|
"r",
|
||
|
"eu0",
|
||
|
"s"
|
||
|
],
|
||
|
"id": 0
|
||
|
}
|
||
|
,
|
||
|
{
|
||
|
"phones": [
|
||
|
"m",
|
||
|
"a0",
|
||
|
"n"
|
||
|
],
|
||
|
"id": 1
|
||
|
}
|
||
|
,
|
||
|
{
|
||
|
"phones": [
|
||
|
"m",
|
||
|
"a0",
|
||
|
"n",
|
||
|
"t",
|
||
|
"s",
|
||
|
"y",
|
||
|
"ax"
|
||
|
],
|
||
|
"id": 2
|
||
|
}
|
||
|
],
|
||
|
"label": "reus"
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
"return_objects": [
|
||
|
"grammar"
|
||
|
],
|
||
|
"phoneset": "novo70"
|
||
|
}
|
||
|
|
||
|
res = rec.setgrammar(grammar)
|
||
|
#print "Set grammar result", res
|
||
|
|
||
|
|
||
|
## === novoapi/backend/session.py ===
|
||
|
#import wave
|
||
|
#import time
|
||
|
#from novoapi.backend.session import rpcid, segmentation
|
||
|
|
||
|
#wavf = "reus1008-reus.wav"
|
||
|
#w = wave.open(wavf, 'r')
|
||
|
#nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
|
||
|
#buf = w.readframes(nframes)
|
||
|
#w.close()
|
||
|
|
||
|
#buffer_size = 4096
|
||
|
#nbytes_sent = 0
|
||
|
#start = time.time()
|
||
|
#for j in range(0, len(buf), buffer_size):
|
||
|
# audio_packet = buf[j:j + buffer_size]
|
||
|
# nbytes_sent += len(audio_packet)
|
||
|
# rec.conn.send_binary(audio_packet)
|
||
|
#rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
|
||
|
#print(rpcid.next())
|
||
|
#rec.last_message = rec.conn.recv()
|
||
|
#message = json.loads(rec.last_message)
|
||
|
#result = session.segmentation(message["result"]["words"])
|
||
|
#result.export()
|
||
|
## ====================================
|
||
|
|
||
|
def result2pronunciation(result, word):
|
||
|
#result_ = res.export()[1]
|
||
|
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
|
||
|
llh = result_[0]['llh']
|
||
|
phones = result_[0]['phones']
|
||
|
pronunciation = [phone['label'] for phone in phones]
|
||
|
return pronunciation, llh
|
||
|
|
||
|
|
||
|
res = rec.recognize_wav("reus1008-reus.wav")
|
||
|
#print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n"
|
||
|
#print "Recognition result:", json.dumps(res.export(), indent=4)
|
||
|
result2pronunciation(res.export(), 'reus')
|
||
|
|
||
|
#print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n"
|
||
|
res2 = rec.recognize_wav("reus1167-man.wav")
|
||
|
#print "Recognition result:", json.dumps(res2.export(), indent=4)
|
||
|
result2pronunciation(res2.export(), 'reus')
|
||
|
|
||
|
#print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n"
|
||
|
res3 = rec.recognize_wav("reus3768-mantsje.wav")
|
||
|
#print "Recognition result:", json.dumps(res3.export(), indent=4)
|
||
|
result2pronunciation(res3.export(), 'reus')
|