#!/usr/bin/env python import os os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import argparse import json from novoapi.backend import session p = argparse.ArgumentParser() p.add_argument("--user", default='martijn.wieling') p.add_argument("--password", default='xxxxx') args = p.parse_args() rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) grammar = { "type": "confusion_network", "version": "1.0", "data": { "kind": "sequence", "elements": [ { "kind": "word", "pronunciation": [ { "phones": [ "r", "eu0", "s" ], "id": 0 } , { "phones": [ "m", "a0", "n" ], "id": 1 } , { "phones": [ "m", "a0", "n", "t", "s", "y", "ax" ], "id": 2 } ], "label": "reus" } ] }, "return_objects": [ "grammar" ], "phoneset": "novo70" } res = rec.setgrammar(grammar) #print "Set grammar result", res ## === novoapi/backend/session.py === #import wave #import time #from novoapi.backend.session import rpcid, segmentation #wavf = "reus1008-reus.wav" #w = wave.open(wavf, 'r') #nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams() #buf = w.readframes(nframes) #w.close() #buffer_size = 4096 #nbytes_sent = 0 #start = time.time() #for j in range(0, len(buf), buffer_size): # audio_packet = buf[j:j + buffer_size] # nbytes_sent += len(audio_packet) # rec.conn.send_binary(audio_packet) #rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()})) #print(rpcid.next()) #rec.last_message = rec.conn.recv() #message = json.loads(rec.last_message) #result = session.segmentation(message["result"]["words"]) #result.export() ## ==================================== def result2pronunciation(result, word): #result_ = res.export()[1] result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word] llh = result_[0]['llh'] phones = result_[0]['phones'] pronunciation = [phone['label'] for phone in phones] return pronunciation, llh res = rec.recognize_wav("reus1008-reus.wav") #print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n" #print "Recognition result:", json.dumps(res.export(), indent=4) result2pronunciation(res.export(), 'reus') #print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n" res2 = rec.recognize_wav("reus1167-man.wav") #print "Recognition result:", json.dumps(res2.export(), indent=4) result2pronunciation(res2.export(), 'reus') #print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n" res3 = rec.recognize_wav("reus3768-mantsje.wav") #print "Recognition result:", json.dumps(res3.export(), indent=4) result2pronunciation(res3.export(), 'reus')