# # forced alignment using novo-api. # # *** IMPORTANT *** # This file should be treated as confidencial. # This file should not be copied or uploaded to public sites. # # NOTES: # The usage of novo api: https://bitbucket.org/novolanguage/python-novo-api # I couldn't make it work as I described in the mail to Martijn Bartelds on 2018/12/03. # As per the advice from him, I modified testgrammer.py and made it a function. # # In order to run on Python 3.6, the following points are changed in novo-api. # (1) backend/__init__.py # - #import session # from . import session # (2) backend/session.py # - #except Exception, e: # except Exception as e: # - #print self.last_message # print(self.last_message) # (3) asr/segment/praat.py # - def print_tier(output, title, begin, end, segs, (format, formatter)) # def print_tier(output, title, begin, end, segs, format, formatter): # (4) asr/spraaklab/__init.py # - #import session # from . import session # (5) asr/spraaklab/schema.py # - #print data, "validated not OK", e.message # print("{0} validated not OK {1}".format(data, e.message)) # - #print data, "validated OK" # print("{} validated OK".format(data)) # - #if isinstance(object, basestring): # if isinstance(object, str) # # Aki Kunikoshi # 428968@gmail.com # import argparse import json from novoapi.backend import session # username / password cannot be passed as artuments... p = argparse.ArgumentParser() #p.add_argument("--user", default=None) #p.add_argument("--password", default=None) p.add_argument("--user", default='martijn.wieling') p.add_argument("--password", default='fa0Thaic') args = p.parse_args() wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav' rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) grammar = { "type": "confusion_network", "version": "1.0", "data": { "kind": "sequence", "elements": [ { "kind": "word", "pronunciation": [ { "phones": [ "wv", "a1", "n" ], "id": 0 }, { "phones": [ "wv", "uh1", "n" ], "id": 1 } ], "label": "one" }, { "kind": "word", "pronunciation": [ { "phones": [ "t", "uw1" ], "id": 0 } ], "label": "two" }, { "kind": "word", "pronunciation": [ { "phones": [ "t", "r", "iy1" ], "id": 0 }, { "phones": [ "s", "r", "iy1" ], "id": 1 } ], "label": "three" } ] }, "return_objects": [ "grammar" ], "phoneset": "novo70" } res = rec.setgrammar(grammar) #print "Set grammar result", res #res = rec.recognize_wav("test/onetwothree.wav") res = rec.recognize_wav(wav_file) #print "Recognition result:", json.dumps(res.export(), indent=4)