acoustic_model/acoustic_model/novoapi_forced_alignment.py

118 lines
2.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#
# forced alignment using novo-api.
#
# *** IMPORTANT ***
# This file should be treated as confidencial.
# This file should not be copied or uploaded to public sites.
#
# NOTES:
# The usage of novo api: https://bitbucket.org/novolanguage/python-novo-api
# I couldn't make it work as I described in the mail to Martijn Bartelds on
# 2018/12/03.
# As per the advice from him, I modified testgrammer.py and made it a function.
#
# In order to run on Python 3.6, the following points are changed in novo-api.
# (1) backend/__init__.py
# - #import session
# from . import session
# (2) backend/session.py
# - #except Exception, e:
# except Exception as e:
# - #print self.last_message
# print(self.last_message)
# (3) asr/segment/praat.py
# - def print_tier(output, title, begin, end, segs, (format, formatter))
# def print_tier(output, title, begin, end, segs, format, formatter):
# (4) asr/spraaklab/__init.py
# - #import session
# from . import session
# (5) asr/spraaklab/schema.py
# - #print data, "validated not OK", e.message
# print("{0} validated not OK {1}".format(data, e.message))
# - #print data, "validated OK"
# print("{} validated OK".format(data))
# - #if isinstance(object, basestring):
# if isinstance(object, str)
#
# Aki Kunikoshi
# 428968@gmail.com
#
import argparse
import json
from novoapi.backend import session
import novoapi_functions
# username / password cannot be passed as artuments...
p = argparse.ArgumentParser()
#p.add_argument("--user", default=None)
#p.add_argument("--password", default=None)
p.add_argument("--user", default='martijn.wieling')
p.add_argument("--password", default='fa0Thaic')
args = p.parse_args()
wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav'
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
grammar = {
"type": "confusion_network",
"version": "1.0",
"data": {
"kind": "sequence",
"elements": [{
"kind": "word",
"pronunciation": [{
"phones": ["wv",
"a1",
"n"],
"id": 0
},
{
"phones": ["wv",
"uh1",
"n"],
"id": 1
}],
"label": "one"
},
{
"kind": "word",
"pronunciation": [{
"phones": ["t",
"uw1"],
"id": 0
}],
"label": "two"
},
{
"kind": "word",
"pronunciation": [{
"phones": ["t",
"r",
"iy1"],
"id": 0
},
{
"phones": ["s",
"r",
"iy1"],
"id": 1
}],
"label": "three"
}]
},
"return_objects": ["grammar"],
"phoneset": "novo70"
}
res = rec.setgrammar(grammar)
#print "Set grammar result", res
#res = rec.recognize_wav("test/onetwothree.wav")
res = rec.recognize_wav(wav_file)
#print "Recognition result:", json.dumps(res.export(), indent=4)
# list of the pronunciation for each words
word = 'pauw'
pronunciation_ipa = ['pau', 'pɑu']
grammar = novoapi_functions.make_grammar(word, pronunciation_ipa)