From e5cf182a182bfc280228d272c3f3f76fe1c67a77 Mon Sep 17 00:00:00 2001 From: yemaozi88 <428968@gmail.com> Date: Sun, 30 Dec 2018 23:47:55 +0100 Subject: [PATCH] novo_api for python 3.x is added. --- .vs/acoustic_model/v15/.suo | Bin 71168 -> 74752 bytes .../__pycache__/defaultfiles.cpython-36.pyc | Bin 995 -> 995 bytes acoustic_model/acoustic_model.pyproj | 5 +- acoustic_model/check_novoapi.py | 18 +- acoustic_model/defaultfiles.py | 5 +- .../{performance_check.py => htk_vs_kaldi.py} | 0 novoapi/__init__.py | 5 + novoapi/asr/__init__.py | 6 + novoapi/asr/segments/__init__.py | 4 + novoapi/asr/segments/praat.py | 77 +++++ novoapi/asr/segments/segments.py | 99 +++++++ novoapi/asr/spraaklab/__init__.py | 4 + novoapi/asr/spraaklab/schema.py | 273 ++++++++++++++++++ novoapi/backend/__init__.py | 4 + novoapi/backend/session.py | 254 ++++++++++++++++ novoapi/utils/json/__init__.py | 25 ++ 16 files changed, 769 insertions(+), 10 deletions(-) rename acoustic_model/{performance_check.py => htk_vs_kaldi.py} (100%) create mode 100644 novoapi/__init__.py create mode 100644 novoapi/asr/__init__.py create mode 100644 novoapi/asr/segments/__init__.py create mode 100644 novoapi/asr/segments/praat.py create mode 100644 novoapi/asr/segments/segments.py create mode 100644 novoapi/asr/spraaklab/__init__.py create mode 100644 novoapi/asr/spraaklab/schema.py create mode 100644 novoapi/backend/__init__.py create mode 100644 novoapi/backend/session.py create mode 100644 novoapi/utils/json/__init__.py diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 586e8a92e941a6cac42630f927f60b4774ef3d41..e9ddb3028de24f584b466c6f213232c9135e52a2 100644 GIT binary patch delta 4058 zcmd6qe^6A{702IwEGrAEf-DHik7Yq!g@yfrupl4;BBoYF&`_%Us9=pc2_oA1BjrVt zKdNA1kL^zTN24odOp|KamrSQKO@iV~AoWKzL^B-+G-EqR%~WUbkC?{o_biKns%>m% zI(;*rJ?EbL>)dniJMXUYwnpjGbmT>)NRku;9{PMf2){n3yL`4h&AQZ<-g$ID6u{cE zg7(OJgqiNk2gDNE9n>ns{`one-ZzE-h?^_^o1Z6Oa;k+ zo!bOy2f1Jo$Ol;<8x#T_w>ThKFZJtFSoQhVbe3mOS#X}n>@1Ir{XVL;PH4(4wxJwH z-NC<~dS`wyhqVj_Wwk%7OAuC{_J=TVP^TocazNRq1&~R=3V0BH#sfN5^7%NR<2nbl zC{S*IoEXs1fSm(6-kxR2gmMOSY(}d;pj;o@@R=~X!NLOgpa`rh! z;1GdXU^ZaeIEfR=Cg{l^1c*rZJKsFlp2ltVCq4+^=T8CG2VQz0HpHRaMS_$#CcJdiMoX# zE6{d)Q7jk1mIv}d0ay$Q0qSJm!_N9I+2^AVRM)fX#duc1YV zT-q6R$WthL=E~k8yR6z{E(%w3Px#>P%tfb42hNIHu^*ka-Cm*ovDqs8J(9QwdDb{p zXG))-=D}!+&b3B7ztPh0{)ZdnPt%eumF=#p65dGKlDm!$bQii7Df*&}ippG@!rfSuK z1!sk;ruZ9nbL~$FK8RrssY&~<36E2DrI}Py`k?Ss%APFQReeswr=Dj~N2~SXu@F%B zwYzluS|TN0OCqJIBjUT=FRne`5ve^LwzPQFrcNC%S)Wzwa5%DU&h)J8jouQYOvdW2 z$pgC*da1V>>oN^!k^+m>U4p`CKq6aQq#7PqC#We{6Ec1VXkUC`2yY-8=v=(-u@S-^ zjlkQRDr8qdi-@D01*<|hP4mmlQB_9^^nxxPvP`nQRKuT(u}!ioqL%L4TSX1+bQ~hZV4^NVoLZ2Ey@3bWPxioN zFMBkG+G1}L+)*`E4GLFTFBO+wu z&XZl%V(N9|tBO?-p3NH1CK&@8Rby$g&t%mT|h zsOArN#f&t6;xZ^hNDQPxeFq$Pdir*&}<6C6mLWw!GGcJv&lL z%HD8wUB|D)Bwy9hAL|Vsm+bxJdD*>QD-zU^qtQX0BeJ*aGeMrN1n)?l^Nd*+KU&RjfV zkaUKQn(MfK*yfh4wYzrIZmr+eK=1dZVJZg?FU;Qw%vMc#C-c2;m5QgQr(^l?wDsfZ zv~9ZqX^LJpn*>h~ulD?*N6>rbVxDQngqdDQZ`!?Y-(K%;6rpdh+jH!eMn`soCDU~! zIwX+P?CRey42ZtWfWj_vlD}eiUE# zq`I*&76b+<`KK*4(@39%dIlcoDx^w@uOAX^ABxpEaRG4R;+f^X6e4C%sk#)(RqlHs zB8m*3r1z~I&_Jnj^Q{ud}|#Y9x;+~TlwiI()Hcp6>9WfpA*96UsluSb79^?f4Y+* z*3m1sm(ummQ}B7F%)v^;@D1Hd(o51dUM1b@a766DC2EJX9>qYgsryC_`D1)@EXMzN zY&ptDZ$|%ji!GzihN~wZj2vN8|MulZWRH>bX9|P5#^(`km%xYTDT7AHN5m+%XnS{5 zgbR0vEY7k2VNRS&8!ph!%Ad%%ZYDJy!@Ei79YlyfgxTCu!Oq7aO11EQ-`);Vvip@BT6;#n|z)LibE-%ZV i8_SH(K#W@s6YKxae4?IaBYnEe`1n2V*%xRO(mw!(ZEv~& delta 3132 zcmd6p3rv$&6vul$+d@mhqU9lC{R9Pk0HqW}L@CM`>J$()&L>P>zL6DU(9F7-W$`uX zN%k4yGX3IQe1vE}6BFaK8@RB{44wFhLBvHFZo0Sxx5=jLoC1b9x2eg@Zt~;ad%knO z@7(`AJr`$#+Id*LGcCwprBd}H9(8qfQRf}okLC>)0*;)XSN{H$}f*nRsuO@6n7%_&3BCJFt!9K&N z4=08Y!-*hbG+`qA2#z$4`jINtY*h>mv;;34MSUP)A%Y1V!S;CSLx{0Gb8}BGQlCtu z5yb?*m>U{3q42P}@Kwy#q{R-TC*Ah$za=y;C6+&7UqSN}JgPbA|LgpJ*3HMfku~Gb zv3`9uGtztz!JlHySi5mO^CvZ9AJ)u=V9jEC{Mi0?n(;Gg%eq)I7M*9T8TY-KCD5ET zi|(-}QqP)^;3|>ms~N9LCEIvn0x^?doqM%>AsHuOEgz9pe%iw^KSZUY@F?ipQuO3Em;*O&+5P_BcIle3d=PR4E z;c&%ply;- zF;2Y)uFN{D6B0D1$?HBous5pt*m-}NMjFH`4;D@XUtb!&)?-J<;%zRwcNKQnn;!f7 zdHu{8=^b{l?u(yYuZ zuLYOr`5=N0s#O$0c4+1I1bKtd*Z8M#+;DiLB*Zn+Ptf3AlnK6EpL-KdactctxU1?w zL&copqTE(GstqRhiCR!<514_E$BcJQ&Q&HIIwhdp<5ZkS-T>TdGs`;!{WZ#H-iM&V z{+H{qy1^>f!IN3({xS=mFN;W8yIdo<&pH7gUU160_0JNJzWG_Aq2CDBHvWb;FNfpe z3qvS_jTrmw_ISR&dK;HmZj<8kXQd`2T2n2l>k|@^i>%gh*4X02u|=^qS5(&X?Md-` z<>B3YtXyb31g58b4&|m6?@u}8(kJM==)_N(>U!q;z4MIo6GSeX-7Uex_i`b@_Zj~e z!3_gjBGGf+j`!N^XuonIU|g)tmXz52kRHF=km579k#9E@`dl3rx@7VzvzA+DWRxyX zS(%raH@&PZYuU7<`01{zF(HnnOBSzOT~xecL0sul$I{YOt1I}XR?@F-0z7poZP>pB z^U|ULivrEt1#G(*hOPU;F}E!fJIy8%B~wImXm?uF#{4{n<%wUB1Np3IZ7avA&V)CIaMGoMROiThy0JOmlJOBUy delta 66 zcmaFN{+M0Ln3tC;+f6e@fs=vZF#{4{pD49YqM0d5AeAADDT_H(AVn-iJVhc!GDWJH MZR3ZJOiX-?0J%~QOaK4? diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj index 7a2f4b5..27625c9 100644 --- a/acoustic_model/acoustic_model.pyproj +++ b/acoustic_model/acoustic_model.pyproj @@ -4,7 +4,7 @@ 2.0 4d8c8573-32f0-4a62-9e62-3ce5cc680390 . - performance_check.py + check_novoapi.py . @@ -25,6 +25,7 @@ Code + Code @@ -34,7 +35,7 @@ Code - + Code diff --git a/acoustic_model/check_novoapi.py b/acoustic_model/check_novoapi.py index 35da212..93ec540 100644 --- a/acoustic_model/check_novoapi.py +++ b/acoustic_model/check_novoapi.py @@ -20,13 +20,8 @@ from forced_alignment import pyhtk import novoapi -## ======================= convert phones ====================== -mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) -stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) - - -## novo phoneset +## ======================= novo phoneset ====================== translation_key = dict() #phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) @@ -54,3 +49,14 @@ with open(default.cmu69_phoneset, "rt", encoding="utf-8") as fin: phoneset_ipa = np.unique(phoneset_ipa) phoneset_novo70 = np.unique(phoneset_novo70) + +## ======================= convert phones ====================== +mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) + +stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) +df = pd.read_excel(stimmen_transcription_, 'check') +#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): +# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_) +# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) +# if not ipa_converted == ipa: +# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) \ No newline at end of file diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py index 4f98e6c..726f23a 100644 --- a/acoustic_model/defaultfiles.py +++ b/acoustic_model/defaultfiles.py @@ -40,5 +40,6 @@ stimmen_transcription_xlsx = os.path.join(experiments_dir, 'stimmen', 'data', 'F stimmen_data_dir = os.path.join(experiments_dir, 'stimmen', 'data') phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') -novo_api_dir = os.path.join(WSL_dir, 'python-novo-api') -cmu69_phoneset = os.path.join(novo_api_dir, 'novoapi', 'asr', 'phoneset', 'en', 'cmu69.phoneset') \ No newline at end of file +novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi') +#novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi' +cmu69_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'en', 'cmu69.phoneset') \ No newline at end of file diff --git a/acoustic_model/performance_check.py b/acoustic_model/htk_vs_kaldi.py similarity index 100% rename from acoustic_model/performance_check.py rename to acoustic_model/htk_vs_kaldi.py diff --git a/novoapi/__init__.py b/novoapi/__init__.py new file mode 100644 index 0000000..9ff2f76 --- /dev/null +++ b/novoapi/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python + +__version__ = "0.2" + +import backend diff --git a/novoapi/asr/__init__.py b/novoapi/asr/__init__.py new file mode 100644 index 0000000..2832e82 --- /dev/null +++ b/novoapi/asr/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python + +#import segments +#import spraaklab +from . import segments +from . import spraaklab \ No newline at end of file diff --git a/novoapi/asr/segments/__init__.py b/novoapi/asr/segments/__init__.py new file mode 100644 index 0000000..737e432 --- /dev/null +++ b/novoapi/asr/segments/__init__.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python + +from .segments import Segmentation +from .praat import seg2tg diff --git a/novoapi/asr/segments/praat.py b/novoapi/asr/segments/praat.py new file mode 100644 index 0000000..fbc9e4c --- /dev/null +++ b/novoapi/asr/segments/praat.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen + +import codecs + +def print_header(output, begin, end, nr_tiers): + print >> output, 'File type = "ooTextFile"' + print >> output, 'Object class = "TextGrid"' + print >> output, '' + print >> output, 'xmin = %s' % begin + print >> output, 'xmax = %s' % end + print >> output, 'tiers? ' + print >> output, 'size = %d' % nr_tiers + print >> output, 'item []:' + + +def print_info_tier(output, title, begin, end, label): + print >> output, '\titem [%d]:' % 0 + print >> output, '\t\tclass = "IntervalTier"' + print >> output, '\t\tname = "%s"' % title + print >> output, '\t\txmin = %s' % begin + print >> output, '\t\txmax = %s' % end + print >> output, '\t\tintervals: size = %d' % 1 + + print >> output, '\t\tintervals [1]:' + print >> output, '\t\t\txmin = %s' % begin + print >> output, '\t\t\txmax = %s' % end + print >> output, '\t\t\ttext = "%s"' % label + + +#def print_tier(output, title, begin, end, segs, (format, formatter)): +def print_tier(output, title, begin, end, segs, format, formatter): + print >> output, '\titem [%d]:' % 0 + print >> output, '\t\tclass = "IntervalTier"' + print >> output, '\t\tname = "%s"' % title + print >> output, '\t\txmin = %s' % begin + print >> output, '\t\txmax = %s' % end + print >> output, '\t\tintervals: size = %d' % len(segs) + + count = 1 + for seg in segs: + #print seg + print >> output, '\t\tintervals [%d]:' % count + print >> output, '\t\t\txmin = %s' % repr(int(seg['begin']) / 100.0) + print >> output, '\t\t\txmax = %s' % repr(int(seg['end']) / 100.0) + string = '\t\t\ttext = "' + format + '"' + print >> output, string % formatter(seg['label']) + count += 1 + + +def seg2tg(fname, segments): + if not segments: + return + output = codecs.open(fname, "w", encoding="utf-8") + + confidences = [] + word_labels = [] + phones = [] + + for s in segments: + conf = s.llh if hasattr(s, "llh") else s.score + confidences.append({'begin': s.begin, 'end': s.end, 'label': conf}) + word_labels.append({'begin': s.begin, 'end': s.end, 'label': s.label}) + for p in s.phones: + phones.append({'begin': p.begin, 'end': p.end, 'label': p.label}) + + + begin = repr(int(segments[0].begin) / 100.0) + end = repr(int(segments[-1].end) / 100.0) + + nr_tiers = 3 + print_header(output, begin, end, nr_tiers) + print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x)) + print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x)) + print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x)) + + output.close() diff --git a/novoapi/asr/segments/segments.py b/novoapi/asr/segments/segments.py new file mode 100644 index 0000000..ee5dbcc --- /dev/null +++ b/novoapi/asr/segments/segments.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen + +## These classes can be initialized with dictionaries, as they are returned by the python spraaklab recognition system. + +class Segment(object): + def __init__(self, segment): + self.begin = segment["begin"] + self.end = segment["end"] + self.begintime = segment.get("beginTime", self.begin / 100.0) + self.endtime = segment.get("endTime", self.end / 100.0) + self.label = segment["label"] + self.score = segment["score"] + if "llh" in segment: + self.llh = segment["llh"] + if "phones" in segment: + self.type = "word" + self.phones = Segmentation(segment["phones"], ["sil"]) + if hasattr(self.phones[0], "llh"): + self.minllh = min([s.llh for s in self.phones]) ## the current word llh for error detection + else: + self.type = "phone" + + def __repr__(self): + res = "%8.3f -- %8.3f score %8.3f " % (self.begintime, self.endtime, self.score) + if hasattr(self, "llh"): + res += "llh %8.3f " % self.llh + res += self.label.encode("utf8") + return res + + def export(self): + r = {"begin": self.begin, "end": self.end, "label": self.label, "score": self.score, "type": self.type} + if hasattr(self, "llh"): + r["llh"] = self.llh + if hasattr(self, "phones"): + r["phones"] = self.phones.export() + return r + +class Segmentation(object): + def __init__(self, segments, sils=["", "", "!sil"]): + """Create a segmentation from a spraaklab recognition structure. + segments: an array of words (or phones), represented by a dict with + "begin", "end", "label", "score", and "llh" keys. Words can also have + "phones" which is another array of segments.""" + self.segments = [Segment(s) for s in segments] + if self.segments: + self.type = self.segments[0].type + else: + self.type = None + self.sils = sils + self.orig = segments ## in case we want to have access to the original recognition structure + + def __getitem__(self, item): + return self.segments[item] + + def __repr__(self): + ns = len(self.segments) + res = "Segmentation with %d %s%s" % (ns, self.type, "" if ns==1 else "s") + for seg in self.segments: + res += "\n " + repr(seg) + return res + + def __len__(self): + return len(self.segments) + + def score(self, skip=None): + if not skip: + skip = self.sils + s = 0.0 + for seg in self.segments: + if seg.label not in skip: + s += seg.score + return s + + def llhs(self, skip=None): + if not skip: + skip = self.sils + return [seg.llh for seg in self.segments if hasattr(seg, "llh") and seg.label not in skip] + + def llh(self, skip=None): + return sum(self.llhs(skip)) + + def minllh(self, skip=None): + llhs = self.llhs(skip) + if llhs: + return min(llhs) + else: + return None + + def labels(self, skip=None): + if not skip: + skip = self.sils + return [seg.label for seg in self.segments if seg.label not in skip] + + def sentence(self, skip=None): + return " ".join(self.labels(skip)) + + def export(self): + return [seg.export() for seg in self.segments] \ No newline at end of file diff --git a/novoapi/asr/spraaklab/__init__.py b/novoapi/asr/spraaklab/__init__.py new file mode 100644 index 0000000..2c5f2fd --- /dev/null +++ b/novoapi/asr/spraaklab/__init__.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python + +#import schema +from . import schema \ No newline at end of file diff --git a/novoapi/asr/spraaklab/schema.py b/novoapi/asr/spraaklab/schema.py new file mode 100644 index 0000000..8efc49f --- /dev/null +++ b/novoapi/asr/spraaklab/schema.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python +## (c) 2017 NovoLanguage, author: David A. van Leeuwen + +## The purpose of this to define the grammar structure in a json schema, so that it can be validated, +## (de)serialized, and perhaps even automatically converted to a Python class structure. + +import json +import jsonschema + +grammar_schema_v10 = { + "$schema": "http://json-schema.org/schema#", + "title": "NovoLanguage grammar", + "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", + "$ref": "#/definitions/group", + "definitions": { + "phones": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "pronunciation": { + "type": "object", + "properties": { + "phones": { + "$ref": "#/definitions/phones" + }, + "syllables": { + "type": "array", + "items": { + "$ref": "#/definitions/syllable" + }, + "minItems": 1 + }, + "id": { + "type": "integer", + "description": "ID to distinguish this pronunciation from other variants" + }, + "meta": { + "type": "object" + } + }, + "required": ["phones"] + }, + "syllable": { + "type": "object", + "properties": { + "begin": { + "type": "integer", + "minimum": 0 + }, + "end": { + "type": "integer", + "minimum": 0 + }, + "stress": { + "type": "integer", + "minimum": 0 + }, + "tone": { + "type": "integer", + "minimum": 0 + } + }, + "required": ["begin", "end"] + }, + "word": { + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": ["word"] + }, + "label": { + "type": "string" + }, + "pronunciation": { + "anyOf": [ + { + "$ref": "#/definitions/pronunciation" + }, + { + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/pronunciation" + }, + { + "$ref": "#/definitions/phones" + } + ] + }, + "minItems": 1 + }, + { + "$ref": "#/definitions/phones" + } + + ] + }, + "syllables": { + "type": "array", + "items": { + "$ref": "#/definitions/syllable" + } + }, + "graphemes": { + "type": "array", + "items": { + "type": "string" + } + }, + "id": { + "type": "integer", + "description": "ID to distinguish this word from other words (with possibly the same label)" + }, + "meta": { + "type": "object" + } + }, + "required": ["label"] + }, + "element": { + "title": "element", + "oneOf": [ + { + "$ref": "#/definitions/word" + }, + { + "$ref": "#/definitions/group" + }, + { + "type": ["string", "null"] + } + ] + }, + "group": { + "title": "element group", + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": ["sequence", "alternatives", "order"] + }, + "elements": { + "type": "array", + "items": { + "$ref": "#/definitions/element" + }, + "minItems": 1, + }, + "meta": { + "type": "object" + } + }, + "required": ["kind", "elements"] + } + } +} + +grammar_schema_v01 = { + "$schema": "http://json-schema.org/schema#", + "title": "NovoLanguage grammar v0.1", + "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["multiple_choice", "word_order"] + }, + "parts": { + "type": "array", + "minItems": 1, + "maxItems": 5, + "items": { + "type": ["string", "array"], + "items": { + "type": ["string"] + } + } + } + } +} + +grammar_rpc_schema = { + "$schema": "http://json-schema.org/schema#", + "title": "NovoLanguage RPC grammar", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["confusion_network"] + }, + "version": { + "type": "string", + "default": "v0.1" + }, + "data": { + "type": "object" + }, + "return_dict": { + "type": "boolean" + }, + "return_objects": { + "type": "array", + "items": { + "type": "string", + "enum": ["dict", "grammar"] + } + }, + "phoneset": { + "type": "string", + "enum": ["cmu69", "novo70", "mdbg115"] + }, + "parallel_silence": { + "type": "boolean" + } + }, + "required": ["type", "data"] +} + +def validate(object, schema=grammar_schema_v10): + #if isinstance(object, basestring): + if isinstance(object, str): + object = json.loads(object) + if not isinstance(object, dict): + raise TypeError("Expected dict or json string") + try: + jsonschema.validate(object, schema) + except jsonschema.ValidationError: + return False + except Exception: + raise + else: + return True + +def validate_rpc_grammar(message): + """validate an rpc grammar message""" + if not validate(message, grammar_rpc_schema): + raise ValueError("Not a valid RPC grammar") + version = message.get("version", "0.1") + data = message["data"] + if version == "0.1": + if not validate(data, grammar_schema_v01): + raise ValueError("Not a valid grammar v0.1") + elif version == "1.0": + if not validate(data, grammar_schema_v10): + raise ValueError("Not a valid grammar v1.0") + else: + raise ValueError("Unsupported schema version") + + +## test +def test(data=None): + if not data: + data = {"kind": "sequence", "elements": [ + {"kind": "alternatives", "elements": ["a plain string", "an alternative string"]}, + {"kind": "word", "label": "a word", "pronunciation": {"phones": ["ah", "w", "er", "d"]}}, + {"kind": "order", "elements": [{"kind": "word", "label": "another word", "visible": False}, "last word"]}]} + try: + jsonschema.validate(data, schema) + except jsonschema.ValidationError as e: + #print data, "validated not OK", e.message + print("{0} validated not OK {1}".format(data, e.message)) + else: + #print data, "validated OK" + print("{} validated OK".format(data)) + + +if __name__ == "__main__": + test() diff --git a/novoapi/backend/__init__.py b/novoapi/backend/__init__.py new file mode 100644 index 0000000..c52d472 --- /dev/null +++ b/novoapi/backend/__init__.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python + +#import session +from . import session \ No newline at end of file diff --git a/novoapi/backend/session.py b/novoapi/backend/session.py new file mode 100644 index 0000000..b08a096 --- /dev/null +++ b/novoapi/backend/session.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python +# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen + +## Recognition interface for actual backend. Adapted from player.asr.debug. + +import json +import sys +import wave +import requests +import websocket +import logging +import collections + +import time + +from .. import asr + +logger = logging.getLogger(__name__) + +## turn off annoying warnings +requests.packages.urllib3.disable_warnings() +logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(logging.WARN) + +buffer_size = 4096 +gm = "gm.novolanguage.com" ## dev +protocol = "https" +port = 443 +apiversion = 0 + +sessions = collections.Counter() + +def segmentation(result): + """converts a raw backend recognition result to a segment of novo.asr.segments class Segmentation""" + for w in result: + w["score"] = w["confidence"]["prob"] + w["llh"] = w["confidence"]["llr"] + w["label"] = w["label"]["raw"] + w["begin"] /= 10 + w["end"] /= 10 + for p in w["phones"]: + p["score"] = p["confidence"]["prob"] + p["llh"] = p["confidence"]["llr"] + p["begin"] /= 10 + p["end"] /= 10 + return asr.segments.Segmentation(result) + +class rpcid: + id = 0 + @staticmethod + def next(): + rpcid.id += 1 + return rpcid.id + +class Recognizer(object): + def __init__(self, lang="en", gm=gm, grammar_version="0.1", user=None, password=None, snodeid=None, keepopen=False): + self.lang = lang + self.keepopen = keepopen + self.api_url = "%s://%s:%d/v%d" % (protocol, gm, port, apiversion) + self.verify = False + self.headers = {"Content-Type": "application/json"} + self.login_user(user, password) + data = {"l2": lang, "local": False, "skipupload": True} + if snodeid: + data["snodeid"] = snodeid + self.conn = None + self.init_session(data) + self.grammar_version = grammar_version + self.last_message = None + + def login_user(self, username, password): + # obtain authentication token of user + logger.info('obtain auth token at %s', self.api_url) + data = { + 'username': username, + 'password': password + } + try: + r = requests.post(self.api_url + '/publishers/1/login', headers=self.headers, data=json.dumps(data), verify=self.verify) + except Exception as e: + logger.error("Cannot post request to GM API for user login: %s", e.message) + sys.exit(-1) + assert r.ok, r.reason + result = r.json() + if "errors" in result["response"]: + logger.info("Error in logging in: %s", result["response"]["errors"]) + sys.exit(-1) + + user_auth_token = result['response']['user']['authentication_token'] + logger.info("User auth token is: %s", user_auth_token) + + # set auth token in header + self.headers['Authentication-Token'] = user_auth_token + + def init_session(self, data, direct=False, use_ip=False): + logger.info('Request new session: %s', data) + r = requests.post(self.api_url + '/sessions', headers=self.headers, data=json.dumps(data), verify=self.verify) + if not r.ok: + logger.error("New session request failed: %s", r.text) + return + + status_url = r.headers.get("location") + if status_url: + ## we got a redirect + status = {} + while True: + logger.debug("Checking %s", status_url) + s = requests.get(status_url, verify=self.verify) + if not s.ok: + logger.error('Checking Failed: %s', s.text) + return + + status = s.json() + if status['status'] == 'PENDING': + logger.debug("Status: %s", status['status']) + time.sleep(1) + else: + break + session = status['result'][0] ## [1] is another status code... + if "error" in session: + logger.error("Error in getting a snode: %s", session["error"]) + raise Exception + else: + session = r.json() + + try: + logger.info("Session: %r", session) + if direct: + snode_ip = session["snode"]["ip"] + proxy_url = snode_ip + snode_port = session["port"] + ws_url = "%s://%s:%d/" % ("ws", snode_ip, snode_port) + else: + field = "ip" if use_ip else "hostname" + proxy_url = session['snode']['datacentre']['proxy'][field] + ws_url = 'wss://' + proxy_url + '/' + session['uuid'] + logger.info("Connecting to websocket: %s", ws_url) + conn = websocket.create_connection(ws_url, sslopt={"check_hostname": self.verify}) + logger.info("Connected.") + #except Exception, e: + except Exception as e: + logger.error("Unable to connect to websocket: %s", e.message) + raise e + + self.session_id = session['id'] + self.proxy_url = proxy_url + self.conn = conn + self.session = session + sessions[session["uuid"]] += 1 + + def setgrammar(self, grammar): ## backend grammar object: {"data": {...}, "type": "confusion_network"} + data = {"jsonrpc": "2.0", + 'type': 'jsonrpc', + 'method': 'set_grammar', + 'params': grammar, + "id": rpcid.next()} + asr.spraaklab.schema.validate_rpc_grammar(grammar) + self.conn.send(json.dumps(data)) + result = json.loads(self.conn.recv()) + if result.get("error"): + logger.error("Exercise validation error: %s", result) + return result + + def set_alternatives_grammar(self, *args, **kwargs): + if not "version" in kwargs: + kwargs["version"] = self.grammar_version + return self.setgrammar(alternatives_grammar(*args, **kwargs)) + + def recognize_wav(self, wavf): + w = wave.open(wavf, 'r') + nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams() + if nchannels > 1: + logging.error("Please use .wav with only 1 channel, found %d channels in %s", nchannels, wavf) + return + if (sampwidth != 2): + logging.error("Please use .wav with 2-byte PCM data, found %d bytes in %s", sampwidth, wavf) + return + if (framerate != 16000.0): + logging.error("Please use .wav sampled at 16000 Hz, found %1.0f in %s", framerate, wavf) + return + if (comptype != 'NONE'): + logging.error("Please use .wav with uncompressed data, found %s in %s", compname, wavf) + return + buf = w.readframes(nframes) + w.close() + return self.recognize_data(buf) + + def recognize_data(self, buf): + nbytes_sent = 0 + start = time.time() + for j in range(0, len(buf), buffer_size): + audio_packet = str(buf[j:j + buffer_size]) + nbytes_sent += len(audio_packet) + self.conn.send_binary(audio_packet) + self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()})) + logger.info("Waiting for recognition result...") + self.last_message = self.conn.recv() ## keep result for the interested applications + message = json.loads(self.last_message) + dur = time.time() - start + logger.info("Recognition took %5.3f seconds", dur) + if "error" in message: + raise RuntimeError("Error from recognition backend: %r" % message.get("error")) + return segmentation(message["result"]["words"]) + + def recognize_url(self, url): + start = time.time() + data = json.dumps({"jsonrpc": "2.0", "method": "send_audio", "id": rpcid.next(), "params": {"type": "url", "data": url, "details": ["word", "utterance"]}}) + self.conn.send(data) + logger.info("Waiting for recognition result...") + self.last_message = self.conn.recv() ## keep result for the interested applications + #print self.last_message + print(self.last_message) + message = json.loads(self.last_message) + dur = time.time() - start + logger.info("Recognition took %5.3f seconds", dur) + if "error" in message: + raise RuntimeError("Error from recognition backend: %r" % message.get("error")) + return segmentation(message["result"]["words"]) + + def __del__(self): + sessions[self.session["uuid"]] -= 1 + if self.conn and sessions[self.session["uuid"]] <= 0: + self.conn.close() + url = self.api_url + '/sessions/%d' % self.session_id + if self.keepopen: + logger.info("Keeping session open...") + else: + logger.info("Closing session: %s", url) + r = requests.delete(url, headers=self.headers, verify=self.verify) + assert r.ok, r.reason + +def alternatives_grammar(parts, version="0.1", ret=None): + """Make a grammar of alternatives, as array(sequence)-of-array(alternatives)-of-strings""" + r = {"type": "confusion_network", "version": version} + if version=="0.1": + r["data"] = {"type": "multiple_choice", "parts": parts} + if isinstance(ret, list) and "dict" in ret: + r["return_dict"] = True + elif version=="1.0": + seqels = [] + for part in parts: + altels = [] + for alt in part: + words = alt.split(" ") + if len(words) > 1: + alt = {"kind": "sequence", "elements": words} + altels.append(alt) + seqels.append({"kind": "alternatives", "elements": altels}) + r["data"] = {"kind": "sequence", "elements": seqels} + if isinstance(ret, list): + r["return_objects"] = ret + else: + raise ValueError("Unsupported version: %s" % version) + asr.spraaklab.schema.validate_rpc_grammar(r) + return r diff --git a/novoapi/utils/json/__init__.py b/novoapi/utils/json/__init__.py new file mode 100644 index 0000000..75d0b5f --- /dev/null +++ b/novoapi/utils/json/__init__.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +## from https://stackoverflow.com/questions/1447287/format-floats-with-standard-json-module +class PrettyFloat(float): + def __repr__(self): + return '%.15g' % self + +def pretty_floats(obj): + if isinstance(obj, float): + return PrettyFloat(obj) + elif isinstance(obj, dict): + return dict((k, pretty_floats(v)) for k, v in obj.items()) + elif isinstance(obj, (list, tuple)): + return map(pretty_floats, obj) + return obj + +def rounded_floats(obj, ndigits=15): + if isinstance(obj, float): + return PrettyFloat(round(obj, ndigits)) + elif isinstance(obj, dict): + return dict((k, rounded_floats(v, ndigits)) for k, v in obj.items()) + elif isinstance(obj, (list, tuple)): + return map(lambda o: rounded_floats(o, ndigits), obj) + return obj +