#!/usr/bin/env python ## (c) 2017 NovoLanguage, author: David A. van Leeuwen ## The purpose of this to define the grammar structure in a json schema, so that it can be validated, ## (de)serialized, and perhaps even automatically converted to a Python class structure. import json import jsonschema grammar_schema_v10 = { "$schema": "http://json-schema.org/schema#", "title": "NovoLanguage grammar", "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", "$ref": "#/definitions/group", "definitions": { "phones": { "type": "array", "items": { "type": "string" }, "minItems": 1 }, "pronunciation": { "type": "object", "properties": { "phones": { "$ref": "#/definitions/phones" }, "syllables": { "type": "array", "items": { "$ref": "#/definitions/syllable" }, "minItems": 1 }, "id": { "type": "integer", "description": "ID to distinguish this pronunciation from other variants" }, "meta": { "type": "object" } }, "required": ["phones"] }, "syllable": { "type": "object", "properties": { "begin": { "type": "integer", "minimum": 0 }, "end": { "type": "integer", "minimum": 0 }, "stress": { "type": "integer", "minimum": 0 }, "tone": { "type": "integer", "minimum": 0 } }, "required": ["begin", "end"] }, "word": { "type": "object", "properties": { "kind": { "type": "string", "enum": ["word"] }, "label": { "type": "string" }, "pronunciation": { "anyOf": [ { "$ref": "#/definitions/pronunciation" }, { "type": "array", "items": { "anyOf": [ { "$ref": "#/definitions/pronunciation" }, { "$ref": "#/definitions/phones" } ] }, "minItems": 1 }, { "$ref": "#/definitions/phones" } ] }, "syllables": { "type": "array", "items": { "$ref": "#/definitions/syllable" } }, "graphemes": { "type": "array", "items": { "type": "string" } }, "id": { "type": "integer", "description": "ID to distinguish this word from other words (with possibly the same label)" }, "meta": { "type": "object" } }, "required": ["label"] }, "element": { "title": "element", "oneOf": [ { "$ref": "#/definitions/word" }, { "$ref": "#/definitions/group" }, { "type": ["string", "null"] } ] }, "group": { "title": "element group", "type": "object", "properties": { "kind": { "type": "string", "enum": ["sequence", "alternatives", "order"] }, "elements": { "type": "array", "items": { "$ref": "#/definitions/element" }, "minItems": 1, }, "meta": { "type": "object" } }, "required": ["kind", "elements"] } } } grammar_schema_v01 = { "$schema": "http://json-schema.org/schema#", "title": "NovoLanguage grammar v0.1", "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", "type": "object", "properties": { "type": { "type": "string", "enum": ["multiple_choice", "word_order"] }, "parts": { "type": "array", "minItems": 1, "maxItems": 5, "items": { "type": ["string", "array"], "items": { "type": ["string"] } } } } } grammar_rpc_schema = { "$schema": "http://json-schema.org/schema#", "title": "NovoLanguage RPC grammar", "type": "object", "properties": { "type": { "type": "string", "enum": ["confusion_network"] }, "version": { "type": "string", "default": "v0.1" }, "data": { "type": "object" }, "return_dict": { "type": "boolean" }, "return_objects": { "type": "array", "items": { "type": "string", "enum": ["dict", "grammar"] } }, "phoneset": { "type": "string", "enum": ["cmu69", "novo70", "mdbg115"] }, "parallel_silence": { "type": "boolean" } }, "required": ["type", "data"] } def validate(object, schema=grammar_schema_v10): #if isinstance(object, basestring): if isinstance(object, str): object = json.loads(object) if not isinstance(object, dict): raise TypeError("Expected dict or json string") try: jsonschema.validate(object, schema) except jsonschema.ValidationError: return False except Exception: raise else: return True def validate_rpc_grammar(message): """validate an rpc grammar message""" if not validate(message, grammar_rpc_schema): raise ValueError("Not a valid RPC grammar") version = message.get("version", "0.1") data = message["data"] if version == "0.1": if not validate(data, grammar_schema_v01): raise ValueError("Not a valid grammar v0.1") elif version == "1.0": if not validate(data, grammar_schema_v10): raise ValueError("Not a valid grammar v1.0") else: raise ValueError("Unsupported schema version") ## test def test(data=None): if not data: data = {"kind": "sequence", "elements": [ {"kind": "alternatives", "elements": ["a plain string", "an alternative string"]}, {"kind": "word", "label": "a word", "pronunciation": {"phones": ["ah", "w", "er", "d"]}}, {"kind": "order", "elements": [{"kind": "word", "label": "another word", "visible": False}, "last word"]}]} try: jsonschema.validate(data, schema) except jsonschema.ValidationError as e: #print data, "validated not OK", e.message print("{0} validated not OK {1}".format(data, e.message)) else: #print data, "validated OK" print("{0} validated OK".format(data)) if __name__ == "__main__": test()