274 lines
8.1 KiB
Python
274 lines
8.1 KiB
Python
#!/usr/bin/env python
|
|
## (c) 2017 NovoLanguage, author: David A. van Leeuwen
|
|
|
|
## The purpose of this to define the grammar structure in a json schema, so that it can be validated,
|
|
## (de)serialized, and perhaps even automatically converted to a Python class structure.
|
|
|
|
import json
|
|
import jsonschema
|
|
|
|
grammar_schema_v10 = {
|
|
"$schema": "http://json-schema.org/schema#",
|
|
"title": "NovoLanguage grammar",
|
|
"description": "A grammar specification for the NovoLanguage Automatic Speech Recognition",
|
|
"$ref": "#/definitions/group",
|
|
"definitions": {
|
|
"phones": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"minItems": 1
|
|
},
|
|
"pronunciation": {
|
|
"type": "object",
|
|
"properties": {
|
|
"phones": {
|
|
"$ref": "#/definitions/phones"
|
|
},
|
|
"syllables": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/definitions/syllable"
|
|
},
|
|
"minItems": 1
|
|
},
|
|
"id": {
|
|
"type": "integer",
|
|
"description": "ID to distinguish this pronunciation from other variants"
|
|
},
|
|
"meta": {
|
|
"type": "object"
|
|
}
|
|
},
|
|
"required": ["phones"]
|
|
},
|
|
"syllable": {
|
|
"type": "object",
|
|
"properties": {
|
|
"begin": {
|
|
"type": "integer",
|
|
"minimum": 0
|
|
},
|
|
"end": {
|
|
"type": "integer",
|
|
"minimum": 0
|
|
},
|
|
"stress": {
|
|
"type": "integer",
|
|
"minimum": 0
|
|
},
|
|
"tone": {
|
|
"type": "integer",
|
|
"minimum": 0
|
|
}
|
|
},
|
|
"required": ["begin", "end"]
|
|
},
|
|
"word": {
|
|
"type": "object",
|
|
"properties": {
|
|
"kind": {
|
|
"type": "string",
|
|
"enum": ["word"]
|
|
},
|
|
"label": {
|
|
"type": "string"
|
|
},
|
|
"pronunciation": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/definitions/pronunciation"
|
|
},
|
|
{
|
|
"type": "array",
|
|
"items": {
|
|
"anyOf": [
|
|
{
|
|
"$ref": "#/definitions/pronunciation"
|
|
},
|
|
{
|
|
"$ref": "#/definitions/phones"
|
|
}
|
|
]
|
|
},
|
|
"minItems": 1
|
|
},
|
|
{
|
|
"$ref": "#/definitions/phones"
|
|
}
|
|
|
|
]
|
|
},
|
|
"syllables": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/definitions/syllable"
|
|
}
|
|
},
|
|
"graphemes": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
}
|
|
},
|
|
"id": {
|
|
"type": "integer",
|
|
"description": "ID to distinguish this word from other words (with possibly the same label)"
|
|
},
|
|
"meta": {
|
|
"type": "object"
|
|
}
|
|
},
|
|
"required": ["label"]
|
|
},
|
|
"element": {
|
|
"title": "element",
|
|
"oneOf": [
|
|
{
|
|
"$ref": "#/definitions/word"
|
|
},
|
|
{
|
|
"$ref": "#/definitions/group"
|
|
},
|
|
{
|
|
"type": ["string", "null"]
|
|
}
|
|
]
|
|
},
|
|
"group": {
|
|
"title": "element group",
|
|
"type": "object",
|
|
"properties": {
|
|
"kind": {
|
|
"type": "string",
|
|
"enum": ["sequence", "alternatives", "order"]
|
|
},
|
|
"elements": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/definitions/element"
|
|
},
|
|
"minItems": 1,
|
|
},
|
|
"meta": {
|
|
"type": "object"
|
|
}
|
|
},
|
|
"required": ["kind", "elements"]
|
|
}
|
|
}
|
|
}
|
|
|
|
grammar_schema_v01 = {
|
|
"$schema": "http://json-schema.org/schema#",
|
|
"title": "NovoLanguage grammar v0.1",
|
|
"description": "A grammar specification for the NovoLanguage Automatic Speech Recognition",
|
|
"type": "object",
|
|
"properties": {
|
|
"type": {
|
|
"type": "string",
|
|
"enum": ["multiple_choice", "word_order"]
|
|
},
|
|
"parts": {
|
|
"type": "array",
|
|
"minItems": 1,
|
|
"maxItems": 5,
|
|
"items": {
|
|
"type": ["string", "array"],
|
|
"items": {
|
|
"type": ["string"]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
grammar_rpc_schema = {
|
|
"$schema": "http://json-schema.org/schema#",
|
|
"title": "NovoLanguage RPC grammar",
|
|
"type": "object",
|
|
"properties": {
|
|
"type": {
|
|
"type": "string",
|
|
"enum": ["confusion_network"]
|
|
},
|
|
"version": {
|
|
"type": "string",
|
|
"default": "v0.1"
|
|
},
|
|
"data": {
|
|
"type": "object"
|
|
},
|
|
"return_dict": {
|
|
"type": "boolean"
|
|
},
|
|
"return_objects": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string",
|
|
"enum": ["dict", "grammar"]
|
|
}
|
|
},
|
|
"phoneset": {
|
|
"type": "string",
|
|
"enum": ["cmu69", "novo70", "mdbg115"]
|
|
},
|
|
"parallel_silence": {
|
|
"type": "boolean"
|
|
}
|
|
},
|
|
"required": ["type", "data"]
|
|
}
|
|
|
|
def validate(object, schema=grammar_schema_v10):
|
|
#if isinstance(object, basestring):
|
|
if isinstance(object, str):
|
|
object = json.loads(object)
|
|
if not isinstance(object, dict):
|
|
raise TypeError("Expected dict or json string")
|
|
try:
|
|
jsonschema.validate(object, schema)
|
|
except jsonschema.ValidationError:
|
|
return False
|
|
except Exception:
|
|
raise
|
|
else:
|
|
return True
|
|
|
|
def validate_rpc_grammar(message):
|
|
"""validate an rpc grammar message"""
|
|
if not validate(message, grammar_rpc_schema):
|
|
raise ValueError("Not a valid RPC grammar")
|
|
version = message.get("version", "0.1")
|
|
data = message["data"]
|
|
if version == "0.1":
|
|
if not validate(data, grammar_schema_v01):
|
|
raise ValueError("Not a valid grammar v0.1")
|
|
elif version == "1.0":
|
|
if not validate(data, grammar_schema_v10):
|
|
raise ValueError("Not a valid grammar v1.0")
|
|
else:
|
|
raise ValueError("Unsupported schema version")
|
|
|
|
|
|
## test
|
|
def test(data=None):
|
|
if not data:
|
|
data = {"kind": "sequence", "elements": [
|
|
{"kind": "alternatives", "elements": ["a plain string", "an alternative string"]},
|
|
{"kind": "word", "label": "a word", "pronunciation": {"phones": ["ah", "w", "er", "d"]}},
|
|
{"kind": "order", "elements": [{"kind": "word", "label": "another word", "visible": False}, "last word"]}]}
|
|
try:
|
|
jsonschema.validate(data, schema)
|
|
except jsonschema.ValidationError as e:
|
|
#print data, "validated not OK", e.message
|
|
print("{0} validated not OK {1}".format(data, e.message))
|
|
else:
|
|
#print data, "validated OK"
|
|
print("{0} validated OK".format(data))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test()
|