the script 'forced_alignment_novo.py' which is to run novo_api on Python 3.6 environment is added.

2018-12-26 23:49:28 +01:00
parent 0777735979
commit b87a81eb9d
30 changed files with 1258 additions and 32 deletions
--- a/novoapi_for_python3x/asr/spraaklab/init.py
+++ b/novoapi_for_python3x/asr/spraaklab/init.py
@ -0,0 +1,4 @@
+#!/usr/bin/env python
+
+#import schema
+from . import schema
--- a/novoapi_for_python3x/asr/spraaklab/pycache/init.cpython-36.pyc
+++ b/novoapi_for_python3x/asr/spraaklab/pycache/init.cpython-36.pyc
--- a/novoapi_for_python3x/asr/spraaklab/pycache/schema.cpython-36.pyc
+++ b/novoapi_for_python3x/asr/spraaklab/pycache/schema.cpython-36.pyc
--- a/novoapi_for_python3x/asr/spraaklab/schema.py
+++ b/novoapi_for_python3x/asr/spraaklab/schema.py
@ -0,0 +1,273 @@
+#!/usr/bin/env python
+## (c) 2017 NovoLanguage, author: David A. van Leeuwen
+
+## The purpose of this to define the grammar structure in a json schema, so that it can be validated,
+## (de)serialized, and perhaps even automatically converted to a Python class structure.
+
+import json
+import jsonschema
+
+grammar_schema_v10 = {
+    "$schema": "http://json-schema.org/schema#",
+    "title": "NovoLanguage grammar",
+    "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition",
+    "$ref": "#/definitions/group",
+    "definitions": {
+        "phones": {
+            "type": "array",
+            "items": {
+                "type": "string"
+            },
+            "minItems": 1
+        },
+        "pronunciation": {
+            "type": "object",
+            "properties": {
+                "phones": {
+                    "$ref": "#/definitions/phones"
+                },
+                "syllables": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/syllable"
+                    },
+                    "minItems": 1
+                },
+                "id": {
+                    "type": "integer",
+                    "description": "ID to distinguish this pronunciation from other variants"
+                },
+                "meta": {
+                    "type": "object"
+                }
+            },
+            "required": ["phones"]
+        },
+        "syllable": {
+            "type": "object",
+            "properties": {
+                "begin": {
+                    "type": "integer",
+                    "minimum": 0
+                },
+                "end": {
+                    "type": "integer",
+                    "minimum": 0
+                },
+                "stress": {
+                    "type": "integer",
+                    "minimum": 0
+                },
+                "tone": {
+                    "type": "integer",
+                    "minimum": 0
+                }
+            },
+            "required": ["begin", "end"]
+        },
+        "word": {
+            "type": "object",
+            "properties": {
+                "kind": {
+                    "type": "string",
+                    "enum": ["word"]
+                },
+                "label": {
+                    "type": "string"
+                },
+                "pronunciation": {
+                    "anyOf": [
+                        {
+                            "$ref": "#/definitions/pronunciation"
+                        },
+                        {
+                            "type": "array",
+                            "items": {
+                                "anyOf": [
+                                    {
+                                        "$ref": "#/definitions/pronunciation"
+                                    },
+                                    {
+                                        "$ref": "#/definitions/phones"
+                                    }
+                                ]
+                            },
+                            "minItems": 1
+                        },
+                        {
+                            "$ref": "#/definitions/phones"
+                        }
+
+                    ]
+                },
+                "syllables": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/syllable"
+                    }
+                },
+                "graphemes": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "id": {
+                    "type": "integer",
+                    "description": "ID to distinguish this word from other words (with possibly the same label)"
+                },
+                "meta": {
+                    "type": "object"
+                }
+            },
+            "required": ["label"]
+        },
+        "element": {
+            "title": "element",
+            "oneOf": [
+                {
+                    "$ref": "#/definitions/word"
+                },
+                {
+                    "$ref": "#/definitions/group"
+                },
+                {
+                    "type": ["string", "null"]
+                }
+            ]
+        },
+        "group": {
+            "title": "element group",
+            "type": "object",
+            "properties": {
+                "kind": {
+                    "type": "string",
+                    "enum": ["sequence", "alternatives", "order"]
+                },
+                "elements": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/element"
+                    },
+                    "minItems": 1,
+                },
+                "meta": {
+                    "type": "object"
+                }
+            },
+            "required": ["kind", "elements"]
+        }
+    }
+}
+
+grammar_schema_v01 = {
+    "$schema": "http://json-schema.org/schema#",
+    "title": "NovoLanguage grammar v0.1",
+    "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition",
+    "type": "object",
+    "properties": {
+        "type": {
+            "type": "string",
+            "enum": ["multiple_choice", "word_order"]
+        },
+        "parts": {
+            "type": "array",
+            "minItems": 1,
+            "maxItems": 5,
+            "items": {
+                "type": ["string", "array"],
+                "items": {
+                    "type": ["string"]
+                }
+            }
+        }
+    }
+}
+
+grammar_rpc_schema = {
+    "$schema": "http://json-schema.org/schema#",
+    "title": "NovoLanguage RPC grammar",
+    "type": "object",
+    "properties": {
+        "type": {
+            "type": "string",
+            "enum": ["confusion_network"]
+        },
+        "version": {
+            "type": "string",
+            "default": "v0.1"
+        },
+        "data": {
+            "type": "object"
+        },
+        "return_dict": {
+            "type": "boolean"
+        },
+        "return_objects": {
+            "type": "array",
+            "items": {
+                "type": "string",
+                "enum": ["dict", "grammar"]
+            }
+        },
+        "phoneset": {
+            "type": "string",
+            "enum": ["cmu69", "novo70", "mdbg115"]
+        },
+        "parallel_silence": {
+            "type": "boolean"
+        }
+    },
+    "required": ["type", "data"]
+}
+
+def validate(object, schema=grammar_schema_v10):
+    #if isinstance(object, basestring):
+    if isinstance(object, str):
+        object = json.loads(object)
+    if not isinstance(object, dict):
+        raise TypeError("Expected dict or json string")
+    try:
+        jsonschema.validate(object, schema)
+    except jsonschema.ValidationError:
+        return False
+    except Exception:
+        raise
+    else:
+        return True
+
+def validate_rpc_grammar(message):
+    """validate an rpc grammar message"""
+    if not validate(message, grammar_rpc_schema):
+        raise ValueError("Not a valid RPC grammar")
+    version = message.get("version", "0.1")
+    data = message["data"]
+    if version == "0.1":
+        if not validate(data, grammar_schema_v01):
+            raise ValueError("Not a valid grammar v0.1")
+    elif version == "1.0":
+        if not validate(data, grammar_schema_v10):
+            raise ValueError("Not a valid grammar v1.0")
+    else:
+        raise ValueError("Unsupported schema version")
+
+
+## test
+def test(data=None):
+    if not data:
+        data = {"kind": "sequence", "elements": [
+            {"kind": "alternatives", "elements": ["a plain string", "an alternative string"]},
+            {"kind": "word", "label": "a word", "pronunciation": {"phones": ["ah", "w", "er", "d"]}},
+            {"kind": "order", "elements": [{"kind": "word", "label": "another word", "visible": False}, "last word"]}]}
+    try:
+        jsonschema.validate(data, schema)
+    except jsonschema.ValidationError as e:
+        #print data, "validated not OK", e.message
+        print("{0} validated not OK {1}".format(data, e.message))
+    else:
+        #print data, "validated OK"
+        print("{} validated OK".format(data))
+
+
+if __name__ == "__main__":
+    test()