30 changed files with 1257 additions and 31 deletions
@ -0,0 +1,261 @@
@@ -0,0 +1,261 @@
|
||||
## Ignore Visual Studio temporary files, build results, and |
||||
## files generated by popular Visual Studio add-ons. |
||||
|
||||
# User-specific files |
||||
*.suo |
||||
*.user |
||||
*.userosscache |
||||
*.sln.docstates |
||||
|
||||
# User-specific files (MonoDevelop/Xamarin Studio) |
||||
*.userprefs |
||||
|
||||
# Build results |
||||
[Dd]ebug/ |
||||
[Dd]ebugPublic/ |
||||
[Rr]elease/ |
||||
[Rr]eleases/ |
||||
x64/ |
||||
x86/ |
||||
bld/ |
||||
[Bb]in/ |
||||
[Oo]bj/ |
||||
[Ll]og/ |
||||
|
||||
# Visual Studio 2015 cache/options directory |
||||
.vs/ |
||||
# Uncomment if you have tasks that create the project's static files in wwwroot |
||||
#wwwroot/ |
||||
|
||||
# MSTest test Results |
||||
[Tt]est[Rr]esult*/ |
||||
[Bb]uild[Ll]og.* |
||||
|
||||
# NUNIT |
||||
*.VisualState.xml |
||||
TestResult.xml |
||||
|
||||
# Build Results of an ATL Project |
||||
[Dd]ebugPS/ |
||||
[Rr]eleasePS/ |
||||
dlldata.c |
||||
|
||||
# DNX |
||||
project.lock.json |
||||
project.fragment.lock.json |
||||
artifacts/ |
||||
|
||||
*_i.c |
||||
*_p.c |
||||
*_i.h |
||||
*.ilk |
||||
*.meta |
||||
*.obj |
||||
*.pch |
||||
*.pdb |
||||
*.pgc |
||||
*.pgd |
||||
*.rsp |
||||
*.sbr |
||||
*.tlb |
||||
*.tli |
||||
*.tlh |
||||
*.tmp |
||||
*.tmp_proj |
||||
*.log |
||||
*.vspscc |
||||
*.vssscc |
||||
.builds |
||||
*.pidb |
||||
*.svclog |
||||
*.scc |
||||
|
||||
# Chutzpah Test files |
||||
_Chutzpah* |
||||
|
||||
# Visual C++ cache files |
||||
ipch/ |
||||
*.aps |
||||
*.ncb |
||||
*.opendb |
||||
*.opensdf |
||||
*.sdf |
||||
*.cachefile |
||||
*.VC.db |
||||
*.VC.VC.opendb |
||||
|
||||
# Visual Studio profiler |
||||
*.psess |
||||
*.vsp |
||||
*.vspx |
||||
*.sap |
||||
|
||||
# TFS 2012 Local Workspace |
||||
$tf/ |
||||
|
||||
# Guidance Automation Toolkit |
||||
*.gpState |
||||
|
||||
# ReSharper is a .NET coding add-in |
||||
_ReSharper*/ |
||||
*.[Rr]e[Ss]harper |
||||
*.DotSettings.user |
||||
|
||||
# JustCode is a .NET coding add-in |
||||
.JustCode |
||||
|
||||
# TeamCity is a build add-in |
||||
_TeamCity* |
||||
|
||||
# DotCover is a Code Coverage Tool |
||||
*.dotCover |
||||
|
||||
# NCrunch |
||||
_NCrunch_* |
||||
.*crunch*.local.xml |
||||
nCrunchTemp_* |
||||
|
||||
# MightyMoose |
||||
*.mm.* |
||||
AutoTest.Net/ |
||||
|
||||
# Web workbench (sass) |
||||
.sass-cache/ |
||||
|
||||
# Installshield output folder |
||||
[Ee]xpress/ |
||||
|
||||
# DocProject is a documentation generator add-in |
||||
DocProject/buildhelp/ |
||||
DocProject/Help/*.HxT |
||||
DocProject/Help/*.HxC |
||||
DocProject/Help/*.hhc |
||||
DocProject/Help/*.hhk |
||||
DocProject/Help/*.hhp |
||||
DocProject/Help/Html2 |
||||
DocProject/Help/html |
||||
|
||||
# Click-Once directory |
||||
publish/ |
||||
|
||||
# Publish Web Output |
||||
*.[Pp]ublish.xml |
||||
*.azurePubxml |
||||
# TODO: Comment the next line if you want to checkin your web deploy settings |
||||
# but database connection strings (with potential passwords) will be unencrypted |
||||
#*.pubxml |
||||
*.publishproj |
||||
|
||||
# Microsoft Azure Web App publish settings. Comment the next line if you want to |
||||
# checkin your Azure Web App publish settings, but sensitive information contained |
||||
# in these scripts will be unencrypted |
||||
PublishScripts/ |
||||
|
||||
# NuGet Packages |
||||
*.nupkg |
||||
# The packages folder can be ignored because of Package Restore |
||||
**/packages/* |
||||
# except build/, which is used as an MSBuild target. |
||||
!**/packages/build/ |
||||
# Uncomment if necessary however generally it will be regenerated when needed |
||||
#!**/packages/repositories.config |
||||
# NuGet v3's project.json files produces more ignoreable files |
||||
*.nuget.props |
||||
*.nuget.targets |
||||
|
||||
# Microsoft Azure Build Output |
||||
csx/ |
||||
*.build.csdef |
||||
|
||||
# Microsoft Azure Emulator |
||||
ecf/ |
||||
rcf/ |
||||
|
||||
# Windows Store app package directories and files |
||||
AppPackages/ |
||||
BundleArtifacts/ |
||||
Package.StoreAssociation.xml |
||||
_pkginfo.txt |
||||
|
||||
# Visual Studio cache files |
||||
# files ending in .cache can be ignored |
||||
*.[Cc]ache |
||||
# but keep track of directories ending in .cache |
||||
!*.[Cc]ache/ |
||||
|
||||
# Others |
||||
ClientBin/ |
||||
~$* |
||||
*~ |
||||
*.dbmdl |
||||
*.dbproj.schemaview |
||||
*.jfm |
||||
*.pfx |
||||
*.publishsettings |
||||
node_modules/ |
||||
orleans.codegen.cs |
||||
|
||||
# Since there are multiple workflows, uncomment next line to ignore bower_components |
||||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) |
||||
#bower_components/ |
||||
|
||||
# RIA/Silverlight projects |
||||
Generated_Code/ |
||||
|
||||
# Backup & report files from converting an old project file |
||||
# to a newer Visual Studio version. Backup files are not needed, |
||||
# because we have git ;-) |
||||
_UpgradeReport_Files/ |
||||
Backup*/ |
||||
UpgradeLog*.XML |
||||
UpgradeLog*.htm |
||||
|
||||
# SQL Server files |
||||
*.mdf |
||||
*.ldf |
||||
|
||||
# Business Intelligence projects |
||||
*.rdl.data |
||||
*.bim.layout |
||||
*.bim_*.settings |
||||
|
||||
# Microsoft Fakes |
||||
FakesAssemblies/ |
||||
|
||||
# GhostDoc plugin setting file |
||||
*.GhostDoc.xml |
||||
|
||||
# Node.js Tools for Visual Studio |
||||
.ntvs_analysis.dat |
||||
|
||||
# Visual Studio 6 build log |
||||
*.plg |
||||
|
||||
# Visual Studio 6 workspace options file |
||||
*.opt |
||||
|
||||
# Visual Studio LightSwitch build output |
||||
**/*.HTMLClient/GeneratedArtifacts |
||||
**/*.DesktopClient/GeneratedArtifacts |
||||
**/*.DesktopClient/ModelManifest.xml |
||||
**/*.Server/GeneratedArtifacts |
||||
**/*.Server/ModelManifest.xml |
||||
_Pvt_Extensions |
||||
|
||||
# Paket dependency manager |
||||
.paket/paket.exe |
||||
paket-files/ |
||||
|
||||
# FAKE - F# Make |
||||
.fake/ |
||||
|
||||
# JetBrains Rider |
||||
.idea/ |
||||
*.sln.iml |
||||
|
||||
# CodeRush |
||||
.cr/ |
||||
|
||||
# Python Tools for Visual Studio (PTVS) |
||||
__pycache__/ |
||||
*.pyc |
Binary file not shown.
Binary file not shown.
@ -0,0 +1,38 @@
@@ -0,0 +1,38 @@
|
||||
import os |
||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') |
||||
|
||||
import sys |
||||
import csv |
||||
#import subprocess |
||||
#from collections import Counter |
||||
#import re |
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
#import matplotlib.pyplot as plt |
||||
#from sklearn.metrics import confusion_matrix |
||||
|
||||
import acoustic_model_functions as am_func |
||||
import convert_xsampa2ipa |
||||
import defaultfiles as default |
||||
|
||||
from forced_alignment import pyhtk |
||||
|
||||
import novoapi |
||||
|
||||
## ======================= convert phones ====================== |
||||
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) |
||||
|
||||
stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) |
||||
|
||||
phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) |
||||
df = pd.read_excel(phonelist_novo70_, 'list') |
||||
|
||||
translation_key = dict() |
||||
for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']): |
||||
if not pd.isnull(ipa): |
||||
print('{0}:{1}'.format(ipa, novo70)) |
||||
translation_key[ipa] = novo70 |
||||
|
||||
#df = pd.read_excel(stimmen_transcription, 'check') |
||||
|
@ -0,0 +1,133 @@
@@ -0,0 +1,133 @@
|
||||
# |
||||
# forced alignment using novo-api. |
||||
# |
||||
# *** IMPORTANT *** |
||||
# This file should be treated as confidencial. |
||||
# This file should not be copied or uploaded to public sites. |
||||
# |
||||
# NOTES: |
||||
# The usage of novo api: https://bitbucket.org/novolanguage/python-novo-api |
||||
# I couldn't make it work as I described in the mail to Martijn Bartelds on 2018/12/03. |
||||
# As per the advice from him, I modified testgrammer.py and made it a function. |
||||
# |
||||
# In order to run on Python 3.6, the following points are changed in novo-api. |
||||
# (1) backend/__init__.py |
||||
# - #import session |
||||
# from . import session |
||||
# (2) backend/session.py |
||||
# - #except Exception, e: |
||||
# except Exception as e: |
||||
# - #print self.last_message |
||||
# print(self.last_message) |
||||
# (3) asr/segment/praat.py |
||||
# - def print_tier(output, title, begin, end, segs, (format, formatter)) |
||||
# def print_tier(output, title, begin, end, segs, format, formatter): |
||||
# (4) asr/spraaklab/__init.py |
||||
# - #import session |
||||
# from . import session |
||||
# (5) asr/spraaklab/schema.py |
||||
# - #print data, "validated not OK", e.message |
||||
# print("{0} validated not OK {1}".format(data, e.message)) |
||||
# - #print data, "validated OK" |
||||
# print("{} validated OK".format(data)) |
||||
# - #if isinstance(object, basestring): |
||||
# if isinstance(object, str) |
||||
# |
||||
# Aki Kunikoshi |
||||
# 428968@gmail.com |
||||
# |
||||
|
||||
import argparse |
||||
import json |
||||
|
||||
from novoapi.backend import session |
||||
|
||||
# username / password cannot be passed as artuments... |
||||
p = argparse.ArgumentParser() |
||||
#p.add_argument("--user", default=None) |
||||
#p.add_argument("--password", default=None) |
||||
p.add_argument("--user", default='martijn.wieling') |
||||
p.add_argument("--password", default='fa0Thaic') |
||||
args = p.parse_args() |
||||
|
||||
wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav' |
||||
|
||||
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) |
||||
|
||||
grammar = { |
||||
"type": "confusion_network", |
||||
"version": "1.0", |
||||
"data": { |
||||
"kind": "sequence", |
||||
"elements": [ |
||||
{ |
||||
"kind": "word", |
||||
"pronunciation": [ |
||||
{ |
||||
"phones": [ |
||||
"wv", |
||||
"a1", |
||||
"n" |
||||
], |
||||
"id": 0 |
||||
}, |
||||
{ |
||||
"phones": [ |
||||
"wv", |
||||
"uh1", |
||||
"n" |
||||
], |
||||
"id": 1 |
||||
} |
||||
], |
||||
"label": "one" |
||||
}, |
||||
{ |
||||
"kind": "word", |
||||
"pronunciation": [ |
||||
{ |
||||
"phones": [ |
||||
"t", |
||||
"uw1" |
||||
], |
||||
"id": 0 |
||||
} |
||||
], |
||||
"label": "two" |
||||
}, |
||||
{ |
||||
"kind": "word", |
||||
"pronunciation": [ |
||||
{ |
||||
"phones": [ |
||||
"t", |
||||
"r", |
||||
"iy1" |
||||
], |
||||
"id": 0 |
||||
}, |
||||
{ |
||||
"phones": [ |
||||
"s", |
||||
"r", |
||||
"iy1" |
||||
], |
||||
"id": 1 |
||||
} |
||||
], |
||||
"label": "three" |
||||
} |
||||
] |
||||
}, |
||||
"return_objects": [ |
||||
"grammar" |
||||
], |
||||
"phoneset": "novo70" |
||||
} |
||||
|
||||
res = rec.setgrammar(grammar) |
||||
#print "Set grammar result", res |
||||
|
||||
#res = rec.recognize_wav("test/onetwothree.wav") |
||||
res = rec.recognize_wav(wav_file) |
||||
#print "Recognition result:", json.dumps(res.export(), indent=4) |
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env python |
||||
|
||||
__version__ = "0.2" |
||||
|
||||
import backend |
Binary file not shown.
@ -0,0 +1,6 @@
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env python |
||||
|
||||
#import segments |
||||
#import spraaklab |
||||
from . import segments |
||||
from . import spraaklab |
Binary file not shown.
@ -0,0 +1,4 @@
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env python |
||||
|
||||
from .segments import Segmentation |
||||
from .praat import seg2tg |
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,77 @@
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python |
||||
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen |
||||
|
||||
import codecs |
||||
|
||||
def print_header(output, begin, end, nr_tiers): |
||||
print >> output, 'File type = "ooTextFile"' |
||||
print >> output, 'Object class = "TextGrid"' |
||||
print >> output, '' |
||||
print >> output, 'xmin = %s' % begin |
||||
print >> output, 'xmax = %s' % end |
||||
print >> output, 'tiers? <exists>' |
||||
print >> output, 'size = %d' % nr_tiers |
||||
print >> output, 'item []:' |
||||
|
||||
|
||||
def print_info_tier(output, title, begin, end, label): |
||||
print >> output, '\titem [%d]:' % 0 |
||||
print >> output, '\t\tclass = "IntervalTier"' |
||||
print >> output, '\t\tname = "%s"' % title |
||||
print >> output, '\t\txmin = %s' % begin |
||||
print >> output, '\t\txmax = %s' % end |
||||
print >> output, '\t\tintervals: size = %d' % 1 |
||||
|
||||
print >> output, '\t\tintervals [1]:' |
||||
print >> output, '\t\t\txmin = %s' % begin |
||||
print >> output, '\t\t\txmax = %s' % end |
||||
print >> output, '\t\t\ttext = "%s"' % label |
||||
|
||||
|
||||
#def print_tier(output, title, begin, end, segs, (format, formatter)): |
||||
def print_tier(output, title, begin, end, segs, format, formatter): |
||||
print >> output, '\titem [%d]:' % 0 |
||||
print >> output, '\t\tclass = "IntervalTier"' |
||||
print >> output, '\t\tname = "%s"' % title |
||||
print >> output, '\t\txmin = %s' % begin |
||||
print >> output, '\t\txmax = %s' % end |
||||
print >> output, '\t\tintervals: size = %d' % len(segs) |
||||
|
||||
count = 1 |
||||
for seg in segs: |
||||
#print seg |
||||
print >> output, '\t\tintervals [%d]:' % count |
||||
print >> output, '\t\t\txmin = %s' % repr(int(seg['begin']) / 100.0) |
||||
print >> output, '\t\t\txmax = %s' % repr(int(seg['end']) / 100.0) |
||||
string = '\t\t\ttext = "' + format + '"' |
||||
print >> output, string % formatter(seg['label']) |
||||
count += 1 |
||||
|
||||
|
||||
def seg2tg(fname, segments): |
||||
if not segments: |
||||
return |
||||
output = codecs.open(fname, "w", encoding="utf-8") |
||||
|
||||
confidences = [] |
||||
word_labels = [] |
||||
phones = [] |
||||
|
||||
for s in segments: |
||||
conf = s.llh if hasattr(s, "llh") else s.score |
||||
confidences.append({'begin': s.begin, 'end': s.end, 'label': conf}) |
||||
word_labels.append({'begin': s.begin, 'end': s.end, 'label': s.label}) |
||||
for p in s.phones: |
||||
phones.append({'begin': p.begin, 'end': p.end, 'label': p.label}) |
||||
|
||||
|
||||
begin = repr(int(segments[0].begin) / 100.0) |
||||
end = repr(int(segments[-1].end) / 100.0) |
||||
|
||||
nr_tiers = 3 |
||||
print_header(output, begin, end, nr_tiers) |
||||
print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x)) |
||||
print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x)) |
||||
print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x)) |
||||
|
||||
output.close() |
@ -0,0 +1,99 @@
@@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python |
||||
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen |
||||
|
||||
## These classes can be initialized with dictionaries, as they are returned by the python spraaklab recognition system. |
||||
|
||||
class Segment(object): |
||||
def __init__(self, segment): |
||||
self.begin = segment["begin"] |
||||
self.end = segment["end"] |
||||
self.begintime = segment.get("beginTime", self.begin / 100.0) |
||||
self.endtime = segment.get("endTime", self.end / 100.0) |
||||
self.label = segment["label"] |
||||
self.score = segment["score"] |
||||
if "llh" in segment: |
||||
self.llh = segment["llh"] |
||||
if "phones" in segment: |
||||
self.type = "word" |
||||
self.phones = Segmentation(segment["phones"], ["sil"]) |
||||
if hasattr(self.phones[0], "llh"): |
||||
self.minllh = min([s.llh for s in self.phones]) ## the current word llh for error detection |
||||
else: |
||||
self.type = "phone" |
||||
|
||||
def __repr__(self): |
||||
res = "%8.3f -- %8.3f score %8.3f " % (self.begintime, self.endtime, self.score) |
||||
if hasattr(self, "llh"): |
||||
res += "llh %8.3f " % self.llh |
||||
res += self.label.encode("utf8") |
||||
return res |
||||
|
||||
def export(self): |
||||
r = {"begin": self.begin, "end": self.end, "label": self.label, "score": self.score, "type": self.type} |
||||
if hasattr(self, "llh"): |
||||
r["llh"] = self.llh |
||||
if hasattr(self, "phones"): |
||||
r["phones"] = self.phones.export() |
||||
return r |
||||
|
||||
class Segmentation(object): |
||||
def __init__(self, segments, sils=["<s>", "</s>", "!sil"]): |
||||
"""Create a segmentation from a spraaklab recognition structure. |
||||
segments: an array of words (or phones), represented by a dict with |
||||
"begin", "end", "label", "score", and "llh" keys. Words can also have |
||||
"phones" which is another array of segments.""" |
||||
self.segments = [Segment(s) for s in segments] |
||||
if self.segments: |
||||
self.type = self.segments[0].type |
||||
else: |
||||
self.type = None |
||||
self.sils = sils |
||||
self.orig = segments ## in case we want to have access to the original recognition structure |
||||
|
||||
def __getitem__(self, item): |
||||
return self.segments[item] |
||||
|
||||
def __repr__(self): |
||||
ns = len(self.segments) |
||||
res = "Segmentation with %d %s%s" % (ns, self.type, "" if ns==1 else "s") |
||||
for seg in self.segments: |
||||
res += "\n " + repr(seg) |
||||
return res |
||||
|
||||
def __len__(self): |
||||
return len(self.segments) |
||||
|
||||
def score(self, skip=None): |
||||
if not skip: |
||||
skip = self.sils |
||||
s = 0.0 |
||||
for seg in self.segments: |
||||
if seg.label not in skip: |
||||
s += seg.score |
||||
return s |
||||
|
||||
def llhs(self, skip=None): |
||||
if not skip: |
||||
skip = self.sils |
||||
return [seg.llh for seg in self.segments if hasattr(seg, "llh") and seg.label not in skip] |
||||
|
||||
def llh(self, skip=None): |
||||
return sum(self.llhs(skip)) |
||||
|
||||
def minllh(self, skip=None): |
||||
llhs = self.llhs(skip) |
||||
if llhs: |
||||
return min(llhs) |
||||
else: |
||||
return None |
||||
|
||||
def labels(self, skip=None): |
||||
if not skip: |
||||
skip = self.sils |
||||
return [seg.label for seg in self.segments if seg.label not in skip] |
||||
|
||||
def sentence(self, skip=None): |
||||
return " ".join(self.labels(skip)) |
||||
|
||||
def export(self): |
||||
return [seg.export() for seg in self.segments] |
@ -0,0 +1,4 @@
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env python |
||||
|
||||
#import schema |
||||
from . import schema |
Binary file not shown.
Binary file not shown.
@ -0,0 +1,273 @@
@@ -0,0 +1,273 @@
|
||||
#!/usr/bin/env python |
||||
## (c) 2017 NovoLanguage, author: David A. van Leeuwen |
||||
|
||||
## The purpose of this to define the grammar structure in a json schema, so that it can be validated, |
||||
## (de)serialized, and perhaps even automatically converted to a Python class structure. |
||||
|
||||
import json |
||||
import jsonschema |
||||
|
||||
grammar_schema_v10 = { |
||||
"$schema": "http://json-schema.org/schema#", |
||||
"title": "NovoLanguage grammar", |
||||
"description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", |
||||
"$ref": "#/definitions/group", |
||||
"definitions": { |
||||
"phones": { |
||||
"type": "array", |
||||
"items": { |
||||
"type": "string" |
||||
}, |
||||
"minItems": 1 |
||||
}, |
||||
"pronunciation": { |
||||
"type": "object", |
||||
"properties": { |
||||
"phones": { |
||||
"$ref": "#/definitions/phones" |
||||
}, |
||||
"syllables": { |
||||
"type": "array", |
||||
"items": { |
||||
"$ref": "#/definitions/syllable" |
||||
}, |
||||
"minItems": 1 |
||||
}, |
||||
"id": { |
||||
"type": "integer", |
||||
"description": "ID to distinguish this pronunciation from other variants" |
||||
}, |
||||
"meta": { |
||||
"type": "object" |
||||
} |
||||
}, |
||||
"required": ["phones"] |
||||
}, |
||||
"syllable": { |
||||
"type": "object", |
||||
"properties": { |
||||
"begin": { |
||||
"type": "integer", |
||||
"minimum": 0 |
||||
}, |
||||
"end": { |
||||
"type": "integer", |
||||
"minimum": 0 |
||||
}, |
||||
"stress": { |
||||
"type": "integer", |
||||
"minimum": 0 |
||||
}, |
||||
"tone": { |
||||
"type": "integer", |
||||
"minimum": 0 |
||||
} |
||||
}, |
||||
"required": ["begin", "end"] |
||||
}, |
||||
"word": { |
||||
"type": "object", |
||||
"properties": { |
||||
"kind": { |
||||
"type": "string", |
||||
"enum": ["word"] |
||||
}, |
||||
"label": { |
||||
"type": "string" |
||||
}, |
||||
"pronunciation": { |
||||
"anyOf": [ |
||||
{ |
||||
"$ref": "#/definitions/pronunciation" |
||||
}, |
||||
{ |
||||
"type": "array", |
||||
"items": { |
||||
"anyOf": [ |
||||
{ |
||||
"$ref": "#/definitions/pronunciation" |
||||
}, |
||||
{ |
||||
"$ref": "#/definitions/phones" |
||||
} |
||||
] |
||||
}, |
||||
"minItems": 1 |
||||
}, |
||||
{ |
||||
"$ref": "#/definitions/phones" |
||||
} |
||||
|
||||
] |
||||
}, |
||||
"syllables": { |
||||
"type": "array", |
||||
"items": { |
||||
"$ref": "#/definitions/syllable" |
||||
} |
||||
}, |
||||
"graphemes": { |
||||
"type": "array", |
||||
"items": { |
||||
"type": "string" |
||||
} |
||||
}, |
||||
"id": { |
||||
"type": "integer", |
||||
"description": "ID to distinguish this word from other words (with possibly the same label)" |
||||
}, |
||||
"meta": { |
||||
"type": "object" |
||||
} |
||||
}, |
||||
"required": ["label"] |
||||
}, |
||||
"element": { |
||||
"title": "element", |
||||
"oneOf": [ |
||||
{ |
||||
"$ref": "#/definitions/word" |
||||
}, |
||||
{ |
||||
"$ref": "#/definitions/group" |
||||
}, |
||||
{ |
||||
"type": ["string", "null"] |
||||
} |
||||
] |
||||
}, |
||||
"group": { |
||||
"title": "element group", |
||||
"type": "object", |
||||
"properties": { |
||||
"kind": { |
||||
"type": "string", |
||||
"enum": ["sequence", "alternatives", "order"] |
||||
}, |
||||
"elements": { |
||||
"type": "array", |
||||
"items": { |
||||
"$ref": "#/definitions/element" |
||||
}, |
||||
"minItems": 1, |
||||
}, |
||||
"meta": { |
||||
"type": "object" |
||||
} |
||||
}, |
||||
"required": ["kind", "elements"] |
||||
} |
||||
} |
||||
} |
||||
|
||||
grammar_schema_v01 = { |
||||
"$schema": "http://json-schema.org/schema#", |
||||
"title": "NovoLanguage grammar v0.1", |
||||
"description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", |
||||
"type": "object", |
||||
"properties": { |
||||
"type": { |
||||
"type": "string", |
||||
"enum": ["multiple_choice", "word_order"] |
||||
}, |
||||
"parts": { |
||||
"type": "array", |
||||
"minItems": 1, |
||||
"maxItems": 5, |
||||
"items": { |
||||
"type": ["string", "array"], |
||||
"items": { |
||||
"type": ["string"] |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
grammar_rpc_schema = { |
||||
"$schema": "http://json-schema.org/schema#", |
||||
"title": "NovoLanguage RPC grammar", |
||||
"type": "object", |
||||
"properties": { |
||||
"type": { |
||||
"type": "string", |
||||
"enum": ["confusion_network"] |
||||
}, |
||||
"version": { |
||||
"type": "string", |
||||
"default": "v0.1" |
||||
}, |
||||
"data": { |
||||
"type": "object" |
||||
}, |
||||
"return_dict": { |
||||
"type": "boolean" |
||||
}, |
||||
"return_objects": { |
||||
"type": "array", |
||||
"items": { |
||||
"type": "string", |
||||
"enum": ["dict", "grammar"] |
||||
} |
||||
}, |
||||
"phoneset": { |
||||
"type": "string", |
||||
"enum": ["cmu69", "novo70", "mdbg115"] |
||||
}, |
||||
"parallel_silence": { |
||||
"type": "boolean" |
||||
} |
||||
}, |
||||
"required": ["type", "data"] |
||||
} |
||||
|
||||
def validate(object, schema=grammar_schema_v10): |
||||
#if isinstance(object, basestring): |
||||
if isinstance(object, str): |
||||
object = json.loads(object) |
||||
if not isinstance(object, dict): |
||||
raise TypeError("Expected dict or json string") |
||||
try: |
||||
jsonschema.validate(object, schema) |
||||
except jsonschema.ValidationError: |
||||
return False |
||||
except Exception: |
||||
raise |
||||
else: |
||||
return True |
||||
|
||||
def validate_rpc_grammar(message): |
||||
"""validate an rpc grammar message""" |
||||
if not validate(message, grammar_rpc_schema): |
||||
raise ValueError("Not a valid RPC grammar") |
||||
version = message.get("version", "0.1") |
||||
data = message["data"] |
||||
if version == "0.1": |
||||
if not validate(data, grammar_schema_v01): |
||||
raise ValueError("Not a valid grammar v0.1") |
||||
elif version == "1.0": |
||||
if not validate(data, grammar_schema_v10): |
||||
raise ValueError("Not a valid grammar v1.0") |
||||
else: |
||||
raise ValueError("Unsupported schema version") |
||||
|
||||
|
||||
## test |
||||
def test(data=None): |
||||
if not data: |
||||
data = {"kind": "sequence", "elements": [ |
||||
{"kind": "alternatives", "elements": ["a plain string", "an alternative string"]}, |
||||
{"kind": "word", "label": "a word", "pronunciation": {"phones": ["ah", "w", "er", "d"]}}, |
||||
{"kind": "order", "elements": [{"kind": "word", "label": "another word", "visible": False}, "last word"]}]} |
||||
try: |
||||
jsonschema.validate(data, schema) |
||||
except jsonschema.ValidationError as e: |
||||
#print data, "validated not OK", e.message |
||||
print("{0} validated not OK {1}".format(data, e.message)) |
||||
else: |
||||
#print data, "validated OK" |
||||
print("{} validated OK".format(data)) |
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
test() |
@ -0,0 +1,4 @@
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env python |
||||
|
||||
#import session |
||||
from . import session |
Binary file not shown.
Binary file not shown.
@ -0,0 +1,254 @@
@@ -0,0 +1,254 @@
|
||||
#!/usr/bin/env python |
||||
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen |
||||
|
||||
## Recognition interface for actual backend. Adapted from player.asr.debug. |
||||
|
||||
import json |
||||
import sys |
||||
import wave |
||||
import requests |
||||
import websocket |
||||
import logging |
||||
import collections |
||||
|
||||
import time |
||||
|
||||
from .. import asr |
||||
|
||||
logger = logging.getLogger(__name__) |
||||
|
||||
## turn off annoying warnings |
||||
requests.packages.urllib3.disable_warnings() |
||||
logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(logging.WARN) |
||||
|
||||
buffer_size = 4096 |
||||
gm = "gm.novolanguage.com" ## dev |
||||
protocol = "https" |
||||
port = 443 |
||||
apiversion = 0 |
||||
|
||||
sessions = collections.Counter() |
||||
|
||||
def segmentation(result): |
||||
"""converts a raw backend recognition result to a segment of novo.asr.segments class Segmentation""" |
||||
for w in result: |
||||
w["score"] = w["confidence"]["prob"] |
||||
w["llh"] = w["confidence"]["llr"] |
||||
w["label"] = w["label"]["raw"] |
||||
w["begin"] /= 10 |
||||
w["end"] /= 10 |
||||
for p in w["phones"]: |
||||
p["score"] = p["confidence"]["prob"] |
||||
p["llh"] = p["confidence"]["llr"] |
||||
p["begin"] /= 10 |
||||
p["end"] /= 10 |
||||
return asr.segments.Segmentation(result) |
||||
|
||||
class rpcid: |
||||
id = 0 |
||||
@staticmethod |
||||
def next(): |
||||
rpcid.id += 1 |
||||
return rpcid.id |
||||
|
||||
class Recognizer(object): |
||||
def __init__(self, lang="en", gm=gm, grammar_version="0.1", user=None, password=None, snodeid=None, keepopen=False): |
||||
self.lang = lang |
||||
self.keepopen = keepopen |
||||
self.api_url = "%s://%s:%d/v%d" % (protocol, gm, port, apiversion) |
||||
self.verify = False |
||||
self.headers = {"Content-Type": "application/json"} |
||||
self.login_user(user, password) |
||||
data = {"l2": lang, "local": False, "skipupload": True} |
||||
if snodeid: |
||||
data["snodeid"] = snodeid |
||||
self.conn = None |
||||
self.init_session(data) |
||||
self.grammar_version = grammar_version |
||||
self.last_message = None |
||||
|
||||
def login_user(self, username, password): |
||||
# obtain authentication token of user |
||||
logger.info('obtain auth token at %s', self.api_url) |
||||
data = { |
||||
'username': username, |
||||
'password': password |
||||
} |
||||
try: |
||||
r = requests.post(self.api_url + '/publishers/1/login', headers=self.headers, data=json.dumps(data), verify=self.verify) |
||||
except Exception as e: |
||||
logger.error("Cannot post request to GM API for user login: %s", e.message) |
||||
sys.exit(-1) |
||||
assert r.ok, r.reason |
||||
result = r.json() |
||||
if "errors" in result["response"]: |
||||
logger.info("Error in logging in: %s", result["response"]["errors"]) |
||||
sys.exit(-1) |
||||
|
||||
user_auth_token = result['response']['user']['authentication_token'] |
||||
logger.info("User auth token is: %s", user_auth_token) |
||||
|
||||
# set auth token in header |
||||
self.headers['Authentication-Token'] = user_auth_token |
||||
|
||||
def init_session(self, data, direct=False, use_ip=False): |
||||
logger.info('Request new session: %s', data) |
||||
r = requests.post(self.api_url + '/sessions', headers=self.headers, data=json.dumps(data), verify=self.verify) |
||||
if not r.ok: |
||||
logger.error("New session request failed: %s", r.text) |
||||
return |
||||
|
||||
status_url = r.headers.get("location") |
||||
if status_url: |
||||
## we got a redirect |
||||
status = {} |
||||
while True: |
||||
logger.debug("Checking %s", status_url) |
||||
s = requests.get(status_url, verify=self.verify) |
||||
if not s.ok: |
||||
logger.error('Checking Failed: %s', s.text) |
||||
return |
||||
|
||||
status = s.json() |
||||
if status['status'] == 'PENDING': |
||||
logger.debug("Status: %s", status['status']) |
||||
time.sleep(1) |
||||
else: |
||||
break |
||||
session = status['result'][0] ## [1] is another status code... |
||||
if "error" in session: |
||||
logger.error("Error in getting a snode: %s", session["error"]) |
||||
raise Exception |
||||
else: |
||||
session = r.json() |
||||
|
||||
try: |
||||
logger.info("Session: %r", session) |
||||
if direct: |
||||
snode_ip = session["snode"]["ip"] |
||||
proxy_url = snode_ip |
||||
snode_port = session["port"] |
||||
ws_url = "%s://%s:%d/" % ("ws", snode_ip, snode_port) |
||||
else: |
||||
field = "ip" if use_ip else "hostname" |
||||
proxy_url = session['snode']['datacentre']['proxy'][field] |
||||
ws_url = 'wss://' + proxy_url + '/' + session['uuid'] |
||||
logger.info("Connecting to websocket: %s", ws_url) |
||||
conn = websocket.create_connection(ws_url, sslopt={"check_hostname": self.verify}) |
||||
logger.info("Connected.") |
||||
#except Exception, e: |
||||
except Exception as e: |
||||
logger.error("Unable to connect to websocket: %s", e.message) |
||||
raise e |
||||
|
||||
self.session_id = session['id'] |
||||
self.proxy_url = proxy_url |
||||
self.conn = conn |
||||
self.session = session |
||||
sessions[session["uuid"]] += 1 |
||||
|
||||
def setgrammar(self, grammar): ## backend grammar object: {"data": {...}, "type": "confusion_network"} |
||||
data = {"jsonrpc": "2.0", |
||||
'type': 'jsonrpc', |
||||
'method': 'set_grammar', |
||||
'params': grammar, |
||||
"id": rpcid.next()} |
||||
asr.spraaklab.schema.validate_rpc_grammar(grammar) |
||||
self.conn.send(json.dumps(data)) |
||||
result = json.loads(self.conn.recv()) |
||||
if result.get("error"): |
||||
logger.error("Exercise validation error: %s", result) |
||||
return result |
||||
|
||||
def set_alternatives_grammar(self, *args, **kwargs): |
||||
if not "version" in kwargs: |
||||
kwargs["version"] = self.grammar_version |
||||
return self.setgrammar(alternatives_grammar(*args, **kwargs)) |
||||
|
||||
def recognize_wav(self, wavf): |
||||
w = wave.open(wavf, 'r') |
||||
nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams() |
||||
if nchannels > 1: |
||||
logging.error("Please use .wav with only 1 channel, found %d channels in %s", nchannels, wavf) |
||||
return |
||||
if (sampwidth != 2): |
||||
logging.error("Please use .wav with 2-byte PCM data, found %d bytes in %s", sampwidth, wavf) |
||||
return |
||||
if (framerate != 16000.0): |
||||
logging.error("Please use .wav sampled at 16000 Hz, found %1.0f in %s", framerate, wavf) |
||||
return |
||||
if (comptype != 'NONE'): |
||||
logging.error("Please use .wav with uncompressed data, found %s in %s", compname, wavf) |
||||
return |
||||
buf = w.readframes(nframes) |
||||
w.close() |
||||
return self.recognize_data(buf) |
||||
|
||||
def recognize_data(self, buf): |
||||
nbytes_sent = 0 |
||||
start = time.time() |
||||
for j in range(0, len(buf), buffer_size): |
||||
audio_packet = str(buf[j:j + buffer_size]) |
||||
nbytes_sent += len(audio_packet) |
||||
self.conn.send_binary(audio_packet) |
||||
self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()})) |
||||
|