diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3c4efe2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,261 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ + +# Visual Studio 2015 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# DNX +project.lock.json +project.fragment.lock.json +artifacts/ + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# TODO: Comment the next line if you want to checkin your web deploy settings +# but database connection strings (with potential passwords) will be unencrypted +#*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/packages/* +# except build/, which is used as an MSBuild target. +!**/packages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/packages/repositories.config +# NuGet v3's project.json files produces more ignoreable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +node_modules/ +orleans.codegen.cs + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +*.mdf +*.ldf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# JetBrains Rider +.idea/ +*.sln.iml + +# CodeRush +.cr/ + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc \ No newline at end of file diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index a6c86bc..b0dbc23 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/acoustic_model.sln b/acoustic_model.sln index 37a1335..5c7f4e7 100644 --- a/acoustic_model.sln +++ b/acoustic_model.sln @@ -18,8 +18,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution ..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py ..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py ..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py - ..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py ..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py + ..\..\..\..\..\OneDrive\WSL\python-novo-api\test\testgrammar.py = ..\..\..\..\..\OneDrive\WSL\python-novo-api\test\testgrammar.py EndProjectSection EndProject Global diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc index 4fac91d..1057f0f 100644 Binary files a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc and b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc differ diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj index 7a2f4b5..a2b8e35 100644 --- a/acoustic_model/acoustic_model.pyproj +++ b/acoustic_model/acoustic_model.pyproj @@ -4,7 +4,7 @@ 2.0 4d8c8573-32f0-4a62-9e62-3ce5cc680390 . - performance_check.py + check_novoapi.py . @@ -25,6 +25,9 @@ Code + + Code + Code @@ -34,7 +37,10 @@ Code - + + Code + + Code diff --git a/acoustic_model/check_novoapi.py b/acoustic_model/check_novoapi.py new file mode 100644 index 0000000..40defe6 --- /dev/null +++ b/acoustic_model/check_novoapi.py @@ -0,0 +1,38 @@ +import os +os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') + +import sys +import csv +#import subprocess +#from collections import Counter +#import re + +import numpy as np +import pandas as pd +#import matplotlib.pyplot as plt +#from sklearn.metrics import confusion_matrix + +import acoustic_model_functions as am_func +import convert_xsampa2ipa +import defaultfiles as default + +from forced_alignment import pyhtk + +import novoapi + +## ======================= convert phones ====================== +mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) + +stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) + +phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) +df = pd.read_excel(phonelist_novo70_, 'list') + +translation_key = dict() +for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']): + if not pd.isnull(ipa): + print('{0}:{1}'.format(ipa, novo70)) + translation_key[ipa] = novo70 + +#df = pd.read_excel(stimmen_transcription, 'check') + diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py index 74d9a9b..9f4d4fa 100644 --- a/acoustic_model/defaultfiles.py +++ b/acoustic_model/defaultfiles.py @@ -27,10 +27,14 @@ config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite') #AcousticModel = config['pyHTK']['AcousticModel'] repo_dir = r'C:\Users\Aki\source\repos' -ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') +ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') -fame_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus' -experiments_dir = r'c:\OneDrive\Research\rug\experiments' +fame_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus' + +experiments_dir = r'c:\OneDrive\Research\rug\experiments' +stimmen_transcription_xlsx = os.path.join(experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') +stimmen_data_dir = os.path.join(experiments_dir, 'stimmen', 'data') +phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') +phonelist_novo70_xlsx = os.path.join(experiments_dir, 'Nederlandse phonesets_aki.xlsx') -phonelist = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') \ No newline at end of file diff --git a/acoustic_model/fa_test.py b/acoustic_model/fa_test.py index 3a1bb08..1907949 100644 --- a/acoustic_model/fa_test.py +++ b/acoustic_model/fa_test.py @@ -2,15 +2,52 @@ import os import sys os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') +import numpy as np + import defaultfiles as default sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) -from forced_alignment import forced_alignment +from forced_alignment import forced_alignment, lexicon, convert_phone_set -wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav' -forced_alignment( - wav_file, - #'Australiƫ' - 'BUFFETCOUPON COULISSEN DOUANE' - ) +#wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav' +#forced_alignment( +# wav_file, +# 'Australiƫ' +# #'BUFFETCOUPON COULISSEN DOUANE' +# ) +# according to: http://lands.let.ru.nl/cgn/doc_Dutch/topics/version_1.0/annot/phonetics/fon_prot.pdf +phone_list_cgn = ['p', 'd', 't', 'd', 'k', 'g', # plosives + 'f', 'v', 's', 'z', 'S', 'Z', 'x', 'G', 'h', # fricatives + 'N', 'm', 'n', 'J', 'l', 'r', 'w', 'j', # sonorant + 'I', 'E', 'A', 'O', 'Y', # short vowels + 'i', 'y', 'e', '2', 'a', 'o', 'u', # long vowels + '@', # schwa + 'E+', 'Y+', 'A+', # Diftongen + 'E:', 'Y:', 'O:', # Leenvocalen + 'E~', 'A~', 'O~', 'Y~' # Nasale vocalen + ] + +# load word in the lexicon. +lexicon_file = r'C:\cygwin64\home\Aki\acoustic_model\material\barbara\2010_2510_lexicon_pronvars_HTK.txt' +with open(lexicon_file, 'r') as f: + lines = f.readlines() + +words = [] +for line in lines: + line_split = line.split() + if len(line_split) > 0: + word = line_split[0] + word.replace('+s', '') + word = word.split('-') + words.append(word) +words = list(np.unique(words)) + +pronunciations = lexicon._grapheme_to_phoneme(words) +htks = [] +phone_list = set() +for word in pronunciations.keys(): + ipa = pronunciations[word] + htk = convert_phone_set.split_ipa(ipa) + htks.append(htk) + phone_list = phone_list | set(htk) \ No newline at end of file diff --git a/acoustic_model/forced_alignment_novo.py b/acoustic_model/forced_alignment_novo.py new file mode 100644 index 0000000..dc83dfc --- /dev/null +++ b/acoustic_model/forced_alignment_novo.py @@ -0,0 +1,133 @@ +# +# forced alignment using novo-api. +# +# *** IMPORTANT *** +# This file should be treated as confidencial. +# This file should not be copied or uploaded to public sites. +# +# NOTES: +# The usage of novo api: https://bitbucket.org/novolanguage/python-novo-api +# I couldn't make it work as I described in the mail to Martijn Bartelds on 2018/12/03. +# As per the advice from him, I modified testgrammer.py and made it a function. +# +# In order to run on Python 3.6, the following points are changed in novo-api. +# (1) backend/__init__.py +# - #import session +# from . import session +# (2) backend/session.py +# - #except Exception, e: +# except Exception as e: +# - #print self.last_message +# print(self.last_message) +# (3) asr/segment/praat.py +# - def print_tier(output, title, begin, end, segs, (format, formatter)) +# def print_tier(output, title, begin, end, segs, format, formatter): +# (4) asr/spraaklab/__init.py +# - #import session +# from . import session +# (5) asr/spraaklab/schema.py +# - #print data, "validated not OK", e.message +# print("{0} validated not OK {1}".format(data, e.message)) +# - #print data, "validated OK" +# print("{} validated OK".format(data)) +# - #if isinstance(object, basestring): +# if isinstance(object, str) +# +# Aki Kunikoshi +# 428968@gmail.com +# + +import argparse +import json + +from novoapi.backend import session + +# username / password cannot be passed as artuments... +p = argparse.ArgumentParser() +#p.add_argument("--user", default=None) +#p.add_argument("--password", default=None) +p.add_argument("--user", default='martijn.wieling') +p.add_argument("--password", default='fa0Thaic') +args = p.parse_args() + +wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav' + +rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) + +grammar = { + "type": "confusion_network", + "version": "1.0", + "data": { + "kind": "sequence", + "elements": [ + { + "kind": "word", + "pronunciation": [ + { + "phones": [ + "wv", + "a1", + "n" + ], + "id": 0 + }, + { + "phones": [ + "wv", + "uh1", + "n" + ], + "id": 1 + } + ], + "label": "one" + }, + { + "kind": "word", + "pronunciation": [ + { + "phones": [ + "t", + "uw1" + ], + "id": 0 + } + ], + "label": "two" + }, + { + "kind": "word", + "pronunciation": [ + { + "phones": [ + "t", + "r", + "iy1" + ], + "id": 0 + }, + { + "phones": [ + "s", + "r", + "iy1" + ], + "id": 1 + } + ], + "label": "three" + } + ] + }, + "return_objects": [ + "grammar" + ], + "phoneset": "novo70" +} + +res = rec.setgrammar(grammar) +#print "Set grammar result", res + +#res = rec.recognize_wav("test/onetwothree.wav") +res = rec.recognize_wav(wav_file) +#print "Recognition result:", json.dumps(res.export(), indent=4) diff --git a/acoustic_model/performance_check.py b/acoustic_model/htk_vs_kaldi.py similarity index 95% rename from acoustic_model/performance_check.py rename to acoustic_model/htk_vs_kaldi.py index f4dd82a..3ec6a2c 100644 --- a/acoustic_model/performance_check.py +++ b/acoustic_model/htk_vs_kaldi.py @@ -3,7 +3,7 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import sys import csv -import subprocess +#import subprocess from collections import Counter import re @@ -20,8 +20,6 @@ from forced_alignment import pyhtk ## ======================= user define ======================= -excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') -data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data') wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k @@ -48,8 +46,6 @@ load_forced_alignment_kaldi = 1 eval_forced_alignment_kaldi = 1 - - ## ======================= add paths ======================= sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) from forced_alignment import convert_phone_set @@ -62,15 +58,15 @@ from evaluation import plot_confusion_matrix ## ======================= convert phones ====================== mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) -xls = pd.ExcelFile(excel_file) +xls = pd.ExcelFile(default.stimmen_transcription_xlsx) ## check conversion -#df = pd.read_excel(xls, 'frequency') +#df = pd.read_excel(xls, 'check') #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): -# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_) -# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) -# if not ipa_converted == ipa: -# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) +# if xsampa is not '/': +# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) +# if not ipa_converted == ipa: +# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) ## check phones included in FAME! @@ -160,7 +156,7 @@ if do_forced_alignment_htk: htk_dict_file = os.path.join(htk_dict_dir, word + '.dic') pyhtk.doHVite(wav_file, label_file, htk_dict_file, fa_file, default.config_hvite, - default.phonelist, acoustic_model) + default.phonelist_friesian_txt, acoustic_model) os.remove(label_file) prediction = am_func.read_fileFA(fa_file) @@ -231,7 +227,7 @@ if make_kaldi_data_files: ## ======================= make lexicon txt which is used by Kaldi ======================= if make_kaldi_lexicon_txt: - option_num = 6 + option_num = 7 # remove previous file. if os.path.exists(lexicon_txt): @@ -281,10 +277,10 @@ if load_forced_alignment_kaldi: phones_txt = os.path.join(default.kaldi_dir, 'data', 'lang', 'phones.txt') merged_alignment_txt = os.path.join(default.kaldi_dir, 'exp', 'tri1_alignme', 'merged_alignment.txt') - #filenames = np.load(data_dir + '\\filenames.npy') - #words = np.load(data_dir + '\\words.npy') - #pronunciations = np.load(data_dir + '\\pronunciations_ipa.npy') - #pronvar_list_all = np.load(data_dir + '\\pronvar_list_all.npy') + #filenames = np.load(stimmen_data_dir + '\\filenames.npy') + #words = np.load(stimmen_data_dir + '\\words.npy') + #pronunciations = np.load(stimmen_data_dir + '\\pronunciations_ipa.npy') + #pronvar_list_all = np.load(stimmen_data_dir + '\\pronvar_list_all.npy') #word_list = np.unique(words) # load the mapping between phones and ids. @@ -369,7 +365,7 @@ if eval_forced_alignment_htk: if compare_hmm_num: f_result.write("{},".format(hmm_num_str)) - #match = np.load(data_dir + '\\match_hmm' + hmm_num_str + '.npy') + #match = np.load(stimmen_data_dir + '\\match_hmm' + hmm_num_str + '.npy') #prediction = np.load(os.path.join(result_dir, 'htk', 'predictions_hmm' + hmm_num_str + '.npy')) #prediction = pd.Series(prediction, index=df.index, name='prediction') #result = pd.concat([df, prediction], axis=1) diff --git a/novoapi_for_python3x/__init__.py b/novoapi_for_python3x/__init__.py new file mode 100644 index 0000000..9ff2f76 --- /dev/null +++ b/novoapi_for_python3x/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python + +__version__ = "0.2" + +import backend diff --git a/novoapi_for_python3x/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..b5e000d Binary files /dev/null and b/novoapi_for_python3x/__pycache__/__init__.cpython-36.pyc differ diff --git a/novoapi_for_python3x/asr/__init__.py b/novoapi_for_python3x/asr/__init__.py new file mode 100644 index 0000000..2832e82 --- /dev/null +++ b/novoapi_for_python3x/asr/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python + +#import segments +#import spraaklab +from . import segments +from . import spraaklab \ No newline at end of file diff --git a/novoapi_for_python3x/asr/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/asr/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..608781a Binary files /dev/null and b/novoapi_for_python3x/asr/__pycache__/__init__.cpython-36.pyc differ diff --git a/novoapi_for_python3x/asr/segments/__init__.py b/novoapi_for_python3x/asr/segments/__init__.py new file mode 100644 index 0000000..737e432 --- /dev/null +++ b/novoapi_for_python3x/asr/segments/__init__.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python + +from .segments import Segmentation +from .praat import seg2tg diff --git a/novoapi_for_python3x/asr/segments/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/asr/segments/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..6e69b7e Binary files /dev/null and b/novoapi_for_python3x/asr/segments/__pycache__/__init__.cpython-36.pyc differ diff --git a/novoapi_for_python3x/asr/segments/__pycache__/praat.cpython-36.pyc b/novoapi_for_python3x/asr/segments/__pycache__/praat.cpython-36.pyc new file mode 100644 index 0000000..7235caa Binary files /dev/null and b/novoapi_for_python3x/asr/segments/__pycache__/praat.cpython-36.pyc differ diff --git a/novoapi_for_python3x/asr/segments/__pycache__/segments.cpython-36.pyc b/novoapi_for_python3x/asr/segments/__pycache__/segments.cpython-36.pyc new file mode 100644 index 0000000..eab7f26 Binary files /dev/null and b/novoapi_for_python3x/asr/segments/__pycache__/segments.cpython-36.pyc differ diff --git a/novoapi_for_python3x/asr/segments/praat.py b/novoapi_for_python3x/asr/segments/praat.py new file mode 100644 index 0000000..fbc9e4c --- /dev/null +++ b/novoapi_for_python3x/asr/segments/praat.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen + +import codecs + +def print_header(output, begin, end, nr_tiers): + print >> output, 'File type = "ooTextFile"' + print >> output, 'Object class = "TextGrid"' + print >> output, '' + print >> output, 'xmin = %s' % begin + print >> output, 'xmax = %s' % end + print >> output, 'tiers? ' + print >> output, 'size = %d' % nr_tiers + print >> output, 'item []:' + + +def print_info_tier(output, title, begin, end, label): + print >> output, '\titem [%d]:' % 0 + print >> output, '\t\tclass = "IntervalTier"' + print >> output, '\t\tname = "%s"' % title + print >> output, '\t\txmin = %s' % begin + print >> output, '\t\txmax = %s' % end + print >> output, '\t\tintervals: size = %d' % 1 + + print >> output, '\t\tintervals [1]:' + print >> output, '\t\t\txmin = %s' % begin + print >> output, '\t\t\txmax = %s' % end + print >> output, '\t\t\ttext = "%s"' % label + + +#def print_tier(output, title, begin, end, segs, (format, formatter)): +def print_tier(output, title, begin, end, segs, format, formatter): + print >> output, '\titem [%d]:' % 0 + print >> output, '\t\tclass = "IntervalTier"' + print >> output, '\t\tname = "%s"' % title + print >> output, '\t\txmin = %s' % begin + print >> output, '\t\txmax = %s' % end + print >> output, '\t\tintervals: size = %d' % len(segs) + + count = 1 + for seg in segs: + #print seg + print >> output, '\t\tintervals [%d]:' % count + print >> output, '\t\t\txmin = %s' % repr(int(seg['begin']) / 100.0) + print >> output, '\t\t\txmax = %s' % repr(int(seg['end']) / 100.0) + string = '\t\t\ttext = "' + format + '"' + print >> output, string % formatter(seg['label']) + count += 1 + + +def seg2tg(fname, segments): + if not segments: + return + output = codecs.open(fname, "w", encoding="utf-8") + + confidences = [] + word_labels = [] + phones = [] + + for s in segments: + conf = s.llh if hasattr(s, "llh") else s.score + confidences.append({'begin': s.begin, 'end': s.end, 'label': conf}) + word_labels.append({'begin': s.begin, 'end': s.end, 'label': s.label}) + for p in s.phones: + phones.append({'begin': p.begin, 'end': p.end, 'label': p.label}) + + + begin = repr(int(segments[0].begin) / 100.0) + end = repr(int(segments[-1].end) / 100.0) + + nr_tiers = 3 + print_header(output, begin, end, nr_tiers) + print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x)) + print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x)) + print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x)) + + output.close() diff --git a/novoapi_for_python3x/asr/segments/segments.py b/novoapi_for_python3x/asr/segments/segments.py new file mode 100644 index 0000000..ee5dbcc --- /dev/null +++ b/novoapi_for_python3x/asr/segments/segments.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen + +## These classes can be initialized with dictionaries, as they are returned by the python spraaklab recognition system. + +class Segment(object): + def __init__(self, segment): + self.begin = segment["begin"] + self.end = segment["end"] + self.begintime = segment.get("beginTime", self.begin / 100.0) + self.endtime = segment.get("endTime", self.end / 100.0) + self.label = segment["label"] + self.score = segment["score"] + if "llh" in segment: + self.llh = segment["llh"] + if "phones" in segment: + self.type = "word" + self.phones = Segmentation(segment["phones"], ["sil"]) + if hasattr(self.phones[0], "llh"): + self.minllh = min([s.llh for s in self.phones]) ## the current word llh for error detection + else: + self.type = "phone" + + def __repr__(self): + res = "%8.3f -- %8.3f score %8.3f " % (self.begintime, self.endtime, self.score) + if hasattr(self, "llh"): + res += "llh %8.3f " % self.llh + res += self.label.encode("utf8") + return res + + def export(self): + r = {"begin": self.begin, "end": self.end, "label": self.label, "score": self.score, "type": self.type} + if hasattr(self, "llh"): + r["llh"] = self.llh + if hasattr(self, "phones"): + r["phones"] = self.phones.export() + return r + +class Segmentation(object): + def __init__(self, segments, sils=["", "", "!sil"]): + """Create a segmentation from a spraaklab recognition structure. + segments: an array of words (or phones), represented by a dict with + "begin", "end", "label", "score", and "llh" keys. Words can also have + "phones" which is another array of segments.""" + self.segments = [Segment(s) for s in segments] + if self.segments: + self.type = self.segments[0].type + else: + self.type = None + self.sils = sils + self.orig = segments ## in case we want to have access to the original recognition structure + + def __getitem__(self, item): + return self.segments[item] + + def __repr__(self): + ns = len(self.segments) + res = "Segmentation with %d %s%s" % (ns, self.type, "" if ns==1 else "s") + for seg in self.segments: + res += "\n " + repr(seg) + return res + + def __len__(self): + return len(self.segments) + + def score(self, skip=None): + if not skip: + skip = self.sils + s = 0.0 + for seg in self.segments: + if seg.label not in skip: + s += seg.score + return s + + def llhs(self, skip=None): + if not skip: + skip = self.sils + return [seg.llh for seg in self.segments if hasattr(seg, "llh") and seg.label not in skip] + + def llh(self, skip=None): + return sum(self.llhs(skip)) + + def minllh(self, skip=None): + llhs = self.llhs(skip) + if llhs: + return min(llhs) + else: + return None + + def labels(self, skip=None): + if not skip: + skip = self.sils + return [seg.label for seg in self.segments if seg.label not in skip] + + def sentence(self, skip=None): + return " ".join(self.labels(skip)) + + def export(self): + return [seg.export() for seg in self.segments] \ No newline at end of file diff --git a/novoapi_for_python3x/asr/spraaklab/__init__.py b/novoapi_for_python3x/asr/spraaklab/__init__.py new file mode 100644 index 0000000..2c5f2fd --- /dev/null +++ b/novoapi_for_python3x/asr/spraaklab/__init__.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python + +#import schema +from . import schema \ No newline at end of file diff --git a/novoapi_for_python3x/asr/spraaklab/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/asr/spraaklab/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..5a6f6ad Binary files /dev/null and b/novoapi_for_python3x/asr/spraaklab/__pycache__/__init__.cpython-36.pyc differ diff --git a/novoapi_for_python3x/asr/spraaklab/__pycache__/schema.cpython-36.pyc b/novoapi_for_python3x/asr/spraaklab/__pycache__/schema.cpython-36.pyc new file mode 100644 index 0000000..aebdbf5 Binary files /dev/null and b/novoapi_for_python3x/asr/spraaklab/__pycache__/schema.cpython-36.pyc differ diff --git a/novoapi_for_python3x/asr/spraaklab/schema.py b/novoapi_for_python3x/asr/spraaklab/schema.py new file mode 100644 index 0000000..8efc49f --- /dev/null +++ b/novoapi_for_python3x/asr/spraaklab/schema.py @@ -0,0 +1,273 @@ +#!/usr/bin/env python +## (c) 2017 NovoLanguage, author: David A. van Leeuwen + +## The purpose of this to define the grammar structure in a json schema, so that it can be validated, +## (de)serialized, and perhaps even automatically converted to a Python class structure. + +import json +import jsonschema + +grammar_schema_v10 = { + "$schema": "http://json-schema.org/schema#", + "title": "NovoLanguage grammar", + "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", + "$ref": "#/definitions/group", + "definitions": { + "phones": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "pronunciation": { + "type": "object", + "properties": { + "phones": { + "$ref": "#/definitions/phones" + }, + "syllables": { + "type": "array", + "items": { + "$ref": "#/definitions/syllable" + }, + "minItems": 1 + }, + "id": { + "type": "integer", + "description": "ID to distinguish this pronunciation from other variants" + }, + "meta": { + "type": "object" + } + }, + "required": ["phones"] + }, + "syllable": { + "type": "object", + "properties": { + "begin": { + "type": "integer", + "minimum": 0 + }, + "end": { + "type": "integer", + "minimum": 0 + }, + "stress": { + "type": "integer", + "minimum": 0 + }, + "tone": { + "type": "integer", + "minimum": 0 + } + }, + "required": ["begin", "end"] + }, + "word": { + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": ["word"] + }, + "label": { + "type": "string" + }, + "pronunciation": { + "anyOf": [ + { + "$ref": "#/definitions/pronunciation" + }, + { + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/pronunciation" + }, + { + "$ref": "#/definitions/phones" + } + ] + }, + "minItems": 1 + }, + { + "$ref": "#/definitions/phones" + } + + ] + }, + "syllables": { + "type": "array", + "items": { + "$ref": "#/definitions/syllable" + } + }, + "graphemes": { + "type": "array", + "items": { + "type": "string" + } + }, + "id": { + "type": "integer", + "description": "ID to distinguish this word from other words (with possibly the same label)" + }, + "meta": { + "type": "object" + } + }, + "required": ["label"] + }, + "element": { + "title": "element", + "oneOf": [ + { + "$ref": "#/definitions/word" + }, + { + "$ref": "#/definitions/group" + }, + { + "type": ["string", "null"] + } + ] + }, + "group": { + "title": "element group", + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": ["sequence", "alternatives", "order"] + }, + "elements": { + "type": "array", + "items": { + "$ref": "#/definitions/element" + }, + "minItems": 1, + }, + "meta": { + "type": "object" + } + }, + "required": ["kind", "elements"] + } + } +} + +grammar_schema_v01 = { + "$schema": "http://json-schema.org/schema#", + "title": "NovoLanguage grammar v0.1", + "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["multiple_choice", "word_order"] + }, + "parts": { + "type": "array", + "minItems": 1, + "maxItems": 5, + "items": { + "type": ["string", "array"], + "items": { + "type": ["string"] + } + } + } + } +} + +grammar_rpc_schema = { + "$schema": "http://json-schema.org/schema#", + "title": "NovoLanguage RPC grammar", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["confusion_network"] + }, + "version": { + "type": "string", + "default": "v0.1" + }, + "data": { + "type": "object" + }, + "return_dict": { + "type": "boolean" + }, + "return_objects": { + "type": "array", + "items": { + "type": "string", + "enum": ["dict", "grammar"] + } + }, + "phoneset": { + "type": "string", + "enum": ["cmu69", "novo70", "mdbg115"] + }, + "parallel_silence": { + "type": "boolean" + } + }, + "required": ["type", "data"] +} + +def validate(object, schema=grammar_schema_v10): + #if isinstance(object, basestring): + if isinstance(object, str): + object = json.loads(object) + if not isinstance(object, dict): + raise TypeError("Expected dict or json string") + try: + jsonschema.validate(object, schema) + except jsonschema.ValidationError: + return False + except Exception: + raise + else: + return True + +def validate_rpc_grammar(message): + """validate an rpc grammar message""" + if not validate(message, grammar_rpc_schema): + raise ValueError("Not a valid RPC grammar") + version = message.get("version", "0.1") + data = message["data"] + if version == "0.1": + if not validate(data, grammar_schema_v01): + raise ValueError("Not a valid grammar v0.1") + elif version == "1.0": + if not validate(data, grammar_schema_v10): + raise ValueError("Not a valid grammar v1.0") + else: + raise ValueError("Unsupported schema version") + + +## test +def test(data=None): + if not data: + data = {"kind": "sequence", "elements": [ + {"kind": "alternatives", "elements": ["a plain string", "an alternative string"]}, + {"kind": "word", "label": "a word", "pronunciation": {"phones": ["ah", "w", "er", "d"]}}, + {"kind": "order", "elements": [{"kind": "word", "label": "another word", "visible": False}, "last word"]}]} + try: + jsonschema.validate(data, schema) + except jsonschema.ValidationError as e: + #print data, "validated not OK", e.message + print("{0} validated not OK {1}".format(data, e.message)) + else: + #print data, "validated OK" + print("{} validated OK".format(data)) + + +if __name__ == "__main__": + test() diff --git a/novoapi_for_python3x/backend/__init__.py b/novoapi_for_python3x/backend/__init__.py new file mode 100644 index 0000000..c52d472 --- /dev/null +++ b/novoapi_for_python3x/backend/__init__.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python + +#import session +from . import session \ No newline at end of file diff --git a/novoapi_for_python3x/backend/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/backend/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..109cfba Binary files /dev/null and b/novoapi_for_python3x/backend/__pycache__/__init__.cpython-36.pyc differ diff --git a/novoapi_for_python3x/backend/__pycache__/session.cpython-36.pyc b/novoapi_for_python3x/backend/__pycache__/session.cpython-36.pyc new file mode 100644 index 0000000..856150c Binary files /dev/null and b/novoapi_for_python3x/backend/__pycache__/session.cpython-36.pyc differ diff --git a/novoapi_for_python3x/backend/session.py b/novoapi_for_python3x/backend/session.py new file mode 100644 index 0000000..b08a096 --- /dev/null +++ b/novoapi_for_python3x/backend/session.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python +# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen + +## Recognition interface for actual backend. Adapted from player.asr.debug. + +import json +import sys +import wave +import requests +import websocket +import logging +import collections + +import time + +from .. import asr + +logger = logging.getLogger(__name__) + +## turn off annoying warnings +requests.packages.urllib3.disable_warnings() +logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(logging.WARN) + +buffer_size = 4096 +gm = "gm.novolanguage.com" ## dev +protocol = "https" +port = 443 +apiversion = 0 + +sessions = collections.Counter() + +def segmentation(result): + """converts a raw backend recognition result to a segment of novo.asr.segments class Segmentation""" + for w in result: + w["score"] = w["confidence"]["prob"] + w["llh"] = w["confidence"]["llr"] + w["label"] = w["label"]["raw"] + w["begin"] /= 10 + w["end"] /= 10 + for p in w["phones"]: + p["score"] = p["confidence"]["prob"] + p["llh"] = p["confidence"]["llr"] + p["begin"] /= 10 + p["end"] /= 10 + return asr.segments.Segmentation(result) + +class rpcid: + id = 0 + @staticmethod + def next(): + rpcid.id += 1 + return rpcid.id + +class Recognizer(object): + def __init__(self, lang="en", gm=gm, grammar_version="0.1", user=None, password=None, snodeid=None, keepopen=False): + self.lang = lang + self.keepopen = keepopen + self.api_url = "%s://%s:%d/v%d" % (protocol, gm, port, apiversion) + self.verify = False + self.headers = {"Content-Type": "application/json"} + self.login_user(user, password) + data = {"l2": lang, "local": False, "skipupload": True} + if snodeid: + data["snodeid"] = snodeid + self.conn = None + self.init_session(data) + self.grammar_version = grammar_version + self.last_message = None + + def login_user(self, username, password): + # obtain authentication token of user + logger.info('obtain auth token at %s', self.api_url) + data = { + 'username': username, + 'password': password + } + try: + r = requests.post(self.api_url + '/publishers/1/login', headers=self.headers, data=json.dumps(data), verify=self.verify) + except Exception as e: + logger.error("Cannot post request to GM API for user login: %s", e.message) + sys.exit(-1) + assert r.ok, r.reason + result = r.json() + if "errors" in result["response"]: + logger.info("Error in logging in: %s", result["response"]["errors"]) + sys.exit(-1) + + user_auth_token = result['response']['user']['authentication_token'] + logger.info("User auth token is: %s", user_auth_token) + + # set auth token in header + self.headers['Authentication-Token'] = user_auth_token + + def init_session(self, data, direct=False, use_ip=False): + logger.info('Request new session: %s', data) + r = requests.post(self.api_url + '/sessions', headers=self.headers, data=json.dumps(data), verify=self.verify) + if not r.ok: + logger.error("New session request failed: %s", r.text) + return + + status_url = r.headers.get("location") + if status_url: + ## we got a redirect + status = {} + while True: + logger.debug("Checking %s", status_url) + s = requests.get(status_url, verify=self.verify) + if not s.ok: + logger.error('Checking Failed: %s', s.text) + return + + status = s.json() + if status['status'] == 'PENDING': + logger.debug("Status: %s", status['status']) + time.sleep(1) + else: + break + session = status['result'][0] ## [1] is another status code... + if "error" in session: + logger.error("Error in getting a snode: %s", session["error"]) + raise Exception + else: + session = r.json() + + try: + logger.info("Session: %r", session) + if direct: + snode_ip = session["snode"]["ip"] + proxy_url = snode_ip + snode_port = session["port"] + ws_url = "%s://%s:%d/" % ("ws", snode_ip, snode_port) + else: + field = "ip" if use_ip else "hostname" + proxy_url = session['snode']['datacentre']['proxy'][field] + ws_url = 'wss://' + proxy_url + '/' + session['uuid'] + logger.info("Connecting to websocket: %s", ws_url) + conn = websocket.create_connection(ws_url, sslopt={"check_hostname": self.verify}) + logger.info("Connected.") + #except Exception, e: + except Exception as e: + logger.error("Unable to connect to websocket: %s", e.message) + raise e + + self.session_id = session['id'] + self.proxy_url = proxy_url + self.conn = conn + self.session = session + sessions[session["uuid"]] += 1 + + def setgrammar(self, grammar): ## backend grammar object: {"data": {...}, "type": "confusion_network"} + data = {"jsonrpc": "2.0", + 'type': 'jsonrpc', + 'method': 'set_grammar', + 'params': grammar, + "id": rpcid.next()} + asr.spraaklab.schema.validate_rpc_grammar(grammar) + self.conn.send(json.dumps(data)) + result = json.loads(self.conn.recv()) + if result.get("error"): + logger.error("Exercise validation error: %s", result) + return result + + def set_alternatives_grammar(self, *args, **kwargs): + if not "version" in kwargs: + kwargs["version"] = self.grammar_version + return self.setgrammar(alternatives_grammar(*args, **kwargs)) + + def recognize_wav(self, wavf): + w = wave.open(wavf, 'r') + nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams() + if nchannels > 1: + logging.error("Please use .wav with only 1 channel, found %d channels in %s", nchannels, wavf) + return + if (sampwidth != 2): + logging.error("Please use .wav with 2-byte PCM data, found %d bytes in %s", sampwidth, wavf) + return + if (framerate != 16000.0): + logging.error("Please use .wav sampled at 16000 Hz, found %1.0f in %s", framerate, wavf) + return + if (comptype != 'NONE'): + logging.error("Please use .wav with uncompressed data, found %s in %s", compname, wavf) + return + buf = w.readframes(nframes) + w.close() + return self.recognize_data(buf) + + def recognize_data(self, buf): + nbytes_sent = 0 + start = time.time() + for j in range(0, len(buf), buffer_size): + audio_packet = str(buf[j:j + buffer_size]) + nbytes_sent += len(audio_packet) + self.conn.send_binary(audio_packet) + self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()})) + logger.info("Waiting for recognition result...") + self.last_message = self.conn.recv() ## keep result for the interested applications + message = json.loads(self.last_message) + dur = time.time() - start + logger.info("Recognition took %5.3f seconds", dur) + if "error" in message: + raise RuntimeError("Error from recognition backend: %r" % message.get("error")) + return segmentation(message["result"]["words"]) + + def recognize_url(self, url): + start = time.time() + data = json.dumps({"jsonrpc": "2.0", "method": "send_audio", "id": rpcid.next(), "params": {"type": "url", "data": url, "details": ["word", "utterance"]}}) + self.conn.send(data) + logger.info("Waiting for recognition result...") + self.last_message = self.conn.recv() ## keep result for the interested applications + #print self.last_message + print(self.last_message) + message = json.loads(self.last_message) + dur = time.time() - start + logger.info("Recognition took %5.3f seconds", dur) + if "error" in message: + raise RuntimeError("Error from recognition backend: %r" % message.get("error")) + return segmentation(message["result"]["words"]) + + def __del__(self): + sessions[self.session["uuid"]] -= 1 + if self.conn and sessions[self.session["uuid"]] <= 0: + self.conn.close() + url = self.api_url + '/sessions/%d' % self.session_id + if self.keepopen: + logger.info("Keeping session open...") + else: + logger.info("Closing session: %s", url) + r = requests.delete(url, headers=self.headers, verify=self.verify) + assert r.ok, r.reason + +def alternatives_grammar(parts, version="0.1", ret=None): + """Make a grammar of alternatives, as array(sequence)-of-array(alternatives)-of-strings""" + r = {"type": "confusion_network", "version": version} + if version=="0.1": + r["data"] = {"type": "multiple_choice", "parts": parts} + if isinstance(ret, list) and "dict" in ret: + r["return_dict"] = True + elif version=="1.0": + seqels = [] + for part in parts: + altels = [] + for alt in part: + words = alt.split(" ") + if len(words) > 1: + alt = {"kind": "sequence", "elements": words} + altels.append(alt) + seqels.append({"kind": "alternatives", "elements": altels}) + r["data"] = {"kind": "sequence", "elements": seqels} + if isinstance(ret, list): + r["return_objects"] = ret + else: + raise ValueError("Unsupported version: %s" % version) + asr.spraaklab.schema.validate_rpc_grammar(r) + return r diff --git a/novoapi_for_python3x/utils/json/__init__.py b/novoapi_for_python3x/utils/json/__init__.py new file mode 100644 index 0000000..75d0b5f --- /dev/null +++ b/novoapi_for_python3x/utils/json/__init__.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +## from https://stackoverflow.com/questions/1447287/format-floats-with-standard-json-module +class PrettyFloat(float): + def __repr__(self): + return '%.15g' % self + +def pretty_floats(obj): + if isinstance(obj, float): + return PrettyFloat(obj) + elif isinstance(obj, dict): + return dict((k, pretty_floats(v)) for k, v in obj.items()) + elif isinstance(obj, (list, tuple)): + return map(pretty_floats, obj) + return obj + +def rounded_floats(obj, ndigits=15): + if isinstance(obj, float): + return PrettyFloat(round(obj, ndigits)) + elif isinstance(obj, dict): + return dict((k, rounded_floats(v, ndigits)) for k, v in obj.items()) + elif isinstance(obj, (list, tuple)): + return map(lambda o: rounded_floats(o, ndigits), obj) + return obj + diff --git a/novoapi_for_python3x/utils/json/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/utils/json/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..e2c786a Binary files /dev/null and b/novoapi_for_python3x/utils/json/__pycache__/__init__.cpython-36.pyc differ