Compare commits
No commits in common. "3500a8cdf03da6c2c940e187b566dbd399a6037e" and "0777735979c245a70a93fee8be2506c45df60f99" have entirely different histories.
3500a8cdf0
...
0777735979
264
.gitignore
vendored
264
.gitignore
vendored
@ -1,264 +0,0 @@
|
|||||||
## Ignore Visual Studio temporary files, build results, and
|
|
||||||
## files generated by popular Visual Studio add-ons.
|
|
||||||
|
|
||||||
## important ##
|
|
||||||
.acoustic_model/forced_alignment_novo.py
|
|
||||||
|
|
||||||
# User-specific files
|
|
||||||
*.suo
|
|
||||||
*.user
|
|
||||||
*.userosscache
|
|
||||||
*.sln.docstates
|
|
||||||
|
|
||||||
# User-specific files (MonoDevelop/Xamarin Studio)
|
|
||||||
*.userprefs
|
|
||||||
|
|
||||||
# Build results
|
|
||||||
[Dd]ebug/
|
|
||||||
[Dd]ebugPublic/
|
|
||||||
[Rr]elease/
|
|
||||||
[Rr]eleases/
|
|
||||||
x64/
|
|
||||||
x86/
|
|
||||||
bld/
|
|
||||||
[Bb]in/
|
|
||||||
[Oo]bj/
|
|
||||||
[Ll]og/
|
|
||||||
|
|
||||||
# Visual Studio 2015 cache/options directory
|
|
||||||
.vs/
|
|
||||||
# Uncomment if you have tasks that create the project's static files in wwwroot
|
|
||||||
#wwwroot/
|
|
||||||
|
|
||||||
# MSTest test Results
|
|
||||||
[Tt]est[Rr]esult*/
|
|
||||||
[Bb]uild[Ll]og.*
|
|
||||||
|
|
||||||
# NUNIT
|
|
||||||
*.VisualState.xml
|
|
||||||
TestResult.xml
|
|
||||||
|
|
||||||
# Build Results of an ATL Project
|
|
||||||
[Dd]ebugPS/
|
|
||||||
[Rr]eleasePS/
|
|
||||||
dlldata.c
|
|
||||||
|
|
||||||
# DNX
|
|
||||||
project.lock.json
|
|
||||||
project.fragment.lock.json
|
|
||||||
artifacts/
|
|
||||||
|
|
||||||
*_i.c
|
|
||||||
*_p.c
|
|
||||||
*_i.h
|
|
||||||
*.ilk
|
|
||||||
*.meta
|
|
||||||
*.obj
|
|
||||||
*.pch
|
|
||||||
*.pdb
|
|
||||||
*.pgc
|
|
||||||
*.pgd
|
|
||||||
*.rsp
|
|
||||||
*.sbr
|
|
||||||
*.tlb
|
|
||||||
*.tli
|
|
||||||
*.tlh
|
|
||||||
*.tmp
|
|
||||||
*.tmp_proj
|
|
||||||
*.log
|
|
||||||
*.vspscc
|
|
||||||
*.vssscc
|
|
||||||
.builds
|
|
||||||
*.pidb
|
|
||||||
*.svclog
|
|
||||||
*.scc
|
|
||||||
|
|
||||||
# Chutzpah Test files
|
|
||||||
_Chutzpah*
|
|
||||||
|
|
||||||
# Visual C++ cache files
|
|
||||||
ipch/
|
|
||||||
*.aps
|
|
||||||
*.ncb
|
|
||||||
*.opendb
|
|
||||||
*.opensdf
|
|
||||||
*.sdf
|
|
||||||
*.cachefile
|
|
||||||
*.VC.db
|
|
||||||
*.VC.VC.opendb
|
|
||||||
|
|
||||||
# Visual Studio profiler
|
|
||||||
*.psess
|
|
||||||
*.vsp
|
|
||||||
*.vspx
|
|
||||||
*.sap
|
|
||||||
|
|
||||||
# TFS 2012 Local Workspace
|
|
||||||
$tf/
|
|
||||||
|
|
||||||
# Guidance Automation Toolkit
|
|
||||||
*.gpState
|
|
||||||
|
|
||||||
# ReSharper is a .NET coding add-in
|
|
||||||
_ReSharper*/
|
|
||||||
*.[Rr]e[Ss]harper
|
|
||||||
*.DotSettings.user
|
|
||||||
|
|
||||||
# JustCode is a .NET coding add-in
|
|
||||||
.JustCode
|
|
||||||
|
|
||||||
# TeamCity is a build add-in
|
|
||||||
_TeamCity*
|
|
||||||
|
|
||||||
# DotCover is a Code Coverage Tool
|
|
||||||
*.dotCover
|
|
||||||
|
|
||||||
# NCrunch
|
|
||||||
_NCrunch_*
|
|
||||||
.*crunch*.local.xml
|
|
||||||
nCrunchTemp_*
|
|
||||||
|
|
||||||
# MightyMoose
|
|
||||||
*.mm.*
|
|
||||||
AutoTest.Net/
|
|
||||||
|
|
||||||
# Web workbench (sass)
|
|
||||||
.sass-cache/
|
|
||||||
|
|
||||||
# Installshield output folder
|
|
||||||
[Ee]xpress/
|
|
||||||
|
|
||||||
# DocProject is a documentation generator add-in
|
|
||||||
DocProject/buildhelp/
|
|
||||||
DocProject/Help/*.HxT
|
|
||||||
DocProject/Help/*.HxC
|
|
||||||
DocProject/Help/*.hhc
|
|
||||||
DocProject/Help/*.hhk
|
|
||||||
DocProject/Help/*.hhp
|
|
||||||
DocProject/Help/Html2
|
|
||||||
DocProject/Help/html
|
|
||||||
|
|
||||||
# Click-Once directory
|
|
||||||
publish/
|
|
||||||
|
|
||||||
# Publish Web Output
|
|
||||||
*.[Pp]ublish.xml
|
|
||||||
*.azurePubxml
|
|
||||||
# TODO: Comment the next line if you want to checkin your web deploy settings
|
|
||||||
# but database connection strings (with potential passwords) will be unencrypted
|
|
||||||
#*.pubxml
|
|
||||||
*.publishproj
|
|
||||||
|
|
||||||
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
|
||||||
# checkin your Azure Web App publish settings, but sensitive information contained
|
|
||||||
# in these scripts will be unencrypted
|
|
||||||
PublishScripts/
|
|
||||||
|
|
||||||
# NuGet Packages
|
|
||||||
*.nupkg
|
|
||||||
# The packages folder can be ignored because of Package Restore
|
|
||||||
**/packages/*
|
|
||||||
# except build/, which is used as an MSBuild target.
|
|
||||||
!**/packages/build/
|
|
||||||
# Uncomment if necessary however generally it will be regenerated when needed
|
|
||||||
#!**/packages/repositories.config
|
|
||||||
# NuGet v3's project.json files produces more ignoreable files
|
|
||||||
*.nuget.props
|
|
||||||
*.nuget.targets
|
|
||||||
|
|
||||||
# Microsoft Azure Build Output
|
|
||||||
csx/
|
|
||||||
*.build.csdef
|
|
||||||
|
|
||||||
# Microsoft Azure Emulator
|
|
||||||
ecf/
|
|
||||||
rcf/
|
|
||||||
|
|
||||||
# Windows Store app package directories and files
|
|
||||||
AppPackages/
|
|
||||||
BundleArtifacts/
|
|
||||||
Package.StoreAssociation.xml
|
|
||||||
_pkginfo.txt
|
|
||||||
|
|
||||||
# Visual Studio cache files
|
|
||||||
# files ending in .cache can be ignored
|
|
||||||
*.[Cc]ache
|
|
||||||
# but keep track of directories ending in .cache
|
|
||||||
!*.[Cc]ache/
|
|
||||||
|
|
||||||
# Others
|
|
||||||
ClientBin/
|
|
||||||
~$*
|
|
||||||
*~
|
|
||||||
*.dbmdl
|
|
||||||
*.dbproj.schemaview
|
|
||||||
*.jfm
|
|
||||||
*.pfx
|
|
||||||
*.publishsettings
|
|
||||||
node_modules/
|
|
||||||
orleans.codegen.cs
|
|
||||||
|
|
||||||
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
|
||||||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
|
||||||
#bower_components/
|
|
||||||
|
|
||||||
# RIA/Silverlight projects
|
|
||||||
Generated_Code/
|
|
||||||
|
|
||||||
# Backup & report files from converting an old project file
|
|
||||||
# to a newer Visual Studio version. Backup files are not needed,
|
|
||||||
# because we have git ;-)
|
|
||||||
_UpgradeReport_Files/
|
|
||||||
Backup*/
|
|
||||||
UpgradeLog*.XML
|
|
||||||
UpgradeLog*.htm
|
|
||||||
|
|
||||||
# SQL Server files
|
|
||||||
*.mdf
|
|
||||||
*.ldf
|
|
||||||
|
|
||||||
# Business Intelligence projects
|
|
||||||
*.rdl.data
|
|
||||||
*.bim.layout
|
|
||||||
*.bim_*.settings
|
|
||||||
|
|
||||||
# Microsoft Fakes
|
|
||||||
FakesAssemblies/
|
|
||||||
|
|
||||||
# GhostDoc plugin setting file
|
|
||||||
*.GhostDoc.xml
|
|
||||||
|
|
||||||
# Node.js Tools for Visual Studio
|
|
||||||
.ntvs_analysis.dat
|
|
||||||
|
|
||||||
# Visual Studio 6 build log
|
|
||||||
*.plg
|
|
||||||
|
|
||||||
# Visual Studio 6 workspace options file
|
|
||||||
*.opt
|
|
||||||
|
|
||||||
# Visual Studio LightSwitch build output
|
|
||||||
**/*.HTMLClient/GeneratedArtifacts
|
|
||||||
**/*.DesktopClient/GeneratedArtifacts
|
|
||||||
**/*.DesktopClient/ModelManifest.xml
|
|
||||||
**/*.Server/GeneratedArtifacts
|
|
||||||
**/*.Server/ModelManifest.xml
|
|
||||||
_Pvt_Extensions
|
|
||||||
|
|
||||||
# Paket dependency manager
|
|
||||||
.paket/paket.exe
|
|
||||||
paket-files/
|
|
||||||
|
|
||||||
# FAKE - F# Make
|
|
||||||
.fake/
|
|
||||||
|
|
||||||
# JetBrains Rider
|
|
||||||
.idea/
|
|
||||||
*.sln.iml
|
|
||||||
|
|
||||||
# CodeRush
|
|
||||||
.cr/
|
|
||||||
|
|
||||||
# Python Tools for Visual Studio (PTVS)
|
|
||||||
__pycache__/
|
|
||||||
*.pyc
|
|
Binary file not shown.
@ -18,8 +18,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
|||||||
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
|
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
|
||||||
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
|
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
|
||||||
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
|
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
|
||||||
|
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
|
||||||
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
|
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
|
||||||
..\..\..\..\..\OneDrive\WSL\python-novo-api\test\testgrammar.py = ..\..\..\..\..\OneDrive\WSL\python-novo-api\test\testgrammar.py
|
|
||||||
EndProjectSection
|
EndProjectSection
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
|
Binary file not shown.
@ -4,7 +4,7 @@
|
|||||||
<SchemaVersion>2.0</SchemaVersion>
|
<SchemaVersion>2.0</SchemaVersion>
|
||||||
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
||||||
<ProjectHome>.</ProjectHome>
|
<ProjectHome>.</ProjectHome>
|
||||||
<StartupFile>check_novoapi.py</StartupFile>
|
<StartupFile>performance_check.py</StartupFile>
|
||||||
<SearchPath>
|
<SearchPath>
|
||||||
</SearchPath>
|
</SearchPath>
|
||||||
<WorkingDirectory>.</WorkingDirectory>
|
<WorkingDirectory>.</WorkingDirectory>
|
||||||
@ -25,9 +25,6 @@
|
|||||||
<Compile Include="acoustic_model_functions.py">
|
<Compile Include="acoustic_model_functions.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
<Compile Include="check_novoapi.py">
|
|
||||||
<SubType>Code</SubType>
|
|
||||||
</Compile>
|
|
||||||
<Compile Include="convert_xsampa2ipa.py">
|
<Compile Include="convert_xsampa2ipa.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
@ -37,10 +34,7 @@
|
|||||||
<Compile Include="fa_test.py">
|
<Compile Include="fa_test.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
<Compile Include="forced_alignment_novo.py">
|
<Compile Include="performance_check.py">
|
||||||
<SubType>Code</SubType>
|
|
||||||
</Compile>
|
|
||||||
<Compile Include="htk_vs_kaldi.py">
|
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
@ -1,40 +0,0 @@
|
|||||||
import os
|
|
||||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import csv
|
|
||||||
#import subprocess
|
|
||||||
#from collections import Counter
|
|
||||||
#import re
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
#import matplotlib.pyplot as plt
|
|
||||||
#from sklearn.metrics import confusion_matrix
|
|
||||||
|
|
||||||
import acoustic_model_functions as am_func
|
|
||||||
import convert_xsampa2ipa
|
|
||||||
import defaultfiles as default
|
|
||||||
|
|
||||||
from forced_alignment import pyhtk
|
|
||||||
|
|
||||||
import novoapi
|
|
||||||
|
|
||||||
## ======================= convert phones ======================
|
|
||||||
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
|
|
||||||
|
|
||||||
stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx)
|
|
||||||
|
|
||||||
phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx)
|
|
||||||
df = pd.read_excel(phonelist_novo70_, 'list')
|
|
||||||
|
|
||||||
|
|
||||||
## novo phoneset
|
|
||||||
#translation_key = dict()
|
|
||||||
## *_simple includes columns which has only one phone in.
|
|
||||||
#for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']):
|
|
||||||
# if not pd.isnull(ipa):
|
|
||||||
# print('{0}:{1}'.format(ipa, novo70))
|
|
||||||
# translation_key[ipa] = novo70
|
|
||||||
#phonelist_novo70 = np.unique(list(df['novo70_simple']))
|
|
||||||
|
|
@ -3,7 +3,7 @@ import os
|
|||||||
#default_hvite_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'htk', 'config.HVite')
|
#default_hvite_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'htk', 'config.HVite')
|
||||||
|
|
||||||
cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
|
cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
|
||||||
|
kaldi_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5'
|
||||||
#config_hcopy = os.path.join(cygwin_dir, 'config', 'config.HCopy')
|
#config_hcopy = os.path.join(cygwin_dir, 'config', 'config.HCopy')
|
||||||
#config_train = os.path.join(cygwin_dir, 'config', 'config.train')
|
#config_train = os.path.join(cygwin_dir, 'config', 'config.train')
|
||||||
config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
|
config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
|
||||||
@ -30,16 +30,7 @@ repo_dir = r'C:\Users\Aki\source\repos'
|
|||||||
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
||||||
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
||||||
|
|
||||||
WSL_dir = r'C:\OneDrive\WSL'
|
fame_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus'
|
||||||
fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
|
||||||
fame_s5_dir = os.path.join(fame_dir, 's5')
|
|
||||||
fame_corpus_dir = os.path.join(fame_dir, 'corpus')
|
|
||||||
|
|
||||||
experiments_dir = r'c:\OneDrive\Research\rug\experiments'
|
experiments_dir = r'c:\OneDrive\Research\rug\experiments'
|
||||||
stimmen_transcription_xlsx = os.path.join(experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
|
|
||||||
stimmen_data_dir = os.path.join(experiments_dir, 'stimmen', 'data')
|
|
||||||
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
|
|
||||||
|
|
||||||
novo_api_dir = os.path.join(WSL_dir, 'python-novo-api')
|
|
||||||
cmu69_phoneset = os.path.join(novo_api_dir, 'novoapi', 'asr', 'phoneset', 'en', 'cmu69.phoneset')
|
|
||||||
|
|
||||||
|
phonelist = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
|
@ -2,52 +2,15 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
|
|
||||||
sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
|
sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
|
||||||
from forced_alignment import forced_alignment, lexicon, convert_phone_set
|
from forced_alignment import forced_alignment
|
||||||
|
|
||||||
#wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav'
|
wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav'
|
||||||
#forced_alignment(
|
forced_alignment(
|
||||||
# wav_file,
|
wav_file,
|
||||||
# 'Australië'
|
#'Australië'
|
||||||
# #'BUFFETCOUPON COULISSEN DOUANE'
|
'BUFFETCOUPON COULISSEN DOUANE'
|
||||||
# )
|
)
|
||||||
|
|
||||||
# according to: http://lands.let.ru.nl/cgn/doc_Dutch/topics/version_1.0/annot/phonetics/fon_prot.pdf
|
|
||||||
phone_list_cgn = ['p', 'd', 't', 'd', 'k', 'g', # plosives
|
|
||||||
'f', 'v', 's', 'z', 'S', 'Z', 'x', 'G', 'h', # fricatives
|
|
||||||
'N', 'm', 'n', 'J', 'l', 'r', 'w', 'j', # sonorant
|
|
||||||
'I', 'E', 'A', 'O', 'Y', # short vowels
|
|
||||||
'i', 'y', 'e', '2', 'a', 'o', 'u', # long vowels
|
|
||||||
'@', # schwa
|
|
||||||
'E+', 'Y+', 'A+', # Diftongen
|
|
||||||
'E:', 'Y:', 'O:', # Leenvocalen
|
|
||||||
'E~', 'A~', 'O~', 'Y~' # Nasale vocalen
|
|
||||||
]
|
|
||||||
|
|
||||||
# load word in the lexicon.
|
|
||||||
lexicon_file = r'C:\cygwin64\home\Aki\acoustic_model\material\barbara\2010_2510_lexicon_pronvars_HTK.txt'
|
|
||||||
with open(lexicon_file, 'r') as f:
|
|
||||||
lines = f.readlines()
|
|
||||||
|
|
||||||
words = []
|
|
||||||
for line in lines:
|
|
||||||
line_split = line.split()
|
|
||||||
if len(line_split) > 0:
|
|
||||||
word = line_split[0]
|
|
||||||
word.replace('+s', '')
|
|
||||||
word = word.split('-')
|
|
||||||
words.append(word)
|
|
||||||
words = list(np.unique(words))
|
|
||||||
|
|
||||||
pronunciations = lexicon._grapheme_to_phoneme(words)
|
|
||||||
htks = []
|
|
||||||
phone_list = set()
|
|
||||||
for word in pronunciations.keys():
|
|
||||||
ipa = pronunciations[word]
|
|
||||||
htk = convert_phone_set.split_ipa(ipa)
|
|
||||||
htks.append(htk)
|
|
||||||
phone_list = phone_list | set(htk)
|
|
@ -1,133 +0,0 @@
|
|||||||
#
|
|
||||||
# forced alignment using novo-api.
|
|
||||||
#
|
|
||||||
# *** IMPORTANT ***
|
|
||||||
# This file should be treated as confidencial.
|
|
||||||
# This file should not be copied or uploaded to public sites.
|
|
||||||
#
|
|
||||||
# NOTES:
|
|
||||||
# The usage of novo api: https://bitbucket.org/novolanguage/python-novo-api
|
|
||||||
# I couldn't make it work as I described in the mail to Martijn Bartelds on 2018/12/03.
|
|
||||||
# As per the advice from him, I modified testgrammer.py and made it a function.
|
|
||||||
#
|
|
||||||
# In order to run on Python 3.6, the following points are changed in novo-api.
|
|
||||||
# (1) backend/__init__.py
|
|
||||||
# - #import session
|
|
||||||
# from . import session
|
|
||||||
# (2) backend/session.py
|
|
||||||
# - #except Exception, e:
|
|
||||||
# except Exception as e:
|
|
||||||
# - #print self.last_message
|
|
||||||
# print(self.last_message)
|
|
||||||
# (3) asr/segment/praat.py
|
|
||||||
# - def print_tier(output, title, begin, end, segs, (format, formatter))
|
|
||||||
# def print_tier(output, title, begin, end, segs, format, formatter):
|
|
||||||
# (4) asr/spraaklab/__init.py
|
|
||||||
# - #import session
|
|
||||||
# from . import session
|
|
||||||
# (5) asr/spraaklab/schema.py
|
|
||||||
# - #print data, "validated not OK", e.message
|
|
||||||
# print("{0} validated not OK {1}".format(data, e.message))
|
|
||||||
# - #print data, "validated OK"
|
|
||||||
# print("{} validated OK".format(data))
|
|
||||||
# - #if isinstance(object, basestring):
|
|
||||||
# if isinstance(object, str)
|
|
||||||
#
|
|
||||||
# Aki Kunikoshi
|
|
||||||
# 428968@gmail.com
|
|
||||||
#
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
|
||||||
|
|
||||||
from novoapi.backend import session
|
|
||||||
|
|
||||||
# username / password cannot be passed as artuments...
|
|
||||||
p = argparse.ArgumentParser()
|
|
||||||
#p.add_argument("--user", default=None)
|
|
||||||
#p.add_argument("--password", default=None)
|
|
||||||
p.add_argument("--user", default='martijn.wieling')
|
|
||||||
p.add_argument("--password", default='fa0Thaic')
|
|
||||||
args = p.parse_args()
|
|
||||||
|
|
||||||
wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav'
|
|
||||||
|
|
||||||
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
|
|
||||||
|
|
||||||
grammar = {
|
|
||||||
"type": "confusion_network",
|
|
||||||
"version": "1.0",
|
|
||||||
"data": {
|
|
||||||
"kind": "sequence",
|
|
||||||
"elements": [
|
|
||||||
{
|
|
||||||
"kind": "word",
|
|
||||||
"pronunciation": [
|
|
||||||
{
|
|
||||||
"phones": [
|
|
||||||
"wv",
|
|
||||||
"a1",
|
|
||||||
"n"
|
|
||||||
],
|
|
||||||
"id": 0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"phones": [
|
|
||||||
"wv",
|
|
||||||
"uh1",
|
|
||||||
"n"
|
|
||||||
],
|
|
||||||
"id": 1
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"label": "one"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"kind": "word",
|
|
||||||
"pronunciation": [
|
|
||||||
{
|
|
||||||
"phones": [
|
|
||||||
"t",
|
|
||||||
"uw1"
|
|
||||||
],
|
|
||||||
"id": 0
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"label": "two"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"kind": "word",
|
|
||||||
"pronunciation": [
|
|
||||||
{
|
|
||||||
"phones": [
|
|
||||||
"t",
|
|
||||||
"r",
|
|
||||||
"iy1"
|
|
||||||
],
|
|
||||||
"id": 0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"phones": [
|
|
||||||
"s",
|
|
||||||
"r",
|
|
||||||
"iy1"
|
|
||||||
],
|
|
||||||
"id": 1
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"label": "three"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"return_objects": [
|
|
||||||
"grammar"
|
|
||||||
],
|
|
||||||
"phoneset": "novo70"
|
|
||||||
}
|
|
||||||
|
|
||||||
res = rec.setgrammar(grammar)
|
|
||||||
#print "Set grammar result", res
|
|
||||||
|
|
||||||
#res = rec.recognize_wav("test/onetwothree.wav")
|
|
||||||
res = rec.recognize_wav(wav_file)
|
|
||||||
#print "Recognition result:", json.dumps(res.export(), indent=4)
|
|
@ -3,7 +3,7 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import csv
|
import csv
|
||||||
#import subprocess
|
import subprocess
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -20,6 +20,8 @@ from forced_alignment import pyhtk
|
|||||||
|
|
||||||
|
|
||||||
## ======================= user define =======================
|
## ======================= user define =======================
|
||||||
|
excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
|
||||||
|
data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data')
|
||||||
|
|
||||||
wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k
|
wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k
|
||||||
|
|
||||||
@ -28,12 +30,12 @@ htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short
|
|||||||
fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k')
|
fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k')
|
||||||
result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result')
|
result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result')
|
||||||
|
|
||||||
kaldi_data_dir = os.path.join(default.fame_s5_dir, 'data', 'alignme')
|
kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme')
|
||||||
kaldi_dict_dir = os.path.join(default.fame_s5_dir, 'data', 'local', 'dict')
|
kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict')
|
||||||
lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt')
|
lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt')
|
||||||
|
|
||||||
#lex_asr = os.path.join(default.fame_corpus_dir, 'lexicon', 'lex.asr')
|
#lex_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
|
||||||
#lex_asr_htk = os.path.join(default.fame_corpus_dir, 'lexicon', 'lex.asr_htk')
|
#lex_asr_htk = os.path.join(default.fame_dir, 'lexicon', 'lex.asr_htk')
|
||||||
|
|
||||||
|
|
||||||
# procedure
|
# procedure
|
||||||
@ -46,6 +48,8 @@ load_forced_alignment_kaldi = 1
|
|||||||
eval_forced_alignment_kaldi = 1
|
eval_forced_alignment_kaldi = 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## ======================= add paths =======================
|
## ======================= add paths =======================
|
||||||
sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
|
sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
|
||||||
from forced_alignment import convert_phone_set
|
from forced_alignment import convert_phone_set
|
||||||
@ -58,12 +62,12 @@ from evaluation import plot_confusion_matrix
|
|||||||
## ======================= convert phones ======================
|
## ======================= convert phones ======================
|
||||||
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
|
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
|
||||||
|
|
||||||
xls = pd.ExcelFile(default.stimmen_transcription_xlsx)
|
xls = pd.ExcelFile(excel_file)
|
||||||
|
|
||||||
## check conversion
|
## check conversion
|
||||||
#df = pd.read_excel(xls, 'check')
|
#df = pd.read_excel(xls, 'frequency')
|
||||||
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
||||||
# if xsampa is not '/':
|
# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_)
|
||||||
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
||||||
# if not ipa_converted == ipa:
|
# if not ipa_converted == ipa:
|
||||||
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
||||||
@ -156,7 +160,7 @@ if do_forced_alignment_htk:
|
|||||||
htk_dict_file = os.path.join(htk_dict_dir, word + '.dic')
|
htk_dict_file = os.path.join(htk_dict_dir, word + '.dic')
|
||||||
|
|
||||||
pyhtk.doHVite(wav_file, label_file, htk_dict_file, fa_file, default.config_hvite,
|
pyhtk.doHVite(wav_file, label_file, htk_dict_file, fa_file, default.config_hvite,
|
||||||
default.phonelist_friesian_txt, acoustic_model)
|
default.phonelist, acoustic_model)
|
||||||
os.remove(label_file)
|
os.remove(label_file)
|
||||||
|
|
||||||
prediction = am_func.read_fileFA(fa_file)
|
prediction = am_func.read_fileFA(fa_file)
|
||||||
@ -227,7 +231,7 @@ if make_kaldi_data_files:
|
|||||||
|
|
||||||
## ======================= make lexicon txt which is used by Kaldi =======================
|
## ======================= make lexicon txt which is used by Kaldi =======================
|
||||||
if make_kaldi_lexicon_txt:
|
if make_kaldi_lexicon_txt:
|
||||||
option_num = 7
|
option_num = 6
|
||||||
|
|
||||||
# remove previous file.
|
# remove previous file.
|
||||||
if os.path.exists(lexicon_txt):
|
if os.path.exists(lexicon_txt):
|
||||||
@ -274,13 +278,13 @@ if make_kaldi_lexicon_txt:
|
|||||||
|
|
||||||
## ======================= load kaldi forced alignment result =======================
|
## ======================= load kaldi forced alignment result =======================
|
||||||
if load_forced_alignment_kaldi:
|
if load_forced_alignment_kaldi:
|
||||||
phones_txt = os.path.join(default.fame_s5_dir, 'data', 'lang', 'phones.txt')
|
phones_txt = os.path.join(default.kaldi_dir, 'data', 'lang', 'phones.txt')
|
||||||
merged_alignment_txt = os.path.join(default.fame_s5_dir, 'exp', 'tri1_alignme', 'merged_alignment.txt')
|
merged_alignment_txt = os.path.join(default.kaldi_dir, 'exp', 'tri1_alignme', 'merged_alignment.txt')
|
||||||
|
|
||||||
#filenames = np.load(stimmen_data_dir + '\\filenames.npy')
|
#filenames = np.load(data_dir + '\\filenames.npy')
|
||||||
#words = np.load(stimmen_data_dir + '\\words.npy')
|
#words = np.load(data_dir + '\\words.npy')
|
||||||
#pronunciations = np.load(stimmen_data_dir + '\\pronunciations_ipa.npy')
|
#pronunciations = np.load(data_dir + '\\pronunciations_ipa.npy')
|
||||||
#pronvar_list_all = np.load(stimmen_data_dir + '\\pronvar_list_all.npy')
|
#pronvar_list_all = np.load(data_dir + '\\pronvar_list_all.npy')
|
||||||
#word_list = np.unique(words)
|
#word_list = np.unique(words)
|
||||||
|
|
||||||
# load the mapping between phones and ids.
|
# load the mapping between phones and ids.
|
||||||
@ -365,7 +369,7 @@ if eval_forced_alignment_htk:
|
|||||||
if compare_hmm_num:
|
if compare_hmm_num:
|
||||||
f_result.write("{},".format(hmm_num_str))
|
f_result.write("{},".format(hmm_num_str))
|
||||||
|
|
||||||
#match = np.load(stimmen_data_dir + '\\match_hmm' + hmm_num_str + '.npy')
|
#match = np.load(data_dir + '\\match_hmm' + hmm_num_str + '.npy')
|
||||||
#prediction = np.load(os.path.join(result_dir, 'htk', 'predictions_hmm' + hmm_num_str + '.npy'))
|
#prediction = np.load(os.path.join(result_dir, 'htk', 'predictions_hmm' + hmm_num_str + '.npy'))
|
||||||
#prediction = pd.Series(prediction, index=df.index, name='prediction')
|
#prediction = pd.Series(prediction, index=df.index, name='prediction')
|
||||||
#result = pd.concat([df, prediction], axis=1)
|
#result = pd.concat([df, prediction], axis=1)
|
@ -1,5 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
__version__ = "0.2"
|
|
||||||
|
|
||||||
import backend
|
|
Binary file not shown.
@ -1,6 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
#import segments
|
|
||||||
#import spraaklab
|
|
||||||
from . import segments
|
|
||||||
from . import spraaklab
|
|
Binary file not shown.
@ -1,4 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
from .segments import Segmentation
|
|
||||||
from .praat import seg2tg
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,77 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen
|
|
||||||
|
|
||||||
import codecs
|
|
||||||
|
|
||||||
def print_header(output, begin, end, nr_tiers):
|
|
||||||
print >> output, 'File type = "ooTextFile"'
|
|
||||||
print >> output, 'Object class = "TextGrid"'
|
|
||||||
print >> output, ''
|
|
||||||
print >> output, 'xmin = %s' % begin
|
|
||||||
print >> output, 'xmax = %s' % end
|
|
||||||
print >> output, 'tiers? <exists>'
|
|
||||||
print >> output, 'size = %d' % nr_tiers
|
|
||||||
print >> output, 'item []:'
|
|
||||||
|
|
||||||
|
|
||||||
def print_info_tier(output, title, begin, end, label):
|
|
||||||
print >> output, '\titem [%d]:' % 0
|
|
||||||
print >> output, '\t\tclass = "IntervalTier"'
|
|
||||||
print >> output, '\t\tname = "%s"' % title
|
|
||||||
print >> output, '\t\txmin = %s' % begin
|
|
||||||
print >> output, '\t\txmax = %s' % end
|
|
||||||
print >> output, '\t\tintervals: size = %d' % 1
|
|
||||||
|
|
||||||
print >> output, '\t\tintervals [1]:'
|
|
||||||
print >> output, '\t\t\txmin = %s' % begin
|
|
||||||
print >> output, '\t\t\txmax = %s' % end
|
|
||||||
print >> output, '\t\t\ttext = "%s"' % label
|
|
||||||
|
|
||||||
|
|
||||||
#def print_tier(output, title, begin, end, segs, (format, formatter)):
|
|
||||||
def print_tier(output, title, begin, end, segs, format, formatter):
|
|
||||||
print >> output, '\titem [%d]:' % 0
|
|
||||||
print >> output, '\t\tclass = "IntervalTier"'
|
|
||||||
print >> output, '\t\tname = "%s"' % title
|
|
||||||
print >> output, '\t\txmin = %s' % begin
|
|
||||||
print >> output, '\t\txmax = %s' % end
|
|
||||||
print >> output, '\t\tintervals: size = %d' % len(segs)
|
|
||||||
|
|
||||||
count = 1
|
|
||||||
for seg in segs:
|
|
||||||
#print seg
|
|
||||||
print >> output, '\t\tintervals [%d]:' % count
|
|
||||||
print >> output, '\t\t\txmin = %s' % repr(int(seg['begin']) / 100.0)
|
|
||||||
print >> output, '\t\t\txmax = %s' % repr(int(seg['end']) / 100.0)
|
|
||||||
string = '\t\t\ttext = "' + format + '"'
|
|
||||||
print >> output, string % formatter(seg['label'])
|
|
||||||
count += 1
|
|
||||||
|
|
||||||
|
|
||||||
def seg2tg(fname, segments):
|
|
||||||
if not segments:
|
|
||||||
return
|
|
||||||
output = codecs.open(fname, "w", encoding="utf-8")
|
|
||||||
|
|
||||||
confidences = []
|
|
||||||
word_labels = []
|
|
||||||
phones = []
|
|
||||||
|
|
||||||
for s in segments:
|
|
||||||
conf = s.llh if hasattr(s, "llh") else s.score
|
|
||||||
confidences.append({'begin': s.begin, 'end': s.end, 'label': conf})
|
|
||||||
word_labels.append({'begin': s.begin, 'end': s.end, 'label': s.label})
|
|
||||||
for p in s.phones:
|
|
||||||
phones.append({'begin': p.begin, 'end': p.end, 'label': p.label})
|
|
||||||
|
|
||||||
|
|
||||||
begin = repr(int(segments[0].begin) / 100.0)
|
|
||||||
end = repr(int(segments[-1].end) / 100.0)
|
|
||||||
|
|
||||||
nr_tiers = 3
|
|
||||||
print_header(output, begin, end, nr_tiers)
|
|
||||||
print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
|
|
||||||
print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
|
|
||||||
print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
|
|
||||||
|
|
||||||
output.close()
|
|
@ -1,99 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen
|
|
||||||
|
|
||||||
## These classes can be initialized with dictionaries, as they are returned by the python spraaklab recognition system.
|
|
||||||
|
|
||||||
class Segment(object):
|
|
||||||
def __init__(self, segment):
|
|
||||||
self.begin = segment["begin"]
|
|
||||||
self.end = segment["end"]
|
|
||||||
self.begintime = segment.get("beginTime", self.begin / 100.0)
|
|
||||||
self.endtime = segment.get("endTime", self.end / 100.0)
|
|
||||||
self.label = segment["label"]
|
|
||||||
self.score = segment["score"]
|
|
||||||
if "llh" in segment:
|
|
||||||
self.llh = segment["llh"]
|
|
||||||
if "phones" in segment:
|
|
||||||
self.type = "word"
|
|
||||||
self.phones = Segmentation(segment["phones"], ["sil"])
|
|
||||||
if hasattr(self.phones[0], "llh"):
|
|
||||||
self.minllh = min([s.llh for s in self.phones]) ## the current word llh for error detection
|
|
||||||
else:
|
|
||||||
self.type = "phone"
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
res = "%8.3f -- %8.3f score %8.3f " % (self.begintime, self.endtime, self.score)
|
|
||||||
if hasattr(self, "llh"):
|
|
||||||
res += "llh %8.3f " % self.llh
|
|
||||||
res += self.label.encode("utf8")
|
|
||||||
return res
|
|
||||||
|
|
||||||
def export(self):
|
|
||||||
r = {"begin": self.begin, "end": self.end, "label": self.label, "score": self.score, "type": self.type}
|
|
||||||
if hasattr(self, "llh"):
|
|
||||||
r["llh"] = self.llh
|
|
||||||
if hasattr(self, "phones"):
|
|
||||||
r["phones"] = self.phones.export()
|
|
||||||
return r
|
|
||||||
|
|
||||||
class Segmentation(object):
|
|
||||||
def __init__(self, segments, sils=["<s>", "</s>", "!sil"]):
|
|
||||||
"""Create a segmentation from a spraaklab recognition structure.
|
|
||||||
segments: an array of words (or phones), represented by a dict with
|
|
||||||
"begin", "end", "label", "score", and "llh" keys. Words can also have
|
|
||||||
"phones" which is another array of segments."""
|
|
||||||
self.segments = [Segment(s) for s in segments]
|
|
||||||
if self.segments:
|
|
||||||
self.type = self.segments[0].type
|
|
||||||
else:
|
|
||||||
self.type = None
|
|
||||||
self.sils = sils
|
|
||||||
self.orig = segments ## in case we want to have access to the original recognition structure
|
|
||||||
|
|
||||||
def __getitem__(self, item):
|
|
||||||
return self.segments[item]
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
ns = len(self.segments)
|
|
||||||
res = "Segmentation with %d %s%s" % (ns, self.type, "" if ns==1 else "s")
|
|
||||||
for seg in self.segments:
|
|
||||||
res += "\n " + repr(seg)
|
|
||||||
return res
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.segments)
|
|
||||||
|
|
||||||
def score(self, skip=None):
|
|
||||||
if not skip:
|
|
||||||
skip = self.sils
|
|
||||||
s = 0.0
|
|
||||||
for seg in self.segments:
|
|
||||||
if seg.label not in skip:
|
|
||||||
s += seg.score
|
|
||||||
return s
|
|
||||||
|
|
||||||
def llhs(self, skip=None):
|
|
||||||
if not skip:
|
|
||||||
skip = self.sils
|
|
||||||
return [seg.llh for seg in self.segments if hasattr(seg, "llh") and seg.label not in skip]
|
|
||||||
|
|
||||||
def llh(self, skip=None):
|
|
||||||
return sum(self.llhs(skip))
|
|
||||||
|
|
||||||
def minllh(self, skip=None):
|
|
||||||
llhs = self.llhs(skip)
|
|
||||||
if llhs:
|
|
||||||
return min(llhs)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def labels(self, skip=None):
|
|
||||||
if not skip:
|
|
||||||
skip = self.sils
|
|
||||||
return [seg.label for seg in self.segments if seg.label not in skip]
|
|
||||||
|
|
||||||
def sentence(self, skip=None):
|
|
||||||
return " ".join(self.labels(skip))
|
|
||||||
|
|
||||||
def export(self):
|
|
||||||
return [seg.export() for seg in self.segments]
|
|
@ -1,4 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
#import schema
|
|
||||||
from . import schema
|
|
Binary file not shown.
Binary file not shown.
@ -1,273 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
## (c) 2017 NovoLanguage, author: David A. van Leeuwen
|
|
||||||
|
|
||||||
## The purpose of this to define the grammar structure in a json schema, so that it can be validated,
|
|
||||||
## (de)serialized, and perhaps even automatically converted to a Python class structure.
|
|
||||||
|
|
||||||
import json
|
|
||||||
import jsonschema
|
|
||||||
|
|
||||||
grammar_schema_v10 = {
|
|
||||||
"$schema": "http://json-schema.org/schema#",
|
|
||||||
"title": "NovoLanguage grammar",
|
|
||||||
"description": "A grammar specification for the NovoLanguage Automatic Speech Recognition",
|
|
||||||
"$ref": "#/definitions/group",
|
|
||||||
"definitions": {
|
|
||||||
"phones": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"minItems": 1
|
|
||||||
},
|
|
||||||
"pronunciation": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"phones": {
|
|
||||||
"$ref": "#/definitions/phones"
|
|
||||||
},
|
|
||||||
"syllables": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"$ref": "#/definitions/syllable"
|
|
||||||
},
|
|
||||||
"minItems": 1
|
|
||||||
},
|
|
||||||
"id": {
|
|
||||||
"type": "integer",
|
|
||||||
"description": "ID to distinguish this pronunciation from other variants"
|
|
||||||
},
|
|
||||||
"meta": {
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["phones"]
|
|
||||||
},
|
|
||||||
"syllable": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"begin": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0
|
|
||||||
},
|
|
||||||
"end": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0
|
|
||||||
},
|
|
||||||
"stress": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0
|
|
||||||
},
|
|
||||||
"tone": {
|
|
||||||
"type": "integer",
|
|
||||||
"minimum": 0
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["begin", "end"]
|
|
||||||
},
|
|
||||||
"word": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"kind": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["word"]
|
|
||||||
},
|
|
||||||
"label": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"pronunciation": {
|
|
||||||
"anyOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/definitions/pronunciation"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"anyOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/definitions/pronunciation"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/definitions/phones"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"minItems": 1
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/definitions/phones"
|
|
||||||
}
|
|
||||||
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"syllables": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"$ref": "#/definitions/syllable"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"graphemes": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"id": {
|
|
||||||
"type": "integer",
|
|
||||||
"description": "ID to distinguish this word from other words (with possibly the same label)"
|
|
||||||
},
|
|
||||||
"meta": {
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["label"]
|
|
||||||
},
|
|
||||||
"element": {
|
|
||||||
"title": "element",
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"$ref": "#/definitions/word"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/definitions/group"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": ["string", "null"]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"group": {
|
|
||||||
"title": "element group",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"kind": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["sequence", "alternatives", "order"]
|
|
||||||
},
|
|
||||||
"elements": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"$ref": "#/definitions/element"
|
|
||||||
},
|
|
||||||
"minItems": 1,
|
|
||||||
},
|
|
||||||
"meta": {
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["kind", "elements"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
grammar_schema_v01 = {
|
|
||||||
"$schema": "http://json-schema.org/schema#",
|
|
||||||
"title": "NovoLanguage grammar v0.1",
|
|
||||||
"description": "A grammar specification for the NovoLanguage Automatic Speech Recognition",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["multiple_choice", "word_order"]
|
|
||||||
},
|
|
||||||
"parts": {
|
|
||||||
"type": "array",
|
|
||||||
"minItems": 1,
|
|
||||||
"maxItems": 5,
|
|
||||||
"items": {
|
|
||||||
"type": ["string", "array"],
|
|
||||||
"items": {
|
|
||||||
"type": ["string"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
grammar_rpc_schema = {
|
|
||||||
"$schema": "http://json-schema.org/schema#",
|
|
||||||
"title": "NovoLanguage RPC grammar",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["confusion_network"]
|
|
||||||
},
|
|
||||||
"version": {
|
|
||||||
"type": "string",
|
|
||||||
"default": "v0.1"
|
|
||||||
},
|
|
||||||
"data": {
|
|
||||||
"type": "object"
|
|
||||||
},
|
|
||||||
"return_dict": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"return_objects": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["dict", "grammar"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"phoneset": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["cmu69", "novo70", "mdbg115"]
|
|
||||||
},
|
|
||||||
"parallel_silence": {
|
|
||||||
"type": "boolean"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["type", "data"]
|
|
||||||
}
|
|
||||||
|
|
||||||
def validate(object, schema=grammar_schema_v10):
|
|
||||||
#if isinstance(object, basestring):
|
|
||||||
if isinstance(object, str):
|
|
||||||
object = json.loads(object)
|
|
||||||
if not isinstance(object, dict):
|
|
||||||
raise TypeError("Expected dict or json string")
|
|
||||||
try:
|
|
||||||
jsonschema.validate(object, schema)
|
|
||||||
except jsonschema.ValidationError:
|
|
||||||
return False
|
|
||||||
except Exception:
|
|
||||||
raise
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
def validate_rpc_grammar(message):
|
|
||||||
"""validate an rpc grammar message"""
|
|
||||||
if not validate(message, grammar_rpc_schema):
|
|
||||||
raise ValueError("Not a valid RPC grammar")
|
|
||||||
version = message.get("version", "0.1")
|
|
||||||
data = message["data"]
|
|
||||||
if version == "0.1":
|
|
||||||
if not validate(data, grammar_schema_v01):
|
|
||||||
raise ValueError("Not a valid grammar v0.1")
|
|
||||||
elif version == "1.0":
|
|
||||||
if not validate(data, grammar_schema_v10):
|
|
||||||
raise ValueError("Not a valid grammar v1.0")
|
|
||||||
else:
|
|
||||||
raise ValueError("Unsupported schema version")
|
|
||||||
|
|
||||||
|
|
||||||
## test
|
|
||||||
def test(data=None):
|
|
||||||
if not data:
|
|
||||||
data = {"kind": "sequence", "elements": [
|
|
||||||
{"kind": "alternatives", "elements": ["a plain string", "an alternative string"]},
|
|
||||||
{"kind": "word", "label": "a word", "pronunciation": {"phones": ["ah", "w", "er", "d"]}},
|
|
||||||
{"kind": "order", "elements": [{"kind": "word", "label": "another word", "visible": False}, "last word"]}]}
|
|
||||||
try:
|
|
||||||
jsonschema.validate(data, schema)
|
|
||||||
except jsonschema.ValidationError as e:
|
|
||||||
#print data, "validated not OK", e.message
|
|
||||||
print("{0} validated not OK {1}".format(data, e.message))
|
|
||||||
else:
|
|
||||||
#print data, "validated OK"
|
|
||||||
print("{} validated OK".format(data))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
test()
|
|
@ -1,4 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
#import session
|
|
||||||
from . import session
|
|
Binary file not shown.
Binary file not shown.
@ -1,254 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen
|
|
||||||
|
|
||||||
## Recognition interface for actual backend. Adapted from player.asr.debug.
|
|
||||||
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
import wave
|
|
||||||
import requests
|
|
||||||
import websocket
|
|
||||||
import logging
|
|
||||||
import collections
|
|
||||||
|
|
||||||
import time
|
|
||||||
|
|
||||||
from .. import asr
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
## turn off annoying warnings
|
|
||||||
requests.packages.urllib3.disable_warnings()
|
|
||||||
logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(logging.WARN)
|
|
||||||
|
|
||||||
buffer_size = 4096
|
|
||||||
gm = "gm.novolanguage.com" ## dev
|
|
||||||
protocol = "https"
|
|
||||||
port = 443
|
|
||||||
apiversion = 0
|
|
||||||
|
|
||||||
sessions = collections.Counter()
|
|
||||||
|
|
||||||
def segmentation(result):
|
|
||||||
"""converts a raw backend recognition result to a segment of novo.asr.segments class Segmentation"""
|
|
||||||
for w in result:
|
|
||||||
w["score"] = w["confidence"]["prob"]
|
|
||||||
w["llh"] = w["confidence"]["llr"]
|
|
||||||
w["label"] = w["label"]["raw"]
|
|
||||||
w["begin"] /= 10
|
|
||||||
w["end"] /= 10
|
|
||||||
for p in w["phones"]:
|
|
||||||
p["score"] = p["confidence"]["prob"]
|
|
||||||
p["llh"] = p["confidence"]["llr"]
|
|
||||||
p["begin"] /= 10
|
|
||||||
p["end"] /= 10
|
|
||||||
return asr.segments.Segmentation(result)
|
|
||||||
|
|
||||||
class rpcid:
|
|
||||||
id = 0
|
|
||||||
@staticmethod
|
|
||||||
def next():
|
|
||||||
rpcid.id += 1
|
|
||||||
return rpcid.id
|
|
||||||
|
|
||||||
class Recognizer(object):
|
|
||||||
def __init__(self, lang="en", gm=gm, grammar_version="0.1", user=None, password=None, snodeid=None, keepopen=False):
|
|
||||||
self.lang = lang
|
|
||||||
self.keepopen = keepopen
|
|
||||||
self.api_url = "%s://%s:%d/v%d" % (protocol, gm, port, apiversion)
|
|
||||||
self.verify = False
|
|
||||||
self.headers = {"Content-Type": "application/json"}
|
|
||||||
self.login_user(user, password)
|
|
||||||
data = {"l2": lang, "local": False, "skipupload": True}
|
|
||||||
if snodeid:
|
|
||||||
data["snodeid"] = snodeid
|
|
||||||
self.conn = None
|
|
||||||
self.init_session(data)
|
|
||||||
self.grammar_version = grammar_version
|
|
||||||
self.last_message = None
|
|
||||||
|
|
||||||
def login_user(self, username, password):
|
|
||||||
# obtain authentication token of user
|
|
||||||
logger.info('obtain auth token at %s', self.api_url)
|
|
||||||
data = {
|
|
||||||
'username': username,
|
|
||||||
'password': password
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
r = requests.post(self.api_url + '/publishers/1/login', headers=self.headers, data=json.dumps(data), verify=self.verify)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Cannot post request to GM API for user login: %s", e.message)
|
|
||||||
sys.exit(-1)
|
|
||||||
assert r.ok, r.reason
|
|
||||||
result = r.json()
|
|
||||||
if "errors" in result["response"]:
|
|
||||||
logger.info("Error in logging in: %s", result["response"]["errors"])
|
|
||||||
sys.exit(-1)
|
|
||||||
|
|
||||||
user_auth_token = result['response']['user']['authentication_token']
|
|
||||||
logger.info("User auth token is: %s", user_auth_token)
|
|
||||||
|
|
||||||
# set auth token in header
|
|
||||||
self.headers['Authentication-Token'] = user_auth_token
|
|
||||||
|
|
||||||
def init_session(self, data, direct=False, use_ip=False):
|
|
||||||
logger.info('Request new session: %s', data)
|
|
||||||
r = requests.post(self.api_url + '/sessions', headers=self.headers, data=json.dumps(data), verify=self.verify)
|
|
||||||
if not r.ok:
|
|
||||||
logger.error("New session request failed: %s", r.text)
|
|
||||||
return
|
|
||||||
|
|
||||||
status_url = r.headers.get("location")
|
|
||||||
if status_url:
|
|
||||||
## we got a redirect
|
|
||||||
status = {}
|
|
||||||
while True:
|
|
||||||
logger.debug("Checking %s", status_url)
|
|
||||||
s = requests.get(status_url, verify=self.verify)
|
|
||||||
if not s.ok:
|
|
||||||
logger.error('Checking Failed: %s', s.text)
|
|
||||||
return
|
|
||||||
|
|
||||||
status = s.json()
|
|
||||||
if status['status'] == 'PENDING':
|
|
||||||
logger.debug("Status: %s", status['status'])
|
|
||||||
time.sleep(1)
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
session = status['result'][0] ## [1] is another status code...
|
|
||||||
if "error" in session:
|
|
||||||
logger.error("Error in getting a snode: %s", session["error"])
|
|
||||||
raise Exception
|
|
||||||
else:
|
|
||||||
session = r.json()
|
|
||||||
|
|
||||||
try:
|
|
||||||
logger.info("Session: %r", session)
|
|
||||||
if direct:
|
|
||||||
snode_ip = session["snode"]["ip"]
|
|
||||||
proxy_url = snode_ip
|
|
||||||
snode_port = session["port"]
|
|
||||||
ws_url = "%s://%s:%d/" % ("ws", snode_ip, snode_port)
|
|
||||||
else:
|
|
||||||
field = "ip" if use_ip else "hostname"
|
|
||||||
proxy_url = session['snode']['datacentre']['proxy'][field]
|
|
||||||
ws_url = 'wss://' + proxy_url + '/' + session['uuid']
|
|
||||||
logger.info("Connecting to websocket: %s", ws_url)
|
|
||||||
conn = websocket.create_connection(ws_url, sslopt={"check_hostname": self.verify})
|
|
||||||
logger.info("Connected.")
|
|
||||||
#except Exception, e:
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Unable to connect to websocket: %s", e.message)
|
|
||||||
raise e
|
|
||||||
|
|
||||||
self.session_id = session['id']
|
|
||||||
self.proxy_url = proxy_url
|
|
||||||
self.conn = conn
|
|
||||||
self.session = session
|
|
||||||
sessions[session["uuid"]] += 1
|
|
||||||
|
|
||||||
def setgrammar(self, grammar): ## backend grammar object: {"data": {...}, "type": "confusion_network"}
|
|
||||||
data = {"jsonrpc": "2.0",
|
|
||||||
'type': 'jsonrpc',
|
|
||||||
'method': 'set_grammar',
|
|
||||||
'params': grammar,
|
|
||||||
"id": rpcid.next()}
|
|
||||||
asr.spraaklab.schema.validate_rpc_grammar(grammar)
|
|
||||||
self.conn.send(json.dumps(data))
|
|
||||||
result = json.loads(self.conn.recv())
|
|
||||||
if result.get("error"):
|
|
||||||
logger.error("Exercise validation error: %s", result)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def set_alternatives_grammar(self, *args, **kwargs):
|
|
||||||
if not "version" in kwargs:
|
|
||||||
kwargs["version"] = self.grammar_version
|
|
||||||
return self.setgrammar(alternatives_grammar(*args, **kwargs))
|
|
||||||
|
|
||||||
def recognize_wav(self, wavf):
|
|
||||||
w = wave.open(wavf, 'r')
|
|
||||||
nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
|
|
||||||
if nchannels > 1:
|
|
||||||
logging.error("Please use .wav with only 1 channel, found %d channels in %s", nchannels, wavf)
|
|
||||||
return
|
|
||||||
if (sampwidth != 2):
|
|
||||||
logging.error("Please use .wav with 2-byte PCM data, found %d bytes in %s", sampwidth, wavf)
|
|
||||||
return
|
|
||||||
if (framerate != 16000.0):
|
|
||||||
logging.error("Please use .wav sampled at 16000 Hz, found %1.0f in %s", framerate, wavf)
|
|
||||||
return
|
|
||||||
if (comptype != 'NONE'):
|
|
||||||
logging.error("Please use .wav with uncompressed data, found %s in %s", compname, wavf)
|
|
||||||
return
|
|
||||||
buf = w.readframes(nframes)
|
|
||||||
w.close()
|
|
||||||
return self.recognize_data(buf)
|
|
||||||
|
|
||||||
def recognize_data(self, buf):
|
|
||||||
nbytes_sent = 0
|
|
||||||
start = time.time()
|
|
||||||
for j in range(0, len(buf), buffer_size):
|
|
||||||
audio_packet = str(buf[j:j + buffer_size])
|
|
||||||
nbytes_sent += len(audio_packet)
|
|
||||||
self.conn.send_binary(audio_packet)
|
|
||||||
self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
|
|
||||||
logger.info("Waiting for recognition result...")
|
|
||||||
self.last_message = self.conn.recv() ## keep result for the interested applications
|
|
||||||
message = json.loads(self.last_message)
|
|
||||||
dur = time.time() - start
|
|
||||||
logger.info("Recognition took %5.3f seconds", dur)
|
|
||||||
if "error" in message:
|
|
||||||
raise RuntimeError("Error from recognition backend: %r" % message.get("error"))
|
|
||||||
return segmentation(message["result"]["words"])
|
|
||||||
|
|
||||||
def recognize_url(self, url):
|
|
||||||
start = time.time()
|
|
||||||
data = json.dumps({"jsonrpc": "2.0", "method": "send_audio", "id": rpcid.next(), "params": {"type": "url", "data": url, "details": ["word", "utterance"]}})
|
|
||||||
self.conn.send(data)
|
|
||||||
logger.info("Waiting for recognition result...")
|
|
||||||
self.last_message = self.conn.recv() ## keep result for the interested applications
|
|
||||||
#print self.last_message
|
|
||||||
print(self.last_message)
|
|
||||||
message = json.loads(self.last_message)
|
|
||||||
dur = time.time() - start
|
|
||||||
logger.info("Recognition took %5.3f seconds", dur)
|
|
||||||
if "error" in message:
|
|
||||||
raise RuntimeError("Error from recognition backend: %r" % message.get("error"))
|
|
||||||
return segmentation(message["result"]["words"])
|
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
sessions[self.session["uuid"]] -= 1
|
|
||||||
if self.conn and sessions[self.session["uuid"]] <= 0:
|
|
||||||
self.conn.close()
|
|
||||||
url = self.api_url + '/sessions/%d' % self.session_id
|
|
||||||
if self.keepopen:
|
|
||||||
logger.info("Keeping session open...")
|
|
||||||
else:
|
|
||||||
logger.info("Closing session: %s", url)
|
|
||||||
r = requests.delete(url, headers=self.headers, verify=self.verify)
|
|
||||||
assert r.ok, r.reason
|
|
||||||
|
|
||||||
def alternatives_grammar(parts, version="0.1", ret=None):
|
|
||||||
"""Make a grammar of alternatives, as array(sequence)-of-array(alternatives)-of-strings"""
|
|
||||||
r = {"type": "confusion_network", "version": version}
|
|
||||||
if version=="0.1":
|
|
||||||
r["data"] = {"type": "multiple_choice", "parts": parts}
|
|
||||||
if isinstance(ret, list) and "dict" in ret:
|
|
||||||
r["return_dict"] = True
|
|
||||||
elif version=="1.0":
|
|
||||||
seqels = []
|
|
||||||
for part in parts:
|
|
||||||
altels = []
|
|
||||||
for alt in part:
|
|
||||||
words = alt.split(" ")
|
|
||||||
if len(words) > 1:
|
|
||||||
alt = {"kind": "sequence", "elements": words}
|
|
||||||
altels.append(alt)
|
|
||||||
seqels.append({"kind": "alternatives", "elements": altels})
|
|
||||||
r["data"] = {"kind": "sequence", "elements": seqels}
|
|
||||||
if isinstance(ret, list):
|
|
||||||
r["return_objects"] = ret
|
|
||||||
else:
|
|
||||||
raise ValueError("Unsupported version: %s" % version)
|
|
||||||
asr.spraaklab.schema.validate_rpc_grammar(r)
|
|
||||||
return r
|
|
@ -1,25 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
|
|
||||||
## from https://stackoverflow.com/questions/1447287/format-floats-with-standard-json-module
|
|
||||||
class PrettyFloat(float):
|
|
||||||
def __repr__(self):
|
|
||||||
return '%.15g' % self
|
|
||||||
|
|
||||||
def pretty_floats(obj):
|
|
||||||
if isinstance(obj, float):
|
|
||||||
return PrettyFloat(obj)
|
|
||||||
elif isinstance(obj, dict):
|
|
||||||
return dict((k, pretty_floats(v)) for k, v in obj.items())
|
|
||||||
elif isinstance(obj, (list, tuple)):
|
|
||||||
return map(pretty_floats, obj)
|
|
||||||
return obj
|
|
||||||
|
|
||||||
def rounded_floats(obj, ndigits=15):
|
|
||||||
if isinstance(obj, float):
|
|
||||||
return PrettyFloat(round(obj, ndigits))
|
|
||||||
elif isinstance(obj, dict):
|
|
||||||
return dict((k, rounded_floats(v, ndigits)) for k, v in obj.items())
|
|
||||||
elif isinstance(obj, (list, tuple)):
|
|
||||||
return map(lambda o: rounded_floats(o, ndigits), obj)
|
|
||||||
return obj
|
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user