Compare commits
	
		
			2 Commits
		
	
	
		
			0777735979
			...
			3500a8cdf0
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 3500a8cdf0 | ||
|  | b87a81eb9d | 
							
								
								
									
										264
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										264
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,264 @@ | ||||
| ## Ignore Visual Studio temporary files, build results, and | ||||
| ## files generated by popular Visual Studio add-ons. | ||||
|  | ||||
| ## important ## | ||||
| .acoustic_model/forced_alignment_novo.py | ||||
|  | ||||
| # User-specific files | ||||
| *.suo | ||||
| *.user | ||||
| *.userosscache | ||||
| *.sln.docstates | ||||
|  | ||||
| # User-specific files (MonoDevelop/Xamarin Studio) | ||||
| *.userprefs | ||||
|  | ||||
| # Build results | ||||
| [Dd]ebug/ | ||||
| [Dd]ebugPublic/ | ||||
| [Rr]elease/ | ||||
| [Rr]eleases/ | ||||
| x64/ | ||||
| x86/ | ||||
| bld/ | ||||
| [Bb]in/ | ||||
| [Oo]bj/ | ||||
| [Ll]og/ | ||||
|  | ||||
| # Visual Studio 2015 cache/options directory | ||||
| .vs/ | ||||
| # Uncomment if you have tasks that create the project's static files in wwwroot | ||||
| #wwwroot/ | ||||
|  | ||||
| # MSTest test Results | ||||
| [Tt]est[Rr]esult*/ | ||||
| [Bb]uild[Ll]og.* | ||||
|  | ||||
| # NUNIT | ||||
| *.VisualState.xml | ||||
| TestResult.xml | ||||
|  | ||||
| # Build Results of an ATL Project | ||||
| [Dd]ebugPS/ | ||||
| [Rr]eleasePS/ | ||||
| dlldata.c | ||||
|  | ||||
| # DNX | ||||
| project.lock.json | ||||
| project.fragment.lock.json | ||||
| artifacts/ | ||||
|  | ||||
| *_i.c | ||||
| *_p.c | ||||
| *_i.h | ||||
| *.ilk | ||||
| *.meta | ||||
| *.obj | ||||
| *.pch | ||||
| *.pdb | ||||
| *.pgc | ||||
| *.pgd | ||||
| *.rsp | ||||
| *.sbr | ||||
| *.tlb | ||||
| *.tli | ||||
| *.tlh | ||||
| *.tmp | ||||
| *.tmp_proj | ||||
| *.log | ||||
| *.vspscc | ||||
| *.vssscc | ||||
| .builds | ||||
| *.pidb | ||||
| *.svclog | ||||
| *.scc | ||||
|  | ||||
| # Chutzpah Test files | ||||
| _Chutzpah* | ||||
|  | ||||
| # Visual C++ cache files | ||||
| ipch/ | ||||
| *.aps | ||||
| *.ncb | ||||
| *.opendb | ||||
| *.opensdf | ||||
| *.sdf | ||||
| *.cachefile | ||||
| *.VC.db | ||||
| *.VC.VC.opendb | ||||
|  | ||||
| # Visual Studio profiler | ||||
| *.psess | ||||
| *.vsp | ||||
| *.vspx | ||||
| *.sap | ||||
|  | ||||
| # TFS 2012 Local Workspace | ||||
| $tf/ | ||||
|  | ||||
| # Guidance Automation Toolkit | ||||
| *.gpState | ||||
|  | ||||
| # ReSharper is a .NET coding add-in | ||||
| _ReSharper*/ | ||||
| *.[Rr]e[Ss]harper | ||||
| *.DotSettings.user | ||||
|  | ||||
| # JustCode is a .NET coding add-in | ||||
| .JustCode | ||||
|  | ||||
| # TeamCity is a build add-in | ||||
| _TeamCity* | ||||
|  | ||||
| # DotCover is a Code Coverage Tool | ||||
| *.dotCover | ||||
|  | ||||
| # NCrunch | ||||
| _NCrunch_* | ||||
| .*crunch*.local.xml | ||||
| nCrunchTemp_* | ||||
|  | ||||
| # MightyMoose | ||||
| *.mm.* | ||||
| AutoTest.Net/ | ||||
|  | ||||
| # Web workbench (sass) | ||||
| .sass-cache/ | ||||
|  | ||||
| # Installshield output folder | ||||
| [Ee]xpress/ | ||||
|  | ||||
| # DocProject is a documentation generator add-in | ||||
| DocProject/buildhelp/ | ||||
| DocProject/Help/*.HxT | ||||
| DocProject/Help/*.HxC | ||||
| DocProject/Help/*.hhc | ||||
| DocProject/Help/*.hhk | ||||
| DocProject/Help/*.hhp | ||||
| DocProject/Help/Html2 | ||||
| DocProject/Help/html | ||||
|  | ||||
| # Click-Once directory | ||||
| publish/ | ||||
|  | ||||
| # Publish Web Output | ||||
| *.[Pp]ublish.xml | ||||
| *.azurePubxml | ||||
| # TODO: Comment the next line if you want to checkin your web deploy settings | ||||
| # but database connection strings (with potential passwords) will be unencrypted | ||||
| #*.pubxml | ||||
| *.publishproj | ||||
|  | ||||
| # Microsoft Azure Web App publish settings. Comment the next line if you want to | ||||
| # checkin your Azure Web App publish settings, but sensitive information contained | ||||
| # in these scripts will be unencrypted | ||||
| PublishScripts/ | ||||
|  | ||||
| # NuGet Packages | ||||
| *.nupkg | ||||
| # The packages folder can be ignored because of Package Restore | ||||
| **/packages/* | ||||
| # except build/, which is used as an MSBuild target. | ||||
| !**/packages/build/ | ||||
| # Uncomment if necessary however generally it will be regenerated when needed | ||||
| #!**/packages/repositories.config | ||||
| # NuGet v3's project.json files produces more ignoreable files | ||||
| *.nuget.props | ||||
| *.nuget.targets | ||||
|  | ||||
| # Microsoft Azure Build Output | ||||
| csx/ | ||||
| *.build.csdef | ||||
|  | ||||
| # Microsoft Azure Emulator | ||||
| ecf/ | ||||
| rcf/ | ||||
|  | ||||
| # Windows Store app package directories and files | ||||
| AppPackages/ | ||||
| BundleArtifacts/ | ||||
| Package.StoreAssociation.xml | ||||
| _pkginfo.txt | ||||
|  | ||||
| # Visual Studio cache files | ||||
| # files ending in .cache can be ignored | ||||
| *.[Cc]ache | ||||
| # but keep track of directories ending in .cache | ||||
| !*.[Cc]ache/ | ||||
|  | ||||
| # Others | ||||
| ClientBin/ | ||||
| ~$* | ||||
| *~ | ||||
| *.dbmdl | ||||
| *.dbproj.schemaview | ||||
| *.jfm | ||||
| *.pfx | ||||
| *.publishsettings | ||||
| node_modules/ | ||||
| orleans.codegen.cs | ||||
|  | ||||
| # Since there are multiple workflows, uncomment next line to ignore bower_components | ||||
| # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) | ||||
| #bower_components/ | ||||
|  | ||||
| # RIA/Silverlight projects | ||||
| Generated_Code/ | ||||
|  | ||||
| # Backup & report files from converting an old project file | ||||
| # to a newer Visual Studio version. Backup files are not needed, | ||||
| # because we have git ;-) | ||||
| _UpgradeReport_Files/ | ||||
| Backup*/ | ||||
| UpgradeLog*.XML | ||||
| UpgradeLog*.htm | ||||
|  | ||||
| # SQL Server files | ||||
| *.mdf | ||||
| *.ldf | ||||
|  | ||||
| # Business Intelligence projects | ||||
| *.rdl.data | ||||
| *.bim.layout | ||||
| *.bim_*.settings | ||||
|  | ||||
| # Microsoft Fakes | ||||
| FakesAssemblies/ | ||||
|  | ||||
| # GhostDoc plugin setting file | ||||
| *.GhostDoc.xml | ||||
|  | ||||
| # Node.js Tools for Visual Studio | ||||
| .ntvs_analysis.dat | ||||
|  | ||||
| # Visual Studio 6 build log | ||||
| *.plg | ||||
|  | ||||
| # Visual Studio 6 workspace options file | ||||
| *.opt | ||||
|  | ||||
| # Visual Studio LightSwitch build output | ||||
| **/*.HTMLClient/GeneratedArtifacts | ||||
| **/*.DesktopClient/GeneratedArtifacts | ||||
| **/*.DesktopClient/ModelManifest.xml | ||||
| **/*.Server/GeneratedArtifacts | ||||
| **/*.Server/ModelManifest.xml | ||||
| _Pvt_Extensions | ||||
|  | ||||
| # Paket dependency manager | ||||
| .paket/paket.exe | ||||
| paket-files/ | ||||
|  | ||||
| # FAKE - F# Make | ||||
| .fake/ | ||||
|  | ||||
| # JetBrains Rider | ||||
| .idea/ | ||||
| *.sln.iml | ||||
|  | ||||
| # CodeRush | ||||
| .cr/ | ||||
|  | ||||
| # Python Tools for Visual Studio (PTVS) | ||||
| __pycache__/ | ||||
| *.pyc | ||||
										
											Binary file not shown.
										
									
								
							| @@ -18,8 +18,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution | ||||
| 		..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py | ||||
| 		..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py | ||||
| 		..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py | ||||
| 		..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py | ||||
| 		..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py | ||||
| 		..\..\..\..\..\OneDrive\WSL\python-novo-api\test\testgrammar.py = ..\..\..\..\..\OneDrive\WSL\python-novo-api\test\testgrammar.py | ||||
| 	EndProjectSection | ||||
| EndProject | ||||
| Global | ||||
|   | ||||
										
											Binary file not shown.
										
									
								
							| @@ -4,7 +4,7 @@ | ||||
|     <SchemaVersion>2.0</SchemaVersion> | ||||
|     <ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid> | ||||
|     <ProjectHome>.</ProjectHome> | ||||
|     <StartupFile>performance_check.py</StartupFile> | ||||
|     <StartupFile>check_novoapi.py</StartupFile> | ||||
|     <SearchPath> | ||||
|     </SearchPath> | ||||
|     <WorkingDirectory>.</WorkingDirectory> | ||||
| @@ -25,6 +25,9 @@ | ||||
|     <Compile Include="acoustic_model_functions.py"> | ||||
|       <SubType>Code</SubType> | ||||
|     </Compile> | ||||
|     <Compile Include="check_novoapi.py"> | ||||
|       <SubType>Code</SubType> | ||||
|     </Compile> | ||||
|     <Compile Include="convert_xsampa2ipa.py"> | ||||
|       <SubType>Code</SubType> | ||||
|     </Compile> | ||||
| @@ -34,7 +37,10 @@ | ||||
|     <Compile Include="fa_test.py"> | ||||
|       <SubType>Code</SubType> | ||||
|     </Compile> | ||||
|     <Compile Include="performance_check.py"> | ||||
|     <Compile Include="forced_alignment_novo.py"> | ||||
|       <SubType>Code</SubType> | ||||
|     </Compile> | ||||
|     <Compile Include="htk_vs_kaldi.py"> | ||||
|       <SubType>Code</SubType> | ||||
|     </Compile> | ||||
|   </ItemGroup> | ||||
|   | ||||
							
								
								
									
										40
									
								
								acoustic_model/check_novoapi.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								acoustic_model/check_novoapi.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| import os | ||||
| os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') | ||||
|  | ||||
| import sys | ||||
| import csv | ||||
| #import subprocess | ||||
| #from collections import Counter | ||||
| #import re | ||||
|  | ||||
| import numpy as np | ||||
| import pandas as pd | ||||
| #import matplotlib.pyplot as plt | ||||
| #from sklearn.metrics import confusion_matrix | ||||
|  | ||||
| import acoustic_model_functions as am_func | ||||
| import convert_xsampa2ipa | ||||
| import defaultfiles as default | ||||
|  | ||||
| from forced_alignment import pyhtk | ||||
|  | ||||
| import novoapi  | ||||
|  | ||||
| ## ======================= convert phones ====================== | ||||
| mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) | ||||
|  | ||||
| stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) | ||||
|  | ||||
| phonelist_novo70_      = pd.ExcelFile(default.phonelist_novo70_xlsx) | ||||
| df = pd.read_excel(phonelist_novo70_, 'list') | ||||
|  | ||||
|  | ||||
| ## novo phoneset | ||||
| #translation_key = dict() | ||||
| ## *_simple includes columns which has only one phone in. | ||||
| #for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']): | ||||
| #    if not pd.isnull(ipa): | ||||
| #        print('{0}:{1}'.format(ipa, novo70)) | ||||
| #        translation_key[ipa] = novo70 | ||||
| #phonelist_novo70 = np.unique(list(df['novo70_simple'])) | ||||
|  | ||||
| @@ -3,7 +3,7 @@ import os | ||||
| #default_hvite_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'htk', 'config.HVite') | ||||
|  | ||||
| cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' | ||||
| kaldi_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5' | ||||
|  | ||||
| #config_hcopy = os.path.join(cygwin_dir, 'config', 'config.HCopy') | ||||
| #config_train = os.path.join(cygwin_dir, 'config', 'config.train') | ||||
| config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite') | ||||
| @@ -30,7 +30,16 @@ repo_dir = r'C:\Users\Aki\source\repos' | ||||
| ipa_xsampa_converter_dir    = os.path.join(repo_dir, 'ipa-xsama-converter') | ||||
| forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') | ||||
|  | ||||
| fame_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus' | ||||
| experiments_dir = r'c:\OneDrive\Research\rug\experiments' | ||||
| WSL_dir   = r'C:\OneDrive\WSL' | ||||
| fame_dir        = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') | ||||
| fame_s5_dir     = os.path.join(fame_dir, 's5') | ||||
| fame_corpus_dir = os.path.join(fame_dir, 'corpus') | ||||
|  | ||||
| experiments_dir = r'c:\OneDrive\Research\rug\experiments' | ||||
| stimmen_transcription_xlsx = os.path.join(experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') | ||||
| stimmen_data_dir           = os.path.join(experiments_dir, 'stimmen', 'data') | ||||
| phonelist_friesian_txt     = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') | ||||
|  | ||||
| novo_api_dir = os.path.join(WSL_dir, 'python-novo-api') | ||||
| cmu69_phoneset = os.path.join(novo_api_dir, 'novoapi', 'asr', 'phoneset', 'en', 'cmu69.phoneset') | ||||
|  | ||||
| phonelist = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') | ||||
| @@ -2,15 +2,52 @@ import os | ||||
| import sys | ||||
| os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| import defaultfiles as default | ||||
|  | ||||
| sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) | ||||
| from forced_alignment import forced_alignment | ||||
| from forced_alignment import forced_alignment, lexicon, convert_phone_set | ||||
|  | ||||
| wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav' | ||||
| forced_alignment( | ||||
|     wav_file, | ||||
|     #'Australië' | ||||
|     'BUFFETCOUPON COULISSEN DOUANE' | ||||
|     ) | ||||
| #wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav' | ||||
| #forced_alignment( | ||||
| #    wav_file, | ||||
| #    'Australië' | ||||
| #    #'BUFFETCOUPON COULISSEN DOUANE' | ||||
| #    ) | ||||
|  | ||||
| # according to: http://lands.let.ru.nl/cgn/doc_Dutch/topics/version_1.0/annot/phonetics/fon_prot.pdf | ||||
| phone_list_cgn = ['p', 'd', 't', 'd', 'k', 'g', # plosives | ||||
|                  'f', 'v', 's', 'z', 'S', 'Z', 'x', 'G', 'h', # fricatives | ||||
|                  'N', 'm', 'n', 'J', 'l', 'r', 'w', 'j', # sonorant  | ||||
|                  'I', 'E', 'A', 'O', 'Y', # short vowels | ||||
|                  'i', 'y', 'e', '2', 'a', 'o', 'u', # long vowels | ||||
|                  '@', # schwa  | ||||
|                  'E+', 'Y+', 'A+', # Diftongen | ||||
|                  'E:', 'Y:', 'O:', # Leenvocalen | ||||
|                  'E~', 'A~', 'O~', 'Y~' # Nasale vocalen | ||||
|                  ] | ||||
|  | ||||
| # load word in the lexicon. | ||||
| lexicon_file = r'C:\cygwin64\home\Aki\acoustic_model\material\barbara\2010_2510_lexicon_pronvars_HTK.txt' | ||||
| with open(lexicon_file, 'r') as f: | ||||
|     lines = f.readlines() | ||||
|  | ||||
| words = [] | ||||
| for line in lines: | ||||
|     line_split = line.split() | ||||
|     if len(line_split) > 0: | ||||
|         word = line_split[0] | ||||
|         word.replace('+s', '') | ||||
|         word = word.split('-') | ||||
|         words.append(word) | ||||
| words = list(np.unique(words)) | ||||
|  | ||||
| pronunciations = lexicon._grapheme_to_phoneme(words) | ||||
| htks = [] | ||||
| phone_list = set() | ||||
| for word in pronunciations.keys(): | ||||
|     ipa = pronunciations[word] | ||||
|     htk = convert_phone_set.split_ipa(ipa) | ||||
|     htks.append(htk) | ||||
|     phone_list = phone_list | set(htk) | ||||
							
								
								
									
										133
									
								
								acoustic_model/forced_alignment_novo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								acoustic_model/forced_alignment_novo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | ||||
| # | ||||
| # forced alignment using novo-api. | ||||
| # | ||||
| # *** IMPORTANT *** | ||||
| # This file should be treated as confidencial.  | ||||
| # This file should not be copied or uploaded to public sites. | ||||
| #   | ||||
| # NOTES: | ||||
| # The usage of novo api: https://bitbucket.org/novolanguage/python-novo-api | ||||
| # I couldn't make it work as I described in the mail to Martijn Bartelds on 2018/12/03. | ||||
| # As per the advice from him, I modified testgrammer.py and made it a function.   | ||||
| # | ||||
| # In order to run on Python 3.6, the following points are changed in novo-api. | ||||
| # (1) backend/__init__.py | ||||
| # - #import session | ||||
| #   from . import session | ||||
| # (2) backend/session.py | ||||
| # - #except Exception, e: | ||||
| #   except Exception as e: | ||||
| # - #print self.last_message | ||||
| #   print(self.last_message) | ||||
| # (3) asr/segment/praat.py | ||||
| # - def print_tier(output, title, begin, end, segs, (format, formatter)) | ||||
| #   def print_tier(output, title, begin, end, segs, format, formatter): | ||||
| # (4) asr/spraaklab/__init.py | ||||
| # - #import session | ||||
| #   from . import session | ||||
| # (5) asr/spraaklab/schema.py | ||||
| # - #print data, "validated not OK", e.message | ||||
| #   print("{0} validated not OK {1}".format(data, e.message)) | ||||
| # - #print data, "validated OK" | ||||
| #   print("{} validated OK".format(data)) | ||||
| # - #if isinstance(object, basestring): | ||||
| #	if isinstance(object, str) | ||||
| # | ||||
| # Aki Kunikoshi | ||||
| # 428968@gmail.com | ||||
| # | ||||
|  | ||||
| import argparse | ||||
| import json | ||||
|  | ||||
| from novoapi.backend import session | ||||
|  | ||||
| # username / password cannot be passed as artuments... | ||||
| p = argparse.ArgumentParser() | ||||
| #p.add_argument("--user", default=None) | ||||
| #p.add_argument("--password", default=None) | ||||
| p.add_argument("--user", default='martijn.wieling') | ||||
| p.add_argument("--password", default='fa0Thaic') | ||||
| args = p.parse_args() | ||||
|  | ||||
| wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav' | ||||
|  | ||||
| rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) | ||||
|  | ||||
| grammar = { | ||||
|   "type": "confusion_network", | ||||
|   "version": "1.0", | ||||
|   "data": { | ||||
|     "kind": "sequence", | ||||
|     "elements": [ | ||||
|       { | ||||
|         "kind": "word", | ||||
|         "pronunciation": [ | ||||
|           { | ||||
|             "phones": [ | ||||
|               "wv", | ||||
|               "a1", | ||||
|               "n" | ||||
|             ], | ||||
|             "id": 0 | ||||
|           }, | ||||
|           { | ||||
|             "phones": [ | ||||
|               "wv", | ||||
|               "uh1", | ||||
|               "n" | ||||
|             ], | ||||
|             "id": 1 | ||||
|           } | ||||
|         ], | ||||
|         "label": "one" | ||||
|       }, | ||||
|       { | ||||
|         "kind": "word", | ||||
|         "pronunciation": [ | ||||
|           { | ||||
|             "phones": [ | ||||
|               "t", | ||||
|               "uw1" | ||||
|             ], | ||||
|             "id": 0 | ||||
|           } | ||||
|         ], | ||||
|         "label": "two" | ||||
|       }, | ||||
|       { | ||||
|         "kind": "word", | ||||
|         "pronunciation": [ | ||||
|           { | ||||
|             "phones": [ | ||||
|               "t", | ||||
|               "r", | ||||
|               "iy1" | ||||
|             ], | ||||
|             "id": 0 | ||||
|           }, | ||||
|           { | ||||
|             "phones": [ | ||||
|               "s", | ||||
|               "r", | ||||
|               "iy1" | ||||
|             ], | ||||
|             "id": 1 | ||||
|           } | ||||
|         ], | ||||
|         "label": "three" | ||||
|       } | ||||
|     ] | ||||
|   }, | ||||
|   "return_objects": [ | ||||
|     "grammar" | ||||
|   ], | ||||
|   "phoneset": "novo70" | ||||
| } | ||||
|  | ||||
| res = rec.setgrammar(grammar) | ||||
| #print "Set grammar result", res | ||||
|  | ||||
| #res = rec.recognize_wav("test/onetwothree.wav") | ||||
| res = rec.recognize_wav(wav_file) | ||||
| #print "Recognition result:", json.dumps(res.export(), indent=4) | ||||
| @@ -3,7 +3,7 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') | ||||
| 
 | ||||
| import sys | ||||
| import csv | ||||
| import subprocess | ||||
| #import subprocess | ||||
| from collections import Counter | ||||
| import re | ||||
| 
 | ||||
| @@ -20,8 +20,6 @@ from forced_alignment import pyhtk | ||||
| 
 | ||||
| 
 | ||||
| ## ======================= user define ======================= | ||||
| excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') | ||||
| data_dir   = os.path.join(default.experiments_dir, 'stimmen', 'data') | ||||
| 
 | ||||
| wav_dir	= r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k | ||||
| 
 | ||||
| @@ -30,12 +28,12 @@ htk_dict_dir       = os.path.join(default.experiments_dir, 'stimmen', 'dic_short | ||||
| fa_dir             = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k') | ||||
| result_dir         = os.path.join(default.experiments_dir, 'stimmen', 'result') | ||||
| 
 | ||||
| kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme')  | ||||
| kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict') | ||||
| kaldi_data_dir = os.path.join(default.fame_s5_dir, 'data', 'alignme')  | ||||
| kaldi_dict_dir = os.path.join(default.fame_s5_dir, 'data', 'local', 'dict') | ||||
| lexicon_txt    = os.path.join(kaldi_dict_dir, 'lexicon.txt') | ||||
| 
 | ||||
| #lex_asr	 = os.path.join(default.fame_dir, 'lexicon', 'lex.asr') | ||||
| #lex_asr_htk = os.path.join(default.fame_dir, 'lexicon', 'lex.asr_htk') | ||||
| #lex_asr	 = os.path.join(default.fame_corpus_dir, 'lexicon', 'lex.asr') | ||||
| #lex_asr_htk = os.path.join(default.fame_corpus_dir, 'lexicon', 'lex.asr_htk') | ||||
| 
 | ||||
| 
 | ||||
| # procedure | ||||
| @@ -48,8 +46,6 @@ load_forced_alignment_kaldi = 1 | ||||
| eval_forced_alignment_kaldi = 1 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| ## ======================= add paths ======================= | ||||
| sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) | ||||
| from forced_alignment import convert_phone_set | ||||
| @@ -62,12 +58,12 @@ from evaluation import plot_confusion_matrix | ||||
| ## ======================= convert phones ====================== | ||||
| mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) | ||||
| 
 | ||||
| xls = pd.ExcelFile(excel_file) | ||||
| xls = pd.ExcelFile(default.stimmen_transcription_xlsx) | ||||
| 
 | ||||
| ## check conversion | ||||
| #df = pd.read_excel(xls, 'frequency') | ||||
| #df = pd.read_excel(xls, 'check') | ||||
| #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): | ||||
| #    #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_) | ||||
| #    if xsampa is not '/': | ||||
| #        ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) | ||||
| #        if not ipa_converted == ipa: | ||||
| #            print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) | ||||
| @@ -160,7 +156,7 @@ if do_forced_alignment_htk: | ||||
|                     htk_dict_file = os.path.join(htk_dict_dir, word + '.dic') | ||||
| 
 | ||||
|                     pyhtk.doHVite(wav_file, label_file, htk_dict_file, fa_file, default.config_hvite,  | ||||
|                                 default.phonelist, acoustic_model) | ||||
|                                 default.phonelist_friesian_txt, acoustic_model) | ||||
|                     os.remove(label_file) | ||||
| 
 | ||||
|                     prediction = am_func.read_fileFA(fa_file) | ||||
| @@ -231,7 +227,7 @@ if make_kaldi_data_files: | ||||
| 
 | ||||
| ## ======================= make lexicon txt which is used by Kaldi ======================= | ||||
| if make_kaldi_lexicon_txt: | ||||
|     option_num = 6 | ||||
|     option_num = 7 | ||||
| 
 | ||||
|     # remove previous file. | ||||
|     if os.path.exists(lexicon_txt): | ||||
| @@ -278,13 +274,13 @@ if make_kaldi_lexicon_txt: | ||||
| 
 | ||||
| ## ======================= load kaldi forced alignment result ======================= | ||||
| if load_forced_alignment_kaldi: | ||||
|     phones_txt = os.path.join(default.kaldi_dir, 'data', 'lang', 'phones.txt') | ||||
|     merged_alignment_txt = os.path.join(default.kaldi_dir, 'exp', 'tri1_alignme', 'merged_alignment.txt') | ||||
|     phones_txt = os.path.join(default.fame_s5_dir, 'data', 'lang', 'phones.txt') | ||||
|     merged_alignment_txt = os.path.join(default.fame_s5_dir, 'exp', 'tri1_alignme', 'merged_alignment.txt') | ||||
|      | ||||
|     #filenames	   = np.load(data_dir + '\\filenames.npy') | ||||
|     #words		   = np.load(data_dir + '\\words.npy') | ||||
|     #pronunciations = np.load(data_dir + '\\pronunciations_ipa.npy') | ||||
|     #pronvar_list_all = np.load(data_dir + '\\pronvar_list_all.npy') | ||||
|     #filenames	   = np.load(stimmen_data_dir + '\\filenames.npy') | ||||
|     #words		   = np.load(stimmen_data_dir + '\\words.npy') | ||||
|     #pronunciations = np.load(stimmen_data_dir + '\\pronunciations_ipa.npy') | ||||
|     #pronvar_list_all = np.load(stimmen_data_dir + '\\pronvar_list_all.npy') | ||||
|     #word_list = np.unique(words)     | ||||
| 
 | ||||
|     # load the mapping between phones and ids. | ||||
| @@ -369,7 +365,7 @@ if eval_forced_alignment_htk: | ||||
|         if compare_hmm_num: | ||||
|             f_result.write("{},".format(hmm_num_str)) | ||||
| 
 | ||||
|         #match = np.load(data_dir + '\\match_hmm' + hmm_num_str + '.npy') | ||||
|         #match = np.load(stimmen_data_dir + '\\match_hmm' + hmm_num_str + '.npy') | ||||
|         #prediction = np.load(os.path.join(result_dir, 'htk', 'predictions_hmm' + hmm_num_str + '.npy')) | ||||
|         #prediction = pd.Series(prediction, index=df.index, name='prediction') | ||||
|         #result = pd.concat([df, prediction], axis=1) | ||||
							
								
								
									
										5
									
								
								novoapi_for_python3x/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								novoapi_for_python3x/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| __version__ = "0.2" | ||||
|  | ||||
| import backend | ||||
							
								
								
									
										
											BIN
										
									
								
								novoapi_for_python3x/__pycache__/__init__.cpython-36.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								novoapi_for_python3x/__pycache__/__init__.cpython-36.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										6
									
								
								novoapi_for_python3x/asr/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								novoapi_for_python3x/asr/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| #import segments | ||||
| #import spraaklab | ||||
| from . import segments | ||||
| from . import spraaklab | ||||
							
								
								
									
										
											BIN
										
									
								
								novoapi_for_python3x/asr/__pycache__/__init__.cpython-36.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								novoapi_for_python3x/asr/__pycache__/__init__.cpython-36.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										4
									
								
								novoapi_for_python3x/asr/segments/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								novoapi_for_python3x/asr/segments/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| from .segments import Segmentation | ||||
| from .praat import seg2tg | ||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										77
									
								
								novoapi_for_python3x/asr/segments/praat.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								novoapi_for_python3x/asr/segments/praat.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,77 @@ | ||||
| #!/usr/bin/env python | ||||
| # (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen | ||||
|  | ||||
| import codecs | ||||
|  | ||||
| def print_header(output, begin, end, nr_tiers): | ||||
|     print >> output, 'File type = "ooTextFile"' | ||||
|     print >> output, 'Object class = "TextGrid"' | ||||
|     print >> output, '' | ||||
|     print >> output, 'xmin = %s' % begin | ||||
|     print >> output, 'xmax = %s' % end | ||||
|     print >> output, 'tiers? <exists>' | ||||
|     print >> output, 'size = %d' % nr_tiers | ||||
|     print >> output, 'item []:' | ||||
|  | ||||
|  | ||||
| def print_info_tier(output, title, begin, end, label): | ||||
|     print >> output, '\titem [%d]:' % 0 | ||||
|     print >> output, '\t\tclass = "IntervalTier"' | ||||
|     print >> output, '\t\tname = "%s"' % title | ||||
|     print >> output, '\t\txmin = %s' % begin | ||||
|     print >> output, '\t\txmax = %s' % end | ||||
|     print >> output, '\t\tintervals: size = %d' % 1 | ||||
|  | ||||
|     print >> output, '\t\tintervals [1]:' | ||||
|     print >> output, '\t\t\txmin = %s' % begin | ||||
|     print >> output, '\t\t\txmax = %s' % end | ||||
|     print >> output, '\t\t\ttext = "%s"' % label | ||||
|  | ||||
|  | ||||
| #def print_tier(output, title, begin, end, segs, (format, formatter)): | ||||
| def print_tier(output, title, begin, end, segs, format, formatter): | ||||
|     print >> output, '\titem [%d]:' % 0 | ||||
|     print >> output, '\t\tclass = "IntervalTier"' | ||||
|     print >> output, '\t\tname = "%s"' % title | ||||
|     print >> output, '\t\txmin = %s' % begin | ||||
|     print >> output, '\t\txmax = %s' % end | ||||
|     print >> output, '\t\tintervals: size = %d' % len(segs) | ||||
|  | ||||
|     count = 1 | ||||
|     for seg in segs: | ||||
|         #print seg | ||||
|         print >> output, '\t\tintervals [%d]:' % count | ||||
|         print >> output, '\t\t\txmin = %s' % repr(int(seg['begin']) / 100.0) | ||||
|         print >> output, '\t\t\txmax = %s' % repr(int(seg['end']) / 100.0) | ||||
|         string = '\t\t\ttext = "' + format + '"' | ||||
|         print >> output, string % formatter(seg['label']) | ||||
|         count += 1 | ||||
|  | ||||
|  | ||||
| def seg2tg(fname, segments): | ||||
|     if not segments: | ||||
|         return | ||||
|     output = codecs.open(fname, "w", encoding="utf-8") | ||||
|  | ||||
|     confidences = [] | ||||
|     word_labels = [] | ||||
|     phones = [] | ||||
|  | ||||
|     for s in segments: | ||||
|         conf = s.llh if hasattr(s, "llh") else s.score | ||||
|         confidences.append({'begin': s.begin, 'end': s.end, 'label': conf}) | ||||
|         word_labels.append({'begin': s.begin, 'end': s.end, 'label': s.label}) | ||||
|         for p in s.phones: | ||||
|             phones.append({'begin': p.begin, 'end': p.end, 'label': p.label}) | ||||
|  | ||||
|  | ||||
|     begin = repr(int(segments[0].begin) / 100.0) | ||||
|     end = repr(int(segments[-1].end) / 100.0) | ||||
|  | ||||
|     nr_tiers = 3 | ||||
|     print_header(output, begin, end, nr_tiers) | ||||
|     print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x)) | ||||
|     print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x)) | ||||
|     print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x)) | ||||
|  | ||||
|     output.close() | ||||
							
								
								
									
										99
									
								
								novoapi_for_python3x/asr/segments/segments.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								novoapi_for_python3x/asr/segments/segments.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,99 @@ | ||||
| #!/usr/bin/env python | ||||
| # (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen | ||||
|  | ||||
| ## These classes can be initialized with dictionaries, as they are returned by the python spraaklab recognition system. | ||||
|  | ||||
| class Segment(object): | ||||
|     def __init__(self, segment): | ||||
|         self.begin = segment["begin"] | ||||
|         self.end = segment["end"] | ||||
|         self.begintime = segment.get("beginTime", self.begin / 100.0) | ||||
|         self.endtime = segment.get("endTime", self.end / 100.0) | ||||
|         self.label = segment["label"] | ||||
|         self.score = segment["score"] | ||||
|         if "llh" in segment: | ||||
|             self.llh = segment["llh"] | ||||
|         if "phones" in segment: | ||||
|             self.type = "word" | ||||
|             self.phones = Segmentation(segment["phones"], ["sil"]) | ||||
|             if hasattr(self.phones[0], "llh"): | ||||
|                 self.minllh = min([s.llh for s in self.phones]) ## the current word llh for error detection | ||||
|         else: | ||||
|             self.type = "phone" | ||||
|  | ||||
|     def __repr__(self): | ||||
|         res = "%8.3f -- %8.3f score %8.3f " % (self.begintime, self.endtime, self.score) | ||||
|         if hasattr(self, "llh"): | ||||
|             res += "llh %8.3f " % self.llh | ||||
|         res += self.label.encode("utf8") | ||||
|         return res | ||||
|  | ||||
|     def export(self): | ||||
|         r = {"begin": self.begin, "end": self.end, "label": self.label, "score": self.score, "type": self.type} | ||||
|         if hasattr(self, "llh"): | ||||
|             r["llh"] = self.llh | ||||
|         if hasattr(self, "phones"): | ||||
|             r["phones"] = self.phones.export() | ||||
|         return r | ||||
|  | ||||
| class Segmentation(object): | ||||
|     def __init__(self, segments, sils=["<s>", "</s>", "!sil"]): | ||||
|         """Create a segmentation from a spraaklab recognition structure. | ||||
|         segments: an array of words (or phones), represented by a dict with | ||||
|         "begin", "end", "label", "score", and "llh" keys.  Words can also have | ||||
|         "phones" which is another array of segments.""" | ||||
|         self.segments = [Segment(s) for s in segments] | ||||
|         if self.segments: | ||||
|             self.type = self.segments[0].type | ||||
|         else: | ||||
|             self.type = None | ||||
|         self.sils = sils | ||||
|         self.orig = segments ## in case we want to have access to the original recognition structure | ||||
|  | ||||
|     def __getitem__(self, item): | ||||
|         return self.segments[item] | ||||
|  | ||||
|     def __repr__(self): | ||||
|         ns = len(self.segments) | ||||
|         res = "Segmentation with %d %s%s" % (ns, self.type, "" if ns==1 else "s") | ||||
|         for seg in self.segments: | ||||
|             res += "\n " + repr(seg) | ||||
|         return res | ||||
|  | ||||
|     def __len__(self): | ||||
|         return len(self.segments) | ||||
|  | ||||
|     def score(self, skip=None): | ||||
|         if not skip: | ||||
|             skip = self.sils | ||||
|         s = 0.0 | ||||
|         for seg in self.segments: | ||||
|             if seg.label not in skip: | ||||
|                 s += seg.score | ||||
|         return s | ||||
|  | ||||
|     def llhs(self, skip=None): | ||||
|         if not skip: | ||||
|             skip = self.sils | ||||
|         return [seg.llh for seg in self.segments if hasattr(seg, "llh") and seg.label not in skip] | ||||
|  | ||||
|     def llh(self, skip=None): | ||||
|         return sum(self.llhs(skip)) | ||||
|  | ||||
|     def minllh(self, skip=None): | ||||
|         llhs = self.llhs(skip) | ||||
|         if llhs: | ||||
|             return min(llhs) | ||||
|         else: | ||||
|             return None | ||||
|  | ||||
|     def labels(self, skip=None): | ||||
|         if not skip: | ||||
|             skip = self.sils | ||||
|         return [seg.label for seg in self.segments if seg.label not in skip] | ||||
|  | ||||
|     def sentence(self, skip=None): | ||||
|         return " ".join(self.labels(skip)) | ||||
|  | ||||
|     def export(self): | ||||
|         return [seg.export() for seg in self.segments] | ||||
							
								
								
									
										4
									
								
								novoapi_for_python3x/asr/spraaklab/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								novoapi_for_python3x/asr/spraaklab/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| #import schema | ||||
| from . import schema | ||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										273
									
								
								novoapi_for_python3x/asr/spraaklab/schema.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										273
									
								
								novoapi_for_python3x/asr/spraaklab/schema.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,273 @@ | ||||
| #!/usr/bin/env python | ||||
| ## (c) 2017 NovoLanguage, author: David A. van Leeuwen | ||||
|  | ||||
| ## The purpose of this to define the grammar structure in a json schema, so that it can be validated, | ||||
| ## (de)serialized, and perhaps even automatically converted to a Python class structure. | ||||
|  | ||||
| import json | ||||
| import jsonschema | ||||
|  | ||||
| grammar_schema_v10 = { | ||||
|     "$schema": "http://json-schema.org/schema#", | ||||
|     "title": "NovoLanguage grammar", | ||||
|     "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", | ||||
|     "$ref": "#/definitions/group", | ||||
|     "definitions": { | ||||
|         "phones": { | ||||
|             "type": "array", | ||||
|             "items": { | ||||
|                 "type": "string" | ||||
|             }, | ||||
|             "minItems": 1 | ||||
|         }, | ||||
|         "pronunciation": { | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "phones": { | ||||
|                     "$ref": "#/definitions/phones" | ||||
|                 }, | ||||
|                 "syllables": { | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "$ref": "#/definitions/syllable" | ||||
|                     }, | ||||
|                     "minItems": 1 | ||||
|                 }, | ||||
|                 "id": { | ||||
|                     "type": "integer", | ||||
|                     "description": "ID to distinguish this pronunciation from other variants" | ||||
|                 }, | ||||
|                 "meta": { | ||||
|                     "type": "object" | ||||
|                 } | ||||
|             }, | ||||
|             "required": ["phones"] | ||||
|         }, | ||||
|         "syllable": { | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "begin": { | ||||
|                     "type": "integer", | ||||
|                     "minimum": 0 | ||||
|                 }, | ||||
|                 "end": { | ||||
|                     "type": "integer", | ||||
|                     "minimum": 0 | ||||
|                 }, | ||||
|                 "stress": { | ||||
|                     "type": "integer", | ||||
|                     "minimum": 0 | ||||
|                 }, | ||||
|                 "tone": { | ||||
|                     "type": "integer", | ||||
|                     "minimum": 0 | ||||
|                 } | ||||
|             }, | ||||
|             "required": ["begin", "end"] | ||||
|         }, | ||||
|         "word": { | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "kind": { | ||||
|                     "type": "string", | ||||
|                     "enum": ["word"] | ||||
|                 }, | ||||
|                 "label": { | ||||
|                     "type": "string" | ||||
|                 }, | ||||
|                 "pronunciation": { | ||||
|                     "anyOf": [ | ||||
|                         { | ||||
|                             "$ref": "#/definitions/pronunciation" | ||||
|                         }, | ||||
|                         { | ||||
|                             "type": "array", | ||||
|                             "items": { | ||||
|                                 "anyOf": [ | ||||
|                                     { | ||||
|                                         "$ref": "#/definitions/pronunciation" | ||||
|                                     }, | ||||
|                                     { | ||||
|                                         "$ref": "#/definitions/phones" | ||||
|                                     } | ||||
|                                 ] | ||||
|                             }, | ||||
|                             "minItems": 1 | ||||
|                         }, | ||||
|                         { | ||||
|                             "$ref": "#/definitions/phones" | ||||
|                         } | ||||
|  | ||||
|                     ] | ||||
|                 }, | ||||
|                 "syllables": { | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "$ref": "#/definitions/syllable" | ||||
|                     } | ||||
|                 }, | ||||
|                 "graphemes": { | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "type": "string" | ||||
|                     } | ||||
|                 }, | ||||
|                 "id": { | ||||
|                     "type": "integer", | ||||
|                     "description": "ID to distinguish this word from other words (with possibly the same label)" | ||||
|                 }, | ||||
|                 "meta": { | ||||
|                     "type": "object" | ||||
|                 } | ||||
|             }, | ||||
|             "required": ["label"] | ||||
|         }, | ||||
|         "element": { | ||||
|             "title": "element", | ||||
|             "oneOf": [ | ||||
|                 { | ||||
|                     "$ref": "#/definitions/word" | ||||
|                 }, | ||||
|                 { | ||||
|                     "$ref": "#/definitions/group" | ||||
|                 }, | ||||
|                 { | ||||
|                     "type": ["string", "null"] | ||||
|                 } | ||||
|             ] | ||||
|         }, | ||||
|         "group": { | ||||
|             "title": "element group", | ||||
|             "type": "object", | ||||
|             "properties": { | ||||
|                 "kind": { | ||||
|                     "type": "string", | ||||
|                     "enum": ["sequence", "alternatives", "order"] | ||||
|                 }, | ||||
|                 "elements": { | ||||
|                     "type": "array", | ||||
|                     "items": { | ||||
|                         "$ref": "#/definitions/element" | ||||
|                     }, | ||||
|                     "minItems": 1, | ||||
|                 }, | ||||
|                 "meta": { | ||||
|                     "type": "object" | ||||
|                 } | ||||
|             }, | ||||
|             "required": ["kind", "elements"] | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| grammar_schema_v01 = { | ||||
|     "$schema": "http://json-schema.org/schema#", | ||||
|     "title": "NovoLanguage grammar v0.1", | ||||
|     "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", | ||||
|     "type": "object", | ||||
|     "properties": { | ||||
|         "type": { | ||||
|             "type": "string", | ||||
|             "enum": ["multiple_choice", "word_order"] | ||||
|         }, | ||||
|         "parts": { | ||||
|             "type": "array", | ||||
|             "minItems": 1, | ||||
|             "maxItems": 5, | ||||
|             "items": { | ||||
|                 "type": ["string", "array"], | ||||
|                 "items": { | ||||
|                     "type": ["string"] | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| grammar_rpc_schema = { | ||||
|     "$schema": "http://json-schema.org/schema#", | ||||
|     "title": "NovoLanguage RPC grammar", | ||||
|     "type": "object", | ||||
|     "properties": { | ||||
|         "type": { | ||||
|             "type": "string", | ||||
|             "enum": ["confusion_network"] | ||||
|         }, | ||||
|         "version": { | ||||
|             "type": "string", | ||||
|             "default": "v0.1" | ||||
|         }, | ||||
|         "data": { | ||||
|             "type": "object" | ||||
|         }, | ||||
|         "return_dict": { | ||||
|             "type": "boolean" | ||||
|         }, | ||||
|         "return_objects": { | ||||
|             "type": "array", | ||||
|             "items": { | ||||
|                 "type": "string", | ||||
|                 "enum": ["dict", "grammar"] | ||||
|             } | ||||
|         }, | ||||
|         "phoneset": { | ||||
|             "type": "string", | ||||
|             "enum": ["cmu69", "novo70", "mdbg115"] | ||||
|         }, | ||||
|         "parallel_silence": { | ||||
|             "type": "boolean" | ||||
|         } | ||||
|     }, | ||||
|     "required": ["type", "data"] | ||||
| } | ||||
|  | ||||
| def validate(object, schema=grammar_schema_v10): | ||||
|     #if isinstance(object, basestring): | ||||
|     if isinstance(object, str): | ||||
|         object = json.loads(object) | ||||
|     if not isinstance(object, dict): | ||||
|         raise TypeError("Expected dict or json string") | ||||
|     try: | ||||
|         jsonschema.validate(object, schema) | ||||
|     except jsonschema.ValidationError: | ||||
|         return False | ||||
|     except Exception: | ||||
|         raise | ||||
|     else: | ||||
|         return True | ||||
|  | ||||
| def validate_rpc_grammar(message): | ||||
|     """validate an rpc grammar message""" | ||||
|     if not validate(message, grammar_rpc_schema): | ||||
|         raise ValueError("Not a valid RPC grammar") | ||||
|     version = message.get("version", "0.1") | ||||
|     data = message["data"] | ||||
|     if version == "0.1": | ||||
|         if not validate(data, grammar_schema_v01): | ||||
|             raise ValueError("Not a valid grammar v0.1") | ||||
|     elif version == "1.0": | ||||
|         if not validate(data, grammar_schema_v10): | ||||
|             raise ValueError("Not a valid grammar v1.0") | ||||
|     else: | ||||
|         raise ValueError("Unsupported schema version") | ||||
|  | ||||
|  | ||||
| ## test | ||||
| def test(data=None): | ||||
|     if not data: | ||||
|         data = {"kind": "sequence", "elements": [ | ||||
|             {"kind": "alternatives", "elements": ["a plain string", "an alternative string"]}, | ||||
|             {"kind": "word", "label": "a word", "pronunciation": {"phones": ["ah", "w", "er", "d"]}}, | ||||
|             {"kind": "order", "elements": [{"kind": "word", "label": "another word", "visible": False}, "last word"]}]} | ||||
|     try: | ||||
|         jsonschema.validate(data, schema) | ||||
|     except jsonschema.ValidationError as e: | ||||
|         #print data, "validated not OK", e.message | ||||
|         print("{0} validated not OK {1}".format(data, e.message)) | ||||
|     else: | ||||
|         #print data, "validated OK" | ||||
|         print("{} validated OK".format(data)) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     test() | ||||
							
								
								
									
										4
									
								
								novoapi_for_python3x/backend/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								novoapi_for_python3x/backend/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| #import session | ||||
| from . import session | ||||
							
								
								
									
										
											BIN
										
									
								
								novoapi_for_python3x/backend/__pycache__/__init__.cpython-36.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								novoapi_for_python3x/backend/__pycache__/__init__.cpython-36.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								novoapi_for_python3x/backend/__pycache__/session.cpython-36.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								novoapi_for_python3x/backend/__pycache__/session.cpython-36.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										254
									
								
								novoapi_for_python3x/backend/session.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										254
									
								
								novoapi_for_python3x/backend/session.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,254 @@ | ||||
| #!/usr/bin/env python | ||||
| # (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen | ||||
|  | ||||
| ## Recognition interface for actual backend.  Adapted from player.asr.debug. | ||||
|  | ||||
| import json | ||||
| import sys | ||||
| import wave | ||||
| import requests | ||||
| import websocket | ||||
| import logging | ||||
| import collections | ||||
|  | ||||
| import time | ||||
|  | ||||
| from .. import asr | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| ## turn off annoying warnings | ||||
| requests.packages.urllib3.disable_warnings() | ||||
| logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(logging.WARN) | ||||
|  | ||||
| buffer_size = 4096 | ||||
| gm = "gm.novolanguage.com" ## dev | ||||
| protocol = "https" | ||||
| port = 443 | ||||
| apiversion = 0 | ||||
|  | ||||
| sessions = collections.Counter() | ||||
|  | ||||
| def segmentation(result): | ||||
|     """converts a raw backend recognition result to a segment of novo.asr.segments class Segmentation""" | ||||
|     for w in result: | ||||
|         w["score"] = w["confidence"]["prob"] | ||||
|         w["llh"] = w["confidence"]["llr"] | ||||
|         w["label"] = w["label"]["raw"] | ||||
|         w["begin"] /= 10 | ||||
|         w["end"] /= 10 | ||||
|         for p in w["phones"]: | ||||
|             p["score"] = p["confidence"]["prob"] | ||||
|             p["llh"] = p["confidence"]["llr"] | ||||
|             p["begin"] /= 10 | ||||
|             p["end"] /= 10 | ||||
|     return asr.segments.Segmentation(result) | ||||
|  | ||||
| class rpcid: | ||||
|     id = 0 | ||||
|     @staticmethod | ||||
|     def next(): | ||||
|         rpcid.id += 1 | ||||
|         return rpcid.id | ||||
|  | ||||
| class Recognizer(object): | ||||
|     def __init__(self, lang="en", gm=gm, grammar_version="0.1", user=None, password=None, snodeid=None, keepopen=False): | ||||
|         self.lang = lang | ||||
|         self.keepopen = keepopen | ||||
|         self.api_url = "%s://%s:%d/v%d" % (protocol, gm, port, apiversion) | ||||
|         self.verify = False | ||||
|         self.headers = {"Content-Type": "application/json"} | ||||
|         self.login_user(user, password) | ||||
|         data = {"l2": lang, "local": False, "skipupload": True} | ||||
|         if snodeid: | ||||
|             data["snodeid"] = snodeid | ||||
|         self.conn = None | ||||
|         self.init_session(data) | ||||
|         self.grammar_version = grammar_version | ||||
|         self.last_message = None | ||||
|  | ||||
|     def login_user(self, username, password): | ||||
|         # obtain authentication token of user | ||||
|         logger.info('obtain auth token at %s', self.api_url) | ||||
|         data = { | ||||
|             'username': username, | ||||
|             'password': password | ||||
|         } | ||||
|         try: | ||||
|             r = requests.post(self.api_url + '/publishers/1/login', headers=self.headers, data=json.dumps(data), verify=self.verify) | ||||
|         except Exception as e: | ||||
|             logger.error("Cannot post request to GM API for user login: %s", e.message) | ||||
|             sys.exit(-1) | ||||
|         assert r.ok, r.reason | ||||
|         result = r.json() | ||||
|         if "errors" in result["response"]: | ||||
|             logger.info("Error in logging in: %s", result["response"]["errors"]) | ||||
|             sys.exit(-1) | ||||
|  | ||||
|         user_auth_token = result['response']['user']['authentication_token'] | ||||
|         logger.info("User auth token is: %s", user_auth_token) | ||||
|  | ||||
|         # set auth token in header | ||||
|         self.headers['Authentication-Token'] = user_auth_token | ||||
|  | ||||
|     def init_session(self, data, direct=False, use_ip=False): | ||||
|         logger.info('Request new session: %s', data) | ||||
|         r = requests.post(self.api_url + '/sessions', headers=self.headers, data=json.dumps(data), verify=self.verify) | ||||
|         if not r.ok: | ||||
|             logger.error("New session request failed: %s", r.text) | ||||
|             return | ||||
|  | ||||
|         status_url = r.headers.get("location") | ||||
|         if status_url: | ||||
|             ## we got a redirect | ||||
|             status = {} | ||||
|             while True: | ||||
|                 logger.debug("Checking %s", status_url) | ||||
|                 s = requests.get(status_url, verify=self.verify) | ||||
|                 if not s.ok: | ||||
|                     logger.error('Checking Failed: %s', s.text) | ||||
|                     return | ||||
|  | ||||
|                 status = s.json() | ||||
|                 if status['status'] == 'PENDING': | ||||
|                     logger.debug("Status: %s", status['status']) | ||||
|                     time.sleep(1) | ||||
|                 else: | ||||
|                     break | ||||
|             session = status['result'][0] ## [1] is another status code... | ||||
|             if "error" in session: | ||||
|                 logger.error("Error in getting a snode: %s", session["error"]) | ||||
|                 raise Exception | ||||
|         else: | ||||
|             session = r.json() | ||||
|  | ||||
|         try: | ||||
|             logger.info("Session: %r", session) | ||||
|             if direct: | ||||
|                 snode_ip = session["snode"]["ip"] | ||||
|                 proxy_url = snode_ip | ||||
|                 snode_port = session["port"] | ||||
|                 ws_url = "%s://%s:%d/" % ("ws", snode_ip, snode_port) | ||||
|             else: | ||||
|                 field = "ip" if use_ip else "hostname" | ||||
|                 proxy_url = session['snode']['datacentre']['proxy'][field] | ||||
|                 ws_url = 'wss://' + proxy_url + '/' + session['uuid'] | ||||
|             logger.info("Connecting to websocket: %s", ws_url) | ||||
|             conn = websocket.create_connection(ws_url, sslopt={"check_hostname": self.verify}) | ||||
|             logger.info("Connected.") | ||||
|         #except Exception, e: | ||||
|         except Exception as e: | ||||
|             logger.error("Unable to connect to websocket: %s", e.message) | ||||
|             raise e | ||||
|  | ||||
|         self.session_id = session['id'] | ||||
|         self.proxy_url = proxy_url | ||||
|         self.conn = conn | ||||
|         self.session = session | ||||
|         sessions[session["uuid"]] += 1 | ||||
|  | ||||
|     def setgrammar(self, grammar): ## backend grammar object: {"data": {...}, "type": "confusion_network"} | ||||
|         data = {"jsonrpc": "2.0", | ||||
|                 'type': 'jsonrpc', | ||||
|                 'method': 'set_grammar', | ||||
|                 'params': grammar, | ||||
|                 "id": rpcid.next()} | ||||
|         asr.spraaklab.schema.validate_rpc_grammar(grammar) | ||||
|         self.conn.send(json.dumps(data)) | ||||
|         result = json.loads(self.conn.recv()) | ||||
|         if result.get("error"): | ||||
|             logger.error("Exercise validation error: %s", result) | ||||
|         return result | ||||
|  | ||||
|     def set_alternatives_grammar(self, *args, **kwargs): | ||||
|         if not "version" in kwargs: | ||||
|             kwargs["version"] = self.grammar_version | ||||
|         return self.setgrammar(alternatives_grammar(*args, **kwargs)) | ||||
|  | ||||
|     def recognize_wav(self, wavf): | ||||
|         w = wave.open(wavf, 'r') | ||||
|         nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams() | ||||
|         if nchannels > 1: | ||||
|             logging.error("Please use .wav with only 1 channel, found %d channels in %s", nchannels, wavf) | ||||
|             return | ||||
|         if (sampwidth != 2): | ||||
|             logging.error("Please use .wav with 2-byte PCM data, found %d bytes in %s", sampwidth, wavf) | ||||
|             return | ||||
|         if (framerate != 16000.0): | ||||
|             logging.error("Please use .wav sampled at 16000 Hz, found %1.0f in %s", framerate, wavf) | ||||
|             return | ||||
|         if (comptype != 'NONE'): | ||||
|             logging.error("Please use .wav with uncompressed data, found %s in %s", compname, wavf) | ||||
|             return | ||||
|         buf = w.readframes(nframes) | ||||
|         w.close() | ||||
|         return self.recognize_data(buf) | ||||
|  | ||||
|     def recognize_data(self, buf): | ||||
|         nbytes_sent = 0 | ||||
|         start = time.time() | ||||
|         for j in range(0, len(buf), buffer_size): | ||||
|             audio_packet = str(buf[j:j + buffer_size]) | ||||
|             nbytes_sent += len(audio_packet) | ||||
|             self.conn.send_binary(audio_packet) | ||||
|         self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()})) | ||||
|         logger.info("Waiting for recognition result...") | ||||
|         self.last_message = self.conn.recv() ## keep result for the interested applications | ||||
|         message = json.loads(self.last_message) | ||||
|         dur = time.time() - start | ||||
|         logger.info("Recognition took %5.3f seconds", dur) | ||||
|         if "error" in message: | ||||
|             raise RuntimeError("Error from recognition backend: %r" % message.get("error")) | ||||
|         return segmentation(message["result"]["words"]) | ||||
|  | ||||
|     def recognize_url(self, url): | ||||
|         start = time.time() | ||||
|         data = json.dumps({"jsonrpc": "2.0", "method": "send_audio", "id": rpcid.next(), "params": {"type": "url", "data": url, "details": ["word", "utterance"]}}) | ||||
|         self.conn.send(data) | ||||
|         logger.info("Waiting for recognition result...") | ||||
|         self.last_message = self.conn.recv() ## keep result for the interested applications | ||||
|         #print self.last_message | ||||
|         print(self.last_message) | ||||
|         message = json.loads(self.last_message) | ||||
|         dur = time.time() - start | ||||
|         logger.info("Recognition took %5.3f seconds", dur) | ||||
|         if "error" in message: | ||||
|             raise RuntimeError("Error from recognition backend: %r" % message.get("error")) | ||||
|         return segmentation(message["result"]["words"]) | ||||
|  | ||||
|     def __del__(self): | ||||
|         sessions[self.session["uuid"]] -= 1 | ||||
|         if self.conn and sessions[self.session["uuid"]] <= 0: | ||||
|             self.conn.close() | ||||
|             url = self.api_url + '/sessions/%d' % self.session_id | ||||
|             if self.keepopen: | ||||
|                 logger.info("Keeping session open...") | ||||
|             else: | ||||
|                 logger.info("Closing session: %s", url) | ||||
|                 r = requests.delete(url, headers=self.headers, verify=self.verify) | ||||
|                 assert r.ok, r.reason | ||||
|  | ||||
| def alternatives_grammar(parts, version="0.1", ret=None): | ||||
|     """Make a grammar of alternatives, as array(sequence)-of-array(alternatives)-of-strings""" | ||||
|     r = {"type": "confusion_network", "version": version} | ||||
|     if version=="0.1": | ||||
|         r["data"] = {"type": "multiple_choice", "parts": parts} | ||||
|         if isinstance(ret, list) and "dict" in ret: | ||||
|             r["return_dict"] = True | ||||
|     elif version=="1.0": | ||||
|         seqels = [] | ||||
|         for part in parts: | ||||
|             altels = [] | ||||
|             for alt in part: | ||||
|                 words = alt.split(" ") | ||||
|                 if len(words) > 1: | ||||
|                     alt = {"kind": "sequence", "elements": words} | ||||
|                 altels.append(alt) | ||||
|             seqels.append({"kind": "alternatives", "elements": altels}) | ||||
|         r["data"] = {"kind": "sequence", "elements": seqels} | ||||
|         if isinstance(ret, list): | ||||
|             r["return_objects"] = ret | ||||
|     else: | ||||
|         raise ValueError("Unsupported version: %s" % version) | ||||
|     asr.spraaklab.schema.validate_rpc_grammar(r) | ||||
|     return r | ||||
							
								
								
									
										25
									
								
								novoapi_for_python3x/utils/json/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								novoapi_for_python3x/utils/json/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,25 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| ## from https://stackoverflow.com/questions/1447287/format-floats-with-standard-json-module | ||||
| class PrettyFloat(float): | ||||
|     def __repr__(self): | ||||
|         return '%.15g' % self | ||||
|  | ||||
| def pretty_floats(obj): | ||||
|     if isinstance(obj, float): | ||||
|         return PrettyFloat(obj) | ||||
|     elif isinstance(obj, dict): | ||||
|         return dict((k, pretty_floats(v)) for k, v in obj.items()) | ||||
|     elif isinstance(obj, (list, tuple)): | ||||
|         return map(pretty_floats, obj) | ||||
|     return obj | ||||
|  | ||||
| def rounded_floats(obj, ndigits=15): | ||||
|     if isinstance(obj, float): | ||||
|         return PrettyFloat(round(obj, ndigits)) | ||||
|     elif isinstance(obj, dict): | ||||
|         return dict((k, rounded_floats(v, ndigits)) for k, v in obj.items()) | ||||
|     elif isinstance(obj, (list, tuple)): | ||||
|         return map(lambda o: rounded_floats(o, ndigits), obj) | ||||
|     return obj | ||||
|  | ||||
										
											Binary file not shown.
										
									
								
							
		Reference in New Issue
	
	Block a user