Compare commits
	
		
			2 Commits
		
	
	
		
			de5c9cecb9
			...
			24ac56ac0e
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 24ac56ac0e | ||
|  | 82a8e2302f | 
										
											Binary file not shown.
										
									
								
							| @@ -16,6 +16,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution | |||||||
| 		..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py | 		..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py | ||||||
| 		..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py | 		..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py | ||||||
| 		..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py | 		..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py | ||||||
|  | 		reus-test\reus-test.py = reus-test\reus-test.py | ||||||
| 		..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py | 		..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py | ||||||
| 		..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py | 		..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py | ||||||
| 		..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py | 		..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py | ||||||
|   | |||||||
										
											Binary file not shown.
										
									
								
							| @@ -4,8 +4,7 @@ | |||||||
|     <SchemaVersion>2.0</SchemaVersion> |     <SchemaVersion>2.0</SchemaVersion> | ||||||
|     <ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid> |     <ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid> | ||||||
|     <ProjectHome>.</ProjectHome> |     <ProjectHome>.</ProjectHome> | ||||||
|     <StartupFile> |     <StartupFile>forced_aligner_comparison.py</StartupFile> | ||||||
|     </StartupFile> |  | ||||||
|     <SearchPath> |     <SearchPath> | ||||||
|     </SearchPath> |     </SearchPath> | ||||||
|     <WorkingDirectory>.</WorkingDirectory> |     <WorkingDirectory>.</WorkingDirectory> | ||||||
| @@ -36,6 +35,9 @@ | |||||||
|     <Compile Include="fa_test.py"> |     <Compile Include="fa_test.py"> | ||||||
|       <SubType>Code</SubType> |       <SubType>Code</SubType> | ||||||
|     </Compile> |     </Compile> | ||||||
|  |     <Compile Include="forced_aligner_comparison.py"> | ||||||
|  |       <SubType>Code</SubType> | ||||||
|  |     </Compile> | ||||||
|     <Compile Include="novoapi_forced_alignment.py"> |     <Compile Include="novoapi_forced_alignment.py"> | ||||||
|       <SubType>Code</SubType> |       <SubType>Code</SubType> | ||||||
|     </Compile> |     </Compile> | ||||||
|   | |||||||
| @@ -10,14 +10,13 @@ import shutil | |||||||
| import numpy as np | import numpy as np | ||||||
| import pandas as pd | import pandas as pd | ||||||
| import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||||
|  |  | ||||||
| from sklearn.metrics import confusion_matrix | from sklearn.metrics import confusion_matrix | ||||||
| from sklearn.metrics import accuracy_score | from sklearn.metrics import accuracy_score | ||||||
| import novoapi  | import novoapi  | ||||||
|  |  | ||||||
| import defaultfiles as default | import defaultfiles as default | ||||||
| sys.path.append(default.forced_alignment_module_dir) | sys.path.append(default.forced_alignment_module_dir) | ||||||
| from forced_alignment import pyhtk, convert_phone_set | from forced_alignment import convert_phone_set | ||||||
| #import acoustic_model_functions as am_func | #import acoustic_model_functions as am_func | ||||||
| import convert_xsampa2ipa | import convert_xsampa2ipa | ||||||
| import novoapi_functions | import novoapi_functions | ||||||
| @@ -47,10 +46,6 @@ david_suggestion = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'w'] | |||||||
| ## read pronunciation variants. | ## read pronunciation variants. | ||||||
| stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) | stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) | ||||||
| df = pd.read_excel(stimmen_transcription_, 'frequency') | df = pd.read_excel(stimmen_transcription_, 'frequency') | ||||||
| #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): |  | ||||||
| #    ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) |  | ||||||
| #    if not ipa_converted == ipa: |  | ||||||
| #        print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) |  | ||||||
| transcription_ipa = list(df['IPA']) | transcription_ipa = list(df['IPA']) | ||||||
|  |  | ||||||
| # transcription mistake? | # transcription mistake? | ||||||
| @@ -63,6 +58,7 @@ for ipa in transcription_ipa: | |||||||
| 	ipa = ipa.replace(':', 'ː') | 	ipa = ipa.replace(':', 'ː') | ||||||
| 	ipa = convert_phone_set.split_ipa(ipa) | 	ipa = convert_phone_set.split_ipa(ipa) | ||||||
|  |  | ||||||
|  | 	# list of phones not in novo70 phoneset. | ||||||
| 	not_in_novo70_ = [phone for phone in ipa  | 	not_in_novo70_ = [phone for phone in ipa  | ||||||
| 				   if not phone in phoneset_ipa and not phone in david_suggestion] | 				   if not phone in phoneset_ipa and not phone in david_suggestion] | ||||||
| 	not_in_novo70_ = [phone.replace('sp', '') for phone in not_in_novo70_] | 	not_in_novo70_ = [phone.replace('sp', '') for phone in not_in_novo70_] | ||||||
| @@ -106,6 +102,10 @@ df = pd.read_excel(stimmen_transcription_, 'original') | |||||||
|  |  | ||||||
| # mapping from ipa to xsampa | # mapping from ipa to xsampa | ||||||
| mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) | mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) | ||||||
|  | #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): | ||||||
|  | #    ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) | ||||||
|  | #    if not ipa_converted == ipa: | ||||||
|  | #        print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) | ||||||
|  |  | ||||||
| ipas     = [] | ipas     = [] | ||||||
| famehtks = [] | famehtks = [] | ||||||
| @@ -153,12 +153,12 @@ for word in word_list: | |||||||
|  |  | ||||||
|  |  | ||||||
| ## ===== forced alignment ===== | ## ===== forced alignment ===== | ||||||
| reus_dir = r'C:\OneDrive\Desktop\Reus' | rozen_dir = r'c:\Users\Aki\source\repos\acoustic_model\rozen-test' | ||||||
| if forced_alignment_novo70: | if forced_alignment_novo70: | ||||||
| 	Results = pd.DataFrame(index=[], | 	Results = pd.DataFrame(index=[], | ||||||
| 		columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh']) | 		columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh']) | ||||||
| 	#for word in word_list: | 	#for word in word_list: | ||||||
| 	for word in ['Reus']: | 	for word in ['Rozen']: | ||||||
| 		# pronunciation variants top 3 | 		# pronunciation variants top 3 | ||||||
| 		df_per_word_ = df_per_word[df_per_word['word']==word] | 		df_per_word_ = df_per_word[df_per_word['word']==word] | ||||||
| 		df_per_word_ = df_per_word_.sort_values('frequency', ascending=False) | 		df_per_word_ = df_per_word_.sort_values('frequency', ascending=False) | ||||||
| @@ -208,37 +208,35 @@ if forced_alignment_novo70: | |||||||
| 			wav_file = os.path.join(default.stimmen_wav_dir, filename) | 			wav_file = os.path.join(default.stimmen_wav_dir, filename) | ||||||
| 			if os.path.exists(wav_file): | 			if os.path.exists(wav_file): | ||||||
| 				# for Martijn | 				# for Martijn | ||||||
| 				#shutil.copy(wav_file, os.path.join(reus_dir, filename)) | 				shutil.copy(wav_file, os.path.join(rozen_dir, filename)) | ||||||
|  |  | ||||||
| 				pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa] | 		#		pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa] | ||||||
| 				result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_) | 		#		result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_) | ||||||
| 				result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word) | 		#		result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word) | ||||||
| 				result_ = pd.Series([ | 		#		result_ = pd.Series([ | ||||||
| 					sample['filename'], | 		#			sample['filename'], | ||||||
| 					sample['word'], | 		#			sample['word'], | ||||||
| 					sample['xsampa'], | 		#			sample['xsampa'], | ||||||
| 					sample['ipa'], | 		#			sample['ipa'], | ||||||
| 					' '.join(result_ipa), | 		#			' '.join(result_ipa), | ||||||
| 					' '.join(result_novo70), | 		#			' '.join(result_novo70), | ||||||
| 					llh | 		#			llh | ||||||
| 					], index=results.columns) | 		#			], index=results.columns) | ||||||
| 				results = results.append(result_, ignore_index = True) | 		#		results = results.append(result_, ignore_index = True) | ||||||
| 				print('{0}/{1}: answer {2} - prediction {3}'.format(  | 		#		print('{0}/{1}: answer {2} - prediction {3}'.format(  | ||||||
| 			 i+1, len(samples), result_['ipa'], result_['result_ipa'])) | 		#	 i+1, len(samples), result_['ipa'], result_['result_ipa'])) | ||||||
| 			results.to_excel(os.path.join(reus_dir, 'results.xlsx'), encoding="utf-8") | 		#	#results.to_excel(os.path.join(default.stimmen_dir, 'results.xlsx'), encoding="utf-8") | ||||||
| 		if len(results) > 0: | 		#if len(results) > 0: | ||||||
| 			Results = Results.append(results, ignore_index = True) | 		#	Results = Results.append(results, ignore_index = True) | ||||||
| 		Results.to_excel(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8") | 		#Results.to_excel(os.path.join(default.stimmen_result_novoapi_dir, 'Results.xlsx'), encoding="utf-8") | ||||||
| else: | else: | ||||||
| 	Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8") | 	Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_result_novoapi_dir, 'Results.xlsx'), encoding="utf-8") | ||||||
| 	Results = pd.read_excel(Results_xlsx, 'Sheet1') | 	Results = pd.read_excel(Results_xlsx, 'Sheet1') | ||||||
|  |  | ||||||
|  |  | ||||||
| ## ===== analysis ===== | ## ===== analysis ===== | ||||||
| #result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi') |  | ||||||
| #for word in word_list: | #for word in word_list: | ||||||
| #	if not word == 'Oog': | #	if not word == 'Oog': | ||||||
|  |  | ||||||
| #		Results_ = Results[Results['word'] == word] | #		Results_ = Results[Results['word'] == word] | ||||||
| #		y_true  = list(Results_['ipa']) | #		y_true  = list(Results_['ipa']) | ||||||
| #		y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])] | #		y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])] | ||||||
| @@ -249,4 +247,4 @@ else: | |||||||
| #		plt.figure() | #		plt.figure() | ||||||
| #		output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False) | #		output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False) | ||||||
| #		#plt.show() | #		#plt.show() | ||||||
| #		plt.savefig(os.path.join(result_novoapi_dir, word + '.png')) | #		plt.savefig(os.path.join(default.stimmen_result_novoapi_dir, word + '.png')) | ||||||
| @@ -31,6 +31,12 @@ ipa_xsampa_converter_dir    = os.path.join(repo_dir, 'ipa-xsama-converter') | |||||||
| forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') | forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') | ||||||
| accent_classification_dir   = os.path.join(repo_dir, 'accent_classification', 'accent_classification') | accent_classification_dir   = os.path.join(repo_dir, 'accent_classification', 'accent_classification') | ||||||
|  |  | ||||||
|  | htk_config_dir = r'c:\Users\Aki\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017' | ||||||
|  | config_hvite = os.path.join(htk_config_dir, 'config.HVite') | ||||||
|  | #acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo') | ||||||
|  | acoustic_model = r'c:\cygwin64\home\Aki\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo' | ||||||
|  | phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt') | ||||||
|  |  | ||||||
| WSL_dir   = r'C:\OneDrive\WSL' | WSL_dir   = r'C:\OneDrive\WSL' | ||||||
| fame_dir        = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') | fame_dir        = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') | ||||||
| fame_s5_dir     = os.path.join(fame_dir, 's5') | fame_s5_dir     = os.path.join(fame_dir, 's5') | ||||||
| @@ -43,6 +49,7 @@ stimmen_data_dir = os.path.join(stimmen_dir, 'data') | |||||||
| #stimmen_wav_dir  = os.path.join(stimmen_dir, 'wav') | #stimmen_wav_dir  = os.path.join(stimmen_dir, 'wav') | ||||||
| # 16 kHz | # 16 kHz | ||||||
| stimmen_wav_dir  = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' | stimmen_wav_dir  = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' | ||||||
|  | stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi') | ||||||
|  |  | ||||||
| stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx') | stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx') | ||||||
| phonelist_friesian_txt     = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') | phonelist_friesian_txt     = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') | ||||||
|   | |||||||
							
								
								
									
										42
									
								
								acoustic_model/forced_aligner_comparison.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								acoustic_model/forced_aligner_comparison.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | |||||||
|  | import os | ||||||
|  | os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') | ||||||
|  | import sys | ||||||
|  |  | ||||||
|  | import defaultfiles as default | ||||||
|  | sys.path.append(default.forced_alignment_module_dir) | ||||||
|  | from forced_alignment import pyhtk, convert_phone_set, scripts | ||||||
|  |  | ||||||
|  | reus_dir  = r'c:\Users\Aki\source\repos\acoustic_model\reus-test' | ||||||
|  | wav_dir   = reus_dir | ||||||
|  | wav_files = ['reus1008-reus.wav',  | ||||||
|  | 			 'reus1167-man.wav', | ||||||
|  | 			 'reus3768-mantsje.wav'] | ||||||
|  |  | ||||||
|  | word = 'reus' | ||||||
|  | pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə'] | ||||||
|  |  | ||||||
|  | for wav_file in wav_files: | ||||||
|  | 	file_lab = os.path.join(reus_dir, wav_file.replace('.wav', '.lab')) | ||||||
|  | 	file_dic = os.path.join(reus_dir, wav_file.replace('.wav', '.dic')) | ||||||
|  | 	file_txt = os.path.join(reus_dir, wav_file.replace('.wav', '.txt')) | ||||||
|  |  | ||||||
|  | 	# output htk dict file | ||||||
|  | 	with open(file_dic, 'w', encoding="utf-8") as f: | ||||||
|  | 		for ipa in pronunciation_ipa: | ||||||
|  | 			cgn = convert_phone_set.ipa2cgn([ipa.replace(':', 'ː')]) | ||||||
|  | 			barbara = convert_phone_set.cgn2barbara(cgn) | ||||||
|  | 			f.write(word.upper() + '\t' + barbara + '\n') | ||||||
|  |  | ||||||
|  | 	# output htk label file. | ||||||
|  | 	pyhtk._create_label_file(word, file_lab) | ||||||
|  |  | ||||||
|  | 	scripts.run_command([ | ||||||
|  | 					'HVite','-T', '1',  | ||||||
|  | 					'-a',  | ||||||
|  | 					'-C', default.config_hvite, | ||||||
|  | 					'-H', default.acoustic_model,  | ||||||
|  | 					'-m',  | ||||||
|  | 					'-i', file_txt,  | ||||||
|  | 					#'-S', script_file,  | ||||||
|  | 					file_dic, default.phonelist_txt, os.path.join(wav_dir, wav_file) | ||||||
|  | 				]) | ||||||
| @@ -7,7 +7,7 @@ import json | |||||||
| from novoapi.backend import session | from novoapi.backend import session | ||||||
|  |  | ||||||
| import os | import os | ||||||
| os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') | #os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') | ||||||
| import defaultfiles as default | import defaultfiles as default | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										
											BIN
										
									
								
								reus-test/check_novoapi.zip
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								reus-test/check_novoapi.zip
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										3
									
								
								reus-test/reus1008-reus.dic
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								reus-test/reus1008-reus.dic
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | |||||||
|  | REUS	r eu s | ||||||
|  | REUS	m ac n | ||||||
|  | REUS	m ac n t s j @ | ||||||
							
								
								
									
										1
									
								
								reus-test/reus1008-reus.lab
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								reus-test/reus1008-reus.lab
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | REUS | ||||||
							
								
								
									
										6
									
								
								reus-test/reus1008-reus.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								reus-test/reus1008-reus.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | |||||||
|  | #!MLF!# | ||||||
|  | "c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1008-reus.rec" | ||||||
|  | 0 9700000 r -12463.852539 REUS | ||||||
|  | 9700000 12800000 eu -3622.108887 | ||||||
|  | 12800000 26250001 s -17303.216797 | ||||||
|  | . | ||||||
							
								
								
									
										3
									
								
								reus-test/reus1167-man.dic
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								reus-test/reus1167-man.dic
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | |||||||
|  | REUS	r eu s | ||||||
|  | REUS	m ac n | ||||||
|  | REUS	m ac n t s j @ | ||||||
							
								
								
									
										1
									
								
								reus-test/reus1167-man.lab
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								reus-test/reus1167-man.lab
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | REUS | ||||||
							
								
								
									
										10
									
								
								reus-test/reus1167-man.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								reus-test/reus1167-man.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | |||||||
|  | #!MLF!# | ||||||
|  | "c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1167-man.rec" | ||||||
|  | 0 150000 m -230.057571 REUS | ||||||
|  | 150000 300000 ac -250.994858 | ||||||
|  | 300000 450000 n -202.377716 | ||||||
|  | 450000 4600000 t -5128.984375 | ||||||
|  | 4600000 5050000 s -711.338501 | ||||||
|  | 5050000 5450000 j -564.730591 | ||||||
|  | 5450000 16049999 @ -13249.787109 | ||||||
|  | . | ||||||
							
								
								
									
										3
									
								
								reus-test/reus3768-mantsje.dic
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								reus-test/reus3768-mantsje.dic
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | |||||||
|  | REUS	r eu s | ||||||
|  | REUS	m ac n | ||||||
|  | REUS	m ac n t s j @ | ||||||
							
								
								
									
										1
									
								
								reus-test/reus3768-mantsje.lab
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								reus-test/reus3768-mantsje.lab
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | REUS | ||||||
							
								
								
									
										10
									
								
								reus-test/reus3768-mantsje.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								reus-test/reus3768-mantsje.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | |||||||
|  | #!MLF!# | ||||||
|  | "c:/Users/Aki/source/repos/acoustic_model/reus-test/reus3768-mantsje.rec" | ||||||
|  | 0 150000 m -217.347229 REUS | ||||||
|  | 150000 1150000 ac -1266.293579 | ||||||
|  | 1150000 1650000 n -583.382568 | ||||||
|  | 1650000 11100000 t -11259.270508 | ||||||
|  | 11100000 11250000 s -247.939255 | ||||||
|  | 11250000 11550000 j -445.511444 | ||||||
|  | 11550000 24150000 @ -16769.048828 | ||||||
|  | . | ||||||
							
								
								
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_100_jko5r.wav
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_100_jko5r.wav
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_113_o9kzs.wav
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_113_o9kzs.wav
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_1296_zbve2.wav
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_1296_zbve2.wav
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_1709_kq9xr.wav
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_1709_kq9xr.wav
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_241_bahqi.wav
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_241_bahqi.wav
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_5502_q79fd.wav
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_5502_q79fd.wav
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_632_2m04y.wav
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_632_2m04y.wav
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_911_1zvda.wav
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								rozen-test/pg_rozen_911_1zvda.wav
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										119
									
								
								rozen-test/rozen-test.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								rozen-test/rozen-test.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,119 @@ | |||||||
|  | #!/usr/bin/env python | ||||||
|  | import os | ||||||
|  | os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') | ||||||
|  |  | ||||||
|  | import argparse | ||||||
|  | import json | ||||||
|  |  | ||||||
|  | from novoapi.backend import session | ||||||
|  |  | ||||||
|  | p = argparse.ArgumentParser() | ||||||
|  | p.add_argument("--user", default='martijn.wieling') | ||||||
|  | p.add_argument("--password", default='xxxxx') | ||||||
|  | args = p.parse_args() | ||||||
|  |  | ||||||
|  | rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) | ||||||
|  |  | ||||||
|  | grammar = { | ||||||
|  |   "type": "confusion_network", | ||||||
|  |   "version": "1.0", | ||||||
|  |   "data": { | ||||||
|  |     "kind": "sequence", | ||||||
|  |     "elements": [ | ||||||
|  |       { | ||||||
|  |         "kind": "word", | ||||||
|  |         "pronunciation": [ | ||||||
|  |           { | ||||||
|  |             "phones": [ | ||||||
|  |               "r", | ||||||
|  |               "eu0", | ||||||
|  |               "s" | ||||||
|  |             ], | ||||||
|  |             "id": 0 | ||||||
|  |           } | ||||||
|  |           , | ||||||
|  |           { | ||||||
|  |             "phones": [ | ||||||
|  |               "m", | ||||||
|  |               "a0", | ||||||
|  |               "n" | ||||||
|  |             ], | ||||||
|  |             "id": 1 | ||||||
|  |           } | ||||||
|  | 		   , | ||||||
|  |           { | ||||||
|  |             "phones": [ | ||||||
|  |               "m", | ||||||
|  |               "a0", | ||||||
|  |               "n", | ||||||
|  |               "t", | ||||||
|  |               "s", | ||||||
|  |               "y", | ||||||
|  |               "ax" | ||||||
|  |             ], | ||||||
|  |             "id": 2 | ||||||
|  |           } | ||||||
|  |         ], | ||||||
|  |         "label": "reus" | ||||||
|  |       } | ||||||
|  |     ] | ||||||
|  |   }, | ||||||
|  |   "return_objects": [ | ||||||
|  |     "grammar" | ||||||
|  |   ], | ||||||
|  |   "phoneset": "novo70" | ||||||
|  | } | ||||||
|  |  | ||||||
|  | res = rec.setgrammar(grammar) | ||||||
|  | #print "Set grammar result", res | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## === novoapi/backend/session.py === | ||||||
|  | #import wave | ||||||
|  | #import time | ||||||
|  | #from novoapi.backend.session import rpcid, segmentation | ||||||
|  |  | ||||||
|  | #wavf = "reus1008-reus.wav" | ||||||
|  | #w = wave.open(wavf, 'r') | ||||||
|  | #nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams() | ||||||
|  | #buf = w.readframes(nframes) | ||||||
|  | #w.close() | ||||||
|  |  | ||||||
|  | #buffer_size = 4096 | ||||||
|  | #nbytes_sent = 0 | ||||||
|  | #start = time.time() | ||||||
|  | #for j in range(0, len(buf), buffer_size): | ||||||
|  | #    audio_packet = buf[j:j + buffer_size] | ||||||
|  | #    nbytes_sent += len(audio_packet) | ||||||
|  | #    rec.conn.send_binary(audio_packet) | ||||||
|  | #rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()})) | ||||||
|  | #print(rpcid.next()) | ||||||
|  | #rec.last_message = rec.conn.recv()  | ||||||
|  | #message = json.loads(rec.last_message) | ||||||
|  | #result = session.segmentation(message["result"]["words"]) | ||||||
|  | #result.export() | ||||||
|  | ## ==================================== | ||||||
|  |  | ||||||
|  | def result2pronunciation(result, word): | ||||||
|  | 	#result_ = res.export()[1] | ||||||
|  | 	result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]  | ||||||
|  | 	llh = result_[0]['llh'] | ||||||
|  | 	phones = result_[0]['phones'] | ||||||
|  | 	pronunciation = [phone['label'] for phone in phones] | ||||||
|  | 	return pronunciation, llh | ||||||
|  |  | ||||||
|  |  | ||||||
|  | res = rec.recognize_wav("reus1008-reus.wav") | ||||||
|  | #print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n" | ||||||
|  | #print "Recognition result:", json.dumps(res.export(), indent=4) | ||||||
|  | result2pronunciation(res.export(), 'reus') | ||||||
|  |  | ||||||
|  | #print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n" | ||||||
|  | res2 = rec.recognize_wav("reus1167-man.wav") | ||||||
|  | #print "Recognition result:", json.dumps(res2.export(), indent=4) | ||||||
|  | result2pronunciation(res2.export(), 'reus') | ||||||
|  |  | ||||||
|  | #print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n" | ||||||
|  | res3 = rec.recognize_wav("reus3768-mantsje.wav") | ||||||
|  | #print "Recognition result:", json.dumps(res3.export(), indent=4) | ||||||
|  | result2pronunciation(res3.export(), 'reus') | ||||||
		Reference in New Issue
	
	Block a user