Compare commits
	
		
			2 Commits
		
	
	
		
			7844a56281
			...
			87abbbb95a
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 87abbbb95a | ||
|  | 813f013d7a | 
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| @@ -10,7 +10,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution | |||||||
| 		..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py | 		..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py | ||||||
| 		..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py | 		..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py | ||||||
| 		..\toolbox\evaluation.py = ..\toolbox\evaluation.py | 		..\toolbox\evaluation.py = ..\toolbox\evaluation.py | ||||||
| 		..\toolbox\toolbox\file_handling.py = ..\toolbox\toolbox\file_handling.py |  | ||||||
| 		..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py | 		..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py | ||||||
| 		..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py | 		..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py | ||||||
| 		..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py | 		..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py | ||||||
| @@ -23,7 +22,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution | |||||||
| 		..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py | 		..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py | ||||||
| 	EndProjectSection | 	EndProjectSection | ||||||
| EndProject | EndProject | ||||||
| Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "pyhtk", "..\pyhtk\pyhtk\pyhtk.pyproj", "{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}" | Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "toolbox", "..\toolbox\toolbox.pyproj", "{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}" | ||||||
| EndProject | EndProject | ||||||
| Global | Global | ||||||
| 	GlobalSection(SolutionConfigurationPlatforms) = preSolution | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||||||
| @@ -33,8 +32,8 @@ Global | |||||||
| 	GlobalSection(ProjectConfigurationPlatforms) = postSolution | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||||||
| 		{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | 		{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||||||
| 		{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU | 		{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||||||
| 		{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | 		{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||||||
| 		{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Release|Any CPU.ActiveCfg = Release|Any CPU | 		{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||||||
| 	EndGlobalSection | 	EndGlobalSection | ||||||
| 	GlobalSection(SolutionProperties) = preSolution | 	GlobalSection(SolutionProperties) = preSolution | ||||||
| 		HideSolutionNode = FALSE | 		HideSolutionNode = FALSE | ||||||
|   | |||||||
										
											Binary file not shown.
										
									
								
							| @@ -23,12 +23,18 @@ | |||||||
|   </PropertyGroup> |   </PropertyGroup> | ||||||
|   <ItemGroup> |   <ItemGroup> | ||||||
|     <Compile Include="check_novoapi.py" /> |     <Compile Include="check_novoapi.py" /> | ||||||
|  |     <Compile Include="convert_phone_set.py"> | ||||||
|  |       <SubType>Code</SubType> | ||||||
|  |     </Compile> | ||||||
|     <Compile Include="convert_xsampa2ipa.py"> |     <Compile Include="convert_xsampa2ipa.py"> | ||||||
|       <SubType>Code</SubType> |       <SubType>Code</SubType> | ||||||
|     </Compile> |     </Compile> | ||||||
|     <Compile Include="defaultfiles.py"> |     <Compile Include="defaultfiles.py"> | ||||||
|       <SubType>Code</SubType> |       <SubType>Code</SubType> | ||||||
|     </Compile> |     </Compile> | ||||||
|  |     <Compile Include="fame_phoneset.py"> | ||||||
|  |       <SubType>Code</SubType> | ||||||
|  |     </Compile> | ||||||
|     <Compile Include="fa_test.py"> |     <Compile Include="fa_test.py"> | ||||||
|       <SubType>Code</SubType> |       <SubType>Code</SubType> | ||||||
|     </Compile> |     </Compile> | ||||||
|   | |||||||
							
								
								
									
										29
									
								
								acoustic_model/convert_phone_set.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								acoustic_model/convert_phone_set.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | |||||||
|  | """Module to convert phonemes.""" | ||||||
|  |  | ||||||
|  | def multi_character_tokenize(line, multi_character_tokens): | ||||||
|  | 	"""Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0, | ||||||
|  | 	if so tokenizes and eats that token. Otherwise tokenizes a single character""" | ||||||
|  | 	while line != '': | ||||||
|  | 		for token in multi_character_tokens: | ||||||
|  | 			if line.startswith(token) and len(token) > 0: | ||||||
|  | 				yield token | ||||||
|  | 				line = line[len(token):] | ||||||
|  | 				break | ||||||
|  | 		else: | ||||||
|  | 			yield line[:1] | ||||||
|  | 			line = line[1:] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def split_word(word, multi_character_phones): | ||||||
|  | 	""" | ||||||
|  | 	split a line by given phoneset. | ||||||
|  | 	 | ||||||
|  | 	Args: | ||||||
|  | 		word (str): a word written in given phoneset. | ||||||
|  | 		multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_phoneset.py.  | ||||||
|  |  | ||||||
|  | 	Returns: | ||||||
|  | 		(word_seperated) (list): the word splitted in given phoneset.  | ||||||
|  |  | ||||||
|  | 	""" | ||||||
|  | 	return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)] | ||||||
| @@ -4,7 +4,8 @@ import os | |||||||
|  |  | ||||||
| #cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' | #cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' | ||||||
|  |  | ||||||
| htk_dir = r'C:\Aki\htk_fame' | #htk_dir = r'C:\Aki\htk_fame' | ||||||
|  | htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk' | ||||||
|  |  | ||||||
| config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy') | config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy') | ||||||
| #config_train = os.path.join(cygwin_dir, 'config', 'config.train') | #config_train = os.path.join(cygwin_dir, 'config', 'config.train') | ||||||
| @@ -28,22 +29,21 @@ config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy') | |||||||
| #filePhoneList = config['pyHTK']['filePhoneList'] | #filePhoneList = config['pyHTK']['filePhoneList'] | ||||||
| #AcousticModel = config['pyHTK']['AcousticModel'] | #AcousticModel = config['pyHTK']['AcousticModel'] | ||||||
|  |  | ||||||
| repo_dir = r'C:\Users\A.Kunikoshi\source\repos' | repo_dir = r'C:\Users\Aki\source\repos' | ||||||
| ipa_xsampa_converter_dir    = os.path.join(repo_dir, 'ipa-xsama-converter') | ipa_xsampa_converter_dir    = os.path.join(repo_dir, 'ipa-xsama-converter') | ||||||
| forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') | forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') | ||||||
| accent_classification_dir   = os.path.join(repo_dir, 'accent_classification', 'accent_classification') | accent_classification_dir   = os.path.join(repo_dir, 'accent_classification', 'accent_classification') | ||||||
| pyhtk_dir                   = os.path.join(repo_dir, 'pyhtk', 'pyhtk') | toolbox_dir					= os.path.join(repo_dir, 'toolbox') | ||||||
| toolbox_dir					= os.path.join(repo_dir, 'toolbox', 'toolbox') |  | ||||||
|  |  | ||||||
| htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017' | #htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017' | ||||||
| config_hvite = os.path.join(htk_config_dir, 'config.HVite') | #config_hvite = os.path.join(htk_config_dir, 'config.HVite') | ||||||
| #acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo') | #acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo') | ||||||
| acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo' | #acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo' | ||||||
| phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt') | #phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt') | ||||||
|  |  | ||||||
| WSL_dir   = r'C:\OneDrive\WSL' | WSL_dir   = r'C:\OneDrive\WSL' | ||||||
| #fame_dir        = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') | #fame_dir        = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') | ||||||
| fame_dir = r'f:\_corpus\fame' | fame_dir = r'd:\_corpus\fame' | ||||||
|  |  | ||||||
| fame_s5_dir     = os.path.join(fame_dir, 's5') | fame_s5_dir     = os.path.join(fame_dir, 's5') | ||||||
| fame_corpus_dir = os.path.join(fame_dir, 'corpus') | fame_corpus_dir = os.path.join(fame_dir, 'corpus') | ||||||
|   | |||||||
| @@ -1,5 +1,5 @@ | |||||||
| import os | import os | ||||||
| os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model') | os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') | ||||||
|  |  | ||||||
| import sys | import sys | ||||||
| from collections import Counter | from collections import Counter | ||||||
| @@ -9,24 +9,8 @@ import numpy as np | |||||||
| import pandas as pd | import pandas as pd | ||||||
|  |  | ||||||
| import defaultfiles as default | import defaultfiles as default | ||||||
|  | import fame_phoneset | ||||||
| #sys.path.append(default.forced_alignment_module_dir) | import convert_phone_set | ||||||
| #from forced_alignment import convert_phone_set |  | ||||||
|  |  | ||||||
| #def find_phone(lexicon_file, phone): |  | ||||||
| #	""" Search where the phone is used in the lexicon. """ |  | ||||||
| #	with open(lexicon_file, "rt", encoding="utf-8") as fin: |  | ||||||
| #		lines = fin.read() |  | ||||||
| #		lines = lines.split('\n') |  | ||||||
| 	 |  | ||||||
| #	extracted = [] |  | ||||||
| #	for line in lines: |  | ||||||
| #		line = line.split('\t') |  | ||||||
| #		if len(line) > 1: |  | ||||||
| #			pronunciation = line[1] |  | ||||||
| #			if phone in pronunciation: |  | ||||||
| #				extracted.append(line) |  | ||||||
| #	return extracted |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out): | #def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out): | ||||||
| @@ -126,25 +110,6 @@ import defaultfiles as default | |||||||
|  |  | ||||||
| #    return ipa | #    return ipa | ||||||
|  |  | ||||||
| def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp): |  | ||||||
| 	""" Make a script file for HCopy using the filelist in FAME! corpus. """ |  | ||||||
| 	 |  | ||||||
| 	filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt') |  | ||||||
| 	with open(filelist_txt) as fin: |  | ||||||
| 		filelist = fin.read() |  | ||||||
| 		filelist = filelist.split('\n') |  | ||||||
| 	 |  | ||||||
| 	with open(hcopy_scp, 'w') as fout: |  | ||||||
| 		for filename_ in filelist: |  | ||||||
| 			filename = filename_.replace('.TextGrid', '') |  | ||||||
|  |  | ||||||
| 			if len(filename) > 3: # remove '.', '..' and '' |  | ||||||
| 				wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav') |  | ||||||
| 				mfc_file = os.path.join(feature_dir, filename + '.mfc') |  | ||||||
|  |  | ||||||
| 				fout.write(wav_file + '\t' + mfc_file + '\n') |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #def make_filelist(input_dir, output_txt): | #def make_filelist(input_dir, output_txt): | ||||||
| #	""" Make a list of files in the input_dir. """ | #	""" Make a list of files in the input_dir. """ | ||||||
| #	filenames = os.listdir(input_dir) | #	filenames = os.listdir(input_dir) | ||||||
| @@ -189,64 +154,147 @@ def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_s | |||||||
| #                            f.write('{0}\t{1}\n'.format(WORD, key)) | #                            f.write('{0}\t{1}\n'.format(WORD, key)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp): | ||||||
|  | 	""" Make a script file for HCopy using the filelist in FAME! corpus.  | ||||||
|  | 	 | ||||||
|  | 	Args: | ||||||
|  | 		fame_dir (path): the directory of FAME corpus. | ||||||
|  | 		dataset (str): 'devel', 'test' or 'train'. | ||||||
|  | 		feature_dir (path): the directory where feature will be stored. | ||||||
|  | 		hcopy_scp (path): a script file for HCopy to be made. | ||||||
|  |  | ||||||
|  | 	""" | ||||||
|  | 	filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt') | ||||||
|  | 	with open(filelist_txt) as fin: | ||||||
|  | 		filelist = fin.read() | ||||||
|  | 		filelist = filelist.split('\n') | ||||||
|  | 	 | ||||||
|  | 	with open(hcopy_scp, 'w') as fout: | ||||||
|  | 		for filename_ in filelist: | ||||||
|  | 			filename = filename_.replace('.TextGrid', '') | ||||||
|  |  | ||||||
|  | 			if len(filename) > 3: # remove '.', '..' and '' | ||||||
|  | 				wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav') | ||||||
|  | 				mfc_file = os.path.join(feature_dir, filename + '.mfc') | ||||||
|  |  | ||||||
|  | 				fout.write(wav_file + '\t' + mfc_file + '\n') | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def load_lexicon(lexicon_file): | def load_lexicon(lexicon_file): | ||||||
|  | 	""" load lexicon file as Data Frame. | ||||||
|  |  | ||||||
|  | 	Args: | ||||||
|  | 		lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. | ||||||
|  | 	 | ||||||
|  | 	Returns: | ||||||
|  | 		lex (df): lexicon as Data Frame, which has columns 'word' and 'pronunciation'. | ||||||
|  |  | ||||||
|  | 	""" | ||||||
| 	lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8") | 	lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8") | ||||||
| 	lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True) | 	lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True) | ||||||
| 	return lex | 	return lex | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_phonelist(lexicon_asr): | def get_phoneset_from_lexicon(lexicon_file, phoneset='asr'): | ||||||
| 	""" Make a list of phones which appears in the lexicon. """ | 	""" Make a list of phones which appears in the lexicon.  | ||||||
| 	 | 	 | ||||||
| 	#with open(lexicon_file, "rt", encoding="utf-8") as fin: | 	Args: | ||||||
| 	#	lines = fin.read() | 		lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. | ||||||
| 	#	lines = lines.split('\n') | 		phoneset (str): the phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'. | ||||||
| 	#	phonelist = set([]) |  | ||||||
| 	#	for line in lines: | 	Returns: | ||||||
| 	#		line = line.split('\t') | 		(list_of_phones) (set): the set of phones included in the lexicon_file. | ||||||
| 	#		if len(line) > 1: |  | ||||||
| 	#			pronunciation = set(line[1].split()) | 	""" | ||||||
| 	#			phonelist = phonelist | pronunciation | 	assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\'' | ||||||
| 	lex = load_lexicon(lexicon_asr) |  | ||||||
|  | 	lex = load_lexicon(lexicon_file) | ||||||
|  | 	if phoneset == 'asr': | ||||||
| 		return set(' '.join(lex['pronunciation']).split(' ')) | 		return set(' '.join(lex['pronunciation']).split(' ')) | ||||||
|  | 	elif phoneset == 'ipa': | ||||||
|  | 		join_pronunciations = ''.join(lex['pronunciation']) | ||||||
|  | 		return set(convert_phone_set.split_word(join_pronunciations, fame_phoneset.multi_character_phones_ipa)) | ||||||
|  |  | ||||||
| import time |  | ||||||
|  |  | ||||||
| timer_start = time.time() | def extract_unknown_phones(ipa, known_phones): | ||||||
|  | 	"""extract unknown phones in the pronunciation written in IPA. | ||||||
|  |  | ||||||
| #def get_translation_key(): | 	Args: | ||||||
| dir_tmp = r'c:\Users\A.Kunikoshi\source\repos\acoustic_model\_tmp' | 		ipa (str): a pronunciation written in IPA.  | ||||||
| lexicon_ipa = r'f:\_corpus\FAME\lexicon\lex.ipa' | 		known_phones (list): list of phones already know. | ||||||
| lexicon_asr = r'f:\_corpus\FAME\lexicon\lex.asr' |  | ||||||
|  |  | ||||||
| lex_ipa = load_lexicon(lexicon_ipa) | 	Returns: | ||||||
| lex_asr = load_lexicon(lexicon_asr) | 		(list_of_phones) (list): unknown phones not included in 'known_phones'. | ||||||
| if 0: |  | ||||||
| 	phone_to_be_searched = get_phonelist(lexicon_asr) | 	""" | ||||||
|  | 	ipa_split = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) | ||||||
|  | 	return [i for i in ipa_split if not i in known_phones] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def get_translation_key(lexicon_file_ipa, lexicon_file_asr): | ||||||
|  | 	""" get correspondence between lexicon_file_ipa and lexicon_file_asr. | ||||||
|  |  | ||||||
|  | 	Args: | ||||||
|  | 		lexicon_file_ipa (path): lexicon in the format of 'word' /t 'pronunciation (IPA)'. | ||||||
|  | 		lexicon_file_asr (path): lexicon in the format of 'word' /t 'pronunciation (asr)'. | ||||||
|  | 			the each character of 'pronunciation' should be delimited by ' '. | ||||||
|  |  | ||||||
|  | 	Returns: | ||||||
|  | 		translation_key (dict): translation key from ipa to asr.  | ||||||
|  | 		(phone_unknown) (list): the list of IPA phones, which does not appear in lexicon_file_asr.  | ||||||
|  |  | ||||||
|  | 	""" | ||||||
|  | 	lex_ipa = load_lexicon(lexicon_file_ipa) | ||||||
|  | 	lex_asr = load_lexicon(lexicon_file_asr) | ||||||
|  | 	phone_unknown = fame_phoneset.phoneset_ipa[:] | ||||||
| 	translation_key = dict() | 	translation_key = dict() | ||||||
| 	for word in lex_asr['word']: | 	for word in lex_ipa['word']: | ||||||
| 		if np.sum(lex_asr['word'] == word) == 1 and np.sum(lex_ipa['word'] == word) == 1: | 		if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1: | ||||||
| 			asr = lex_asr[lex_asr['word'] == word].iat[0, 1] |  | ||||||
| 			ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] | 			ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] | ||||||
|  | 			asr = lex_asr[lex_asr['word'] == word].iat[0, 1] | ||||||
| 	 | 	 | ||||||
|  | 			ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) | ||||||
| 			asr_list = asr.split(' ') | 			asr_list = asr.split(' ') | ||||||
| 			# if there are phones which is not in phone_to_be_searched |  | ||||||
| 			if len([True for i in asr_list if i in phone_to_be_searched]) > 0: | 			# if there are phones which is not in phone_unknown | ||||||
| 				if(len(ipa) == len(asr_list)): | 			#if len([True for i in asr_list if i in phone_unknown]) > 0: | ||||||
| 					print("{0}: {1} --> {2}".format(word, ipa, asr)) | 			if(len(ipa_list) == len(asr_list)): | ||||||
| 					for ipa_, asr_ in zip(ipa, asr_list): | 				print("{0}: {1} --> {2}".format(word, ipa_list, asr_list)) | ||||||
| 						if asr_ in phone_to_be_searched: | 				for ipa_, asr_ in zip(ipa_list, asr_list): | ||||||
| 							#if not translation_key[ipa_] == asr_: | 					if ipa_ in phone_unknown: | ||||||
| 						translation_key[ipa_] = asr_ | 						translation_key[ipa_] = asr_ | ||||||
| 							phone_to_be_searched.remove(asr_) | 						phone_unknown.remove(ipa_) | ||||||
|  | 	return translation_key, list(phone_unknown) | ||||||
|  |  | ||||||
| 	print("elapsed time: {}".format(time.time() - timer_start)) |  | ||||||
|  |  | ||||||
| 	np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key) | def find_phone(lexicon_file, phone, phoneset='ipa'): | ||||||
| 	np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched) | 	""" extract rows where the phone is used in the lexicon_file.  | ||||||
| else: |  | ||||||
| 	translation_key		 = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item() | 	Args: | ||||||
| 	phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item() | 		lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. | ||||||
|  | 		phone (str): the phone to be searched. | ||||||
|  | 		phoneset (str): the phoneset with which lexicon_file is written. 'asr' or 'ipa'(default). | ||||||
|  |  | ||||||
|  | 	Returns: | ||||||
|  | 		extracted (df): rows where the phone is used. | ||||||
|  |  | ||||||
|  | 	ToDo: | ||||||
|  | 		* develop when the phonset == 'asr'. | ||||||
|  |  | ||||||
|  | 	""" | ||||||
|  | 	assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\'' | ||||||
|  | 	 | ||||||
|  | 	lex = load_lexicon(lexicon_file) | ||||||
|  | 	 | ||||||
|  | 	# to reduce the calculation time, only target rows which include 'phone' at least once.  | ||||||
|  | 	lex_ = lex[lex['pronunciation'].str.count(phone)>0] | ||||||
|  |  | ||||||
|  | 	extracted = pd.DataFrame(index=[], columns=['word', 'pronunciation']) | ||||||
|  | 	for index, row in lex_.iterrows(): | ||||||
|  | 		if phoneset == 'ipa': | ||||||
|  | 			pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_phoneset.multi_character_phones_ipa) | ||||||
|  | 		if phone in pronunciation: | ||||||
|  | 			extracted_ = pd.Series([row['word'], pronunciation], index=extracted.columns) | ||||||
|  | 			extracted  = extracted.append(extracted_, ignore_index=True) | ||||||
|  | 	return extracted | ||||||
| @@ -1,21 +1,21 @@ | |||||||
| import sys | import sys | ||||||
| import os | import os | ||||||
| os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model') | os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') | ||||||
|  |  | ||||||
| import tempfile | import tempfile | ||||||
| #import configparser | #import configparser | ||||||
| #import subprocess | #import subprocess | ||||||
| #from collections import Counter | #from collections import Counter | ||||||
|  | import time | ||||||
|  |  | ||||||
| #import numpy as np | #import numpy as np | ||||||
| #import pandas as pd | #import pandas as pd | ||||||
|  |  | ||||||
| import fame_functions | import fame_functions | ||||||
| import defaultfiles as default | import defaultfiles as default | ||||||
| sys.path.append(default.pyhtk_dir) |  | ||||||
| import pyhtk |  | ||||||
| sys.path.append(default.toolbox_dir) | sys.path.append(default.toolbox_dir) | ||||||
| import file_handling | import file_handling as fh | ||||||
|  | from htk import pyhtk | ||||||
|  |  | ||||||
|  |  | ||||||
| ## ======================= user define ======================= | ## ======================= user define ======================= | ||||||
| @@ -28,8 +28,8 @@ import file_handling | |||||||
| dataset_list = ['devel', 'test', 'train'] | dataset_list = ['devel', 'test', 'train'] | ||||||
|  |  | ||||||
| # procedure | # procedure | ||||||
| extract_features  = 1 | extract_features  = 0 | ||||||
| #conv_lexicon	  = 0 | conv_lexicon	  = 1 | ||||||
| #check_lexicon	  = 0 | #check_lexicon	  = 0 | ||||||
| #make_mlf		  = 0 | #make_mlf		  = 0 | ||||||
| #combine_files	  = 0 | #combine_files	  = 0 | ||||||
| @@ -85,14 +85,12 @@ if not os.path.exists(tmp_dir): | |||||||
| ## ======================= extract features ======================= | ## ======================= extract features ======================= | ||||||
| if extract_features: | if extract_features: | ||||||
| 	for dataset in dataset_list: | 	for dataset in dataset_list: | ||||||
| 	#for dataset in ['test']: |  | ||||||
| 		print('==== {} ===='.format(dataset)) | 		print('==== {} ===='.format(dataset)) | ||||||
|  |  | ||||||
| 		# a script file for HCopy  | 		# a script file for HCopy  | ||||||
| 		print(">>> making a script file for HCopy... \n") | 		print(">>> making a script file for HCopy... \n") | ||||||
| 		hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False) | 		hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False) | ||||||
| 		hcopy_scp.close() | 		hcopy_scp.close() | ||||||
| 		#hcopy_scp = os.path.join(default.htk_dir, 'tmp', 'HCopy.scp') |  | ||||||
|  |  | ||||||
| 		# get a list of features (hcopy.scp) from the filelist in FAME! corpus | 		# get a list of features (hcopy.scp) from the filelist in FAME! corpus | ||||||
| 		feature_dir_ = os.path.join(feature_dir, dataset) | 		feature_dir_ = os.path.join(feature_dir, dataset) | ||||||
| @@ -102,32 +100,71 @@ if extract_features: | |||||||
| 		# extract features | 		# extract features | ||||||
| 		print(">>> extracting features... \n") | 		print(">>> extracting features... \n") | ||||||
| 		fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name) | 		fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name) | ||||||
|  |  | ||||||
| 		#subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name |  | ||||||
| 		#subprocess.call(subprocessStr, shell=True) |  | ||||||
| 		pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name) | 		pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name) | ||||||
|  |  | ||||||
| 		# a script file for HCompV | 		# a script file for HCompV | ||||||
| 		print(">>> making a script file for HCompV... \n") | 		print(">>> making a script file for HCompV... \n") | ||||||
|  |  | ||||||
| ## ======================= make a list of features ======================= |  | ||||||
| #if make_feature_list: |  | ||||||
| #	print("==== make a list of features ====\n") |  | ||||||
|  |  | ||||||
| #	for dataset in dataset_list: |  | ||||||
| #		print(dataset) |  | ||||||
|  |  | ||||||
| 		#feature_dir = output_dir + '\\mfc\\' + dataset |  | ||||||
| 		hcompv_scp  = os.path.join(tmp_dir, dataset + '.scp') | 		hcompv_scp  = os.path.join(tmp_dir, dataset + '.scp') | ||||||
|  | 		fh.make_filelist(feature_dir_, hcompv_scp, '.mfc') | ||||||
| 		#am_func.make_filelist(feature_dir, hcompv_scp) |  | ||||||
| 		file_handling.make_filelist(feature_dir_, hcompv_scp, '.mfc') |  | ||||||
|  |  | ||||||
|  |  | ||||||
| ## ======================= convert lexicon from ipa to fame_htk ======================= | ## ======================= convert lexicon from ipa to fame_htk ======================= | ||||||
| if conv_lexicon: | if conv_lexicon: | ||||||
| 	print('==== convert lexicon from ipa 2 fame ====\n') | 	print('==== convert lexicon from ipa 2 fame ====\n') | ||||||
| 	 | 	 | ||||||
|  | 	#dir_out = r'c:\Users\Aki\source\repos\acoustic_model\_tmp' | ||||||
|  | 	lexicon_dir = os.path.join(default.fame_dir, 'lexicon')  | ||||||
|  | 	lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa') | ||||||
|  | 	lexicon_asr = os.path.join(lexicon_dir, 'lex.asr') | ||||||
|  |  | ||||||
|  | 	# get the correspondence between lex_ipa and lex_asr. | ||||||
|  | 	lex_asr  = fame_functions.load_lexicon(lexicon_asr) | ||||||
|  | 	lex_ipa  = fame_functions.load_lexicon(lexicon_ipa)		 | ||||||
|  | 	if 1: | ||||||
|  | 		timer_start = time.time() | ||||||
|  | 		translation_key, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr) | ||||||
|  | 		print("elapsed time: {}".format(time.time() - timer_start)) | ||||||
|  |  | ||||||
|  | 		np.save('translation_key_ipa2asr.npy', translation_key) | ||||||
|  | 		np.save('phone_unknown.npy', phone_unknown) | ||||||
|  | 	else: | ||||||
|  | 		translation_key = np.load('translation_key_ipa2asr.npy').item() | ||||||
|  | 		phone_unknown   = np.load('phone_unknown.npy') | ||||||
|  | 		phone_unknown   = list(phone_unknown) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | 	## manually check the correspondence for the phone in phone_unknown. | ||||||
|  | 	#p = phone_unknown[0] | ||||||
|  | 	#lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa') | ||||||
|  |  | ||||||
|  | 	#for word in lex_ipa_['word']: | ||||||
|  | 	#	ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] | ||||||
|  | 	#	if np.sum(lex_asr['word'] == word) > 0: | ||||||
|  | 	#		asr = lex_asr[lex_asr['word'] == word].iat[0, 1] | ||||||
|  | 	 | ||||||
|  | 	#		ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) | ||||||
|  | 	#		asr_list = asr.split(' ') | ||||||
|  | 	#		if p in ipa_list and (len(ipa_list) == len(asr_list)): | ||||||
|  | 	#			print("{0}: {1} --> {2}".format(word, ipa_list, asr_list)) | ||||||
|  | 	#			for ipa_, asr_ in zip(ipa_list, asr_list): | ||||||
|  | 	#				if ipa_ in phone_unknown: | ||||||
|  | 	#					translation_key[ipa_] = asr_ | ||||||
|  | 	#					phone_unknown.remove(ipa_) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | 	## check if all the phones in lexicon_ipa are in fame_phoneset.py. | ||||||
|  | 	#timer_start = time.time() | ||||||
|  | 	#phoneset_lex = get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa') | ||||||
|  | 	#print("elapsed time: {}".format(time.time() - timer_start)) | ||||||
|  | 	 | ||||||
|  | 	#phoneset_py = fame_phoneset.phoneset_ipa | ||||||
|  | 	#set(phoneset_lex) - set(phoneset_py) | ||||||
|  |  | ||||||
|  | 	##timer_start = time.time() | ||||||
|  | 	##extracted = find_phone(lexicon_ipa, 'ⁿ') | ||||||
|  | 	##print("elapsed time: {}".format(time.time() - timer_start)) | ||||||
|  |  | ||||||
|  |  | ||||||
| 	# lex.asr is Kaldi compatible version of lex.ipa. | 	# lex.asr is Kaldi compatible version of lex.ipa. | ||||||
| 	# to check...  | 	# to check...  | ||||||
| 	#lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation']) | 	#lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation']) | ||||||
| @@ -140,13 +177,13 @@ if conv_lexicon: | |||||||
| 	#		fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split))) | 	#		fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split))) | ||||||
|  |  | ||||||
| 	# convert each lexicon from ipa description to fame_htk phoneset. | 	# convert each lexicon from ipa description to fame_htk phoneset. | ||||||
| 	am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk) | 	#am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk) | ||||||
| 	am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk) | 	#am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk) | ||||||
|  |  | ||||||
| 	# combine lexicon | 	# combine lexicon | ||||||
| 	# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov. | 	# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov. | ||||||
| 	# therefore there is no overlap between lex_asr and lex_oov.    | 	# therefore there is no overlap between lex_asr and lex_oov.    | ||||||
| 	am_func.combine_lexicon(lex_asr_htk, lex_oov_htk, lex_htk) | 	#am_func.combine_lexicon(lex_asr_htk, lex_oov_htk, lex_htk) | ||||||
|  |  | ||||||
|  |  | ||||||
| ## ======================= check if all the phones are successfully converted ======================= | ## ======================= check if all the phones are successfully converted ======================= | ||||||
|   | |||||||
							
								
								
									
										107
									
								
								acoustic_model/fame_phoneset.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								acoustic_model/fame_phoneset.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | |||||||
|  | """ definition of the phones to be used. """ | ||||||
|  |  | ||||||
|  | ## phones in IPA. | ||||||
|  | phoneset_ipa = [ | ||||||
|  | 	# vowels | ||||||
|  | 	'i̯', | ||||||
|  | 	'i̯ⁿ', | ||||||
|  | 	'y', | ||||||
|  | 	'i', | ||||||
|  | 	'i.', | ||||||
|  | 	'iⁿ', | ||||||
|  | 	'i:', | ||||||
|  | 	'i:ⁿ', | ||||||
|  | 	'ɪ', | ||||||
|  | 	'ɪⁿ', | ||||||
|  | 	'ɪ.', | ||||||
|  | 	#'ɪ:', # not included in lex.ipa | ||||||
|  | 	'ɪ:ⁿ', | ||||||
|  | 	'e', | ||||||
|  | 	'e:', | ||||||
|  | 	'e:ⁿ', | ||||||
|  | 	'ə', | ||||||
|  | 	'əⁿ', | ||||||
|  | 	'ə:', | ||||||
|  | 	'ɛ', | ||||||
|  | 	'ɛ.', | ||||||
|  | 	'ɛⁿ', | ||||||
|  | 	'ɛ:', | ||||||
|  | 	'ɛ:ⁿ', | ||||||
|  | 	'a', | ||||||
|  | 	'aⁿ', | ||||||
|  | 	'a.', | ||||||
|  | 	'a:', | ||||||
|  | 	'a:ⁿ', | ||||||
|  | 	'ṷ', | ||||||
|  | 	'ṷ.', | ||||||
|  | 	'ṷⁿ', | ||||||
|  | 	#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr.  | ||||||
|  | 	'u', | ||||||
|  | 	'uⁿ', | ||||||
|  | 	'u.', | ||||||
|  | 	'u:', | ||||||
|  | 	'u:ⁿ', | ||||||
|  | 	'ü', | ||||||
|  | 	'ü.', | ||||||
|  | 	'üⁿ', | ||||||
|  | 	'ü:', | ||||||
|  | 	'ü:ⁿ', | ||||||
|  | 	'o', | ||||||
|  | 	'oⁿ', | ||||||
|  | 	'o.', | ||||||
|  | 	'o:', | ||||||
|  | 	'o:ⁿ', | ||||||
|  | 	'ö', | ||||||
|  | 	'ö.', | ||||||
|  | 	'öⁿ', | ||||||
|  | 	'ö:', | ||||||
|  | 	'ö:ⁿ', | ||||||
|  | 	'ɔ', | ||||||
|  | 	'ɔ.', | ||||||
|  | 	'ɔⁿ', | ||||||
|  | 	'ɔ:', | ||||||
|  | 	'ɔ:ⁿ', | ||||||
|  | 	#'ɔ̈', # not included in lex.ipa  | ||||||
|  | 	'ɔ̈.', | ||||||
|  | 	'ɔ̈:', | ||||||
|  |  | ||||||
|  | 	# plosives | ||||||
|  | 	'p',  | ||||||
|  | 	'b',  | ||||||
|  | 	't', | ||||||
|  | 	'tⁿ', | ||||||
|  | 	'd',  | ||||||
|  | 	'k', | ||||||
|  | 	'g', | ||||||
|  | 	'ɡ', # = 'g' | ||||||
|  |  | ||||||
|  | 	# nasals | ||||||
|  | 	'm', | ||||||
|  | 	'n', | ||||||
|  | 	'ŋ', | ||||||
|  | 	 | ||||||
|  | 	# fricatives | ||||||
|  | 	'f', | ||||||
|  | 	'v', | ||||||
|  | 	's', | ||||||
|  | 	's:', | ||||||
|  | 	'z', | ||||||
|  | 	'zⁿ', | ||||||
|  | 	'x', | ||||||
|  | 	'h', | ||||||
|  |  | ||||||
|  | 	# tap and flip | ||||||
|  | 	'r', | ||||||
|  | 	'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.    | ||||||
|  | 	'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'. | ||||||
|  |  | ||||||
|  | 	# approximant | ||||||
|  | 	'j', | ||||||
|  | 	'j.', | ||||||
|  | 	'l' | ||||||
|  | 	] | ||||||
|  |  | ||||||
|  | ## the list of multi character phones.  | ||||||
|  | # for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. | ||||||
|  | multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1] | ||||||
|  | multi_character_phones_ipa.sort(key=len, reverse=True) | ||||||
		Reference in New Issue
	
	Block a user