fame_asr phoneset is added including reduced version and htk compatible version.
This commit is contained in:
		
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| @@ -32,7 +32,9 @@ | |||||||
|     <Compile Include="defaultfiles.py"> |     <Compile Include="defaultfiles.py"> | ||||||
|       <SubType>Code</SubType> |       <SubType>Code</SubType> | ||||||
|     </Compile> |     </Compile> | ||||||
|     <Compile Include="fame_phoneset.py"> |     <Compile Include="fame_asr.py" /> | ||||||
|  |     <Compile Include="fame_ipa.py" /> | ||||||
|  |     <Compile Include="fame_test.py"> | ||||||
|       <SubType>Code</SubType> |       <SubType>Code</SubType> | ||||||
|     </Compile> |     </Compile> | ||||||
|     <Compile Include="fa_test.py"> |     <Compile Include="fa_test.py"> | ||||||
|   | |||||||
| @@ -20,7 +20,7 @@ def split_word(word, multi_character_phones): | |||||||
| 	 | 	 | ||||||
| 	Args: | 	Args: | ||||||
| 		word (str): a word written in given phoneset. | 		word (str): a word written in given phoneset. | ||||||
| 		multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_phoneset.py.  | 		multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_ipa.py.  | ||||||
|  |  | ||||||
| 	Returns: | 	Returns: | ||||||
| 		(word_seperated) (list): the word splitted in given phoneset.  | 		(word_seperated) (list): the word splitted in given phoneset.  | ||||||
|   | |||||||
							
								
								
									
										127
									
								
								acoustic_model/fame_asr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								acoustic_model/fame_asr.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,127 @@ | |||||||
|  | """ definition of the phones to be used. """ | ||||||
|  |  | ||||||
|  | # phonese in {FAME}/lexicon/lex.asr | ||||||
|  | phoneset = [ | ||||||
|  | 	# vowels | ||||||
|  | 	'a', | ||||||
|  | 	'a:', | ||||||
|  | 	'e', | ||||||
|  | 	'e:', | ||||||
|  | 	'i', | ||||||
|  | 	'i:', | ||||||
|  | 	'i̯', | ||||||
|  | 	'o', | ||||||
|  | 	'o:', | ||||||
|  | 	'ö', | ||||||
|  | 	'ö:', | ||||||
|  | 	'u', | ||||||
|  | 	'u:', | ||||||
|  | 	'ü', | ||||||
|  | 	'ü:', | ||||||
|  | 	#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone.  | ||||||
|  | 	'ṷ', | ||||||
|  | 	'y', | ||||||
|  | 	'ɔ', | ||||||
|  | 	'ɔ:', | ||||||
|  | 	'ɔ̈',  | ||||||
|  | 	'ɔ̈:', | ||||||
|  | 	'ə', | ||||||
|  | 	'ɛ', | ||||||
|  | 	'ɛ:', | ||||||
|  | 	'ɪ', | ||||||
|  | 	'ɪ:', | ||||||
|  |  | ||||||
|  | 	# plosives | ||||||
|  | 	'p',  | ||||||
|  | 	'b',  | ||||||
|  | 	't', | ||||||
|  | 	'd',  | ||||||
|  | 	'k', | ||||||
|  | 	'g', | ||||||
|  | 	'ɡ', # = 'g' | ||||||
|  |  | ||||||
|  | 	# nasals | ||||||
|  | 	'm', | ||||||
|  | 	'n', | ||||||
|  | 	'ŋ', | ||||||
|  | 	 | ||||||
|  | 	# fricatives | ||||||
|  | 	'f', | ||||||
|  | 	'v', | ||||||
|  | 	's', | ||||||
|  | 	's:', | ||||||
|  | 	'z', | ||||||
|  | 	'x', | ||||||
|  | 	'h', | ||||||
|  | 	 | ||||||
|  | 	# tap and flip | ||||||
|  | 	'r', | ||||||
|  | 	'r:', | ||||||
|  |  | ||||||
|  | 	# approximant | ||||||
|  | 	'j', | ||||||
|  | 	'l' | ||||||
|  | 	] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## reduce the number of phones. | ||||||
|  | # the phones which seldom occur are replaced with another more popular phones. | ||||||
|  | # replacements are based on the advice from Martijn Wieling. | ||||||
|  | reduction_key = { | ||||||
|  | 	'y':'i:', 'e':'e:', 'ə:':'ɛ:', 'r:':'r', 'ɡ':'g' | ||||||
|  | 	} | ||||||
|  | # already removed beforehand in phoneset. Just to be sure. | ||||||
|  | phones_to_be_removed = ['ú', 's:', 'ɔ̈:'] | ||||||
|  |  | ||||||
|  | phoneset_short = [reduction_key.get(i, i) for i in phoneset | ||||||
|  | 				  if not i in phones_to_be_removed] | ||||||
|  | phoneset_short = list(set(phoneset_short)) | ||||||
|  | phoneset_short.sort() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## translation_key to htk format (ascii). | ||||||
|  | # phones which gives UnicodeEncodeError when phone.encode("ascii") | ||||||
|  | # are replaced with other characters. | ||||||
|  | translation_key_asr2htk = { | ||||||
|  | 	'i̯': 'i_', | ||||||
|  | 	'ṷ': 'u_', | ||||||
|  |  | ||||||
|  | 	# on the analogy of German umlaut, 'e' is used. | ||||||
|  | 	'ö': 'oe', 'ö:': 'oe:', | ||||||
|  | 	'ü': 'ue', 'ü:': 'ue:', | ||||||
|  |  | ||||||
|  | 	# on the analogy of Chinese... | ||||||
|  | 	'ŋ': 'ng', | ||||||
|  | 				 | ||||||
|  | 	# refer to Xsampa.  | ||||||
|  | 	'ɔ': 'O', 'ɔ:': 'O:', 'ɔ̈': 'Oe', | ||||||
|  | 	'ɛ': 'E', 'ɛ:': 'E:', | ||||||
|  | 	'ɪ': 'I', 'ɪ:': 'I:',  | ||||||
|  |  | ||||||
|  | 	# it is @ in Xsampa, but that is not handy on HTK. | ||||||
|  | 	'ə': 'A' | ||||||
|  | 	} | ||||||
|  | phoneset_htk = [translation_key_asr2htk.get(i, i) for i in phoneset_short] | ||||||
|  |  | ||||||
|  | ## check | ||||||
|  | #for i in phoneset_short: | ||||||
|  | #	try: | ||||||
|  | #		print("{0} --> {1}".format(i, i.encode("ascii"))) | ||||||
|  | #	except UnicodeEncodeError: | ||||||
|  | #		print(">>> {}".format(i)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## the list of multi character phones.  | ||||||
|  | # for example, the length of 'a:' is 3, but in the codes it is treated as one letter. | ||||||
|  |  | ||||||
|  | # original. | ||||||
|  | multi_character_phones = [i for i in phoneset if len(i) > 1] | ||||||
|  | multi_character_phones.sort(key=len, reverse=True) | ||||||
|  |  | ||||||
|  | # phonset reduced. | ||||||
|  | multi_character_phones_short = [i for i in phoneset_short if len(i) > 1] | ||||||
|  | multi_character_phones_short.sort(key=len, reverse=True) | ||||||
|  |  | ||||||
|  | # htk compatible. | ||||||
|  | multi_character_phones_htk = [i for i in phoneset_htk if len(i) > 1] | ||||||
|  | multi_character_phones_htk.sort(key=len, reverse=True) | ||||||
| @@ -1,5 +1,4 @@ | |||||||
| import os | import os | ||||||
| os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') |  | ||||||
|  |  | ||||||
| import sys | import sys | ||||||
| from collections import Counter | from collections import Counter | ||||||
| @@ -9,7 +8,7 @@ import numpy as np | |||||||
| import pandas as pd | import pandas as pd | ||||||
|  |  | ||||||
| import defaultfiles as default | import defaultfiles as default | ||||||
| import fame_phoneset | from phoneset import fame_ipa | ||||||
| import convert_phone_set | import convert_phone_set | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -110,14 +109,6 @@ import convert_phone_set | |||||||
|  |  | ||||||
| #    return ipa | #    return ipa | ||||||
|  |  | ||||||
| #def make_filelist(input_dir, output_txt): |  | ||||||
| #	""" Make a list of files in the input_dir. """ |  | ||||||
| #	filenames = os.listdir(input_dir) |  | ||||||
|  |  | ||||||
| #	with open(output_txt, 'w') as fout: |  | ||||||
| #		for filename in filenames: |  | ||||||
| #			fout.write(input_dir + '\\' + filename + '\n') |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #def make_htk_dict(word, pronvar_, fileDic, output_type): | #def make_htk_dict(word, pronvar_, fileDic, output_type): | ||||||
| #    """ | #    """ | ||||||
| @@ -179,10 +170,11 @@ def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_s | |||||||
|  |  | ||||||
| 				fout.write(wav_file + '\t' + mfc_file + '\n') | 				fout.write(wav_file + '\t' + mfc_file + '\n') | ||||||
|  |  | ||||||
|  | 	return | ||||||
|  |  | ||||||
|  |  | ||||||
| def load_lexicon(lexicon_file): | def load_lexicon(lexicon_file): | ||||||
| 	""" load lexicon file as Data Frame. | 	""" load lexicon file as data frame. | ||||||
|  |  | ||||||
| 	Args: | 	Args: | ||||||
| 		lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. | 		lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. | ||||||
| @@ -196,25 +188,27 @@ def load_lexicon(lexicon_file): | |||||||
| 	return lex | 	return lex | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_phoneset_from_lexicon(lexicon_file, phoneset='asr'): | def get_phoneset_from_lexicon(lexicon_file, phoneset_name='asr'): | ||||||
| 	""" Make a list of phones which appears in the lexicon.  | 	""" Make a list of phones which appears in the lexicon.  | ||||||
| 	 | 	 | ||||||
| 	Args: | 	Args: | ||||||
| 		lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. | 		lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. | ||||||
| 		phoneset (str): the phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'. | 		phoneset_name (str): the name of phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'. | ||||||
|  |  | ||||||
| 	Returns: | 	Returns: | ||||||
| 		(list_of_phones) (set): the set of phones included in the lexicon_file. | 		(list_of_phones) (set): the set of phones included in the lexicon_file. | ||||||
|  |  | ||||||
| 	""" | 	""" | ||||||
| 	assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\'' | 	assert phoneset_name in ['asr', 'ipa'], 'phoneset_name should be \'asr\' or \'ipa\'' | ||||||
|  |  | ||||||
| 	lex = load_lexicon(lexicon_file) | 	lex = load_lexicon(lexicon_file) | ||||||
| 	if phoneset == 'asr': | 	if phoneset_name == 'asr': | ||||||
| 		return set(' '.join(lex['pronunciation']).split(' ')) | 		return set(' '.join(lex['pronunciation']).split(' ')) | ||||||
| 	elif phoneset == 'ipa': | 	elif phoneset_name == 'ipa': | ||||||
| 		join_pronunciations = ''.join(lex['pronunciation']) | 		join_pronunciations = ''.join(lex['pronunciation']) | ||||||
| 		return set(convert_phone_set.split_word(join_pronunciations, fame_phoneset.multi_character_phones_ipa)) | 		return set(convert_phone_set.split_word(join_pronunciations, fame_ipa.multi_character_phones)) | ||||||
|  |  | ||||||
|  | 	return | ||||||
|  |  | ||||||
|  |  | ||||||
| def extract_unknown_phones(ipa, known_phones): | def extract_unknown_phones(ipa, known_phones): | ||||||
| @@ -228,7 +222,7 @@ def extract_unknown_phones(ipa, known_phones): | |||||||
| 		(list_of_phones) (list): unknown phones not included in 'known_phones'. | 		(list_of_phones) (list): unknown phones not included in 'known_phones'. | ||||||
|  |  | ||||||
| 	""" | 	""" | ||||||
| 	ipa_split = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) | 	ipa_split = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones) | ||||||
| 	return [i for i in ipa_split if not i in known_phones] | 	return [i for i in ipa_split if not i in known_phones] | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -247,14 +241,14 @@ def get_translation_key(lexicon_file_ipa, lexicon_file_asr): | |||||||
| 	""" | 	""" | ||||||
| 	lex_ipa = load_lexicon(lexicon_file_ipa) | 	lex_ipa = load_lexicon(lexicon_file_ipa) | ||||||
| 	lex_asr = load_lexicon(lexicon_file_asr) | 	lex_asr = load_lexicon(lexicon_file_asr) | ||||||
| 	phone_unknown = fame_phoneset.phoneset_ipa[:] | 	phone_unknown = fame_ipa.phoneset[:] | ||||||
| 	translation_key = dict() | 	translation_key = dict() | ||||||
| 	for word in lex_ipa['word']: | 	for word in lex_ipa['word']: | ||||||
| 		if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1: | 		if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1: | ||||||
| 			ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] | 			ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] | ||||||
| 			asr = lex_asr[lex_asr['word'] == word].iat[0, 1] | 			asr = lex_asr[lex_asr['word'] == word].iat[0, 1] | ||||||
| 	 | 	 | ||||||
| 			ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) | 			ipa_list = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones) | ||||||
| 			asr_list = asr.split(' ') | 			asr_list = asr.split(' ') | ||||||
|  |  | ||||||
| 			# if there are phones which is not in phone_unknown | 			# if there are phones which is not in phone_unknown | ||||||
| @@ -268,13 +262,13 @@ def get_translation_key(lexicon_file_ipa, lexicon_file_asr): | |||||||
| 	return translation_key, list(phone_unknown) | 	return translation_key, list(phone_unknown) | ||||||
|  |  | ||||||
|  |  | ||||||
| def find_phone(lexicon_file, phone, phoneset='ipa'): | def find_phone(lexicon_file, phone, phoneset_name='ipa'): | ||||||
| 	""" extract rows where the phone is used in the lexicon_file.  | 	""" extract rows where the phone is used in the lexicon_file.  | ||||||
|  |  | ||||||
| 	Args: | 	Args: | ||||||
| 		lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. | 		lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. | ||||||
| 		phone (str): the phone to be searched. | 		phone (str): the phone to be searched. | ||||||
| 		phoneset (str): the phoneset with which lexicon_file is written. 'asr' or 'ipa'(default). | 		phoneset_name (str): the name of phoneset_name with which lexicon_file is written. 'asr' or 'ipa'(default). | ||||||
|  |  | ||||||
| 	Returns: | 	Returns: | ||||||
| 		extracted (df): rows where the phone is used. | 		extracted (df): rows where the phone is used. | ||||||
| @@ -283,7 +277,7 @@ def find_phone(lexicon_file, phone, phoneset='ipa'): | |||||||
| 		* develop when the phonset == 'asr'. | 		* develop when the phonset == 'asr'. | ||||||
|  |  | ||||||
| 	""" | 	""" | ||||||
| 	assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\'' | 	assert phoneset_name in ['asr', 'ipa'], 'phoneset_name should be \'asr\' or \'ipa\'' | ||||||
| 	 | 	 | ||||||
| 	lex = load_lexicon(lexicon_file) | 	lex = load_lexicon(lexicon_file) | ||||||
| 	 | 	 | ||||||
| @@ -292,8 +286,8 @@ def find_phone(lexicon_file, phone, phoneset='ipa'): | |||||||
|  |  | ||||||
| 	extracted = pd.DataFrame(index=[], columns=['word', 'pronunciation']) | 	extracted = pd.DataFrame(index=[], columns=['word', 'pronunciation']) | ||||||
| 	for index, row in lex_.iterrows(): | 	for index, row in lex_.iterrows(): | ||||||
| 		if phoneset == 'ipa': | 		if phoneset_name == 'ipa': | ||||||
| 			pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_phoneset.multi_character_phones_ipa) | 			pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_ipa.multi_character_phones) | ||||||
| 		if phone in pronunciation: | 		if phone in pronunciation: | ||||||
| 			extracted_ = pd.Series([row['word'], pronunciation], index=extracted.columns) | 			extracted_ = pd.Series([row['word'], pronunciation], index=extracted.columns) | ||||||
| 			extracted  = extracted.append(extracted_, ignore_index=True) | 			extracted  = extracted.append(extracted_, ignore_index=True) | ||||||
|   | |||||||
| @@ -8,8 +8,8 @@ import tempfile | |||||||
| #from collections import Counter | #from collections import Counter | ||||||
| import time | import time | ||||||
|  |  | ||||||
| #import numpy as np | import numpy as np | ||||||
| #import pandas as pd | import pandas as pd | ||||||
|  |  | ||||||
| import fame_functions | import fame_functions | ||||||
| import defaultfiles as default | import defaultfiles as default | ||||||
| @@ -54,6 +54,10 @@ conv_lexicon	  = 1 | |||||||
| #mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl'] | #mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl'] | ||||||
| #FAME_dir	 = config['Settings']['FAME_dir'] | #FAME_dir	 = config['Settings']['FAME_dir'] | ||||||
|  |  | ||||||
|  | #lexicon_dir = os.path.join(default.fame_dir, 'lexicon')  | ||||||
|  | #lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa') | ||||||
|  | #lexicon_asr = os.path.join(lexicon_dir, 'lex.asr') | ||||||
|  |  | ||||||
| #lex_asr		= FAME_dir + '\\lexicon\\lex.asr' | #lex_asr		= FAME_dir + '\\lexicon\\lex.asr' | ||||||
| #lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk' | #lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk' | ||||||
| #lex_oov		= FAME_dir + '\\lexicon\\lex.oov' | #lex_oov		= FAME_dir + '\\lexicon\\lex.oov' | ||||||
| @@ -111,71 +115,6 @@ if extract_features: | |||||||
| ## ======================= convert lexicon from ipa to fame_htk ======================= | ## ======================= convert lexicon from ipa to fame_htk ======================= | ||||||
| if conv_lexicon: | if conv_lexicon: | ||||||
| 	print('==== convert lexicon from ipa 2 fame ====\n') | 	print('==== convert lexicon from ipa 2 fame ====\n') | ||||||
| 	 |  | ||||||
| 	#dir_out = r'c:\Users\Aki\source\repos\acoustic_model\_tmp' |  | ||||||
| 	lexicon_dir = os.path.join(default.fame_dir, 'lexicon')  |  | ||||||
| 	lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa') |  | ||||||
| 	lexicon_asr = os.path.join(lexicon_dir, 'lex.asr') |  | ||||||
|  |  | ||||||
| 	# get the correspondence between lex_ipa and lex_asr. |  | ||||||
| 	lex_asr  = fame_functions.load_lexicon(lexicon_asr) |  | ||||||
| 	lex_ipa  = fame_functions.load_lexicon(lexicon_ipa)		 |  | ||||||
| 	if 1: |  | ||||||
| 		timer_start = time.time() |  | ||||||
| 		translation_key, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr) |  | ||||||
| 		print("elapsed time: {}".format(time.time() - timer_start)) |  | ||||||
|  |  | ||||||
| 		np.save('translation_key_ipa2asr.npy', translation_key) |  | ||||||
| 		np.save('phone_unknown.npy', phone_unknown) |  | ||||||
| 	else: |  | ||||||
| 		translation_key = np.load('translation_key_ipa2asr.npy').item() |  | ||||||
| 		phone_unknown   = np.load('phone_unknown.npy') |  | ||||||
| 		phone_unknown   = list(phone_unknown) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| 	## manually check the correspondence for the phone in phone_unknown. |  | ||||||
| 	#p = phone_unknown[0] |  | ||||||
| 	#lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa') |  | ||||||
|  |  | ||||||
| 	#for word in lex_ipa_['word']: |  | ||||||
| 	#	ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] |  | ||||||
| 	#	if np.sum(lex_asr['word'] == word) > 0: |  | ||||||
| 	#		asr = lex_asr[lex_asr['word'] == word].iat[0, 1] |  | ||||||
| 	 |  | ||||||
| 	#		ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) |  | ||||||
| 	#		asr_list = asr.split(' ') |  | ||||||
| 	#		if p in ipa_list and (len(ipa_list) == len(asr_list)): |  | ||||||
| 	#			print("{0}: {1} --> {2}".format(word, ipa_list, asr_list)) |  | ||||||
| 	#			for ipa_, asr_ in zip(ipa_list, asr_list): |  | ||||||
| 	#				if ipa_ in phone_unknown: |  | ||||||
| 	#					translation_key[ipa_] = asr_ |  | ||||||
| 	#					phone_unknown.remove(ipa_) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| 	## check if all the phones in lexicon_ipa are in fame_phoneset.py. |  | ||||||
| 	#timer_start = time.time() |  | ||||||
| 	#phoneset_lex = get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa') |  | ||||||
| 	#print("elapsed time: {}".format(time.time() - timer_start)) |  | ||||||
| 	 |  | ||||||
| 	#phoneset_py = fame_phoneset.phoneset_ipa |  | ||||||
| 	#set(phoneset_lex) - set(phoneset_py) |  | ||||||
|  |  | ||||||
| 	##timer_start = time.time() |  | ||||||
| 	##extracted = find_phone(lexicon_ipa, 'ⁿ') |  | ||||||
| 	##print("elapsed time: {}".format(time.time() - timer_start)) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| 	# lex.asr is Kaldi compatible version of lex.ipa. |  | ||||||
| 	# to check...  |  | ||||||
| 	#lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation']) |  | ||||||
| 	#with open(lex_ipa_, "w", encoding="utf-8") as fout: |  | ||||||
| 	#	for word, pronunciation in zip(lexicon_ipa['word'], lexicon_ipa['pronunciation']): |  | ||||||
| 	#		# ignore nasalization and '.' |  | ||||||
| 	#		pronunciation_ = pronunciation.replace(u'ⁿ', '') |  | ||||||
| 	#		pronunciation_ = pronunciation_.replace('.', '') |  | ||||||
| 	#		pronunciation_split = convert_phone_set.split_ipa_fame(pronunciation_) |  | ||||||
| 	#		fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split))) |  | ||||||
|  |  | ||||||
| 	# convert each lexicon from ipa description to fame_htk phoneset. | 	# convert each lexicon from ipa description to fame_htk phoneset. | ||||||
| 	#am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk) | 	#am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk) | ||||||
| 	#am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk) | 	#am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk) | ||||||
|   | |||||||
							
								
								
									
										107
									
								
								acoustic_model/fame_ipa.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								acoustic_model/fame_ipa.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | |||||||
|  | """ definition of the phones to be used. """ | ||||||
|  |  | ||||||
|  | phoneset = [ | ||||||
|  | 	# vowels | ||||||
|  | 	'i̯', | ||||||
|  | 	'i̯ⁿ', | ||||||
|  | 	'y', | ||||||
|  | 	'i', | ||||||
|  | 	'i.', | ||||||
|  | 	'iⁿ', | ||||||
|  | 	'i:', | ||||||
|  | 	'i:ⁿ', | ||||||
|  | 	'ɪ', | ||||||
|  | 	'ɪⁿ', | ||||||
|  | 	'ɪ.', | ||||||
|  | 	#'ɪ:', # not included in lex.ipa | ||||||
|  | 	'ɪ:ⁿ', | ||||||
|  | 	'e', | ||||||
|  | 	'e:', | ||||||
|  | 	'e:ⁿ', | ||||||
|  | 	'ə', | ||||||
|  | 	'əⁿ', | ||||||
|  | 	'ə:', | ||||||
|  | 	'ɛ', | ||||||
|  | 	'ɛ.', | ||||||
|  | 	'ɛⁿ', | ||||||
|  | 	'ɛ:', | ||||||
|  | 	'ɛ:ⁿ', | ||||||
|  | 	'a', | ||||||
|  | 	'aⁿ', | ||||||
|  | 	'a.', | ||||||
|  | 	'a:', | ||||||
|  | 	'a:ⁿ', | ||||||
|  | 	'ṷ', | ||||||
|  | 	'ṷ.', | ||||||
|  | 	'ṷⁿ', | ||||||
|  | 	#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone.  | ||||||
|  | 	'u', | ||||||
|  | 	'uⁿ', | ||||||
|  | 	'u.', | ||||||
|  | 	'u:', | ||||||
|  | 	'u:ⁿ', | ||||||
|  | 	'ü', | ||||||
|  | 	'ü.', | ||||||
|  | 	'üⁿ', | ||||||
|  | 	'ü:', | ||||||
|  | 	'ü:ⁿ', | ||||||
|  | 	'o', | ||||||
|  | 	'oⁿ', | ||||||
|  | 	'o.', | ||||||
|  | 	'o:', | ||||||
|  | 	'o:ⁿ', | ||||||
|  | 	'ö', | ||||||
|  | 	'ö.', | ||||||
|  | 	'öⁿ', | ||||||
|  | 	'ö:', | ||||||
|  | 	'ö:ⁿ', | ||||||
|  | 	'ɔ', | ||||||
|  | 	'ɔ.', | ||||||
|  | 	'ɔⁿ', | ||||||
|  | 	'ɔ:', | ||||||
|  | 	'ɔ:ⁿ', | ||||||
|  | 	#'ɔ̈', # not included in lex.ipa  | ||||||
|  | 	'ɔ̈.', | ||||||
|  | 	'ɔ̈:', | ||||||
|  |  | ||||||
|  | 	# plosives | ||||||
|  | 	'p',  | ||||||
|  | 	'b',  | ||||||
|  | 	't', | ||||||
|  | 	'tⁿ', | ||||||
|  | 	'd',  | ||||||
|  | 	'k', | ||||||
|  | 	'g', | ||||||
|  | 	'ɡ', # = 'g' | ||||||
|  |  | ||||||
|  | 	# nasals | ||||||
|  | 	'm', | ||||||
|  | 	'n', | ||||||
|  | 	'ŋ', | ||||||
|  | 	 | ||||||
|  | 	# fricatives | ||||||
|  | 	'f', | ||||||
|  | 	'v', | ||||||
|  | 	's', | ||||||
|  | 	's:', | ||||||
|  | 	'z', | ||||||
|  | 	'zⁿ', | ||||||
|  | 	'x', | ||||||
|  | 	'h', | ||||||
|  |  | ||||||
|  | 	# tap and flip | ||||||
|  | 	'r', | ||||||
|  | 	'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.    | ||||||
|  | 	'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'. | ||||||
|  |  | ||||||
|  | 	# approximant | ||||||
|  | 	'j', | ||||||
|  | 	'j.', | ||||||
|  | 	'l' | ||||||
|  | 	] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## the list of multi character phones.  | ||||||
|  | # for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. | ||||||
|  | multi_character_phones = [i for i in phoneset if len(i) > 1] | ||||||
|  | multi_character_phones.sort(key=len, reverse=True) | ||||||
							
								
								
									
										93
									
								
								acoustic_model/fame_test.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								acoustic_model/fame_test.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,93 @@ | |||||||
|  | import sys | ||||||
|  | import os | ||||||
|  | os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') | ||||||
|  |  | ||||||
|  | import time | ||||||
|  |  | ||||||
|  | import numpy as np | ||||||
|  | import pandas as pd | ||||||
|  |  | ||||||
|  | import fame_functions | ||||||
|  | import defaultfiles as default | ||||||
|  | sys.path.append(default.toolbox_dir) | ||||||
|  | from phoneset import fame_ipa, fame_asr | ||||||
|  |  | ||||||
|  |  | ||||||
|  | lexicon_dir = os.path.join(default.fame_dir, 'lexicon')  | ||||||
|  | lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa') | ||||||
|  | lexicon_asr = os.path.join(lexicon_dir, 'lex.asr') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## check if all the phones in lexicon.ipa are in fame_ipa.py. | ||||||
|  | #timer_start = time.time() | ||||||
|  | #phoneset_lex = fame_functions.get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa')	 | ||||||
|  | #phoneset_py = fame_ipa.phoneset | ||||||
|  | #print("phones which is in lexicon.ipa but not in fame_ipa.py:\n{}".format( | ||||||
|  | #	set(phoneset_lex) - set(phoneset_py))) | ||||||
|  | #print("elapsed time: {}".format(time.time() - timer_start)) | ||||||
|  |  | ||||||
|  | # check which word has the phone.  | ||||||
|  | #timer_start = time.time() | ||||||
|  | #extracted = find_phone(lexicon_ipa, 'ⁿ') | ||||||
|  | #print("elapsed time: {}".format(time.time() - timer_start)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## get the correspondence between lex_ipa and lex_asr.	 | ||||||
|  | lex_asr  = fame_functions.load_lexicon(lexicon_asr) | ||||||
|  | lex_ipa  = fame_functions.load_lexicon(lexicon_ipa)		 | ||||||
|  | if 0: | ||||||
|  | 	timer_start = time.time() | ||||||
|  | 	translation_key_ipa2asr, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr) | ||||||
|  | 	print("elapsed time: {}".format(time.time() - timer_start)) | ||||||
|  |  | ||||||
|  | 	np.save(os.path.join('phoneset', 'output_get_translation_key_translation_key.npy'), translation_key_ipa2asr) | ||||||
|  | 	np.save(os.path.join('phoneset', 'output_get_translation_key_phone_unknown.npy'), phone_unknown) | ||||||
|  | else: | ||||||
|  | 	translation_key_ipa2asr = np.load(os.path.join('phoneset', 'output_get_translation_key_translation_key.npy')).item() | ||||||
|  | 	phone_unknown   = np.load(os.path.join('phoneset', 'output_get_translation_key_phone_unknown.npy')) | ||||||
|  | 	phone_unknown   = list(phone_unknown) | ||||||
|  |  | ||||||
|  | # manually check the correspondence for the phone in phone_unknown. | ||||||
|  | #p = phone_unknown[0] | ||||||
|  | #lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa') | ||||||
|  |  | ||||||
|  | #for word in lex_ipa_['word']: | ||||||
|  | #	ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] | ||||||
|  | #	if np.sum(lex_asr['word'] == word) > 0: | ||||||
|  | #		asr = lex_asr[lex_asr['word'] == word].iat[0, 1] | ||||||
|  | 	 | ||||||
|  | #		ipa_list = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones) | ||||||
|  | #		asr_list = asr.split(' ') | ||||||
|  | #		if p in ipa_list and (len(ipa_list) == len(asr_list)): | ||||||
|  | #			print("{0}: {1} --> {2}".format(word, ipa_list, asr_list)) | ||||||
|  | #			for ipa_, asr_ in zip(ipa_list, asr_list): | ||||||
|  | #				if ipa_ in phone_unknown: | ||||||
|  | #					translation_key_ipa2asr[ipa_] = asr_ | ||||||
|  | #					phone_unknown.remove(ipa_) | ||||||
|  | translation_key_ipa2asr['ə:'] = 'ə' | ||||||
|  | translation_key_ipa2asr['r.'] = 'r' | ||||||
|  | translation_key_ipa2asr['r:'] = 'r' | ||||||
|  | np.save(os.path.join('phoneset', 'fame_ipa2asr.npy'), translation_key_ipa2asr) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## check if all the phones in lexicon.asr are in translation_key_ipa2asr. | ||||||
|  | timer_start = time.time() | ||||||
|  | phoneset_lex = fame_functions.get_phoneset_from_lexicon(lexicon_asr, phoneset='asr') | ||||||
|  | phoneset_lex.remove("") | ||||||
|  | phoneset_asr = list(set(translation_key_ipa2asr.values())) | ||||||
|  | print("phones which is in lexicon.asr but not in the translation_key_ipa2asr:\n{}".format( | ||||||
|  | 	set(phoneset_lex) - set(phoneset_asr))) | ||||||
|  | print("elapsed time: {}".format(time.time() - timer_start)) | ||||||
|  |  | ||||||
|  | ## make the translation key between asr to htk. | ||||||
|  | #multi_character_phones = [i for i in phoneset_asr if len(i) > 1] | ||||||
|  | #multi_character_phones.sort(key=len, reverse=True) | ||||||
|  |  | ||||||
|  | #lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation']) | ||||||
|  | #with open(lex_ipa_, "w", encoding="utf-8") as fout: | ||||||
|  | #	for word, pronunciation in zip(lexicon_ipa['word'], lexicon_ipa['pronunciation']): | ||||||
|  | #		# ignore nasalization and '.' | ||||||
|  | #		pronunciation_ = pronunciation.replace(u'ⁿ', '') | ||||||
|  | #		pronunciation_ = pronunciation_.replace('.', '') | ||||||
|  | #		pronunciation_split = convert_phone_set.split_ipa_fame(pronunciation_) | ||||||
|  | #		fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split))) | ||||||
| @@ -1,7 +1,6 @@ | |||||||
| """ definition of the phones to be used. """ | """ definition of the phones to be used. """ | ||||||
| 
 | 
 | ||||||
| ## phones in IPA. | phoneset = [ | ||||||
| phoneset_ipa = [ |  | ||||||
| 	# vowels | 	# vowels | ||||||
| 	'i̯', | 	'i̯', | ||||||
| 	'i̯ⁿ', | 	'i̯ⁿ', | ||||||
| @@ -103,5 +102,5 @@ phoneset_ipa = [ | |||||||
| 
 | 
 | ||||||
| ## the list of multi character phones.  | ## the list of multi character phones.  | ||||||
| # for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. | # for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. | ||||||
| multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1] | multi_character_phones = [i for i in phoneset if len(i) > 1] | ||||||
| multi_character_phones_ipa.sort(key=len, reverse=True) | multi_character_phones.sort(key=len, reverse=True) | ||||||
							
								
								
									
										106
									
								
								acoustic_model/phoneset/fame_ipa.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								acoustic_model/phoneset/fame_ipa.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,106 @@ | |||||||
|  | """ definition of the phones to be used. """ | ||||||
|  |  | ||||||
|  | phoneset = [ | ||||||
|  | 	# vowels | ||||||
|  | 	'i̯', | ||||||
|  | 	'i̯ⁿ', | ||||||
|  | 	'y', | ||||||
|  | 	'i', | ||||||
|  | 	'i.', | ||||||
|  | 	'iⁿ', | ||||||
|  | 	'i:', | ||||||
|  | 	'i:ⁿ', | ||||||
|  | 	'ɪ', | ||||||
|  | 	'ɪⁿ', | ||||||
|  | 	'ɪ.', | ||||||
|  | 	#'ɪ:', # not included in lex.ipa | ||||||
|  | 	'ɪ:ⁿ', | ||||||
|  | 	'e', | ||||||
|  | 	'e:', | ||||||
|  | 	'e:ⁿ', | ||||||
|  | 	'ə', | ||||||
|  | 	'əⁿ', | ||||||
|  | 	'ə:', | ||||||
|  | 	'ɛ', | ||||||
|  | 	'ɛ.', | ||||||
|  | 	'ɛⁿ', | ||||||
|  | 	'ɛ:', | ||||||
|  | 	'ɛ:ⁿ', | ||||||
|  | 	'a', | ||||||
|  | 	'aⁿ', | ||||||
|  | 	'a.', | ||||||
|  | 	'a:', | ||||||
|  | 	'a:ⁿ', | ||||||
|  | 	'ṷ', | ||||||
|  | 	'ṷ.', | ||||||
|  | 	'ṷⁿ', | ||||||
|  | 	#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr.  | ||||||
|  | 	'u', | ||||||
|  | 	'uⁿ', | ||||||
|  | 	'u.', | ||||||
|  | 	'u:', | ||||||
|  | 	'u:ⁿ', | ||||||
|  | 	'ü', | ||||||
|  | 	'ü.', | ||||||
|  | 	'üⁿ', | ||||||
|  | 	'ü:', | ||||||
|  | 	'ü:ⁿ', | ||||||
|  | 	'o', | ||||||
|  | 	'oⁿ', | ||||||
|  | 	'o.', | ||||||
|  | 	'o:', | ||||||
|  | 	'o:ⁿ', | ||||||
|  | 	'ö', | ||||||
|  | 	'ö.', | ||||||
|  | 	'öⁿ', | ||||||
|  | 	'ö:', | ||||||
|  | 	'ö:ⁿ', | ||||||
|  | 	'ɔ', | ||||||
|  | 	'ɔ.', | ||||||
|  | 	'ɔⁿ', | ||||||
|  | 	'ɔ:', | ||||||
|  | 	'ɔ:ⁿ', | ||||||
|  | 	#'ɔ̈', # not included in lex.ipa  | ||||||
|  | 	'ɔ̈.', | ||||||
|  | 	'ɔ̈:', | ||||||
|  |  | ||||||
|  | 	# plosives | ||||||
|  | 	'p',  | ||||||
|  | 	'b',  | ||||||
|  | 	't', | ||||||
|  | 	'tⁿ', | ||||||
|  | 	'd',  | ||||||
|  | 	'k', | ||||||
|  | 	'g', | ||||||
|  | 	'ɡ', # = 'g' | ||||||
|  |  | ||||||
|  | 	# nasals | ||||||
|  | 	'm', | ||||||
|  | 	'n', | ||||||
|  | 	'ŋ', | ||||||
|  | 	 | ||||||
|  | 	# fricatives | ||||||
|  | 	'f', | ||||||
|  | 	'v', | ||||||
|  | 	's', | ||||||
|  | 	's:', | ||||||
|  | 	'z', | ||||||
|  | 	'zⁿ', | ||||||
|  | 	'x', | ||||||
|  | 	'h', | ||||||
|  |  | ||||||
|  | 	# tap and flip | ||||||
|  | 	'r', | ||||||
|  | 	'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.    | ||||||
|  | 	'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'. | ||||||
|  |  | ||||||
|  | 	# approximant | ||||||
|  | 	'j', | ||||||
|  | 	'j.', | ||||||
|  | 	'l' | ||||||
|  | 	] | ||||||
|  |  | ||||||
|  | ## the list of multi character phones.  | ||||||
|  | # for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. | ||||||
|  | multi_character_phones = [i for i in phoneset if len(i) > 1] | ||||||
|  | multi_character_phones.sort(key=len, reverse=True) | ||||||
							
								
								
									
										
											BIN
										
									
								
								acoustic_model/phoneset/fame_ipa2asr.npy
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								acoustic_model/phoneset/fame_ipa2asr.npy
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
		Reference in New Issue
	
	Block a user