correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.
This commit is contained in:
		
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							@@ -16,14 +16,14 @@ def multi_character_tokenize(line, multi_character_tokens):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def split_word(word, multi_character_phones):
 | 
					def split_word(word, multi_character_phones):
 | 
				
			||||||
	"""
 | 
						"""
 | 
				
			||||||
	Split a line by given phoneset.
 | 
						split a line by given phoneset.
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	Args:
 | 
						Args:
 | 
				
			||||||
		word (str): one word written in given phoneset.
 | 
							word (str): a word written in given phoneset.
 | 
				
			||||||
		multi_character_phones:
 | 
							multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_phoneset.py. 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	Returns:
 | 
						Returns:
 | 
				
			||||||
		word_seperated (str): the word splitted in given phoneset. 
 | 
							(word_seperated) (list): the word splitted in given phoneset. 
 | 
				
			||||||
	"""
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"""
 | 
				
			||||||
	return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]
 | 
						return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]
 | 
				
			||||||
@@ -33,7 +33,6 @@ repo_dir = r'C:\Users\Aki\source\repos'
 | 
				
			|||||||
ipa_xsampa_converter_dir    = os.path.join(repo_dir, 'ipa-xsama-converter')
 | 
					ipa_xsampa_converter_dir    = os.path.join(repo_dir, 'ipa-xsama-converter')
 | 
				
			||||||
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
 | 
					forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
 | 
				
			||||||
accent_classification_dir   = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
 | 
					accent_classification_dir   = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
 | 
				
			||||||
#pyhtk_dir                   = os.path.join(repo_dir, 'pyhtk', 'pyhtk')
 | 
					 | 
				
			||||||
toolbox_dir					= os.path.join(repo_dir, 'toolbox')
 | 
					toolbox_dir					= os.path.join(repo_dir, 'toolbox')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
 | 
					#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -12,24 +12,6 @@ import defaultfiles as default
 | 
				
			|||||||
import fame_phoneset
 | 
					import fame_phoneset
 | 
				
			||||||
import convert_phone_set
 | 
					import convert_phone_set
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#sys.path.append(default.forced_alignment_module_dir)
 | 
					 | 
				
			||||||
#from forced_alignment import convert_phone_set
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#def find_phone(lexicon_file, phone):
 | 
					 | 
				
			||||||
#	""" Search where the phone is used in the lexicon. """
 | 
					 | 
				
			||||||
#	with open(lexicon_file, "rt", encoding="utf-8") as fin:
 | 
					 | 
				
			||||||
#		lines = fin.read()
 | 
					 | 
				
			||||||
#		lines = lines.split('\n')
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
#	extracted = []
 | 
					 | 
				
			||||||
#	for line in lines:
 | 
					 | 
				
			||||||
#		line = line.split('\t')
 | 
					 | 
				
			||||||
#		if len(line) > 1:
 | 
					 | 
				
			||||||
#			pronunciation = line[1]
 | 
					 | 
				
			||||||
#			if phone in pronunciation:
 | 
					 | 
				
			||||||
#				extracted.append(line)
 | 
					 | 
				
			||||||
#	return extracted
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
#def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
 | 
					#def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
 | 
				
			||||||
#	""" Convert a lexicon file from IPA to HTK format for FAME! corpus. """
 | 
					#	""" Convert a lexicon file from IPA to HTK format for FAME! corpus. """
 | 
				
			||||||
@@ -128,25 +110,6 @@ import convert_phone_set
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#    return ipa
 | 
					#    return ipa
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp):
 | 
					 | 
				
			||||||
	""" Make a script file for HCopy using the filelist in FAME! corpus. """
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
	filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt')
 | 
					 | 
				
			||||||
	with open(filelist_txt) as fin:
 | 
					 | 
				
			||||||
		filelist = fin.read()
 | 
					 | 
				
			||||||
		filelist = filelist.split('\n')
 | 
					 | 
				
			||||||
	
 | 
					 | 
				
			||||||
	with open(hcopy_scp, 'w') as fout:
 | 
					 | 
				
			||||||
		for filename_ in filelist:
 | 
					 | 
				
			||||||
			filename = filename_.replace('.TextGrid', '')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			if len(filename) > 3: # remove '.', '..' and ''
 | 
					 | 
				
			||||||
				wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav')
 | 
					 | 
				
			||||||
				mfc_file = os.path.join(feature_dir, filename + '.mfc')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
				fout.write(wav_file + '\t' + mfc_file + '\n')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#def make_filelist(input_dir, output_txt):
 | 
					#def make_filelist(input_dir, output_txt):
 | 
				
			||||||
#	""" Make a list of files in the input_dir. """
 | 
					#	""" Make a list of files in the input_dir. """
 | 
				
			||||||
#	filenames = os.listdir(input_dir)
 | 
					#	filenames = os.listdir(input_dir)
 | 
				
			||||||
@@ -191,98 +154,147 @@ def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_s
 | 
				
			|||||||
#                            f.write('{0}\t{1}\n'.format(WORD, key))
 | 
					#                            f.write('{0}\t{1}\n'.format(WORD, key))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp):
 | 
				
			||||||
 | 
						""" Make a script file for HCopy using the filelist in FAME! corpus. 
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						Args:
 | 
				
			||||||
 | 
							fame_dir (path): the directory of FAME corpus.
 | 
				
			||||||
 | 
							dataset (str): 'devel', 'test' or 'train'.
 | 
				
			||||||
 | 
							feature_dir (path): the directory where feature will be stored.
 | 
				
			||||||
 | 
							hcopy_scp (path): a script file for HCopy to be made.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"""
 | 
				
			||||||
 | 
						filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt')
 | 
				
			||||||
 | 
						with open(filelist_txt) as fin:
 | 
				
			||||||
 | 
							filelist = fin.read()
 | 
				
			||||||
 | 
							filelist = filelist.split('\n')
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						with open(hcopy_scp, 'w') as fout:
 | 
				
			||||||
 | 
							for filename_ in filelist:
 | 
				
			||||||
 | 
								filename = filename_.replace('.TextGrid', '')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if len(filename) > 3: # remove '.', '..' and ''
 | 
				
			||||||
 | 
									wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav')
 | 
				
			||||||
 | 
									mfc_file = os.path.join(feature_dir, filename + '.mfc')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									fout.write(wav_file + '\t' + mfc_file + '\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def load_lexicon(lexicon_file):
 | 
					def load_lexicon(lexicon_file):
 | 
				
			||||||
 | 
						""" load lexicon file as Data Frame.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Args:
 | 
				
			||||||
 | 
							lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						Returns:
 | 
				
			||||||
 | 
							lex (df): lexicon as Data Frame, which has columns 'word' and 'pronunciation'.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"""
 | 
				
			||||||
	lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8")
 | 
						lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8")
 | 
				
			||||||
	lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True)
 | 
						lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True)
 | 
				
			||||||
	return lex
 | 
						return lex
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_phonelist(lexicon_asr):
 | 
					def get_phoneset_from_lexicon(lexicon_file, phoneset='asr'):
 | 
				
			||||||
	""" Make a list of phones which appears in the lexicon. """
 | 
						""" Make a list of phones which appears in the lexicon. 
 | 
				
			||||||
 | 
					 | 
				
			||||||
	#with open(lexicon_file, "rt", encoding="utf-8") as fin:
 | 
					 | 
				
			||||||
	#	lines = fin.read()
 | 
					 | 
				
			||||||
	#	lines = lines.split('\n')
 | 
					 | 
				
			||||||
	#	phonelist = set([])
 | 
					 | 
				
			||||||
	#	for line in lines:
 | 
					 | 
				
			||||||
	#		line = line.split('\t')
 | 
					 | 
				
			||||||
	#		if len(line) > 1:
 | 
					 | 
				
			||||||
	#			pronunciation = set(line[1].split())
 | 
					 | 
				
			||||||
	#			phonelist = phonelist | pronunciation
 | 
					 | 
				
			||||||
	lex = load_lexicon(lexicon_asr)
 | 
					 | 
				
			||||||
	return set(' '.join(lex['pronunciation']).split(' '))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def extract_unknown_phones(word_list, known_phones):
 | 
					 | 
				
			||||||
	return [i for i in word_list if not i in known_phones]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if __name__ == '__main__':
 | 
					 | 
				
			||||||
	import time
 | 
					 | 
				
			||||||
	timer_start = time.time()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	#def get_translation_key():
 | 
					 | 
				
			||||||
	dir_tmp = r'c:\Users\Aki\source\repos\acoustic_model\_tmp'
 | 
					 | 
				
			||||||
	lexicon_ipa = r'd:\_corpus\FAME\lexicon\lex.ipa'
 | 
					 | 
				
			||||||
	lexicon_asr = r'd:\_corpus\FAME\lexicon\lex.asr'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	lex_ipa = load_lexicon(lexicon_ipa)
 | 
					 | 
				
			||||||
	lex_asr = load_lexicon(lexicon_asr)
 | 
					 | 
				
			||||||
	if 1:
 | 
					 | 
				
			||||||
		phone_to_be_searched = fame_phoneset.phoneset_ipa[:]
 | 
					 | 
				
			||||||
		translation_key = dict()
 | 
					 | 
				
			||||||
		for word in lex_ipa['word']:
 | 
					 | 
				
			||||||
			if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1:
 | 
					 | 
				
			||||||
				ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
 | 
					 | 
				
			||||||
				asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
 | 
					 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
				ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
 | 
						Args:
 | 
				
			||||||
				asr_list = asr.split(' ')
 | 
							lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
 | 
				
			||||||
 | 
							phoneset (str): the phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				# if there are phones which is not in phone_to_be_searched
 | 
						Returns:
 | 
				
			||||||
				#if len([True for i in asr_list if i in phone_to_be_searched]) > 0:
 | 
							(list_of_phones) (set): the set of phones included in the lexicon_file.
 | 
				
			||||||
				if(len(ipa_list) == len(asr_list)):
 | 
					 | 
				
			||||||
					print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
 | 
					 | 
				
			||||||
					for ipa_, asr_ in zip(ipa_list, asr_list):
 | 
					 | 
				
			||||||
						if ipa_ in phone_to_be_searched:
 | 
					 | 
				
			||||||
							translation_key[ipa_] = asr_
 | 
					 | 
				
			||||||
							phone_to_be_searched.remove(ipa_)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		print("elapsed time: {}".format(time.time() - timer_start))
 | 
						"""
 | 
				
			||||||
 | 
						assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key)
 | 
						lex = load_lexicon(lexicon_file)
 | 
				
			||||||
		np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched)
 | 
						if phoneset == 'asr':
 | 
				
			||||||
	else:
 | 
							return set(' '.join(lex['pronunciation']).split(' '))
 | 
				
			||||||
		translation_key		 = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item()
 | 
						elif phoneset == 'ipa':
 | 
				
			||||||
		phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item()
 | 
							join_pronunciations = ''.join(lex['pronunciation'])
 | 
				
			||||||
 | 
							return set(convert_phone_set.split_word(join_pronunciations, fame_phoneset.multi_character_phones_ipa))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	#phone_unknown = list(phone_to_be_searched)
 | 
					def extract_unknown_phones(ipa, known_phones):
 | 
				
			||||||
	##phone_unknown.remove('')
 | 
						"""extract unknown phones in the pronunciation written in IPA.
 | 
				
			||||||
	#phone_known = list(translation_key.keys())
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	#p = phone_unknown[0]
 | 
						Args:
 | 
				
			||||||
 | 
							ipa (str): a pronunciation written in IPA. 
 | 
				
			||||||
 | 
							known_phones (list): list of phones already know.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Returns:
 | 
				
			||||||
 | 
							(list_of_phones) (list): unknown phones not included in 'known_phones'.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"""
 | 
				
			||||||
 | 
						ipa_split = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
 | 
				
			||||||
 | 
						return [i for i in ipa_split if not i in known_phones]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_translation_key(lexicon_file_ipa, lexicon_file_asr):
 | 
				
			||||||
 | 
						""" get correspondence between lexicon_file_ipa and lexicon_file_asr.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Args:
 | 
				
			||||||
 | 
							lexicon_file_ipa (path): lexicon in the format of 'word' /t 'pronunciation (IPA)'.
 | 
				
			||||||
 | 
							lexicon_file_asr (path): lexicon in the format of 'word' /t 'pronunciation (asr)'.
 | 
				
			||||||
 | 
								the each character of 'pronunciation' should be delimited by ' '.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Returns:
 | 
				
			||||||
 | 
							translation_key (dict): translation key from ipa to asr. 
 | 
				
			||||||
 | 
							(phone_unknown) (list): the list of IPA phones, which does not appear in lexicon_file_asr. 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"""
 | 
				
			||||||
 | 
						lex_ipa = load_lexicon(lexicon_file_ipa)
 | 
				
			||||||
 | 
						lex_asr = load_lexicon(lexicon_file_asr)
 | 
				
			||||||
 | 
						phone_unknown = fame_phoneset.phoneset_ipa[:]
 | 
				
			||||||
 | 
						translation_key = dict()
 | 
				
			||||||
 | 
						for word in lex_ipa['word']:
 | 
				
			||||||
 | 
							if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1:
 | 
				
			||||||
 | 
								ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
 | 
				
			||||||
 | 
								asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	### extract lines which contains 'unknown' phone.
 | 
								ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
 | 
				
			||||||
	#lex_ipa_ = lex_ipa[lex_ipa['pronunciation'].str.count(p)>0]
 | 
								asr_list = asr.split(' ')
 | 
				
			||||||
	##phone_unknown_ = phone_unknown[:]
 | 
					 | 
				
			||||||
	##phone_unknown_.remove(p)
 | 
					 | 
				
			||||||
	#phone_known_ = phone_known[:]
 | 
					 | 
				
			||||||
	#phone_known_.append(p)
 | 
					 | 
				
			||||||
	#for index, row in lex_ipa_.iterrows():
 | 
					 | 
				
			||||||
	#	ipa = row['pronunciation']
 | 
					 | 
				
			||||||
	#	phone_extract_unknown_phones(asr_list, phone_known_):
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	#	# check the number of phones in phone_unknown_
 | 
								# if there are phones which is not in phone_unknown
 | 
				
			||||||
	#	if len([True for i in asr_list if i in phone_unknown_]) == 0:
 | 
								#if len([True for i in asr_list if i in phone_unknown]) > 0:
 | 
				
			||||||
	#		word = row['word']
 | 
								if(len(ipa_list) == len(asr_list)):
 | 
				
			||||||
	#		ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
 | 
									print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
 | 
				
			||||||
	#		print("{0}: {1} --> {2}".format(word, ipa, asr))
 | 
									for ipa_, asr_ in zip(ipa_list, asr_list):
 | 
				
			||||||
	#		#print("{0}:{1}".format(index, row['pronunciation']))
 | 
										if ipa_ in phone_unknown:
 | 
				
			||||||
 | 
											translation_key[ipa_] = asr_
 | 
				
			||||||
 | 
											phone_unknown.remove(ipa_)
 | 
				
			||||||
 | 
						return translation_key, list(phone_unknown)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def find_phone(lexicon_file, phone, phoneset='ipa'):
 | 
				
			||||||
 | 
						""" extract rows where the phone is used in the lexicon_file. 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	
 | 
						Args:
 | 
				
			||||||
 | 
							lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
 | 
				
			||||||
 | 
							phone (str): the phone to be searched.
 | 
				
			||||||
 | 
							phoneset (str): the phoneset with which lexicon_file is written. 'asr' or 'ipa'(default).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Returns:
 | 
				
			||||||
 | 
							extracted (df): rows where the phone is used.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ToDo:
 | 
				
			||||||
 | 
							* develop when the phonset == 'asr'.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"""
 | 
				
			||||||
 | 
						assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\''
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						lex = load_lexicon(lexicon_file)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						# to reduce the calculation time, only target rows which include 'phone' at least once. 
 | 
				
			||||||
 | 
						lex_ = lex[lex['pronunciation'].str.count(phone)>0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						extracted = pd.DataFrame(index=[], columns=['word', 'pronunciation'])
 | 
				
			||||||
 | 
						for index, row in lex_.iterrows():
 | 
				
			||||||
 | 
							if phoneset == 'ipa':
 | 
				
			||||||
 | 
								pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_phoneset.multi_character_phones_ipa)
 | 
				
			||||||
 | 
							if phone in pronunciation:
 | 
				
			||||||
 | 
								extracted_ = pd.Series([row['word'], pronunciation], index=extracted.columns)
 | 
				
			||||||
 | 
								extracted  = extracted.append(extracted_, ignore_index=True)
 | 
				
			||||||
 | 
						return extracted
 | 
				
			||||||
@@ -6,6 +6,7 @@ import tempfile
 | 
				
			|||||||
#import configparser
 | 
					#import configparser
 | 
				
			||||||
#import subprocess
 | 
					#import subprocess
 | 
				
			||||||
#from collections import Counter
 | 
					#from collections import Counter
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#import numpy as np
 | 
					#import numpy as np
 | 
				
			||||||
#import pandas as pd
 | 
					#import pandas as pd
 | 
				
			||||||
@@ -27,8 +28,8 @@ from htk import pyhtk
 | 
				
			|||||||
dataset_list = ['devel', 'test', 'train']
 | 
					dataset_list = ['devel', 'test', 'train']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# procedure
 | 
					# procedure
 | 
				
			||||||
extract_features  = 1
 | 
					extract_features  = 0
 | 
				
			||||||
#conv_lexicon	  = 0
 | 
					conv_lexicon	  = 1
 | 
				
			||||||
#check_lexicon	  = 0
 | 
					#check_lexicon	  = 0
 | 
				
			||||||
#make_mlf		  = 0
 | 
					#make_mlf		  = 0
 | 
				
			||||||
#combine_files	  = 0
 | 
					#combine_files	  = 0
 | 
				
			||||||
@@ -84,16 +85,14 @@ if not os.path.exists(tmp_dir):
 | 
				
			|||||||
## ======================= extract features =======================
 | 
					## ======================= extract features =======================
 | 
				
			||||||
if extract_features:
 | 
					if extract_features:
 | 
				
			||||||
	for dataset in dataset_list:
 | 
						for dataset in dataset_list:
 | 
				
			||||||
	#for dataset in ['test']:
 | 
					 | 
				
			||||||
		print('==== {} ===='.format(dataset))
 | 
							print('==== {} ===='.format(dataset))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		# a script file for HCopy 
 | 
							# a script file for HCopy 
 | 
				
			||||||
		print(">>> making a script file for HCopy... \n")
 | 
							print(">>> making a script file for HCopy... \n")
 | 
				
			||||||
		hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
 | 
							hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
 | 
				
			||||||
		hcopy_scp.close()
 | 
							hcopy_scp.close()
 | 
				
			||||||
		#hcopy_scp = os.path.join(default.htk_dir, 'tmp', 'HCopy.scp')
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
		## get a list of features (hcopy.scp) from the filelist in FAME! corpus
 | 
							# get a list of features (hcopy.scp) from the filelist in FAME! corpus
 | 
				
			||||||
		feature_dir_ = os.path.join(feature_dir, dataset)
 | 
							feature_dir_ = os.path.join(feature_dir, dataset)
 | 
				
			||||||
		if not os.path.exists(feature_dir_):
 | 
							if not os.path.exists(feature_dir_):
 | 
				
			||||||
			os.makedirs(feature_dir_)
 | 
								os.makedirs(feature_dir_)
 | 
				
			||||||
@@ -101,32 +100,70 @@ if extract_features:
 | 
				
			|||||||
		# extract features
 | 
							# extract features
 | 
				
			||||||
		print(">>> extracting features... \n")
 | 
							print(">>> extracting features... \n")
 | 
				
			||||||
		fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
 | 
							fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
 | 
				
			||||||
 | 
					 | 
				
			||||||
		#subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name
 | 
					 | 
				
			||||||
		#subprocess.call(subprocessStr, shell=True)
 | 
					 | 
				
			||||||
		pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)
 | 
							pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		# a script file for HCompV
 | 
							# a script file for HCompV
 | 
				
			||||||
		print(">>> making a script file for HCompV... \n")
 | 
							print(">>> making a script file for HCompV... \n")
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## ======================= make a list of features =======================
 | 
					 | 
				
			||||||
#if make_feature_list:
 | 
					 | 
				
			||||||
#	print("==== make a list of features ====\n")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#	for dataset in dataset_list:
 | 
					 | 
				
			||||||
#		print(dataset)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		#feature_dir = output_dir + '\\mfc\\' + dataset
 | 
					 | 
				
			||||||
		hcompv_scp  = os.path.join(tmp_dir, dataset + '.scp')
 | 
							hcompv_scp  = os.path.join(tmp_dir, dataset + '.scp')
 | 
				
			||||||
 | 
					 | 
				
			||||||
		#am_func.make_filelist(feature_dir, hcompv_scp)
 | 
					 | 
				
			||||||
		fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')
 | 
							fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## ======================= convert lexicon from ipa to fame_htk =======================
 | 
					## ======================= convert lexicon from ipa to fame_htk =======================
 | 
				
			||||||
if conv_lexicon:
 | 
					if conv_lexicon:
 | 
				
			||||||
	print('==== convert lexicon from ipa 2 fame ====\n')
 | 
						print('==== convert lexicon from ipa 2 fame ====\n')
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						#dir_out = r'c:\Users\Aki\source\repos\acoustic_model\_tmp'
 | 
				
			||||||
 | 
						lexicon_dir = os.path.join(default.fame_dir, 'lexicon') 
 | 
				
			||||||
 | 
						lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa')
 | 
				
			||||||
 | 
						lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						# get the correspondence between lex_ipa and lex_asr.
 | 
				
			||||||
 | 
						lex_asr  = fame_functions.load_lexicon(lexicon_asr)
 | 
				
			||||||
 | 
						lex_ipa  = fame_functions.load_lexicon(lexicon_ipa)		
 | 
				
			||||||
 | 
						if 1:
 | 
				
			||||||
 | 
							timer_start = time.time()
 | 
				
			||||||
 | 
							translation_key, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr)
 | 
				
			||||||
 | 
							print("elapsed time: {}".format(time.time() - timer_start))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							np.save('translation_key_ipa2asr.npy', translation_key)
 | 
				
			||||||
 | 
							np.save('phone_unknown.npy', phone_unknown)
 | 
				
			||||||
 | 
						else:
 | 
				
			||||||
 | 
							translation_key = np.load('translation_key_ipa2asr.npy').item()
 | 
				
			||||||
 | 
							phone_unknown   = np.load('phone_unknown.npy')
 | 
				
			||||||
 | 
							phone_unknown   = list(phone_unknown)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						## manually check the correspondence for the phone in phone_unknown.
 | 
				
			||||||
 | 
						#p = phone_unknown[0]
 | 
				
			||||||
 | 
						#lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						#for word in lex_ipa_['word']:
 | 
				
			||||||
 | 
						#	ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
 | 
				
			||||||
 | 
						#	if np.sum(lex_asr['word'] == word) > 0:
 | 
				
			||||||
 | 
						#		asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						#		ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
 | 
				
			||||||
 | 
						#		asr_list = asr.split(' ')
 | 
				
			||||||
 | 
						#		if p in ipa_list and (len(ipa_list) == len(asr_list)):
 | 
				
			||||||
 | 
						#			print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
 | 
				
			||||||
 | 
						#			for ipa_, asr_ in zip(ipa_list, asr_list):
 | 
				
			||||||
 | 
						#				if ipa_ in phone_unknown:
 | 
				
			||||||
 | 
						#					translation_key[ipa_] = asr_
 | 
				
			||||||
 | 
						#					phone_unknown.remove(ipa_)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						## check if all the phones in lexicon_ipa are in fame_phoneset.py.
 | 
				
			||||||
 | 
						#timer_start = time.time()
 | 
				
			||||||
 | 
						#phoneset_lex = get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa')
 | 
				
			||||||
 | 
						#print("elapsed time: {}".format(time.time() - timer_start))
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						#phoneset_py = fame_phoneset.phoneset_ipa
 | 
				
			||||||
 | 
						#set(phoneset_lex) - set(phoneset_py)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						##timer_start = time.time()
 | 
				
			||||||
 | 
						##extracted = find_phone(lexicon_ipa, 'ⁿ')
 | 
				
			||||||
 | 
						##print("elapsed time: {}".format(time.time() - timer_start))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	# lex.asr is Kaldi compatible version of lex.ipa.
 | 
						# lex.asr is Kaldi compatible version of lex.ipa.
 | 
				
			||||||
	# to check... 
 | 
						# to check... 
 | 
				
			||||||
@@ -140,13 +177,13 @@ if conv_lexicon:
 | 
				
			|||||||
	#		fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split)))
 | 
						#		fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	# convert each lexicon from ipa description to fame_htk phoneset.
 | 
						# convert each lexicon from ipa description to fame_htk phoneset.
 | 
				
			||||||
	am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk)
 | 
						#am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk)
 | 
				
			||||||
	am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk)
 | 
						#am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	# combine lexicon
 | 
						# combine lexicon
 | 
				
			||||||
	# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov.
 | 
						# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov.
 | 
				
			||||||
	# therefore there is no overlap between lex_asr and lex_oov.   
 | 
						# therefore there is no overlap between lex_asr and lex_oov.   
 | 
				
			||||||
	am_func.combine_lexicon(lex_asr_htk, lex_oov_htk, lex_htk)
 | 
						#am_func.combine_lexicon(lex_asr_htk, lex_oov_htk, lex_htk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## ======================= check if all the phones are successfully converted =======================
 | 
					## ======================= check if all the phones are successfully converted =======================
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,41 +1,79 @@
 | 
				
			|||||||
 | 
					""" definition of the phones to be used. """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## phones in IPA.
 | 
				
			||||||
phoneset_ipa = [
 | 
					phoneset_ipa = [
 | 
				
			||||||
	# vowels
 | 
						# vowels
 | 
				
			||||||
	'i̯',
 | 
						'i̯',
 | 
				
			||||||
 | 
						'i̯ⁿ',
 | 
				
			||||||
	'y',
 | 
						'y',
 | 
				
			||||||
	'i',
 | 
						'i',
 | 
				
			||||||
 | 
						'i.',
 | 
				
			||||||
 | 
						'iⁿ',
 | 
				
			||||||
	'i:',
 | 
						'i:',
 | 
				
			||||||
 | 
						'i:ⁿ',
 | 
				
			||||||
	'ɪ',
 | 
						'ɪ',
 | 
				
			||||||
	'ɪ:',
 | 
						'ɪⁿ',
 | 
				
			||||||
 | 
						'ɪ.',
 | 
				
			||||||
 | 
						#'ɪ:', # not included in lex.ipa
 | 
				
			||||||
 | 
						'ɪ:ⁿ',
 | 
				
			||||||
	'e',
 | 
						'e',
 | 
				
			||||||
	'e:',
 | 
						'e:',
 | 
				
			||||||
 | 
						'e:ⁿ',
 | 
				
			||||||
	'ə',
 | 
						'ə',
 | 
				
			||||||
 | 
						'əⁿ',
 | 
				
			||||||
	'ə:',
 | 
						'ə:',
 | 
				
			||||||
	'ɛ',
 | 
						'ɛ',
 | 
				
			||||||
 | 
						'ɛ.',
 | 
				
			||||||
 | 
						'ɛⁿ',
 | 
				
			||||||
	'ɛ:',
 | 
						'ɛ:',
 | 
				
			||||||
 | 
						'ɛ:ⁿ',
 | 
				
			||||||
	'a',
 | 
						'a',
 | 
				
			||||||
 | 
						'aⁿ',
 | 
				
			||||||
 | 
						'a.',
 | 
				
			||||||
	'a:',
 | 
						'a:',
 | 
				
			||||||
 | 
						'a:ⁿ',
 | 
				
			||||||
	'ṷ',
 | 
						'ṷ',
 | 
				
			||||||
	'ú',
 | 
						'ṷ.',
 | 
				
			||||||
 | 
						'ṷⁿ',
 | 
				
			||||||
 | 
						#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. 
 | 
				
			||||||
	'u',
 | 
						'u',
 | 
				
			||||||
 | 
						'uⁿ',
 | 
				
			||||||
 | 
						'u.',
 | 
				
			||||||
	'u:',
 | 
						'u:',
 | 
				
			||||||
 | 
						'u:ⁿ',
 | 
				
			||||||
	'ü',
 | 
						'ü',
 | 
				
			||||||
 | 
						'ü.',
 | 
				
			||||||
 | 
						'üⁿ',
 | 
				
			||||||
	'ü:',
 | 
						'ü:',
 | 
				
			||||||
 | 
						'ü:ⁿ',
 | 
				
			||||||
	'o',
 | 
						'o',
 | 
				
			||||||
 | 
						'oⁿ',
 | 
				
			||||||
 | 
						'o.',
 | 
				
			||||||
	'o:',
 | 
						'o:',
 | 
				
			||||||
 | 
						'o:ⁿ',
 | 
				
			||||||
	'ö',
 | 
						'ö',
 | 
				
			||||||
 | 
						'ö.',
 | 
				
			||||||
 | 
						'öⁿ',
 | 
				
			||||||
	'ö:',
 | 
						'ö:',
 | 
				
			||||||
 | 
						'ö:ⁿ',
 | 
				
			||||||
	'ɔ',
 | 
						'ɔ',
 | 
				
			||||||
 | 
						'ɔ.',
 | 
				
			||||||
 | 
						'ɔⁿ',
 | 
				
			||||||
	'ɔ:',
 | 
						'ɔ:',
 | 
				
			||||||
	'ɔ̈',
 | 
						'ɔ:ⁿ',
 | 
				
			||||||
 | 
						#'ɔ̈', # not included in lex.ipa 
 | 
				
			||||||
 | 
						'ɔ̈.',
 | 
				
			||||||
	'ɔ̈:',
 | 
						'ɔ̈:',
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	# plosives
 | 
						# plosives
 | 
				
			||||||
	'p', 
 | 
						'p', 
 | 
				
			||||||
	'b', 
 | 
						'b', 
 | 
				
			||||||
	't', 
 | 
						't',
 | 
				
			||||||
 | 
						'tⁿ',
 | 
				
			||||||
	'd', 
 | 
						'd', 
 | 
				
			||||||
	'k',
 | 
						'k',
 | 
				
			||||||
	'g',
 | 
						'g',
 | 
				
			||||||
 | 
						'ɡ', # = 'g'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	# nasals
 | 
						# nasals
 | 
				
			||||||
	'm',
 | 
						'm',
 | 
				
			||||||
@@ -48,8 +86,22 @@ phoneset_ipa = [
 | 
				
			|||||||
	's',
 | 
						's',
 | 
				
			||||||
	's:',
 | 
						's:',
 | 
				
			||||||
	'z',
 | 
						'z',
 | 
				
			||||||
 | 
						'zⁿ',
 | 
				
			||||||
	'x',
 | 
						'x',
 | 
				
			||||||
	'h',
 | 
						'h',
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						# tap and flip
 | 
				
			||||||
 | 
						'r',
 | 
				
			||||||
 | 
						'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.   
 | 
				
			||||||
 | 
						'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						# approximant
 | 
				
			||||||
 | 
						'j',
 | 
				
			||||||
 | 
						'j.',
 | 
				
			||||||
 | 
						'l'
 | 
				
			||||||
	]
 | 
						]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## the list of multi character phones. 
 | 
				
			||||||
 | 
					# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
 | 
				
			||||||
multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1]
 | 
					multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1]
 | 
				
			||||||
 | 
					multi_character_phones_ipa.sort(key=len, reverse=True)
 | 
				
			||||||
		Reference in New Issue
	
	Block a user