phonset is given as fame_phoneset.py. translation key is obtained based on the information.

2019-01-27 01:34:04 +01:00
parent 7844a56281
commit 813f013d7a
11 changed files with 176 additions and 50 deletions
--- a/.vs/acoustic_model/v15/.suo
+++ b/.vs/acoustic_model/v15/.suo
--- a/_tmp/phone_to_be_searched.npy
+++ b/_tmp/phone_to_be_searched.npy
--- a/_tmp/translation_key.npy
+++ b/_tmp/translation_key.npy
--- a/acoustic_model.sln
+++ b/acoustic_model.sln
@@ -10,7 +10,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
 		..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
 		..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
 		..\toolbox\evaluation.py = ..\toolbox\evaluation.py
-		..\toolbox\toolbox\file_handling.py = ..\toolbox\toolbox\file_handling.py
 		..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
 		..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
 		..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
@@ -23,7 +22,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
 		..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
 	EndProjectSection
 EndProject
-Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "pyhtk", "..\pyhtk\pyhtk\pyhtk.pyproj", "{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}"
+Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "toolbox", "..\toolbox\toolbox.pyproj", "{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -33,8 +32,8 @@ Global
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
 		{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Release|Any CPU.ActiveCfg = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
--- a/acoustic_model/pycache/defaultfiles.cpython-36.pyc
+++ b/acoustic_model/pycache/defaultfiles.cpython-36.pyc
--- a/acoustic_model/acoustic_model.pyproj
+++ b/acoustic_model/acoustic_model.pyproj
@@ -23,12 +23,18 @@
  </PropertyGroup>
  <ItemGroup>
    <Compile Include="check_novoapi.py" />
+    <Compile Include="convert_phone_set.py">
+      <SubType>Code</SubType>
+    </Compile>
    <Compile Include="convert_xsampa2ipa.py">
      <SubType>Code</SubType>
    </Compile>
    <Compile Include="defaultfiles.py">
      <SubType>Code</SubType>
    </Compile>
+    <Compile Include="fame_phoneset.py">
+      <SubType>Code</SubType>
+    </Compile>
    <Compile Include="fa_test.py">
      <SubType>Code</SubType>
    </Compile>
--- a/acoustic_model/convert_phone_set.py
+++ b/acoustic_model/convert_phone_set.py
@@ -0,0 +1,29 @@
+"""Module to convert phonemes."""
+
+def multi_character_tokenize(line, multi_character_tokens):
+	"""Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0,
+	if so tokenizes and eats that token. Otherwise tokenizes a single character"""
+	while line != '':
+		for token in multi_character_tokens:
+			if line.startswith(token) and len(token) > 0:
+				yield token
+				line = line[len(token):]
+				break
+		else:
+			yield line[:1]
+			line = line[1:]
+
+
+def split_word(word, multi_character_phones):
+	"""
+	Split a line by given phoneset.
+	
+	Args:
+		word (str): one word written in given phoneset.
+		multi_character_phones:
+
+	Returns:
+		word_seperated (str): the word splitted in given phoneset. 
+	"""
+
+	return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]
--- a/acoustic_model/defaultfiles.py
+++ b/acoustic_model/defaultfiles.py
@@ -4,7 +4,8 @@ import os

 #cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'

-htk_dir = r'C:\Aki\htk_fame'
+#htk_dir = r'C:\Aki\htk_fame'
+htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk'

 config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
 #config_train = os.path.join(cygwin_dir, 'config', 'config.train')
@@ -28,22 +29,22 @@ config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
 #filePhoneList = config['pyHTK']['filePhoneList']
 #AcousticModel = config['pyHTK']['AcousticModel']

-repo_dir = r'C:\Users\A.Kunikoshi\source\repos'
+repo_dir = r'C:\Users\Aki\source\repos'
 ipa_xsampa_converter_dir    = os.path.join(repo_dir, 'ipa-xsama-converter')
 forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
 accent_classification_dir   = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
-pyhtk_dir                   = os.path.join(repo_dir, 'pyhtk', 'pyhtk')
-toolbox_dir					= os.path.join(repo_dir, 'toolbox', 'toolbox')
+#pyhtk_dir                   = os.path.join(repo_dir, 'pyhtk', 'pyhtk')
+toolbox_dir					= os.path.join(repo_dir, 'toolbox')

-htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
-config_hvite = os.path.join(htk_config_dir, 'config.HVite')
+#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
+#config_hvite = os.path.join(htk_config_dir, 'config.HVite')
 #acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
-acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
-phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
+#acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
+#phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')

 WSL_dir   = r'C:\OneDrive\WSL'
 #fame_dir        = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
-fame_dir = r'f:\_corpus\fame'
+fame_dir = r'd:\_corpus\fame'

 fame_s5_dir     = os.path.join(fame_dir, 's5')
 fame_corpus_dir = os.path.join(fame_dir, 'corpus')
--- a/acoustic_model/fame_functions.py
+++ b/acoustic_model/fame_functions.py
@@ -1,5 +1,5 @@
 import os
-os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model')
+os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')

 import sys
 from collections import Counter
@@ -9,6 +9,8 @@ import numpy as np
 import pandas as pd

 import defaultfiles as default
+import fame_phoneset
+import convert_phone_set

 #sys.path.append(default.forced_alignment_module_dir)
 #from forced_alignment import convert_phone_set
@@ -213,40 +215,74 @@ def get_phonelist(lexicon_asr):
 	lex = load_lexicon(lexicon_asr)
 	return set(' '.join(lex['pronunciation']).split(' '))

-import time

-timer_start = time.time()
+def extract_unknown_phones(word_list, known_phones):
+	return [i for i in word_list if not i in known_phones]

-#def get_translation_key():
-dir_tmp = r'c:\Users\A.Kunikoshi\source\repos\acoustic_model\_tmp'
-lexicon_ipa = r'f:\_corpus\FAME\lexicon\lex.ipa'
-lexicon_asr = r'f:\_corpus\FAME\lexicon\lex.asr'

-lex_ipa = load_lexicon(lexicon_ipa)
-lex_asr = load_lexicon(lexicon_asr)
-if 0:
-	phone_to_be_searched = get_phonelist(lexicon_asr)
+if __name__ == '__main__':
+	import time
+	timer_start = time.time()
+
+	#def get_translation_key():
+	dir_tmp = r'c:\Users\Aki\source\repos\acoustic_model\_tmp'
+	lexicon_ipa = r'd:\_corpus\FAME\lexicon\lex.ipa'
+	lexicon_asr = r'd:\_corpus\FAME\lexicon\lex.asr'
+
+	lex_ipa = load_lexicon(lexicon_ipa)
+	lex_asr = load_lexicon(lexicon_asr)
+	if 1:
+		phone_to_be_searched = fame_phoneset.phoneset_ipa[:]
 		translation_key = dict()
-	for word in lex_asr['word']:
-		if np.sum(lex_asr['word'] == word) == 1 and np.sum(lex_ipa['word'] == word) == 1:
-			asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
+		for word in lex_ipa['word']:
+			if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1:
 				ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
+				asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
 	
+				ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
 				asr_list = asr.split(' ')
+
 				# if there are phones which is not in phone_to_be_searched
-			if len([True for i in asr_list if i in phone_to_be_searched]) > 0:
-				if(len(ipa) == len(asr_list)):
-					print("{0}: {1} --> {2}".format(word, ipa, asr))
-					for ipa_, asr_ in zip(ipa, asr_list):
-						if asr_ in phone_to_be_searched:
-							#if not translation_key[ipa_] == asr_:
+				#if len([True for i in asr_list if i in phone_to_be_searched]) > 0:
+				if(len(ipa_list) == len(asr_list)):
+					print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
+					for ipa_, asr_ in zip(ipa_list, asr_list):
+						if ipa_ in phone_to_be_searched:
 							translation_key[ipa_] = asr_
-							phone_to_be_searched.remove(asr_)
+							phone_to_be_searched.remove(ipa_)

 		print("elapsed time: {}".format(time.time() - timer_start))

 		np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key)
 		np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched)
-else:
+	else:
 		translation_key		 = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item()
 		phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item()
+
+
+	#phone_unknown = list(phone_to_be_searched)
+	##phone_unknown.remove('')
+	#phone_known = list(translation_key.keys())
+
+	#p = phone_unknown[0]
+	
+	### extract lines which contains 'unknown' phone.
+	#lex_ipa_ = lex_ipa[lex_ipa['pronunciation'].str.count(p)>0]
+	##phone_unknown_ = phone_unknown[:]
+	##phone_unknown_.remove(p)
+	#phone_known_ = phone_known[:]
+	#phone_known_.append(p)
+	#for index, row in lex_ipa_.iterrows():
+	#	ipa = row['pronunciation']
+	#	phone_extract_unknown_phones(asr_list, phone_known_):
+
+	#	# check the number of phones in phone_unknown_
+	#	if len([True for i in asr_list if i in phone_unknown_]) == 0:
+	#		word = row['word']
+	#		ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
+	#		print("{0}: {1} --> {2}".format(word, ipa, asr))
+	#		#print("{0}:{1}".format(index, row['pronunciation']))
+
+
+
+	
--- a/acoustic_model/fame_hmm.py
+++ b/acoustic_model/fame_hmm.py
@@ -1,6 +1,6 @@
 import sys
 import os
-os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model')
+os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')

 import tempfile
 #import configparser
@@ -12,10 +12,9 @@ import tempfile

 import fame_functions
 import defaultfiles as default
-sys.path.append(default.pyhtk_dir)
-import pyhtk
 sys.path.append(default.toolbox_dir)
-import file_handling
+import file_handling as fh
+from htk import pyhtk


 ## ======================= user define =======================
@@ -94,7 +93,7 @@ if extract_features:
 		hcopy_scp.close()
 		#hcopy_scp = os.path.join(default.htk_dir, 'tmp', 'HCopy.scp')

-		# get a list of features (hcopy.scp) from the filelist in FAME! corpus
+		## get a list of features (hcopy.scp) from the filelist in FAME! corpus
 		feature_dir_ = os.path.join(feature_dir, dataset)
 		if not os.path.exists(feature_dir_):
 			os.makedirs(feature_dir_)
@@ -110,6 +109,7 @@ if extract_features:
 		# a script file for HCompV
 		print(">>> making a script file for HCompV... \n")

+
 ## ======================= make a list of features =======================
 #if make_feature_list:
 #	print("==== make a list of features ====\n")
@@ -121,7 +121,7 @@ if extract_features:
 		hcompv_scp  = os.path.join(tmp_dir, dataset + '.scp')

 		#am_func.make_filelist(feature_dir, hcompv_scp)
-		file_handling.make_filelist(feature_dir_, hcompv_scp, '.mfc')
+		fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')


 ## ======================= convert lexicon from ipa to fame_htk =======================
--- a/acoustic_model/fame_phoneset.py
+++ b/acoustic_model/fame_phoneset.py
@@ -0,0 +1,55 @@
+phoneset_ipa = [
+	# vowels
+	'i̯',
+	'y',
+	'i',
+	'i:',
+	'ɪ',
+	'ɪ:',
+	'e',
+	'e:',
+	'ə',
+	'ə:',
+	'ɛ',
+	'ɛ:',
+	'a',
+	'a:',
+	'ṷ',
+	'ú',
+	'u',
+	'u:',
+	'ü',
+	'ü:',
+	'o',
+	'o:',
+	'ö',
+	'ö:',
+	'ɔ',
+	'ɔ:',
+	'ɔ̈',
+	'ɔ̈:',
+
+	# plosives
+	'p', 
+	'b', 
+	't', 
+	'd', 
+	'k',
+	'g',
+
+	# nasals
+	'm',
+	'n',
+	'ŋ',
+	
+	# fricatives
+	'f',
+	'v',
+	's',
+	's:',
+	'z',
+	'x',
+	'h',
+	]
+
+multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1]