test on stimmen data is added.

2019-03-03 02:05:37 +01:00
parent c185072d5b
commit b1b1942fa0
7 changed files with 133 additions and 76 deletions
--- a/.vs/acoustic_model/v15/.suo
+++ b/.vs/acoustic_model/v15/.suo
--- a/acoustic_model/acoustic_model.pyproj
+++ b/acoustic_model/acoustic_model.pyproj
@ -4,7 +4,7 @@
    <SchemaVersion>2.0</SchemaVersion>
    <ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
    <ProjectHome>.</ProjectHome>
-    <StartupFile>htk_vs_kaldi.py</StartupFile>
+    <StartupFile>fame_hmm.py</StartupFile>
    <SearchPath>
    </SearchPath>
    <WorkingDirectory>.</WorkingDirectory>
--- a/acoustic_model/fame_functions.py
+++ b/acoustic_model/fame_functions.py
@ -12,6 +12,10 @@ import defaultfiles as default
 import convert_phoneset
 from phoneset import fame_ipa, fame_asr

+sys.path.append(default.toolbox_dir)
+from htk import pyhtk
+
+
 #def read_fileFA(fileFA):
 #    """
 #    read the result file of HTK forced alignment.
@ -371,4 +375,25 @@ def ipa2htk(ipa):
 	asr_splitted = convert_phoneset.convert_phoneset(ipa_splitted, translation_key_ipa2asr)
 	asr_splitted = fame_asr.phone_reduction(asr_splitted)
 	htk_splitted = convert_phoneset.convert_phoneset(asr_splitted, fame_asr.translation_key_asr2htk)
-	return ''.join(htk_splitted)
+	return ''.join(htk_splitted)
+
+
+def performance_on_stimmen(stimmen_dir, hmmdefs):
+	#hmmdefs = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\model_\hmm1\iter20\hmmdefs'
+	#stimmen_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\stimmen'
+	lattice_file = os.path.join(stimmen_dir, 'word_lattice.ltc')
+	hvite_scp	 = os.path.join(stimmen_dir, 'hvite.scp')
+	#fh.make_filelist(os.path.join(stimmen_dir, 'mfc'), hvite_scp, file_type='mfc')
+	hresult_scp  = os.path.join(stimmen_dir, 'hresult.scp')
+	#fh.make_filelist(os.path.join(stimmen_dir, 'mfc'), hresult_scp, file_type='rec')
+	lexicon_file = os.path.join(stimmen_dir, 'lexicon_recognition.dic')
+	chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_file)
+	
+	result = chtk.recognition(
+		lattice_file,
+		hmmdefs,
+		hvite_scp
+		)
+	per_sentence, per_word = chtk.calc_recognition_performance(hresult_scp)
+
+	return per_sentence['accuracy']
--- a/acoustic_model/fame_hmm.py
+++ b/acoustic_model/fame_hmm.py
@ -22,30 +22,27 @@ from htk import pyhtk
 # procedure
 make_lexicon	  = 0
 make_label		  = 0 # it takes roughly 4800 sec on Surface pro 2.
-make_htk_files    = 0
+make_mlf		  = 0
 extract_features  = 0
 flat_start		  = 0
 train_model_without_sp = 0
 add_sp = 0
 train_model_with_sp    = 0
-train_model_with_sp_align_mlf = 1
+train_model_with_sp_align_mlf = 0
+train_triphone = 0



 # pre-defined values.
-
 dataset_list = ['devel', 'test', 'train']
 hmmdefs_name = 'hmmdefs'
-proto_name   = 'proto39'
+proto_name   = 'proto'

 lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
 lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov')

 config_dir = os.path.join(default.htk_dir, 'config')
-config_hcopy = os.path.join(config_dir, 'config.HCopy')
-config_train = os.path.join(config_dir, 'config.train')
-global_ded   = os.path.join(config_dir, 'global.ded')
-mkphones_led = os.path.join(config_dir, 'mkphones.led')
+
 sil_hed		 = os.path.join(config_dir, 'sil.hed')
 prototype    = os.path.join(config_dir, proto_name)

@ -53,25 +50,20 @@ model_dir    = os.path.join(default.htk_dir, 'model')


 # directories / files to be made.
-
 lexicon_dir = os.path.join(default.htk_dir, 'lexicon') 
 lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr')
 lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov')
 lexicon_htk     = os.path.join(lexicon_dir, 'lex.htk')

-phonelist_txt = os.path.join(config_dir, 'phonelist.txt')
-model0_dir	  = os.path.join(model_dir, 'hmm0')
-model1_dir	  = os.path.join(model_dir, 'hmm1')
+
+#model1_dir	  = os.path.join(model_dir, 'hmm1')

 feature_dir = os.path.join(default.htk_dir, 'mfc')
-if not os.path.exists(feature_dir):
-	os.makedirs(feature_dir)
+fh.make_new_directory(feature_dir, existing_dir='leave')
 tmp_dir = os.path.join(default.htk_dir, 'tmp')
-if not os.path.exists(tmp_dir):
-	os.makedirs(tmp_dir)
+fh.make_new_directory(tmp_dir, existing_dir='leave')
 label_dir = os.path.join(default.htk_dir, 'label')
-if not os.path.exists(label_dir):
-	os.makedirs(label_dir)
+fh.make_new_directory(label_dir, existing_dir='leave')

 ## training
 hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
@ -98,20 +90,21 @@ if make_lexicon:
 	# therefore there is no overlap between lex_asr and lex_oov.   
 	fame_functions.combine_lexicon(lexicon_htk_asr, lexicon_htk_oov, lexicon_htk)

-	## ======================= 
-	## manually make changes to the pronunciation dictionary and save it as lex.htk 
-	## =======================
+	## fixing the lexicon for HTK. 
 	# (1) Replace all tabs with single space;
 	# (2) Put a '\' before any dictionary entry beginning with single quote 
-	#http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html
+	# http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html
 	print('>>> fixing the lexicon...')
 	fame_functions.fix_lexicon(lexicon_htk)
 	print("elapsed time: {}".format(time.time() - timer_start))


+## intialize the instance for HTK.
+chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk)
+
+
 ## ======================= make label files =======================
 if make_label:
-	# train_2002_gongfansaken_10347.lab is empty. should be removed.
 	for dataset in dataset_list:
 		timer_start = time.time()
 		print("==== making label files on dataset {}".format(dataset))
@ -120,7 +113,7 @@ if make_label:
 		wav_dir_	= os.path.join(default.fame_dir, 'fame', 'wav', dataset)
 		label_dir_		= os.path.join(label_dir, dataset)
 		dictionary_file = os.path.join(label_dir_, 'temp.dic')
-		fh.make_new_directory(label_dir_)
+		fh.make_new_directory(label_dir_, existing_dir='leave')

 		# list of scripts 
 		with open(script_list, "rt", encoding="utf-8") as fin:
@ -135,56 +128,48 @@ if make_label:
 			sentence_htk = fame_functions.word2htk(sentence)

 			wav_file = os.path.join(wav_dir_, filename + '.wav')
-			if os.path.exists(wav_file) and pyhtk.can_be_ascii(sentence_htk) == 0:
-				if pyhtk.create_dictionary_without_log(
-					sentence_htk, global_ded, dictionary_file, lexicon_htk) == 0:
+			if os.path.exists(wav_file) and chtk.can_be_ascii(sentence_htk) == 0:
+				if chtk.get_number_of_missing_words(
+					sentence_htk, dictionary_file) == 0:
 					# when the file name is too long, HDMan command does not work.
 					# therefore first temporary dictionary_file is made, then renamed. 
 					shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic'))

 					label_file = os.path.join(label_dir_, filename + '.lab')
-					pyhtk.create_label_file(sentence_htk, label_file)
+					chtk.create_label_file(sentence_htk, label_file)
 				else:
 					os.remove(dictionary_file)
+
 		print("elapsed time: {}".format(time.time() - timer_start))


-## ======================= make other required files =======================
-if make_htk_files:
+## ======================= make master label files =======================
+if make_mlf:
 	timer_start = time.time()
-	print("==== making files required for HTK ====")
+	print("==== making master label files ====")
 	
-	print(">>> making a phonelist...")
-	pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt)
+	# train_2002_gongfansaken_10347.lab is empty. should be removed.
+	empty_lab_file = os.path.join(label_dir, 'train', 'train_2002_gongfansaken_10347.lab')
+	empty_dic_file = empty_lab_file.replace('.lab', '.dic')
+
+	if os.path.exists(empty_lab_file):
+		os.remove(empty_lab_file)
+	if os.path.exists(empty_dic_file):
+		os.remove(empty_dic_file)

 	for dataset in dataset_list:
-		wav_dir_	 = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
+		#wav_dir_	 = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
 		feature_dir_ = os.path.join(feature_dir, dataset)
 		label_dir_   = os.path.join(label_dir, dataset)
 		mlf_word  = os.path.join(label_dir, dataset + '_word.mlf')
 		mlf_phone = os.path.join(label_dir, dataset + '_phone.mlf')

-		#print(">>> making a script file for {}...".format(dataset))
-		#listdir    = glob.glob(os.path.join(wav_dir_, '*.dic'))
-		#mfc_list   = [filename.replace(wav_dir_, feature_dir_).replace('.dic', '.mfc') for filename in listdir]
-		#hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
-		#with open(hcompv_scp, 'wb') as f:
-		#	f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii'))
-
-		print(">>> making a mlf file for {}...".format(dataset))
-		lab_list = glob.glob(os.path.join(label_dir_, '*.lab'))
-		with open(mlf_word, 'wb') as fmlf:
-			fmlf.write(bytes('#!MLF!#\n', 'ascii'))
-			for label_file in lab_list:
-				filename = os.path.basename(label_file)
-				fmlf.write(bytes('\"*/{}\"\n'.format(filename), 'ascii'))
-				with open(label_file) as flab:
-					lines = flab.read()
-				fmlf.write(bytes(lines + '.\n', 'ascii'))
-
-		print(">>> generating phone level transcription for {}...".format(dataset))
-		pyhtk.mlf_word2phone(lexicon_htk, mlf_phone, mlf_word, mkphones_led)
-		print("elapsed time: {}".format(time.time() - timer_start))
+		print(">>> generating a word level mlf file for {}...".format(dataset))
+		chtk.label2mlf(label_dir_, mlf_word)
+		print(">>> generating a phone level mlf file for {}...".format(dataset))
+		chtk.mlf_word2phone(mlf_phone, mlf_word)
+		
+	print("elapsed time: {}".format(time.time() - timer_start))


 ## ======================= extract features =======================
@ -196,7 +181,7 @@ if extract_features:
 		wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
 		label_dir_   = os.path.join(label_dir, dataset)
 		feature_dir_ = os.path.join(feature_dir, dataset)
-		fh.make_new_directory(feature_dir_)
+		fh.make_new_directory(feature_dir_, existing_dir='delete')

 		# a script file for HCopy 
 		print(">>> making a script file for HCopy...")
@ -212,12 +197,15 @@ if extract_features:
 			os.path.join(wav_dir_, os.path.basename(lab_file).replace('.lab', '.wav')) + '\t'
 			+ os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc'))
 				  for lab_file in lab_list]
+
+		if os.path.exists(empty_mfc_file):
+			os.remove(empty_mfc_file)
 		with open(hcopy_scp.name, 'wb') as f:
 			f.write(bytes('\n'.join(feature_list), 'ascii'))
 		
 		# extract features.
 		print(">>> extracting features on {}...".format(dataset))
-		pyhtk.wav2mfc(config_hcopy, hcopy_scp.name)
+		chtk.wav2mfc(hcopy_scp.name)
 		os.remove(hcopy_scp.name)

 		# make hcompv.scp.
@ -235,21 +223,18 @@ if extract_features:
 if flat_start:
 	timer_start = time.time()
 	print('==== flat start ====')
-	pyhtk.flat_start(config_train, hcompv_scp_train, model0_dir, prototype)
+	feature_size = 39
+	model0_dir	  = os.path.join(model_dir, 'hmm0')
+	fh.make_new_directory(model0_dir, existing_dir='leave')
+
+	chtk.flat_start(hcompv_scp_train, model0_dir, feature_size)

 	# allocate mean & variance to all phones in the phone list
 	print('>>> allocating mean & variance to all phones in the phone list...')
-	pyhtk.create_hmmdefs(
+	chtk.create_hmmdefs(
 		os.path.join(model0_dir, proto_name),
-	    os.path.join(model0_dir, 'hmmdefs'), 
-		phonelist_txt)
-
-	# make macros
-	print('>>> making macros...')
-	with open(os.path.join(model0_dir, 'vFloors')) as f:
-		lines = f.read()
-	with open(os.path.join(model0_dir, 'macros'), 'wb') as f:
-		f.write(bytes('~o <MFCC_0_D_A> <VecSize> 39\n' + lines, 'ascii'))
+	    os.path.join(model0_dir, 'hmmdefs')
+		)
 	
 	print("elapsed time: {}".format(time.time() - timer_start))

@ -362,4 +347,24 @@ if train_model_with_sp_align_mlf:
 			hcompv_scp_train, phonelist_txt,
 			mlf_file=mlf_file_train_aligned, 
 			macros=os.path.join(modeln_dir_pre, 'macros'))
-		print("elapsed time: {}".format(time.time() - timer_start))
+		print("elapsed time: {}".format(time.time() - timer_start))
+
+
+# train triphone.
+if train_triphone:
+	triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
+	macros = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'macros')
+	hmmdefs = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'hmmdefs')
+	model_out_dir = os.path.join(model_dir, 'hmm1_tri', 'iter1')
+	run_command([
+		'HERest', '-B', 
+		'-C', config_train,
+		'-I', triphone_mlf,
+		'-t', '250.0', '150.0', '1000.0',
+		'-s', 'stats' 
+		'-S', hcompv_scp_train,
+		'-H', macros,
+		'-H', hmmdefs,
+		'-M', model_out_dir, 
+		os.path.join(config_dir, 'triphonelist.txt')
+	])
--- a/acoustic_model/htk_vs_kaldi.py
+++ b/acoustic_model/htk_vs_kaldi.py
@ -53,7 +53,7 @@ from htk import pyhtk

 # procedure
 make_dic_file = 0
-make_HTK_files = 1
+make_HTK_files = 0
 extract_features = 0
 #make_htk_dict_files = 0
 #do_forced_alignment_htk = 0
@ -171,7 +171,7 @@ if make_HTK_files:
 		filename = row['filename'].replace('.wav', '.lab')
 		label_file = os.path.join(feature_dir, filename)
 		with open(label_file, 'wb') as f:
-			label_string = 'START\n' + row['word'].upper() + '\nEND\n'
+			label_string = 'SILENCE\n' + row['word'].upper() + '\nSILENCE\n'
 			f.write(bytes(label_string, 'ascii'))


@ -249,7 +249,7 @@ with open(hresult_scp, 'wb') as f:

 # calculate result
 performance = np.zeros((1, 2))
-for niter in range(1, 50):
+for niter in range(50, 60):
 	output = pyhtk.recognition(
 		os.path.join(config_dir, 'config.rec'),
 		lattice_file,
@ -265,6 +265,16 @@ for niter in range(1, 50):



+	#output = run_command_with_output([
+	#	'HVite', '-T', '1', 
+	#	'-C', config_rec, 
+	#	'-w', lattice_file, 
+	#	'-H', hmm, 
+	#	dictionary_file, phonelist_txt, 
+	#	'-S', HVite_scp
+	#])
+
+
 ## ======================= forced alignment using HTK =======================
 if do_forced_alignment_htk:
 	
--- a/acoustic_model/phoneset/fame_asr.py
+++ b/acoustic_model/phoneset/fame_asr.py
@ -128,7 +128,11 @@ translation_key_word2htk = {
 	'ä': 'ao', 'ë': 'ee', 'ï': 'ie', 'ö': 'oe', 'ü': 'ue',
 }
 #[translation_key_word2htk.get(i, i) for i in not_in_ascii]
-
+#Stop: p, b, t, d, k, g  
+#Nasal: m, n, ng(ŋ)
+#Fricative: s, z, f, v, h, x
+#Liquid: l, r
+#Vowel: a, a:, e:, i, i:, i_(i̯), o, o:, u, u:, u_(ṷ), oe(ö), oe:(ö:), ue(ü), ue:(ü:), O(ɔ), O:(ɔ:), Oe(ɔ̈), A(ə), E(ɛ), E:(ɛ:), I(ɪ), I:(ɪ:)


 ## the list of multi character phones. 
--- a/acoustic_model/stimmen_test.py
+++ b/acoustic_model/stimmen_test.py
@ -77,4 +77,17 @@ for word in word_list:
 	for key, value in zip(c.keys(), c.values()):
 		if value > 3:
 			pronunciations[key] = value
-	print(pronunciations)
+	print(pronunciations)
+
+
+monophone_mlf = os.path.join(default.htk_dir, 'label', 'train_phone_aligned.mlf')
+triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
+def filenames_in_mlf(file_mlf):
+	with open(file_mlf) as f:
+		lines_ = f.read().split('\n')
+	lines = [line for line in lines_ if len(line.split(' ')) == 1 and line != '.']
+	filenames = [line.replace('"', '').replace('*/', '') for line in lines[1:-1]]
+	return filenames
+filenames_mono = filenames_in_mlf(monophone_mlf)
+filenames_tri  = filenames_in_mlf(triphone_mlf)
+