novoapi_functions.py is adjusted to use convert_phoneset.py.

2019-04-22 00:59:53 +02:00
parent b444b70af9
commit 2004399179
5 changed files with 283 additions and 132 deletions
--- a/acoustic_model/fame_hmm.py
+++ b/acoustic_model/fame_hmm.py
@ -16,50 +16,53 @@ import defaultfiles as default
 sys.path.append(default.toolbox_dir)
 import file_handling as fh
 from htk import pyhtk
+#from scripts import run_command


 ## ======================= user define =======================
 # procedure
+combine_all = 1
+
 make_lexicon	  = 0
 make_label		  = 0 # it takes roughly 4800 sec on Surface pro 2.
 make_mlf		  = 0
 extract_features  = 0
-flat_start		  = 0
-train_monophone_without_sp = 0
-add_sp = 0
-train_monophone_with_re_aligned_mlf = 0
+flat_start		  = 1
+train_monophone_without_sp = 1
+add_sp = 1
+train_monophone_with_re_aligned_mlf = 1
+increase_mixture = 1
 train_triphone = 0
-train_triphone_tied = 1
+train_triphone_tied = 0


 # pre-defined values.
 dataset_list = ['devel', 'test', 'train']
-feature_size = 39
+feature_size = 30
 improvement_threshold = 0.3

-hmmdefs_name = 'hmmdefs'
-proto_name   = 'proto'
-
 lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
 lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov')

 config_dir = os.path.join(default.htk_dir, 'config')
 phonelist_full_txt = os.path.join(config_dir, 'phonelist_full.txt')
-tree_hed   = os.path.join(config_dir, 'tree.hed')
-quest_hed  = os.path.join(config_dir, 'quests.hed')
+tree_hed    = os.path.join(config_dir, 'tree.hed')
+quests_hed  = os.path.join(config_dir, 'quests.hed')

 model_dir     = os.path.join(default.htk_dir, 'model')
 model_mono0_dir    = os.path.join(model_dir, 'mono0')
 model_mono1_dir    = os.path.join(model_dir, 'mono1')
 model_mono1sp_dir  = os.path.join(model_dir, 'mono1sp')
 model_mono1sp2_dir = os.path.join(model_dir, 'mono1sp2')
-model_tri1_dir  = os.path.join(model_dir, 'tri1')
+model_tri1_dir	   = os.path.join(model_dir, 'tri1')
+model_tri1tied_dir = os.path.join(model_dir, 'tri1tied')

 # directories / files to be made.
 lexicon_dir = os.path.join(default.htk_dir, 'lexicon') 
 lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr')
 lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov')
 lexicon_htk     = os.path.join(lexicon_dir, 'lex.htk')
+lexicon_htk_with_sp  = os.path.join(lexicon_dir, 'lex_with_sp.htk')
 lexicon_htk_triphone = os.path.join(lexicon_dir, 'lex_triphone.htk')

 feature_dir = os.path.join(default.htk_dir, 'mfc')
@ -71,10 +74,20 @@ fh.make_new_directory(label_dir, existing_dir='leave')


 ## training
-hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
-mlf_file_train   = os.path.join(label_dir, 'train_phone.mlf')
-mlf_file_train_with_sp = os.path.join(label_dir, 'train_phone_with_sp.mlf')
-mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
+if combine_all:
+	hcompv_scp_train		 = os.path.join(tmp_dir, 'all.scp')
+	mlf_file_train			 = os.path.join(label_dir, 'all_phone.mlf')
+	mlf_file_train_word		 = os.path.join(label_dir, 'all_word.mlf')
+	mlf_file_train_with_sp   = os.path.join(label_dir, 'all_phone_with_sp.mlf')
+	mlf_file_train_aligned   = os.path.join(label_dir, 'all_phone_aligned.mlf')
+	triphone_mlf			 = os.path.join(label_dir, 'all_triphone.mlf')
+else:
+	hcompv_scp_train		 = os.path.join(tmp_dir, 'train.scp')
+	mlf_file_train			 = os.path.join(label_dir, 'train_phone.mlf')
+	mlf_file_train_word		 = os.path.join(label_dir, 'train_word.mlf')
+	mlf_file_train_with_sp   = os.path.join(label_dir, 'train_phone_with_sp.mlf')
+	mlf_file_train_aligned   = os.path.join(label_dir, 'train_phone_aligned.mlf')
+	triphone_mlf			 = os.path.join(label_dir, 'train_triphone.mlf')
 hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp')

 ## testing
@ -104,19 +117,18 @@ if make_lexicon:
 	print('>>> fixing the lexicon...')
 	fame_functions.fix_lexicon(lexicon_htk)

-	## add sp to the end of each line.
-	#print('>>> adding sp...')
-	#with open(lexicon_htk) as f:
-	#	lines = f.read().split('\n')
-	#lines = [line + ' sp' for line in lines]
-	#with open(lexicon_htk_with_sp, 'wb') as f:
-	#	f.write(bytes('\n'.join(lines), 'ascii'))
+	## adding sp to the lexicon for HTK. 
+	print('>>> adding sp to the lexicon...')
+	with open(lexicon_htk) as f:
+		lines = f.read().split('\n')
+	with open(lexicon_htk_with_sp, 'wb') as f:
+		f.write(bytes(' sp\n'.join(lines), 'ascii'))

 	print("elapsed time: {}".format(time.time() - timer_start))
 	

 ## intialize the instance for HTK.
-chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk, feature_size)
+chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk_with_sp, feature_size)


 ## ======================= make label files =======================
@ -152,7 +164,7 @@ if make_label:
 					shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic'))

 					label_file = os.path.join(label_dir_, filename + '.lab')
-					chtk.create_label_file(sentence_htk, label_file)
+					chtk.make_label_file(sentence_htk, label_file)
 				else:
 					os.remove(dictionary_file)

@ -174,7 +186,6 @@ if make_mlf:
 		os.remove(empty_dic_file)

 	for dataset in dataset_list:
-		#wav_dir_	 = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
 		feature_dir_ = os.path.join(feature_dir, dataset)
 		label_dir_   = os.path.join(label_dir, dataset)
 		mlf_word  = os.path.join(label_dir, dataset + '_word.mlf')
@ -183,11 +194,11 @@ if make_mlf:

 		print(">>> generating a word level mlf file for {}...".format(dataset))
 		chtk.label2mlf(label_dir_, mlf_word)
+
 		print(">>> generating a phone level mlf file for {}...".format(dataset))
 		chtk.mlf_word2phone(mlf_phone, mlf_word, with_sp=False)
 		chtk.mlf_word2phone(mlf_phone_with_sp, mlf_word, with_sp=True)
 		
-
 	print("elapsed time: {}".format(time.time() - timer_start))


@ -197,7 +208,7 @@ if extract_features:
 		timer_start = time.time()
 		print('==== extract features on dataset {} ===='.format(dataset))

-		wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
+		wav_dir_	 = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
 		label_dir_   = os.path.join(label_dir, dataset)
 		feature_dir_ = os.path.join(feature_dir, dataset)
 		fh.make_new_directory(feature_dir_, existing_dir='delete')
@ -217,8 +228,8 @@ if extract_features:
 			+ os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc'))
 				  for lab_file in lab_list]

-		if os.path.exists(empty_mfc_file):
-			os.remove(empty_mfc_file)
+		#if os.path.exists(empty_mfc_file):
+		#	os.remove(empty_mfc_file)
 		with open(hcopy_scp.name, 'wb') as f:
 			f.write(bytes('\n'.join(feature_list), 'ascii'))
 		
@ -235,9 +246,64 @@ if extract_features:
 		with open(hcompv_scp, 'wb') as f:
 			f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii'))

+		print(">>> extracting features on stimmen...")
+		chtk.wav2mfc(os.path.join(htk_stimmen_dir, 'hcopy.scp'))
+
 		print("elapsed time: {}".format(time.time() - timer_start))


+## ======================= flat start monophones =======================
+if combine_all:
+	# script files.
+	fh.concatenate(
+		os.path.join(tmp_dir, 'devel.scp'),
+		os.path.join(tmp_dir, 'test.scp'),
+		hcompv_scp_train
+		)
+	fh.concatenate(
+		hcompv_scp_train,
+		os.path.join(tmp_dir, 'train.scp'),
+		hcompv_scp_train
+		)
+
+	# phone level mlfs.
+	fh.concatenate(
+		os.path.join(label_dir, 'devel_phone.mlf'),
+		os.path.join(label_dir, 'test_phone.mlf'),
+		mlf_file_train
+		)
+	fh.concatenate(
+		mlf_file_train,
+		os.path.join(label_dir, 'train_phone.mlf'),
+		mlf_file_train
+		)
+
+	# phone level mlfs with sp.
+	fh.concatenate(
+		os.path.join(label_dir, 'devel_phone_with_sp.mlf'),
+		os.path.join(label_dir, 'test_phone_with_sp.mlf'),
+		mlf_file_train_with_sp
+		)
+	fh.concatenate(
+		mlf_file_train_with_sp,
+		os.path.join(label_dir, 'train_phone_with_sp.mlf'),
+		mlf_file_train_with_sp
+		)
+
+
+	# word level mlfs.
+	fh.concatenate(
+		os.path.join(label_dir, 'devel_word.mlf'),
+		os.path.join(label_dir, 'test_word.mlf'),
+		mlf_file_train_word
+		)
+	fh.concatenate(
+		mlf_file_train_word,
+		os.path.join(label_dir, 'train_word.mlf'),
+		mlf_file_train_word
+		)
+
+
 ## ======================= flat start monophones =======================
 if flat_start:
 	timer_start = time.time()
@ -246,17 +312,14 @@ if flat_start:

 	chtk.flat_start(hcompv_scp_train, model_mono0_dir)

-	# create macros.
+	# make macros.
 	vFloors = os.path.join(model_mono0_dir, 'vFloors')
 	if os.path.exists(vFloors):
-		chtk.create_macros(vFloors)
+		chtk.make_macros(vFloors)

 	# allocate mean & variance to all phones in the phone list
 	print('>>> allocating mean & variance to all phones in the phone list...')
-	chtk.create_hmmdefs(
-		os.path.join(model_mono0_dir, proto_name),
-	    os.path.join(model_mono0_dir, 'hmmdefs')
-		)
+	chtk.make_hmmdefs(model_mono0_dir)
 	
 	print("elapsed time: {}".format(time.time() - timer_start))

@ -320,8 +383,9 @@ if train_monophone_with_re_aligned_mlf:
 		os.path.join(modeln_dir, 'macros'),
 		os.path.join(modeln_dir, 'hmmdefs'), 
 		mlf_file_train_aligned, 		
-		os.path.join(label_dir,	'train_word.mlf'), 
+		mlf_file_train_word, 
 		hcompv_scp_train)
+	chtk.fix_mlf(mlf_file_train_aligned)

 	print('>>> updating the script file... ')
 	chtk.update_script_file(
@ -349,24 +413,55 @@ if train_monophone_with_re_aligned_mlf:
 	print("elapsed time: {}".format(time.time() - timer_start))


-## ======================= train triphone =======================
-if train_triphone:
-	print('==== traina triphone model ====')
+## ======================= increase mixture =======================
+if increase_mixture:
+	print('==== increase mixture ====')
 	timer_start = time.time()
+	for nmix in [2, 4, 8, 16]:
+		if nmix == 2:
+			modeln_dir_ = model_mono1sp2_dir
+		else:
+			modeln_dir_ = os.path.join(model_dir, 'mono'+str(nmix_))
+		modeln_dir	= os.path.join(model_dir, 'mono'+str(nmix))

-	triphonelist_txt = os.path.join(config_dir, 'triphonelist.txt')
-	triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
+		print('mixture: {}'.format(nmix))
+		fh.make_new_directory(modeln_dir, existing_dir='delete')	
+		niter = chtk.get_niter_max(modeln_dir_)
+		chtk.increase_mixture(
+			os.path.join(modeln_dir_, 'iter'+str(niter), 'hmmdefs'), 
+			nmix, 
+			os.path.join(modeln_dir, 'iter0'), 
+			model_type='monophone_with_sp')
+		shutil.copy2(os.path.join(modeln_dir_, 'iter'+str(niter), 'macros'), 
+				  os.path.join(modeln_dir, 'iter0', 'macros'))

-	print('>>> making triphone list... ')
-	chtk.make_triphonelist(
-		triphonelist_txt, 
-		triphone_mlf, 
-		mlf_file_train_aligned)
+		#improvement_threshold = -10
+		niter = chtk.re_estimation_until_saturated(
+			modeln_dir, 
+			os.path.join(modeln_dir_, 'iter0'), 
+			improvement_threshold, 
+			hcompv_scp_train_updated, 
+			os.path.join(htk_stimmen_dir, 'mfc'), 
+			'mfc', 
+			os.path.join(htk_stimmen_dir, 'word_lattice.ltc'), 
+			mlf_file=mlf_file_train_aligned, 
+			lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'), 
+			model_type='monophone_with_sp'
+			)
+		nmix_ = nmix

-	print('>>> making triphone header... ')
-	chtk.make_tri_hed(
-		os.path.join(config_dir, 'mktri.hed')
-		)
+	print("elapsed time: {}".format(time.time() - timer_start))
+
+
+## ======================= train triphone =======================
+print('>>> making triphone list... ')
+chtk.make_triphonelist( 
+	mlf_file_train_aligned,
+	triphone_mlf)
+
+if train_triphone:
+	print('==== train triphone model ====')
+	timer_start = time.time()

 	print('>>> init triphone model... ')
 	niter = chtk.get_niter_max(model_mono1sp2_dir)
@ -377,8 +472,8 @@ if train_triphone:
 		)

 	print('>>> re-estimation... ')
-	# I wanted to train until satulated:
-	# 	#niter = chtk.re_estimation_until_saturated(
+	## I wanted to train until satulated:
+	#niter = chtk.re_estimation_until_saturated(
 	#	model_tri1_dir, 
 	#	os.path.join(model_tri1_dir, 'iter0'), 
 	#	improvement_threshold, 
@ -395,7 +490,6 @@ if train_triphone:
 	#   ERROR [+8231]  GetHCIModel: Cannot find hmm [i:-]r[+???]
 	# therefore only two times re-estimation is performed.
 	output_dir = model_tri1_dir
-
 	for niter in range(1, 4):
 		hmm_n = 'iter' + str(niter)
 		hmm_n_pre = 'iter' + str(niter-1)
@ -414,18 +508,59 @@ if train_triphone:
 	print("elapsed time: {}".format(time.time() - timer_start))


-## ======================= train triphone =======================
+## ======================= train tied-state triphones =======================
 if train_triphone_tied:
-	print('==== traina tied-state triphone ====')
+	print('==== train tied-state triphones ====')
 	timer_start = time.time()

 	print('>>> making lexicon for triphone... ')
-	chtk.make_triphone_full(phonelist_full_txt, lexicon_htk_triphone)
+	chtk.make_lexicon_triphone(phonelist_full_txt, lexicon_htk_triphone)
+	chtk.combine_phonelists(phonelist_full_txt)

-	print('>>> making headers... ')
-	chtk.make_tree_header(tree_hed)
-	fame_phonetics.make_quests_hed(quest_hed)
+	print('>>> making a tree header... ')
+	fame_phonetics.make_quests_hed(quests_hed)
+	stats = os.path.join(r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\model\tri1\iter3', 'stats')
+	chtk.make_tree_header(tree_hed, quests_hed, stats, config_dir)

-	print("elapsed time: {}".format(time.time() - timer_start))
+	print('>>> init triphone model... ')
+	niter = chtk.get_niter_max(model_tri1_dir)
+	fh.make_new_directory(os.path.join(model_tri1tied_dir, 'iter0'), existing_dir='leave')
+	chtk.init_triphone(
+		os.path.join(model_tri1_dir, 'iter'+str(niter)),
+		os.path.join(model_tri1tied_dir, 'iter0'),
+		tied=True)

+	# I wanted to train until satulated:
+	#niter = chtk.re_estimation_until_saturated(
+	#	model_tri1tied_dir, 
+	#	os.path.join(model_tri1tied_dir, 'iter0'), 
+	#	improvement_threshold, 
+	#	hcompv_scp_train_updated, 
+	#	os.path.join(htk_stimmen_dir, 'mfc'), 
+	#	'mfc', 
+	#	os.path.join(htk_stimmen_dir, 'word_lattice.ltc'), 
+	#	mlf_file=triphone_mlf, 
+	#	lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'), 
+	#	model_type='triphone'
+	#	)
+	#
+	# but because the data size is limited, some triphone cannot be trained and received the error:
+	#   ERROR [+8231]  GetHCIModel: Cannot find hmm [i:-]r[+???]
+	# therefore only 3 times re-estimation is performed.
+	output_dir = model_tri1tied_dir
+	for niter in range(1, 4):
+		hmm_n = 'iter' + str(niter)
+		hmm_n_pre = 'iter' + str(niter-1)
+		_modeln_dir	    = os.path.join(output_dir, hmm_n)
+		_modeln_dir_pre = os.path.join(output_dir, hmm_n_pre) 
+		
+		fh.make_new_directory(_modeln_dir, 'leave')
+		chtk.re_estimation(
+			os.path.join(_modeln_dir_pre, 'hmmdefs'), 
+			_modeln_dir,
+			hcompv_scp_train_updated,
+			mlf_file=triphone_mlf,
+			macros=os.path.join(_modeln_dir_pre, 'macros'),
+			model_type='triphone')

+	print("elapsed time: {}".format(time.time() - timer_start))