label files are extracted. hcompv_scp is made.

2019-02-03 13:54:37 +01:00
parent 22cccfb61d
commit 322a8a0079
3 changed files with 62 additions and 58 deletions
--- a/.vs/acoustic_model/v15/.suo
+++ b/.vs/acoustic_model/v15/.suo
--- a/acoustic_model/fame_hmm.py
+++ b/acoustic_model/fame_hmm.py
@@ -4,6 +4,7 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')

 import tempfile
 import shutil
+import glob
 #import configparser
 #import subprocess
 import time
@@ -30,8 +31,9 @@ dataset_list = ['devel', 'test', 'train']

 # procedure
 extract_features  = 0
-make_lexicon	  = 1
-make_mlf		  = 0
+make_lexicon	  = 0
+make_dictionary	  = 0 # 4800 sec
+make_htk_files    = 1
 combine_files	  = 0
 flat_start		  = 0
 train_model		  = 0
@@ -92,11 +94,6 @@ if extract_features:
 		fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
 		pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)

-		# a script file for HCompV
-		print(">>> making a script file for HCompV... \n")
-		hcompv_scp  = os.path.join(tmp_dir, dataset + '.scp')
-		fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')
-
 		os.remove(hcopy_scp.name)


@@ -124,23 +121,11 @@ if make_lexicon:
 	fame_functions.fix_single_quote(lexicon_htk)


-## ======================= make phonelist =======================
-#phonelist_txt = os.path.join(default.htk_dir, 'config', 'phonelist.txt')
-#pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt)
-#sentence = 'ien fan de minsken fan it deiferbliuw sels brúntsje visser'
-#log_txt = os.path.join(default.htk_dir, 'config', 'log.txt')
-#dictionary_file = os.path.join(default.htk_dir, 'config', 'test.dic')
-#pyhtk.create_dictionary(
-#	sentence, global_ded, log_txt, dictionary_file, lexicon_htk)
-#pyhtk.create_dictionary_without_log(
-#	sentence, global_ded, dictionary_file, lexicon_htk)
-
-
-## ======================= make label file =======================
-if make_mlf:
+## ======================= make dic files =======================
+if make_dictionary:
 	for dataset in dataset_list:
 		timer_start = time.time()
-		print("==== generating word level transcription on dataset {}\n".format(dataset))
+		print("==== generating HTK dictionary files on dataset {}\n".format(dataset))

 		#hcompv_scp  = output_dir + '\\scp\\' + dataset + '.scp'
 		#hcompv_scp2 = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
@@ -161,25 +146,22 @@ if make_mlf:
 			filename_ = line.split(' ')[0]
 			filename  = '_'.join(filename_.split('_')[1:])
 			sentence  = ' '.join(line.split(' ')[1:])
+			sentence_htk = fame_functions.word2htk(sentence)

 			wav_file = os.path.join(wav_dir, filename + '.wav')
-			if len(re.findall(r'[\w]+[âêûô\'ú]+[\w]+', sentence))==0:
-				try:
-					sentence_ascii = bytes(sentence, 'ascii')
-				except UnicodeEncodeError:
-					print(sentence)
-			#if os.path.exists(wav_file):
-			#	#dictionary_file = os.path.join(wav_dir, filename + '.dic')
-			#	if pyhtk.create_dictionary_without_log(
-			#		sentence, global_ded, dictionary_file, lexicon_htk) == 0:
-			#		# when the file name is too long, HDMan command does not work.
-			#		# therefore first temporary dictionary_file is made, then renamed. 
-			#		shutil.move(dictionary_file, os.path.join(wav_dir, filename + '.dic'))
-			#		label_file = os.path.join(wav_dir, filename + '.lab')
-			#		pyhtk.create_label_file(sentence, label_file)
-			#	else:
-			#		os.remove(dictionary_file)
+			if os.path.exists(wav_file):
+				#dictionary_file = os.path.join(wav_dir, filename + '.dic')
+				if pyhtk.create_dictionary_without_log(
+					sentence, global_ded, dictionary_file, lexicon_htk) == 0:
+					# when the file name is too long, HDMan command does not work.
+					# therefore first temporary dictionary_file is made, then renamed. 
+					shutil.move(dictionary_file, os.path.join(wav_dir, filename + '.dic'))
+					label_file = os.path.join(wav_dir, filename + '.lab')
+					pyhtk.create_label_file(sentence, label_file)
+				else:
+					os.remove(dictionary_file)
 		print("elapsed time: {}".format(time.time() - timer_start))
+
 		# lexicon
 		#lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation'])

@@ -244,8 +226,30 @@ if make_mlf:
 	#	fscp.close()
 	#	fmlf.close()

+## ======================= make other required files =======================
+if make_htk_files:
+	## phonelist
+	phonelist_txt = os.path.join(default.htk_dir, 'config', 'phonelist.txt')
+	pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt)
+
+	## hcomp_v.scp
+	print(">>> making a script file for HCompV... \n")
+	for dataset in dataset_list:
+		#timer_start = time.time()
+
+		wav_dir = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
+
+		listdir = glob.glob(os.path.join(wav_dir, '*.dic'))
+		filelist = [filename.replace(wav_dir, feature_dir).replace('.dic', '.fea') for filename in listdir]
+
+		hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
+		with open(hcompv_scp, 'wt', newline='\r\n') as f:
+			f.write('\n'.join(filelist))
+
+
+## hcomp_scp
+# a script file for HCompV

-		## generate phone level transcription 
 	#	print("generating phone level transcription...\n")
 	#	mkphones = output_dir + '\\label\\mkphones0.txt'
 	#	subprocessStr = r"HLEd -l * -d " + lex_htk_ + ' -i ' + mlf_phone + ' ' + mkphones + ' ' + mlf_word
@@ -253,29 +257,29 @@ if make_mlf:
 	

 ## ======================= combined scps and mlfs =======================
-if combine_files:
-	print("==== combine scps and mlfs ====\n")
+#if combine_files:
+#	print("==== combine scps and mlfs ====\n")

-	fscp = open(hcompv_scp, 'wt')
-	fmlf = open(combined_mlf, 'wt')
+#	fscp = open(hcompv_scp, 'wt')
+#	fmlf = open(combined_mlf, 'wt')

-	for dataset in dataset_list:
-		fmlf.write("#!MLF!#\n")
-		for dataset in dataset_list:
-			each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf'
-			each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
+#	for dataset in dataset_list:
+#		fmlf.write("#!MLF!#\n")
+#		for dataset in dataset_list:
+#			each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf'
+#			each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
 		
-		with open(each_mlf, 'r') as fin:
-			lines = fin.read()
-			lines = lines.split('\n')
-		fmlf.write('\n'.join(lines[1:]))
+#		with open(each_mlf, 'r') as fin:
+#			lines = fin.read()
+#			lines = lines.split('\n')
+#		fmlf.write('\n'.join(lines[1:]))

-		with open(each_scp, 'r') as fin:
-			lines = fin.read()
-		fscp.write(lines)
+#		with open(each_scp, 'r') as fin:
+#			lines = fin.read()
+#		fscp.write(lines)

-	fscp.close()
-	fmlf.close()
+#	fscp.close()
+#	fmlf.close()


 ## ======================= flat start monophones =======================
--- a/acoustic_model/phoneset/fame_asr.py
+++ b/acoustic_model/phoneset/fame_asr.py
@@ -110,7 +110,7 @@ phoneset_htk = [translation_key_asr2htk.get(i, i) for i in phoneset_short]
 #	'ä', 'ë', 'ï', 'ö', 'ü'
 #]
 translation_key_word2htk = {
-	'\'': '\\\'',
+	#'\'': '\\\'',
 	'í':'i1', 'é':'e1', 'ú':'u1', 'ć':'c1',
 	'à':'a2', 'è':'e2', 	
 	'â':'a3', 'ê':'e3', 'ô':'o3', 'û':'u3',