label files are extracted. hcompv_scp is made.
This commit is contained in:
parent
22cccfb61d
commit
322a8a0079
Binary file not shown.
@ -4,6 +4,7 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
|||||||
|
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
|
import glob
|
||||||
#import configparser
|
#import configparser
|
||||||
#import subprocess
|
#import subprocess
|
||||||
import time
|
import time
|
||||||
@ -30,8 +31,9 @@ dataset_list = ['devel', 'test', 'train']
|
|||||||
|
|
||||||
# procedure
|
# procedure
|
||||||
extract_features = 0
|
extract_features = 0
|
||||||
make_lexicon = 1
|
make_lexicon = 0
|
||||||
make_mlf = 0
|
make_dictionary = 0 # 4800 sec
|
||||||
|
make_htk_files = 1
|
||||||
combine_files = 0
|
combine_files = 0
|
||||||
flat_start = 0
|
flat_start = 0
|
||||||
train_model = 0
|
train_model = 0
|
||||||
@ -92,11 +94,6 @@ if extract_features:
|
|||||||
fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
|
fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
|
||||||
pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)
|
pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)
|
||||||
|
|
||||||
# a script file for HCompV
|
|
||||||
print(">>> making a script file for HCompV... \n")
|
|
||||||
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
|
|
||||||
fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')
|
|
||||||
|
|
||||||
os.remove(hcopy_scp.name)
|
os.remove(hcopy_scp.name)
|
||||||
|
|
||||||
|
|
||||||
@ -124,23 +121,11 @@ if make_lexicon:
|
|||||||
fame_functions.fix_single_quote(lexicon_htk)
|
fame_functions.fix_single_quote(lexicon_htk)
|
||||||
|
|
||||||
|
|
||||||
## ======================= make phonelist =======================
|
## ======================= make dic files =======================
|
||||||
#phonelist_txt = os.path.join(default.htk_dir, 'config', 'phonelist.txt')
|
if make_dictionary:
|
||||||
#pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt)
|
|
||||||
#sentence = 'ien fan de minsken fan it deiferbliuw sels brúntsje visser'
|
|
||||||
#log_txt = os.path.join(default.htk_dir, 'config', 'log.txt')
|
|
||||||
#dictionary_file = os.path.join(default.htk_dir, 'config', 'test.dic')
|
|
||||||
#pyhtk.create_dictionary(
|
|
||||||
# sentence, global_ded, log_txt, dictionary_file, lexicon_htk)
|
|
||||||
#pyhtk.create_dictionary_without_log(
|
|
||||||
# sentence, global_ded, dictionary_file, lexicon_htk)
|
|
||||||
|
|
||||||
|
|
||||||
## ======================= make label file =======================
|
|
||||||
if make_mlf:
|
|
||||||
for dataset in dataset_list:
|
for dataset in dataset_list:
|
||||||
timer_start = time.time()
|
timer_start = time.time()
|
||||||
print("==== generating word level transcription on dataset {}\n".format(dataset))
|
print("==== generating HTK dictionary files on dataset {}\n".format(dataset))
|
||||||
|
|
||||||
#hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp'
|
#hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp'
|
||||||
#hcompv_scp2 = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
|
#hcompv_scp2 = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
|
||||||
@ -161,25 +146,22 @@ if make_mlf:
|
|||||||
filename_ = line.split(' ')[0]
|
filename_ = line.split(' ')[0]
|
||||||
filename = '_'.join(filename_.split('_')[1:])
|
filename = '_'.join(filename_.split('_')[1:])
|
||||||
sentence = ' '.join(line.split(' ')[1:])
|
sentence = ' '.join(line.split(' ')[1:])
|
||||||
|
sentence_htk = fame_functions.word2htk(sentence)
|
||||||
|
|
||||||
wav_file = os.path.join(wav_dir, filename + '.wav')
|
wav_file = os.path.join(wav_dir, filename + '.wav')
|
||||||
if len(re.findall(r'[\w]+[âêûô\'ú]+[\w]+', sentence))==0:
|
if os.path.exists(wav_file):
|
||||||
try:
|
#dictionary_file = os.path.join(wav_dir, filename + '.dic')
|
||||||
sentence_ascii = bytes(sentence, 'ascii')
|
if pyhtk.create_dictionary_without_log(
|
||||||
except UnicodeEncodeError:
|
sentence, global_ded, dictionary_file, lexicon_htk) == 0:
|
||||||
print(sentence)
|
# when the file name is too long, HDMan command does not work.
|
||||||
#if os.path.exists(wav_file):
|
# therefore first temporary dictionary_file is made, then renamed.
|
||||||
# #dictionary_file = os.path.join(wav_dir, filename + '.dic')
|
shutil.move(dictionary_file, os.path.join(wav_dir, filename + '.dic'))
|
||||||
# if pyhtk.create_dictionary_without_log(
|
label_file = os.path.join(wav_dir, filename + '.lab')
|
||||||
# sentence, global_ded, dictionary_file, lexicon_htk) == 0:
|
pyhtk.create_label_file(sentence, label_file)
|
||||||
# # when the file name is too long, HDMan command does not work.
|
else:
|
||||||
# # therefore first temporary dictionary_file is made, then renamed.
|
os.remove(dictionary_file)
|
||||||
# shutil.move(dictionary_file, os.path.join(wav_dir, filename + '.dic'))
|
|
||||||
# label_file = os.path.join(wav_dir, filename + '.lab')
|
|
||||||
# pyhtk.create_label_file(sentence, label_file)
|
|
||||||
# else:
|
|
||||||
# os.remove(dictionary_file)
|
|
||||||
print("elapsed time: {}".format(time.time() - timer_start))
|
print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
|
|
||||||
# lexicon
|
# lexicon
|
||||||
#lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation'])
|
#lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation'])
|
||||||
|
|
||||||
@ -244,8 +226,30 @@ if make_mlf:
|
|||||||
# fscp.close()
|
# fscp.close()
|
||||||
# fmlf.close()
|
# fmlf.close()
|
||||||
|
|
||||||
|
## ======================= make other required files =======================
|
||||||
|
if make_htk_files:
|
||||||
|
## phonelist
|
||||||
|
phonelist_txt = os.path.join(default.htk_dir, 'config', 'phonelist.txt')
|
||||||
|
pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt)
|
||||||
|
|
||||||
|
## hcomp_v.scp
|
||||||
|
print(">>> making a script file for HCompV... \n")
|
||||||
|
for dataset in dataset_list:
|
||||||
|
#timer_start = time.time()
|
||||||
|
|
||||||
|
wav_dir = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
|
||||||
|
|
||||||
|
listdir = glob.glob(os.path.join(wav_dir, '*.dic'))
|
||||||
|
filelist = [filename.replace(wav_dir, feature_dir).replace('.dic', '.fea') for filename in listdir]
|
||||||
|
|
||||||
|
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
|
||||||
|
with open(hcompv_scp, 'wt', newline='\r\n') as f:
|
||||||
|
f.write('\n'.join(filelist))
|
||||||
|
|
||||||
|
|
||||||
|
## hcomp_scp
|
||||||
|
# a script file for HCompV
|
||||||
|
|
||||||
## generate phone level transcription
|
|
||||||
# print("generating phone level transcription...\n")
|
# print("generating phone level transcription...\n")
|
||||||
# mkphones = output_dir + '\\label\\mkphones0.txt'
|
# mkphones = output_dir + '\\label\\mkphones0.txt'
|
||||||
# subprocessStr = r"HLEd -l * -d " + lex_htk_ + ' -i ' + mlf_phone + ' ' + mkphones + ' ' + mlf_word
|
# subprocessStr = r"HLEd -l * -d " + lex_htk_ + ' -i ' + mlf_phone + ' ' + mkphones + ' ' + mlf_word
|
||||||
@ -253,29 +257,29 @@ if make_mlf:
|
|||||||
|
|
||||||
|
|
||||||
## ======================= combined scps and mlfs =======================
|
## ======================= combined scps and mlfs =======================
|
||||||
if combine_files:
|
#if combine_files:
|
||||||
print("==== combine scps and mlfs ====\n")
|
# print("==== combine scps and mlfs ====\n")
|
||||||
|
|
||||||
fscp = open(hcompv_scp, 'wt')
|
# fscp = open(hcompv_scp, 'wt')
|
||||||
fmlf = open(combined_mlf, 'wt')
|
# fmlf = open(combined_mlf, 'wt')
|
||||||
|
|
||||||
for dataset in dataset_list:
|
# for dataset in dataset_list:
|
||||||
fmlf.write("#!MLF!#\n")
|
# fmlf.write("#!MLF!#\n")
|
||||||
for dataset in dataset_list:
|
# for dataset in dataset_list:
|
||||||
each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf'
|
# each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf'
|
||||||
each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
|
# each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
|
||||||
|
|
||||||
with open(each_mlf, 'r') as fin:
|
# with open(each_mlf, 'r') as fin:
|
||||||
lines = fin.read()
|
# lines = fin.read()
|
||||||
lines = lines.split('\n')
|
# lines = lines.split('\n')
|
||||||
fmlf.write('\n'.join(lines[1:]))
|
# fmlf.write('\n'.join(lines[1:]))
|
||||||
|
|
||||||
with open(each_scp, 'r') as fin:
|
# with open(each_scp, 'r') as fin:
|
||||||
lines = fin.read()
|
# lines = fin.read()
|
||||||
fscp.write(lines)
|
# fscp.write(lines)
|
||||||
|
|
||||||
fscp.close()
|
# fscp.close()
|
||||||
fmlf.close()
|
# fmlf.close()
|
||||||
|
|
||||||
|
|
||||||
## ======================= flat start monophones =======================
|
## ======================= flat start monophones =======================
|
||||||
|
@ -110,7 +110,7 @@ phoneset_htk = [translation_key_asr2htk.get(i, i) for i in phoneset_short]
|
|||||||
# 'ä', 'ë', 'ï', 'ö', 'ü'
|
# 'ä', 'ë', 'ï', 'ö', 'ü'
|
||||||
#]
|
#]
|
||||||
translation_key_word2htk = {
|
translation_key_word2htk = {
|
||||||
'\'': '\\\'',
|
#'\'': '\\\'',
|
||||||
'í':'i1', 'é':'e1', 'ú':'u1', 'ć':'c1',
|
'í':'i1', 'é':'e1', 'ú':'u1', 'ć':'c1',
|
||||||
'à':'a2', 'è':'e2',
|
'à':'a2', 'è':'e2',
|
||||||
'â':'a3', 'ê':'e3', 'ô':'o3', 'û':'u3',
|
'â':'a3', 'ê':'e3', 'ô':'o3', 'û':'u3',
|
||||||
|
Loading…
Reference in New Issue
Block a user