|
|
|
@ -22,30 +22,27 @@ from htk import pyhtk
@@ -22,30 +22,27 @@ from htk import pyhtk
|
|
|
|
|
# procedure |
|
|
|
|
make_lexicon = 0 |
|
|
|
|
make_label = 0 # it takes roughly 4800 sec on Surface pro 2. |
|
|
|
|
make_htk_files = 0 |
|
|
|
|
make_mlf = 0 |
|
|
|
|
extract_features = 0 |
|
|
|
|
flat_start = 0 |
|
|
|
|
train_model_without_sp = 0 |
|
|
|
|
add_sp = 0 |
|
|
|
|
train_model_with_sp = 0 |
|
|
|
|
train_model_with_sp_align_mlf = 1 |
|
|
|
|
train_model_with_sp_align_mlf = 0 |
|
|
|
|
train_triphone = 0 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# pre-defined values. |
|
|
|
|
|
|
|
|
|
dataset_list = ['devel', 'test', 'train'] |
|
|
|
|
hmmdefs_name = 'hmmdefs' |
|
|
|
|
proto_name = 'proto39' |
|
|
|
|
proto_name = 'proto' |
|
|
|
|
|
|
|
|
|
lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr') |
|
|
|
|
lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov') |
|
|
|
|
|
|
|
|
|
config_dir = os.path.join(default.htk_dir, 'config') |
|
|
|
|
config_hcopy = os.path.join(config_dir, 'config.HCopy') |
|
|
|
|
config_train = os.path.join(config_dir, 'config.train') |
|
|
|
|
global_ded = os.path.join(config_dir, 'global.ded') |
|
|
|
|
mkphones_led = os.path.join(config_dir, 'mkphones.led') |
|
|
|
|
|
|
|
|
|
sil_hed = os.path.join(config_dir, 'sil.hed') |
|
|
|
|
prototype = os.path.join(config_dir, proto_name) |
|
|
|
|
|
|
|
|
@ -53,25 +50,20 @@ model_dir = os.path.join(default.htk_dir, 'model')
@@ -53,25 +50,20 @@ model_dir = os.path.join(default.htk_dir, 'model')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# directories / files to be made. |
|
|
|
|
|
|
|
|
|
lexicon_dir = os.path.join(default.htk_dir, 'lexicon') |
|
|
|
|
lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr') |
|
|
|
|
lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov') |
|
|
|
|
lexicon_htk = os.path.join(lexicon_dir, 'lex.htk') |
|
|
|
|
|
|
|
|
|
phonelist_txt = os.path.join(config_dir, 'phonelist.txt') |
|
|
|
|
model0_dir = os.path.join(model_dir, 'hmm0') |
|
|
|
|
model1_dir = os.path.join(model_dir, 'hmm1') |
|
|
|
|
|
|
|
|
|
#model1_dir = os.path.join(model_dir, 'hmm1') |
|
|
|
|
|
|
|
|
|
feature_dir = os.path.join(default.htk_dir, 'mfc') |
|
|
|
|
if not os.path.exists(feature_dir): |
|
|
|
|
os.makedirs(feature_dir) |
|
|
|
|
fh.make_new_directory(feature_dir, existing_dir='leave') |
|
|
|
|
tmp_dir = os.path.join(default.htk_dir, 'tmp') |
|
|
|
|
if not os.path.exists(tmp_dir): |
|
|
|
|
os.makedirs(tmp_dir) |
|
|
|
|
fh.make_new_directory(tmp_dir, existing_dir='leave') |
|
|
|
|
label_dir = os.path.join(default.htk_dir, 'label') |
|
|
|
|
if not os.path.exists(label_dir): |
|
|
|
|
os.makedirs(label_dir) |
|
|
|
|
fh.make_new_directory(label_dir, existing_dir='leave') |
|
|
|
|
|
|
|
|
|
## training |
|
|
|
|
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp') |
|
|
|
@ -98,20 +90,21 @@ if make_lexicon:
@@ -98,20 +90,21 @@ if make_lexicon:
|
|
|
|
|
# therefore there is no overlap between lex_asr and lex_oov. |
|
|
|
|
fame_functions.combine_lexicon(lexicon_htk_asr, lexicon_htk_oov, lexicon_htk) |
|
|
|
|
|
|
|
|
|
## ======================= |
|
|
|
|
## manually make changes to the pronunciation dictionary and save it as lex.htk |
|
|
|
|
## ======================= |
|
|
|
|
## fixing the lexicon for HTK. |
|
|
|
|
# (1) Replace all tabs with single space; |
|
|
|
|
# (2) Put a '\' before any dictionary entry beginning with single quote |
|
|
|
|
#http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html |
|
|
|
|
# http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html |
|
|
|
|
print('>>> fixing the lexicon...') |
|
|
|
|
fame_functions.fix_lexicon(lexicon_htk) |
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## intialize the instance for HTK. |
|
|
|
|
chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## ======================= make label files ======================= |
|
|
|
|
if make_label: |
|
|
|
|
# train_2002_gongfansaken_10347.lab is empty. should be removed. |
|
|
|
|
for dataset in dataset_list: |
|
|
|
|
timer_start = time.time() |
|
|
|
|
print("==== making label files on dataset {}".format(dataset)) |
|
|
|
@ -120,7 +113,7 @@ if make_label:
@@ -120,7 +113,7 @@ if make_label:
|
|
|
|
|
wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) |
|
|
|
|
label_dir_ = os.path.join(label_dir, dataset) |
|
|
|
|
dictionary_file = os.path.join(label_dir_, 'temp.dic') |
|
|
|
|
fh.make_new_directory(label_dir_) |
|
|
|
|
fh.make_new_directory(label_dir_, existing_dir='leave') |
|
|
|
|
|
|
|
|
|
# list of scripts |
|
|
|
|
with open(script_list, "rt", encoding="utf-8") as fin: |
|
|
|
@ -135,56 +128,48 @@ if make_label:
@@ -135,56 +128,48 @@ if make_label:
|
|
|
|
|
sentence_htk = fame_functions.word2htk(sentence) |
|
|
|
|
|
|
|
|
|
wav_file = os.path.join(wav_dir_, filename + '.wav') |
|
|
|
|
if os.path.exists(wav_file) and pyhtk.can_be_ascii(sentence_htk) == 0: |
|
|
|
|
if pyhtk.create_dictionary_without_log( |
|
|
|
|
sentence_htk, global_ded, dictionary_file, lexicon_htk) == 0: |
|
|
|
|
if os.path.exists(wav_file) and chtk.can_be_ascii(sentence_htk) == 0: |
|
|
|
|
if chtk.get_number_of_missing_words( |
|
|
|
|
sentence_htk, dictionary_file) == 0: |
|
|
|
|
# when the file name is too long, HDMan command does not work. |
|
|
|
|
# therefore first temporary dictionary_file is made, then renamed. |
|
|
|
|
shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic')) |
|
|
|
|
|
|
|
|
|
label_file = os.path.join(label_dir_, filename + '.lab') |
|
|
|
|
pyhtk.create_label_file(sentence_htk, label_file) |
|
|
|
|
chtk.create_label_file(sentence_htk, label_file) |
|
|
|
|
else: |
|
|
|
|
os.remove(dictionary_file) |
|
|
|
|
|
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## ======================= make other required files ======================= |
|
|
|
|
if make_htk_files: |
|
|
|
|
## ======================= make master label files ======================= |
|
|
|
|
if make_mlf: |
|
|
|
|
timer_start = time.time() |
|
|
|
|
print("==== making files required for HTK ====") |
|
|
|
|
print("==== making master label files ====") |
|
|
|
|
|
|
|
|
|
print(">>> making a phonelist...") |
|
|
|
|
pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt) |
|
|
|
|
# train_2002_gongfansaken_10347.lab is empty. should be removed. |
|
|
|
|
empty_lab_file = os.path.join(label_dir, 'train', 'train_2002_gongfansaken_10347.lab') |
|
|
|
|
empty_dic_file = empty_lab_file.replace('.lab', '.dic') |
|
|
|
|
|
|
|
|
|
if os.path.exists(empty_lab_file): |
|
|
|
|
os.remove(empty_lab_file) |
|
|
|
|
if os.path.exists(empty_dic_file): |
|
|
|
|
os.remove(empty_dic_file) |
|
|
|
|
|
|
|
|
|
for dataset in dataset_list: |
|
|
|
|
wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) |
|
|
|
|
#wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) |
|
|
|
|
feature_dir_ = os.path.join(feature_dir, dataset) |
|
|
|
|
label_dir_ = os.path.join(label_dir, dataset) |
|
|
|
|
mlf_word = os.path.join(label_dir, dataset + '_word.mlf') |
|
|
|
|
mlf_phone = os.path.join(label_dir, dataset + '_phone.mlf') |
|
|
|
|
|
|
|
|
|
#print(">>> making a script file for {}...".format(dataset)) |
|
|
|
|
#listdir = glob.glob(os.path.join(wav_dir_, '*.dic')) |
|
|
|
|
#mfc_list = [filename.replace(wav_dir_, feature_dir_).replace('.dic', '.mfc') for filename in listdir] |
|
|
|
|
#hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') |
|
|
|
|
#with open(hcompv_scp, 'wb') as f: |
|
|
|
|
# f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii')) |
|
|
|
|
|
|
|
|
|
print(">>> making a mlf file for {}...".format(dataset)) |
|
|
|
|
lab_list = glob.glob(os.path.join(label_dir_, '*.lab')) |
|
|
|
|
with open(mlf_word, 'wb') as fmlf: |
|
|
|
|
fmlf.write(bytes('#!MLF!#\n', 'ascii')) |
|
|
|
|
for label_file in lab_list: |
|
|
|
|
filename = os.path.basename(label_file) |
|
|
|
|
fmlf.write(bytes('\"*/{}\"\n'.format(filename), 'ascii')) |
|
|
|
|
with open(label_file) as flab: |
|
|
|
|
lines = flab.read() |
|
|
|
|
fmlf.write(bytes(lines + '.\n', 'ascii')) |
|
|
|
|
|
|
|
|
|
print(">>> generating phone level transcription for {}...".format(dataset)) |
|
|
|
|
pyhtk.mlf_word2phone(lexicon_htk, mlf_phone, mlf_word, mkphones_led) |
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start)) |
|
|
|
|
print(">>> generating a word level mlf file for {}...".format(dataset)) |
|
|
|
|
chtk.label2mlf(label_dir_, mlf_word) |
|
|
|
|
print(">>> generating a phone level mlf file for {}...".format(dataset)) |
|
|
|
|
chtk.mlf_word2phone(mlf_phone, mlf_word) |
|
|
|
|
|
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## ======================= extract features ======================= |
|
|
|
@ -196,7 +181,7 @@ if extract_features:
@@ -196,7 +181,7 @@ if extract_features:
|
|
|
|
|
wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) |
|
|
|
|
label_dir_ = os.path.join(label_dir, dataset) |
|
|
|
|
feature_dir_ = os.path.join(feature_dir, dataset) |
|
|
|
|
fh.make_new_directory(feature_dir_) |
|
|
|
|
fh.make_new_directory(feature_dir_, existing_dir='delete') |
|
|
|
|
|
|
|
|
|
# a script file for HCopy |
|
|
|
|
print(">>> making a script file for HCopy...") |
|
|
|
@ -212,12 +197,15 @@ if extract_features:
@@ -212,12 +197,15 @@ if extract_features:
|
|
|
|
|
os.path.join(wav_dir_, os.path.basename(lab_file).replace('.lab', '.wav')) + '\t' |
|
|
|
|
+ os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc')) |
|
|
|
|
for lab_file in lab_list] |
|
|
|
|
|
|
|
|
|
if os.path.exists(empty_mfc_file): |
|
|
|
|
os.remove(empty_mfc_file) |
|
|
|
|
with open(hcopy_scp.name, 'wb') as f: |
|
|
|
|
f.write(bytes('\n'.join(feature_list), 'ascii')) |
|
|
|
|
|
|
|
|
|
# extract features. |
|
|
|
|
print(">>> extracting features on {}...".format(dataset)) |
|
|
|
|
pyhtk.wav2mfc(config_hcopy, hcopy_scp.name) |
|
|
|
|
chtk.wav2mfc(hcopy_scp.name) |
|
|
|
|
os.remove(hcopy_scp.name) |
|
|
|
|
|
|
|
|
|
# make hcompv.scp. |
|
|
|
@ -235,21 +223,18 @@ if extract_features:
@@ -235,21 +223,18 @@ if extract_features:
|
|
|
|
|
if flat_start: |
|
|
|
|
timer_start = time.time() |
|
|
|
|
print('==== flat start ====') |
|
|
|
|
pyhtk.flat_start(config_train, hcompv_scp_train, model0_dir, prototype) |
|
|
|
|
feature_size = 39 |
|
|
|
|
model0_dir = os.path.join(model_dir, 'hmm0') |
|
|
|
|
fh.make_new_directory(model0_dir, existing_dir='leave') |
|
|
|
|
|
|
|
|
|
chtk.flat_start(hcompv_scp_train, model0_dir, feature_size) |
|
|
|
|
|
|
|
|
|
# allocate mean & variance to all phones in the phone list |
|
|
|
|
print('>>> allocating mean & variance to all phones in the phone list...') |
|
|
|
|
pyhtk.create_hmmdefs( |
|
|
|
|
chtk.create_hmmdefs( |
|
|
|
|
os.path.join(model0_dir, proto_name), |
|
|
|
|
os.path.join(model0_dir, 'hmmdefs'), |
|
|
|
|
phonelist_txt) |
|
|
|
|
|
|
|
|
|
# make macros |
|
|
|
|
print('>>> making macros...') |
|
|
|
|
with open(os.path.join(model0_dir, 'vFloors')) as f: |
|
|
|
|
lines = f.read() |
|
|
|
|
with open(os.path.join(model0_dir, 'macros'), 'wb') as f: |
|
|
|
|
f.write(bytes('~o <MFCC_0_D_A> <VecSize> 39\n' + lines, 'ascii')) |
|
|
|
|
os.path.join(model0_dir, 'hmmdefs') |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start)) |
|
|
|
|
|
|
|
|
@ -362,4 +347,24 @@ if train_model_with_sp_align_mlf:
@@ -362,4 +347,24 @@ if train_model_with_sp_align_mlf:
|
|
|
|
|
hcompv_scp_train, phonelist_txt, |
|
|
|
|
mlf_file=mlf_file_train_aligned, |
|
|
|
|
macros=os.path.join(modeln_dir_pre, 'macros')) |
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start)) |
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# train triphone. |
|
|
|
|
if train_triphone: |
|
|
|
|
triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf') |
|
|
|
|
macros = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'macros') |
|
|
|
|
hmmdefs = os.path.join(model_dir, 'hmm1_tri', 'iter0', 'hmmdefs') |
|
|
|
|
model_out_dir = os.path.join(model_dir, 'hmm1_tri', 'iter1') |
|
|
|
|
run_command([ |
|
|
|
|
'HERest', '-B', |
|
|
|
|
'-C', config_train, |
|
|
|
|
'-I', triphone_mlf, |
|
|
|
|
'-t', '250.0', '150.0', '1000.0', |
|
|
|
|
'-s', 'stats' |
|
|
|
|
'-S', hcompv_scp_train, |
|
|
|
|
'-H', macros, |
|
|
|
|
'-H', hmmdefs, |
|
|
|
|
'-M', model_out_dir, |
|
|
|
|
os.path.join(config_dir, 'triphonelist.txt') |
|
|
|
|
]) |