Browse Source

label files are extracted. hcompv_scp is made.

master
yemaozi88 3 years ago
parent
commit
322a8a0079
  1. BIN
      .vs/acoustic_model/v15/.suo
  2. 118
      acoustic_model/fame_hmm.py
  3. 2
      acoustic_model/phoneset/fame_asr.py

BIN
.vs/acoustic_model/v15/.suo

118
acoustic_model/fame_hmm.py

@ -4,6 +4,7 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import tempfile
import shutil
import glob
#import configparser
#import subprocess
import time
@ -30,8 +31,9 @@ dataset_list = ['devel', 'test', 'train']
# procedure
extract_features = 0
make_lexicon = 1
make_mlf = 0
make_lexicon = 0
make_dictionary = 0 # 4800 sec
make_htk_files = 1
combine_files = 0
flat_start = 0
train_model = 0
@ -92,11 +94,6 @@ if extract_features:
fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)
# a script file for HCompV
print(">>> making a script file for HCompV... \n")
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')
os.remove(hcopy_scp.name)
@ -124,23 +121,11 @@ if make_lexicon:
fame_functions.fix_single_quote(lexicon_htk)
## ======================= make phonelist =======================
#phonelist_txt = os.path.join(default.htk_dir, 'config', 'phonelist.txt')
#pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt)
#sentence = 'ien fan de minsken fan it deiferbliuw sels brúntsje visser'
#log_txt = os.path.join(default.htk_dir, 'config', 'log.txt')
#dictionary_file = os.path.join(default.htk_dir, 'config', 'test.dic')
#pyhtk.create_dictionary(
# sentence, global_ded, log_txt, dictionary_file, lexicon_htk)
#pyhtk.create_dictionary_without_log(
# sentence, global_ded, dictionary_file, lexicon_htk)
## ======================= make label file =======================
if make_mlf:
## ======================= make dic files =======================
if make_dictionary:
for dataset in dataset_list:
timer_start = time.time()
print("==== generating word level transcription on dataset {}\n".format(dataset))
print("==== generating HTK dictionary files on dataset {}\n".format(dataset))
#hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp'
#hcompv_scp2 = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
@ -161,25 +146,22 @@ if make_mlf:
filename_ = line.split(' ')[0]
filename = '_'.join(filename_.split('_')[1:])
sentence = ' '.join(line.split(' ')[1:])
sentence_htk = fame_functions.word2htk(sentence)
wav_file = os.path.join(wav_dir, filename + '.wav')
if len(re.findall(r'[\w]+[âêûô\'ú]+[\w]+', sentence))==0:
try:
sentence_ascii = bytes(sentence, 'ascii')
except UnicodeEncodeError:
print(sentence)
#if os.path.exists(wav_file):
# #dictionary_file = os.path.join(wav_dir, filename + '.dic')
# if pyhtk.create_dictionary_without_log(
# sentence, global_ded, dictionary_file, lexicon_htk) == 0:
# # when the file name is too long, HDMan command does not work.
# # therefore first temporary dictionary_file is made, then renamed.
# shutil.move(dictionary_file, os.path.join(wav_dir, filename + '.dic'))
# label_file = os.path.join(wav_dir, filename + '.lab')
# pyhtk.create_label_file(sentence, label_file)
# else:
# os.remove(dictionary_file)
if os.path.exists(wav_file):
#dictionary_file = os.path.join(wav_dir, filename + '.dic')
if pyhtk.create_dictionary_without_log(
sentence, global_ded, dictionary_file, lexicon_htk) == 0:
# when the file name is too long, HDMan command does not work.
# therefore first temporary dictionary_file is made, then renamed.
shutil.move(dictionary_file, os.path.join(wav_dir, filename + '.dic'))
label_file = os.path.join(wav_dir, filename + '.lab')
pyhtk.create_label_file(sentence, label_file)
else:
os.remove(dictionary_file)
print("elapsed time: {}".format(time.time() - timer_start))
# lexicon
#lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation'])
@ -244,8 +226,30 @@ if make_mlf:
# fscp.close()
# fmlf.close()
## ======================= make other required files =======================
if make_htk_files:
## phonelist
phonelist_txt = os.path.join(default.htk_dir, 'config', 'phonelist.txt')
pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt)
## hcomp_v.scp
print(">>> making a script file for HCompV... \n")
for dataset in dataset_list:
#timer_start = time.time()
wav_dir = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
listdir = glob.glob(os.path.join(wav_dir, '*.dic'))
filelist = [filename.replace(wav_dir, feature_dir).replace('.dic', '.fea') for filename in listdir]
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
with open(hcompv_scp, 'wt', newline='\r\n') as f:
f.write('\n'.join(filelist))
## hcomp_scp
# a script file for HCompV
## generate phone level transcription
# print("generating phone level transcription...\n")
# mkphones = output_dir + '\\label\\mkphones0.txt'
# subprocessStr = r"HLEd -l * -d " + lex_htk_ + ' -i ' + mlf_phone + ' ' + mkphones + ' ' + mlf_word
@ -253,29 +257,29 @@ if make_mlf:
## ======================= combined scps and mlfs =======================
if combine_files:
print("==== combine scps and mlfs ====\n")
#if combine_files:
# print("==== combine scps and mlfs ====\n")
fscp = open(hcompv_scp, 'wt')
fmlf = open(combined_mlf, 'wt')
# fscp = open(hcompv_scp, 'wt')
# fmlf = open(combined_mlf, 'wt')
for dataset in dataset_list:
fmlf.write("#!MLF!#\n")
for dataset in dataset_list:
each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf'
each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
# for dataset in dataset_list:
# fmlf.write("#!MLF!#\n")
# for dataset in dataset_list:
# each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf'
# each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
with open(each_mlf, 'r') as fin:
lines = fin.read()
lines = lines.split('\n')
fmlf.write('\n'.join(lines[1:]))
# with open(each_mlf, 'r') as fin:
# lines = fin.read()
# lines = lines.split('\n')
# fmlf.write('\n'.join(lines[1:]))
with open(each_scp, 'r') as fin:
lines = fin.read()
fscp.write(lines)
# with open(each_scp, 'r') as fin:
# lines = fin.read()
# fscp.write(lines)
fscp.close()
fmlf.close()
# fscp.close()
# fmlf.close()
## ======================= flat start monophones =======================

2
acoustic_model/phoneset/fame_asr.py

@ -110,7 +110,7 @@ phoneset_htk = [translation_key_asr2htk.get(i, i) for i in phoneset_short]
# 'ä', 'ë', 'ï', 'ö', 'ü'
#]
translation_key_word2htk = {
'\'': '\\\'',
#'\'': '\\\'',
'í':'i1', 'é':'e1', 'ú':'u1', 'ć':'c1',
'à':'a2', 'è':'e2',
'â':'a3', 'ê':'e3', 'ô':'o3', 'û':'u3',

Loading…
Cancel
Save