324 lines
11 KiB
Python
324 lines
11 KiB
Python
import sys
|
|
import os
|
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
|
|
|
import tempfile
|
|
import shutil
|
|
#import configparser
|
|
#import subprocess
|
|
import time
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
import fame_functions
|
|
from phoneset import fame_ipa, fame_asr
|
|
import defaultfiles as default
|
|
sys.path.append(default.toolbox_dir)
|
|
import file_handling as fh
|
|
from htk import pyhtk
|
|
|
|
|
|
## ======================= user define =======================
|
|
#repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model'
|
|
#curr_dir = repo_dir + '\\acoustic_model'
|
|
#config_ini = curr_dir + '\\config.ini'
|
|
#output_dir = 'C:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model'
|
|
#forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment'
|
|
|
|
dataset_list = ['devel', 'test', 'train']
|
|
|
|
# procedure
|
|
extract_features = 0
|
|
make_lexicon = 1
|
|
make_mlf = 0
|
|
combine_files = 0
|
|
flat_start = 0
|
|
train_model = 0
|
|
|
|
|
|
## ======================= load variables =======================
|
|
|
|
lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
|
|
lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
|
|
lexicon_oov = os.path.join(lexicon_dir, 'lex.oov')
|
|
lexicon_htk_asr = os.path.join(default.htk_dir, 'lexicon', 'lex.htk_asr')
|
|
lexicon_htk_oov = os.path.join(default.htk_dir, 'lexicon', 'lex.htk_oov')
|
|
lexicon_htk = os.path.join(default.htk_dir, 'lexicon', 'lex.htk')
|
|
|
|
global_ded = os.path.join(default.htk_dir, 'config', 'global.ded')
|
|
|
|
|
|
#hcompv_scp = output_dir + '\\scp\\combined.scp'
|
|
#combined_mlf = output_dir + '\\label\\combined.mlf'
|
|
|
|
#model_dir = output_dir + '\\model'
|
|
#model0_dir = model_dir + '\\hmm0'
|
|
#proto_init = model_dir + '\\proto38'
|
|
#proto_name = 'proto'
|
|
#phonelist = output_dir + '\\config\\phonelist_friesian.txt'
|
|
#hmmdefs_name = 'hmmdefs'
|
|
|
|
feature_dir = os.path.join(default.htk_dir, 'mfc')
|
|
if not os.path.exists(feature_dir):
|
|
os.makedirs(feature_dir)
|
|
tmp_dir = os.path.join(default.htk_dir, 'tmp')
|
|
if not os.path.exists(tmp_dir):
|
|
os.makedirs(tmp_dir)
|
|
label_dir = os.path.join(default.htk_dir, 'label')
|
|
if not os.path.exists(label_dir):
|
|
os.makedirs(label_dir)
|
|
|
|
|
|
|
|
## ======================= extract features =======================
|
|
if extract_features:
|
|
|
|
for dataset in dataset_list:
|
|
print('==== extract features on dataset {} ====\n'.format(dataset))
|
|
|
|
# a script file for HCopy
|
|
print(">>> making a script file for HCopy... \n")
|
|
hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
|
|
hcopy_scp.close()
|
|
|
|
# get a list of features (hcopy.scp) from the filelist in FAME! corpus
|
|
feature_dir_ = os.path.join(feature_dir, dataset)
|
|
if not os.path.exists(feature_dir_):
|
|
os.makedirs(feature_dir_)
|
|
|
|
# extract features
|
|
print(">>> extracting features... \n")
|
|
fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
|
|
pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)
|
|
|
|
# a script file for HCompV
|
|
print(">>> making a script file for HCompV... \n")
|
|
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
|
|
fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')
|
|
|
|
os.remove(hcopy_scp.name)
|
|
|
|
|
|
## ======================= make lexicon for HTK =======================
|
|
if make_lexicon:
|
|
print('==== make lexicon for HTK ====\n')
|
|
|
|
# convert each lexicon from fame_asr phoneset to fame_htk phoneset.
|
|
print('>>> converting each lexicon from fame_asr phoneset to fame_htk phoneset... \n')
|
|
fame_functions.lexicon_asr2htk(lexicon_asr, lexicon_htk_asr)
|
|
fame_functions.lexicon_asr2htk(lexicon_oov, lexicon_htk_oov)
|
|
|
|
# combine lexicon
|
|
print('>>> combining lexicon files into one lexicon... \n')
|
|
# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov.
|
|
# therefore there is no overlap between lex_asr and lex_oov.
|
|
fame_functions.combine_lexicon(lexicon_htk_asr, lexicon_htk_oov, lexicon_htk)
|
|
|
|
## =======================
|
|
## manually make changes to the pronunciation dictionary and save it as lex.htk
|
|
## =======================
|
|
# (1) Replace all tabs with single space;
|
|
# (2) Put a '\' before any dictionary entry beginning with single quote
|
|
#http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html
|
|
fame_functions.fix_single_quote(lexicon_htk)
|
|
|
|
|
|
## ======================= make phonelist =======================
|
|
#phonelist_txt = os.path.join(default.htk_dir, 'config', 'phonelist.txt')
|
|
#pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt)
|
|
#sentence = 'ien fan de minsken fan it deiferbliuw sels brúntsje visser'
|
|
#log_txt = os.path.join(default.htk_dir, 'config', 'log.txt')
|
|
#dictionary_file = os.path.join(default.htk_dir, 'config', 'test.dic')
|
|
#pyhtk.create_dictionary(
|
|
# sentence, global_ded, log_txt, dictionary_file, lexicon_htk)
|
|
#pyhtk.create_dictionary_without_log(
|
|
# sentence, global_ded, dictionary_file, lexicon_htk)
|
|
|
|
|
|
## ======================= make label file =======================
|
|
if make_mlf:
|
|
for dataset in dataset_list:
|
|
timer_start = time.time()
|
|
print("==== generating word level transcription on dataset {}\n".format(dataset))
|
|
|
|
#hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp'
|
|
#hcompv_scp2 = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
|
|
script_list = os.path.join(default.fame_dir, 'data', dataset, 'text')
|
|
#mlf_word = output_dir + '\\label\\' + dataset + '_word.mlf'
|
|
#mlf_phone = output_dir + '\\label\\' + dataset + '_phone.mlf'
|
|
wav_dir = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
|
|
dictionary_file = os.path.join(wav_dir, 'temp.dic')
|
|
|
|
# list of scripts
|
|
with open(script_list, "rt", encoding="utf-8") as fin:
|
|
scripts = fin.read().split('\n')
|
|
|
|
for line in scripts:
|
|
#for line in ['sp0035m_train_1975_fragmentenvraaggesprekkenruilverkaveling_15413 en dat kan men nog meer']:
|
|
# sample line:
|
|
# sp0457m_test_1968_plakkenfryslanterhorne_2168 en dan begjinne je natuerlik
|
|
filename_ = line.split(' ')[0]
|
|
filename = '_'.join(filename_.split('_')[1:])
|
|
sentence = ' '.join(line.split(' ')[1:])
|
|
|
|
wav_file = os.path.join(wav_dir, filename + '.wav')
|
|
if len(re.findall(r'[\w]+[âêûô\'ú]+[\w]+', sentence))==0:
|
|
try:
|
|
sentence_ascii = bytes(sentence, 'ascii')
|
|
except UnicodeEncodeError:
|
|
print(sentence)
|
|
#if os.path.exists(wav_file):
|
|
# #dictionary_file = os.path.join(wav_dir, filename + '.dic')
|
|
# if pyhtk.create_dictionary_without_log(
|
|
# sentence, global_ded, dictionary_file, lexicon_htk) == 0:
|
|
# # when the file name is too long, HDMan command does not work.
|
|
# # therefore first temporary dictionary_file is made, then renamed.
|
|
# shutil.move(dictionary_file, os.path.join(wav_dir, filename + '.dic'))
|
|
# label_file = os.path.join(wav_dir, filename + '.lab')
|
|
# pyhtk.create_label_file(sentence, label_file)
|
|
# else:
|
|
# os.remove(dictionary_file)
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
|
# lexicon
|
|
#lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation'])
|
|
|
|
# list of features
|
|
#with open(hcompv_scp) as fin:
|
|
# features = fin.read()
|
|
# features = features.split('\n')
|
|
#i = 0
|
|
#missing_words = []
|
|
#fscp = open(hcompv_scp2, 'wt')
|
|
#fmlf = open(mlf_word, "wt", encoding="utf-8")
|
|
#fmlf.write("#!MLF!#\n")
|
|
#feature_nr = 1
|
|
#for feature in features:
|
|
# sys.stdout.write("\r%d/%d" % (feature_nr, len(features)))
|
|
# sys.stdout.flush()
|
|
# feature_nr += 1
|
|
# file_basename = os.path.basename(feature).replace('.mfc', '')
|
|
|
|
# # get words from scripts.
|
|
# try:
|
|
# script = scripts[scripts.str.contains(file_basename)]
|
|
# except IndexError:
|
|
# script = []
|
|
|
|
# if len(script) != 0:
|
|
# script_id = script.index[0]
|
|
# script_txt = script.get(script_id)
|
|
# script_words = script_txt.split(' ')
|
|
# del script_words[0]
|
|
|
|
# check if all words can be found in the lexicon.
|
|
# SCRIPT_WORDS = []
|
|
# script_prons = []
|
|
# is_in_lexicon = 1
|
|
# for word in script_words:
|
|
# WORD = word.upper()
|
|
# SCRIPT_WORDS.append(WORD)
|
|
# extracted = lexicon_htk[lexicon_htk['word']==WORD]
|
|
# if len(extracted) == 0:
|
|
# missing_words.append(word)
|
|
# script_prons.append(extracted)
|
|
# is_in_lexicon *= len(extracted)
|
|
|
|
# if all pronunciations are found in the lexicon, update scp and mlf files.
|
|
# if is_in_lexicon:
|
|
# add the feature filename into the .scp file.
|
|
# fscp.write("{}\n".format(feature))
|
|
# i += 1
|
|
|
|
# add the words to the mlf file.
|
|
# fmlf.write('\"*/{}.lab\"\n'.format(file_basename))
|
|
#fmlf.write('{}'.format('\n'.join(SCRIPT_WORDS)))
|
|
# for word_ in SCRIPT_WORDS:
|
|
# if word_[0] == '\'':
|
|
# word_ = '\\' + word_
|
|
# fmlf.write('{}\n'.format(word_))
|
|
# fmlf.write('.\n')
|
|
# print("\n{0} has {1} samples.\n".format(dataset, i))
|
|
# np.save(output_dir + '\\missing_words' + '_' + dataset + '.npy', missing_words)
|
|
|
|
# fscp.close()
|
|
# fmlf.close()
|
|
|
|
|
|
## generate phone level transcription
|
|
# print("generating phone level transcription...\n")
|
|
# mkphones = output_dir + '\\label\\mkphones0.txt'
|
|
# subprocessStr = r"HLEd -l * -d " + lex_htk_ + ' -i ' + mlf_phone + ' ' + mkphones + ' ' + mlf_word
|
|
# subprocess.call(subprocessStr, shell=True)
|
|
|
|
|
|
## ======================= combined scps and mlfs =======================
|
|
if combine_files:
|
|
print("==== combine scps and mlfs ====\n")
|
|
|
|
fscp = open(hcompv_scp, 'wt')
|
|
fmlf = open(combined_mlf, 'wt')
|
|
|
|
for dataset in dataset_list:
|
|
fmlf.write("#!MLF!#\n")
|
|
for dataset in dataset_list:
|
|
each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf'
|
|
each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
|
|
|
|
with open(each_mlf, 'r') as fin:
|
|
lines = fin.read()
|
|
lines = lines.split('\n')
|
|
fmlf.write('\n'.join(lines[1:]))
|
|
|
|
with open(each_scp, 'r') as fin:
|
|
lines = fin.read()
|
|
fscp.write(lines)
|
|
|
|
fscp.close()
|
|
fmlf.close()
|
|
|
|
|
|
## ======================= flat start monophones =======================
|
|
if flat_start:
|
|
subprocessStr = 'HCompV -T 1 -C ' + config_train + ' -m -v 0.01 -S ' + hcompv_scp + ' -M ' + model0_dir + ' ' + proto_init
|
|
subprocess.call(subprocessStr, shell=True)
|
|
|
|
# allocate mean & variance to all phones in the phone list
|
|
subprocessStr = 'perl ' + mkhmmdefs_pl + ' ' + model0_dir + '\\proto38' + ' ' + phonelist + ' > ' + model0_dir + '\\' + hmmdefs_name
|
|
subprocess.call(subprocessStr, shell=True)
|
|
|
|
|
|
## ======================= estimate monophones =======================
|
|
if train_model:
|
|
iter_num_max = 3
|
|
for mix_num in [128, 256, 512, 1024]:
|
|
for iter_num in range(1, iter_num_max+1):
|
|
print("===== mix{}, iter{} =====".format(mix_num, iter_num))
|
|
iter_num_pre = iter_num - 1
|
|
modelN_dir = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num)
|
|
if not os.path.exists(modelN_dir):
|
|
os.makedirs(modelN_dir)
|
|
|
|
if iter_num == 1 and mix_num == 1:
|
|
modelN_dir_pre = model0_dir
|
|
else:
|
|
modelN_dir_pre = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num_pre)
|
|
|
|
## re-estimation
|
|
subprocessStr = 'HERest -T 1 -C ' + config_train + ' -v 0.01 -I ' + combined_mlf + ' -H ' + modelN_dir_pre + '\\' + hmmdefs_name + ' -M ' + modelN_dir + ' ' + phonelist + ' -S ' + hcompv_scp
|
|
subprocess.call(subprocessStr, shell=True)
|
|
|
|
mix_num_next = mix_num * 2
|
|
modelN_dir_next = model_dir + '\\hmm' + str(mix_num_next) + '-0'
|
|
if not os.path.exists(modelN_dir_next):
|
|
os.makedirs(modelN_dir_next)
|
|
|
|
header_file = modelN_dir + '\\mix' + str(mix_num_next) + '.hed'
|
|
with open(header_file, 'w') as fout:
|
|
fout.write("MU %d {*.state[2-4].mix}" % (mix_num_next))
|
|
|
|
subprocessStr = 'HHEd -T 1 -H ' + modelN_dir + '\\' + hmmdefs_name + ' -M ' + modelN_dir_next + ' ' + header_file + ' ' + phonelist
|
|
|
|
subprocess.call(subprocessStr, shell=True)
|
|
|