based on the recommendation from linguists, the total number of phones is reduced.
This commit is contained in:
parent
5a587e0422
commit
bbed340228
Binary file not shown.
@ -5,7 +5,21 @@ VisualStudioVersion = 15.0.26730.12
|
|||||||
MinimumVisualStudioVersion = 10.0.40219.1
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "acoustic_model", "acoustic_model\acoustic_model.pyproj", "{4D8C8573-32F0-4A62-9E62-3CE5CC680390}"
|
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "acoustic_model", "acoustic_model\acoustic_model.pyproj", "{4D8C8573-32F0-4A62-9E62-3CE5CC680390}"
|
||||||
EndProject
|
EndProject
|
||||||
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "forced_alignment", "..\forced_alignment\forced_alignment\forced_alignment.pyproj", "{92E4D819-38D0-467A-ABEE-09662EEAA084}"
|
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{3DCEA49A-8FD7-4255-A223-573DCD2595E0}"
|
||||||
|
ProjectSection(SolutionItems) = preProject
|
||||||
|
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
|
||||||
|
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
|
||||||
|
..\forced_alignment\forced_alignment\defaultfiles.py = ..\forced_alignment\forced_alignment\defaultfiles.py
|
||||||
|
..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj
|
||||||
|
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
|
||||||
|
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
|
||||||
|
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
|
||||||
|
..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py
|
||||||
|
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
|
||||||
|
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
|
||||||
|
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
|
||||||
|
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
|
||||||
|
EndProjectSection
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
@ -15,8 +29,6 @@ Global
|
|||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{92E4D819-38D0-467A-ABEE-09662EEAA084}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
|
||||||
{92E4D819-38D0-467A-ABEE-09662EEAA084}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
Binary file not shown.
@ -3,7 +3,9 @@ import sys
|
|||||||
import tempfile
|
import tempfile
|
||||||
import configparser
|
import configparser
|
||||||
import subprocess
|
import subprocess
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
@ -12,13 +14,26 @@ repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model'
|
|||||||
curr_dir = repo_dir + '\\acoustic_model'
|
curr_dir = repo_dir + '\\acoustic_model'
|
||||||
config_ini = curr_dir + '\\config.ini'
|
config_ini = curr_dir + '\\config.ini'
|
||||||
output_dir = 'd:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model'
|
output_dir = 'd:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model'
|
||||||
forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced-alignment'
|
forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment'
|
||||||
|
|
||||||
|
dataset_list = ['devel', 'test', 'train']
|
||||||
|
|
||||||
|
# procedure
|
||||||
|
extract_features = 0
|
||||||
|
make_feature_list = 0
|
||||||
|
conv_lexicon = 0
|
||||||
|
check_lexicon = 0
|
||||||
|
make_mlf = 0
|
||||||
|
combine_files = 0
|
||||||
|
flat_start = 0
|
||||||
|
train_model = 1
|
||||||
|
forced_alignment = 0
|
||||||
|
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir))
|
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir))
|
||||||
sys.path.append(forced_alignment_module)
|
sys.path.append(forced_alignment_module)
|
||||||
from forced_alignment import convert_phone_set
|
from forced_alignment import convert_phone_set
|
||||||
|
|
||||||
|
|
||||||
import acoustic_model_functions as am_func
|
import acoustic_model_functions as am_func
|
||||||
|
|
||||||
|
|
||||||
@ -30,88 +45,294 @@ config.read(config_ini)
|
|||||||
|
|
||||||
config_hcopy = config['Settings']['config_hcopy']
|
config_hcopy = config['Settings']['config_hcopy']
|
||||||
config_train = config['Settings']['config_train']
|
config_train = config['Settings']['config_train']
|
||||||
|
mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl']
|
||||||
FAME_dir = config['Settings']['FAME_dir']
|
FAME_dir = config['Settings']['FAME_dir']
|
||||||
|
|
||||||
lexicon_file = FAME_dir + '\\lexicon\\lex.asr'
|
lex_asr = FAME_dir + '\\lexicon\\lex.asr'
|
||||||
dataset_list = ['devel', 'test', 'train']
|
lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk'
|
||||||
|
lex_oov = FAME_dir + '\\lexicon\\lex.oov'
|
||||||
|
lex_oov_htk = FAME_dir + '\\lexicon\\lex.oov_htk'
|
||||||
|
#lex_ipa = FAME_dir + '\\lexicon\\lex.ipa'
|
||||||
|
#lex_ipa_ = FAME_dir + '\\lexicon\\lex.ipa_'
|
||||||
|
#lex_ipa_htk = FAME_dir + '\\lexicon\\lex.ipa_htk'
|
||||||
|
lex_htk = FAME_dir + '\\lexicon\\lex_original.htk'
|
||||||
|
lex_htk_ = FAME_dir + '\\lexicon\\lex.htk'
|
||||||
|
|
||||||
|
hcompv_scp = output_dir + '\\scp\\combined.scp'
|
||||||
|
combined_mlf = output_dir + '\\label\\combined.mlf'
|
||||||
|
|
||||||
|
model_dir = output_dir + '\\model'
|
||||||
|
model0_dir = model_dir + '\\hmm0'
|
||||||
|
proto_init = model_dir + '\\proto38'
|
||||||
|
proto_name = 'proto'
|
||||||
|
phonelist = output_dir + '\\config\\phonelist_friesian.txt'
|
||||||
|
hmmdefs_name = 'hmmdefs'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## ======================= extract features =======================
|
## ======================= extract features =======================
|
||||||
##dataset = dataset_list[0]
|
if extract_features:
|
||||||
#for dataset in dataset_list:
|
print("==== extract features ====\n")
|
||||||
# print(dataset)
|
|
||||||
|
|
||||||
## make a script file for HCopy
|
for dataset in dataset_list:
|
||||||
#hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
|
print(dataset)
|
||||||
#hcopy_scp.close()
|
|
||||||
|
|
||||||
## using the filelist in FAME! corpus
|
# a script file for HCopy
|
||||||
#feature_dir = output_dir + '\\mfc\\' + dataset
|
hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
|
||||||
#am_func.make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp.name)
|
hcopy_scp.close()
|
||||||
|
|
||||||
## extract features
|
# get a list of features (hcopy.scp) from the filelist in FAME! corpus
|
||||||
#subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name
|
feature_dir = output_dir + '\\mfc\\' + dataset
|
||||||
#subprocess.call(subprocessStr, shell=True)
|
am_func.make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp.name)
|
||||||
|
|
||||||
#os.remove(hcopy_scp.name)
|
# extract features
|
||||||
|
subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name
|
||||||
|
subprocess.call(subprocessStr, shell=True)
|
||||||
|
|
||||||
|
|
||||||
## ======================= make a list of features =======================
|
## ======================= make a list of features =======================
|
||||||
##dataset = dataset_list[2]
|
if make_feature_list:
|
||||||
#for dataset in dataset_list:
|
print("==== make a list of features ====\n")
|
||||||
# print(dataset)
|
|
||||||
|
|
||||||
# feature_dir = output_dir + '\\mfc\\' + dataset
|
for dataset in dataset_list:
|
||||||
# hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp'
|
print(dataset)
|
||||||
|
|
||||||
# am_func.make_filelist(feature_dir, hcompv_scp)
|
feature_dir = output_dir + '\\mfc\\' + dataset
|
||||||
|
hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp'
|
||||||
|
|
||||||
|
am_func.make_filelist(feature_dir, hcompv_scp)
|
||||||
|
|
||||||
|
|
||||||
## ======================= check the phonemes used in the lexicon =======================
|
## ======================= convert lexicon from ipa to fame_htk =======================
|
||||||
phonelist = am_func.get_phonelist(lexicon_file) # 49
|
if conv_lexicon:
|
||||||
phonelist_list = list(phonelist)
|
print('==== convert lexicon from ipa 2 fame ====\n')
|
||||||
|
|
||||||
#lines_g1 = am_func.find_phone(lexicon_file, 'g')
|
# lex.asr is Kaldi compatible version of lex.ipa.
|
||||||
#lines_g2 = am_func.find_phone(lexicon_file, 'ɡ')
|
# to check...
|
||||||
|
#lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation'])
|
||||||
|
#with open(lex_ipa_, "w", encoding="utf-8") as fout:
|
||||||
|
# for word, pronunciation in zip(lexicon_ipa['word'], lexicon_ipa['pronunciation']):
|
||||||
|
# # ignore nasalization and '.'
|
||||||
|
# pronunciation_ = pronunciation.replace(u'ⁿ', '')
|
||||||
|
# pronunciation_ = pronunciation_.replace('.', '')
|
||||||
|
# pronunciation_split = convert_phone_set.split_ipa_fame(pronunciation_)
|
||||||
|
# fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split)))
|
||||||
|
|
||||||
|
# convert each lexicon from ipa description to fame_htk phoneset.
|
||||||
|
am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk)
|
||||||
|
am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk)
|
||||||
|
|
||||||
|
# combine lexicon
|
||||||
|
# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov.
|
||||||
|
# therefore there is no overlap between lex_asr and lex_oov.
|
||||||
|
am_func.combine_lexicon(lex_asr_htk, lex_oov_htk, lex_htk)
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= check if all the phones are successfully converted =======================
|
||||||
|
if check_lexicon:
|
||||||
|
print("==== check if all the phones are successfully converted. ====\n")
|
||||||
|
|
||||||
|
# the phones used in the lexicon.
|
||||||
|
phonelist = am_func.get_phonelist(lex_htk)
|
||||||
|
|
||||||
|
# the lines which include a specific phone.
|
||||||
|
lines = am_func.find_phone(lex_asr, 'g')
|
||||||
|
|
||||||
|
# statistics over the lexicon
|
||||||
|
lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation'])
|
||||||
|
pronunciation = lexicon_htk['pronunciation']
|
||||||
|
phones_all = []
|
||||||
|
for word in pronunciation:
|
||||||
|
phones_all = phones_all + word.split()
|
||||||
|
c = Counter(phones_all)
|
||||||
|
|
||||||
|
|
||||||
|
## =======================
|
||||||
|
## manually make changes to the pronunciation dictionary and save it as lex.htk
|
||||||
|
## =======================
|
||||||
|
# (1) Replace all tabs with single space;
|
||||||
|
# (2) Put a '\' before any dictionary entry beginning with single quote
|
||||||
|
#http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html
|
||||||
|
|
||||||
|
|
||||||
## ======================= make label file =======================
|
## ======================= make label file =======================
|
||||||
dataset = 'train'
|
if make_mlf:
|
||||||
hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp'
|
print("==== make mlf ====\n")
|
||||||
script_list = FAME_dir + '\\data\\' + dataset + '\\text'
|
|
||||||
|
|
||||||
lexicon = pd.read_table(lexicon_file, names=['word', 'pronunciation'])
|
print("generating word level transcription...\n")
|
||||||
|
for dataset in dataset_list:
|
||||||
|
hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp'
|
||||||
|
hcompv_scp2 = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
|
||||||
|
script_list = FAME_dir + '\\data\\' + dataset + '\\text'
|
||||||
|
mlf_word = output_dir + '\\label\\' + dataset + '_word.mlf'
|
||||||
|
mlf_phone = output_dir + '\\label\\' + dataset + '_phone.mlf'
|
||||||
|
|
||||||
with open(hcompv_scp) as fin:
|
# lexicon
|
||||||
features = fin.read()
|
lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation'])
|
||||||
features = features.split('\n')
|
|
||||||
|
|
||||||
with open(script_list, "rt", encoding="utf-8") as fin:
|
# list of features
|
||||||
scripts = fin.read()
|
with open(hcompv_scp) as fin:
|
||||||
scripts = pd.Series(scripts.split('\n'))
|
features = fin.read()
|
||||||
|
features = features.split('\n')
|
||||||
|
|
||||||
|
# list of scripts
|
||||||
|
with open(script_list, "rt", encoding="utf-8") as fin:
|
||||||
|
scripts = fin.read()
|
||||||
|
scripts = pd.Series(scripts.split('\n'))
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
missing_words = []
|
||||||
|
fscp = open(hcompv_scp2, 'wt')
|
||||||
|
fmlf = open(mlf_word, "wt", encoding="utf-8")
|
||||||
|
fmlf.write("#!MLF!#\n")
|
||||||
|
feature_nr = 1
|
||||||
|
for feature in features:
|
||||||
|
sys.stdout.write("\r%d/%d" % (feature_nr, len(features)))
|
||||||
|
sys.stdout.flush()
|
||||||
|
feature_nr += 1
|
||||||
|
file_basename = os.path.basename(feature).replace('.mfc', '')
|
||||||
|
|
||||||
|
# get words from scripts.
|
||||||
|
try:
|
||||||
|
script = scripts[scripts.str.contains(file_basename)]
|
||||||
|
except IndexError:
|
||||||
|
script = []
|
||||||
|
|
||||||
|
if len(script) != 0:
|
||||||
|
script_id = script.index[0]
|
||||||
|
script_txt = script.get(script_id)
|
||||||
|
script_words = script_txt.split(' ')
|
||||||
|
del script_words[0]
|
||||||
|
|
||||||
|
# check if all words can be found in the lexicon.
|
||||||
|
SCRIPT_WORDS = []
|
||||||
|
script_prons = []
|
||||||
|
is_in_lexicon = 1
|
||||||
|
for word in script_words:
|
||||||
|
WORD = word.upper()
|
||||||
|
SCRIPT_WORDS.append(WORD)
|
||||||
|
extracted = lexicon_htk[lexicon_htk['word']==WORD]
|
||||||
|
if len(extracted) == 0:
|
||||||
|
missing_words.append(word)
|
||||||
|
script_prons.append(extracted)
|
||||||
|
is_in_lexicon *= len(extracted)
|
||||||
|
|
||||||
|
# if all pronunciations are found in the lexicon, update scp and mlf files.
|
||||||
|
if is_in_lexicon:
|
||||||
|
# add the feature filename into the .scp file.
|
||||||
|
fscp.write("{}\n".format(feature))
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# add the words to the mlf file.
|
||||||
|
fmlf.write('\"*/{}.lab\"\n'.format(file_basename))
|
||||||
|
#fmlf.write('{}'.format('\n'.join(SCRIPT_WORDS)))
|
||||||
|
for word_ in SCRIPT_WORDS:
|
||||||
|
if word_[0] == '\'':
|
||||||
|
word_ = '\\' + word_
|
||||||
|
fmlf.write('{}\n'.format(word_))
|
||||||
|
fmlf.write('.\n')
|
||||||
|
print("\n{0} has {1} samples.\n".format(dataset, i))
|
||||||
|
np.save(output_dir + '\\missing_words' + '_' + dataset + '.npy', missing_words)
|
||||||
|
|
||||||
|
fscp.close()
|
||||||
|
fmlf.close()
|
||||||
|
|
||||||
|
|
||||||
feature = features[0]
|
## generate phone level transcription
|
||||||
file_basename = os.path.basename(feature).replace('.mfc', '')
|
print("generating phone level transcription...\n")
|
||||||
|
mkphones = output_dir + '\\label\\mkphones0.txt'
|
||||||
|
subprocessStr = r"HLEd -l * -d " + lex_htk_ + ' -i ' + mlf_phone + ' ' + mkphones + ' ' + mlf_word
|
||||||
|
subprocess.call(subprocessStr, shell=True)
|
||||||
|
|
||||||
# get words from scripts.
|
|
||||||
script = scripts[scripts.str.contains(file_basename)]
|
|
||||||
script_id = script.index[0]
|
|
||||||
script_txt = script.get(script_id)
|
|
||||||
script_words = script_txt.split(' ')
|
|
||||||
del script_words[0]
|
|
||||||
|
|
||||||
# make the label file.
|
## ======================= combined scps and mlfs =======================
|
||||||
SCRIPT_WORDS = []
|
if combine_files:
|
||||||
script_prons = []
|
print("==== combine scps and mlfs ====\n")
|
||||||
all_prons_found = 1
|
|
||||||
for word in script_words:
|
|
||||||
SCRIPT_WORDS.append(word.upper())
|
|
||||||
extracted = lexicon[lexicon['word']==word]
|
|
||||||
script_prons.append(extracted)
|
|
||||||
all_prons_found *= len(extracted)
|
|
||||||
# make the dict file.
|
|
||||||
|
|
||||||
convert_phone_set.ipa2fame(phonelist_list)
|
fscp = open(hcompv_scp, 'wt')
|
||||||
phonelist_list
|
fmlf = open(combined_mlf, 'wt')
|
||||||
|
|
||||||
|
for dataset in dataset_list:
|
||||||
|
fmlf.write("#!MLF!#\n")
|
||||||
|
for dataset in dataset_list:
|
||||||
|
each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf'
|
||||||
|
each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
|
||||||
|
|
||||||
|
with open(each_mlf, 'r') as fin:
|
||||||
|
lines = fin.read()
|
||||||
|
lines = lines.split('\n')
|
||||||
|
fmlf.write('\n'.join(lines[1:]))
|
||||||
|
|
||||||
|
with open(each_scp, 'r') as fin:
|
||||||
|
lines = fin.read()
|
||||||
|
fscp.write(lines)
|
||||||
|
|
||||||
|
fscp.close()
|
||||||
|
fmlf.close()
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= flat start monophones =======================
|
||||||
|
if flat_start:
|
||||||
|
subprocessStr = 'HCompV -T 1 -C ' + config_train + ' -m -v 0.01 -S ' + hcompv_scp + ' -M ' + model0_dir + ' ' + proto_init
|
||||||
|
subprocess.call(subprocessStr, shell=True)
|
||||||
|
|
||||||
|
# allocate mean & variance to all phones in the phone list
|
||||||
|
subprocessStr = 'perl ' + mkhmmdefs_pl + ' ' + model0_dir + '\\proto38' + ' ' + phonelist + ' > ' + model0_dir + '\\' + hmmdefs_name
|
||||||
|
subprocess.call(subprocessStr, shell=True)
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= estimate monophones =======================
|
||||||
|
if train_model:
|
||||||
|
iter_num_max = 3
|
||||||
|
for mix_num in [16, 32, 64, 128]:
|
||||||
|
for iter_num in range(1, iter_num_max+1):
|
||||||
|
print("===== mix{}, iter{} =====".format(mix_num, iter_num))
|
||||||
|
iter_num_pre = iter_num - 1
|
||||||
|
modelN_dir = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num)
|
||||||
|
if not os.path.exists(modelN_dir):
|
||||||
|
os.makedirs(modelN_dir)
|
||||||
|
|
||||||
|
if iter_num == 1 and mix_num == 1:
|
||||||
|
modelN_dir_pre = model0_dir
|
||||||
|
else:
|
||||||
|
modelN_dir_pre = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num_pre)
|
||||||
|
|
||||||
|
## re-estimation
|
||||||
|
subprocessStr = 'HERest -T 1 -C ' + config_train + ' -v 0.01 -I ' + combined_mlf + ' -H ' + modelN_dir_pre + '\\' + hmmdefs_name + ' -M ' + modelN_dir + ' ' + phonelist + ' -S ' + hcompv_scp
|
||||||
|
subprocess.call(subprocessStr, shell=True)
|
||||||
|
|
||||||
|
mix_num_next = mix_num * 2
|
||||||
|
modelN_dir_next = model_dir + '\\hmm' + str(mix_num_next) + '-0'
|
||||||
|
if not os.path.exists(modelN_dir_next):
|
||||||
|
os.makedirs(modelN_dir_next)
|
||||||
|
|
||||||
|
header_file = modelN_dir + '\\mix' + str(mix_num_next) + '.hed'
|
||||||
|
with open(header_file, 'w') as fout:
|
||||||
|
fout.write("MU %d {*.state[2-4].mix}" % (mix_num_next))
|
||||||
|
|
||||||
|
subprocessStr = 'HHEd -T 1 -H ' + modelN_dir + '\\' + hmmdefs_name + ' -M ' + modelN_dir_next + ' ' + header_file + ' ' + phonelist
|
||||||
|
subprocess.call(subprocessStr, shell=True)
|
||||||
|
|
||||||
|
|
||||||
|
### ======================= forced alignment =======================
|
||||||
|
#if forced_alignment:
|
||||||
|
# try:
|
||||||
|
# scripts.run_command([
|
||||||
|
# 'HVite','-T', '1', '-a', '-C', configHVite,
|
||||||
|
# '-H', AcousticModel, '-m', '-I',
|
||||||
|
# mlf_file, '-i', fa_file, '-S',
|
||||||
|
# script_file, htk_dict_file, filePhoneList
|
||||||
|
# ])
|
||||||
|
# except:
|
||||||
|
# print("\033[91mHVite command failed with these input files:\033[0m")
|
||||||
|
# print(_debug_show_file('HVite config', configHVite))
|
||||||
|
# print(_debug_show_file('Accoustic model', AcousticModel))
|
||||||
|
# print(_debug_show_file('Master Label file', mlf_file))
|
||||||
|
# print(_debug_show_file('Output', fa_file))
|
||||||
|
# print(_debug_show_file('Script file', script_file))
|
||||||
|
# print(_debug_show_file('HTK dictionary', htk_dict_file))
|
||||||
|
# print(_debug_show_file('Phoneme list', filePhoneList))
|
||||||
|
# raise
|
||||||
|
|
||||||
|
|
||||||
|
##os.remove(hcopy_scp.name)
|
||||||
|
@ -1,9 +1,18 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= user define =======================
|
||||||
repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model'
|
repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model'
|
||||||
curr_dir = repo_dir + '\\acoustic_model'
|
curr_dir = repo_dir + '\\acoustic_model'
|
||||||
|
forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment'
|
||||||
|
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir))
|
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir))
|
||||||
|
sys.path.append(forced_alignment_module)
|
||||||
|
from forced_alignment import convert_phone_set
|
||||||
|
|
||||||
|
|
||||||
def make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp):
|
def make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp):
|
||||||
@ -62,3 +71,32 @@ def find_phone(lexicon_file, phone):
|
|||||||
if phone in pron:
|
if phone in pron:
|
||||||
extracted.append(line)
|
extracted.append(line)
|
||||||
return extracted
|
return extracted
|
||||||
|
|
||||||
|
|
||||||
|
def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
|
||||||
|
""" Convert a lexicon file from IPA to HTK format for FAME! corpus. """
|
||||||
|
|
||||||
|
lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation'])
|
||||||
|
with open(lexicon_file_out, "w", encoding="utf-8") as fout:
|
||||||
|
for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']):
|
||||||
|
pronunciation_no_space = pronunciation.replace(' ', '')
|
||||||
|
pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space)
|
||||||
|
if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk:
|
||||||
|
fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk))
|
||||||
|
|
||||||
|
|
||||||
|
def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out):
|
||||||
|
""" Combine two lexicon files and sort by words. """
|
||||||
|
|
||||||
|
with open(lexicon_file1, "rt", encoding="utf-8") as fin:
|
||||||
|
lines1 = fin.read()
|
||||||
|
lines1 = lines1.split('\n')
|
||||||
|
with open(lexicon_file2, "rt", encoding="utf-8") as fin:
|
||||||
|
lines2 = fin.read()
|
||||||
|
lines2 = lines2.split('\n')
|
||||||
|
|
||||||
|
lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation'])
|
||||||
|
lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation'])
|
||||||
|
lex = pd.concat([lex1, lex2])
|
||||||
|
lex = lex.sort_values(by='word', ascending=True)
|
||||||
|
lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t')
|
@ -1,4 +1,5 @@
|
|||||||
[Settings]
|
[Settings]
|
||||||
config_hcopy = c:\cygwin64\home\Aki\acoustic_model\config\config.HCopy
|
config_hcopy = c:\cygwin64\home\Aki\acoustic_model\config\config.HCopy
|
||||||
config_train = c:\cygwin64\home\Aki\acoustic_model\config\config.train
|
config_train = c:\cygwin64\home\Aki\acoustic_model\config\config.train
|
||||||
|
mkhmmdefs_pl = c:\cygwin64\home\Aki\acoustic_model\src\acoustic_model\mkhmmdefs.pl
|
||||||
FAME_dir = d:\OneDrive\Research\rug\experiments\friesian\corpus
|
FAME_dir = d:\OneDrive\Research\rug\experiments\friesian\corpus
|
22
acoustic_model/performance_check.py
Normal file
22
acoustic_model/performance_check.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
### ======================= forced alignment =======================
|
||||||
|
#if forced_alignment:
|
||||||
|
# try:
|
||||||
|
# scripts.run_command([
|
||||||
|
# 'HVite','-T', '1', '-a', '-C', configHVite,
|
||||||
|
# '-H', AcousticModel, '-m', '-I',
|
||||||
|
# mlf_file, '-i', fa_file, '-S',
|
||||||
|
# script_file, htk_dict_file, filePhoneList
|
||||||
|
# ])
|
||||||
|
# except:
|
||||||
|
# print("\033[91mHVite command failed with these input files:\033[0m")
|
||||||
|
# print(_debug_show_file('HVite config', configHVite))
|
||||||
|
# print(_debug_show_file('Accoustic model', AcousticModel))
|
||||||
|
# print(_debug_show_file('Master Label file', mlf_file))
|
||||||
|
# print(_debug_show_file('Output', fa_file))
|
||||||
|
# print(_debug_show_file('Script file', script_file))
|
||||||
|
# print(_debug_show_file('HTK dictionary', htk_dict_file))
|
||||||
|
# print(_debug_show_file('Phoneme list', filePhoneList))
|
||||||
|
# raise
|
||||||
|
|
||||||
|
|
||||||
|
##os.remove(hcopy_scp.name)
|
Loading…
Reference in New Issue
Block a user