10 changed files with 364 additions and 285 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,105 +1,127 @@
@@ -1,105 +1,127 @@
|
||||
import os |
||||
import sys |
||||
import os |
||||
os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model') |
||||
|
||||
import tempfile |
||||
import configparser |
||||
import subprocess |
||||
from collections import Counter |
||||
#import configparser |
||||
#import subprocess |
||||
#from collections import Counter |
||||
|
||||
#import numpy as np |
||||
#import pandas as pd |
||||
|
||||
import numpy as np |
||||
import pandas as pd |
||||
import fame_functions |
||||
import defaultfiles as default |
||||
sys.path.append(default.pyhtk_dir) |
||||
import pyhtk |
||||
sys.path.append(default.toolbox_dir) |
||||
import file_handling |
||||
|
||||
|
||||
## ======================= user define ======================= |
||||
repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model' |
||||
curr_dir = repo_dir + '\\acoustic_model' |
||||
config_ini = curr_dir + '\\config.ini' |
||||
output_dir = 'C:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model' |
||||
forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment' |
||||
#repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model' |
||||
#curr_dir = repo_dir + '\\acoustic_model' |
||||
#config_ini = curr_dir + '\\config.ini' |
||||
#output_dir = 'C:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model' |
||||
#forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment' |
||||
|
||||
dataset_list = ['devel', 'test', 'train'] |
||||
|
||||
# procedure |
||||
extract_features = 0 |
||||
make_feature_list = 0 |
||||
conv_lexicon = 0 |
||||
check_lexicon = 0 |
||||
make_mlf = 0 |
||||
combine_files = 0 |
||||
flat_start = 0 |
||||
train_model = 1 |
||||
extract_features = 1 |
||||
#conv_lexicon = 0 |
||||
#check_lexicon = 0 |
||||
#make_mlf = 0 |
||||
#combine_files = 0 |
||||
#flat_start = 0 |
||||
#train_model = 1 |
||||
|
||||
|
||||
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir)) |
||||
sys.path.append(forced_alignment_module) |
||||
from forced_alignment import convert_phone_set |
||||
#sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir)) |
||||
#sys.path.append(forced_alignment_module) |
||||
#from forced_alignment import convert_phone_set |
||||
|
||||
import acoustic_model_functions as am_func |
||||
|
||||
|
||||
## ======================= load variables ======================= |
||||
|
||||
config = configparser.ConfigParser() |
||||
config.sections() |
||||
config.read(config_ini) |
||||
|
||||
config_hcopy = config['Settings']['config_hcopy'] |
||||
config_train = config['Settings']['config_train'] |
||||
mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl'] |
||||
FAME_dir = config['Settings']['FAME_dir'] |
||||
|
||||
lex_asr = FAME_dir + '\\lexicon\\lex.asr' |
||||
lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk' |
||||
lex_oov = FAME_dir + '\\lexicon\\lex.oov' |
||||
lex_oov_htk = FAME_dir + '\\lexicon\\lex.oov_htk' |
||||
#lex_ipa = FAME_dir + '\\lexicon\\lex.ipa' |
||||
#lex_ipa_ = FAME_dir + '\\lexicon\\lex.ipa_' |
||||
#lex_ipa_htk = FAME_dir + '\\lexicon\\lex.ipa_htk' |
||||
lex_htk = FAME_dir + '\\lexicon\\lex_original.htk' |
||||
lex_htk_ = FAME_dir + '\\lexicon\\lex.htk' |
||||
|
||||
hcompv_scp = output_dir + '\\scp\\combined.scp' |
||||
combined_mlf = output_dir + '\\label\\combined.mlf' |
||||
|
||||
model_dir = output_dir + '\\model' |
||||
model0_dir = model_dir + '\\hmm0' |
||||
proto_init = model_dir + '\\proto38' |
||||
proto_name = 'proto' |
||||
phonelist = output_dir + '\\config\\phonelist_friesian.txt' |
||||
hmmdefs_name = 'hmmdefs' |
||||
|
||||
#config = configparser.ConfigParser() |
||||
#config.sections() |
||||
#config.read(config_ini) |
||||
|
||||
#config_hcopy = config['Settings']['config_hcopy'] |
||||
#config_train = config['Settings']['config_train'] |
||||
#mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl'] |
||||
#FAME_dir = config['Settings']['FAME_dir'] |
||||
|
||||
#lex_asr = FAME_dir + '\\lexicon\\lex.asr' |
||||
#lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk' |
||||
#lex_oov = FAME_dir + '\\lexicon\\lex.oov' |
||||
#lex_oov_htk = FAME_dir + '\\lexicon\\lex.oov_htk' |
||||
##lex_ipa = FAME_dir + '\\lexicon\\lex.ipa' |
||||
##lex_ipa_ = FAME_dir + '\\lexicon\\lex.ipa_' |
||||
##lex_ipa_htk = FAME_dir + '\\lexicon\\lex.ipa_htk' |
||||
#lex_htk = FAME_dir + '\\lexicon\\lex_original.htk' |
||||
#lex_htk_ = FAME_dir + '\\lexicon\\lex.htk' |
||||
|
||||
#hcompv_scp = output_dir + '\\scp\\combined.scp' |
||||
#combined_mlf = output_dir + '\\label\\combined.mlf' |
||||
|
||||
#model_dir = output_dir + '\\model' |
||||
#model0_dir = model_dir + '\\hmm0' |
||||
#proto_init = model_dir + '\\proto38' |
||||
#proto_name = 'proto' |
||||
#phonelist = output_dir + '\\config\\phonelist_friesian.txt' |
||||
#hmmdefs_name = 'hmmdefs' |
||||
|
||||
feature_dir = os.path.join(default.htk_dir, 'mfc') |
||||
if not os.path.exists(feature_dir): |
||||
os.makedirs(feature_dir) |
||||
tmp_dir = os.path.join(default.htk_dir, 'tmp') |
||||
if not os.path.exists(tmp_dir): |
||||
os.makedirs(tmp_dir) |
||||
|
||||
|
||||
## ======================= extract features ======================= |
||||
if extract_features: |
||||
print("==== extract features ====\n") |
||||
|
||||
for dataset in dataset_list: |
||||
print(dataset) |
||||
|
||||
#for dataset in ['test']: |
||||
print('==== {} ===='.format(dataset)) |
||||
|
||||
# a script file for HCopy |
||||
print(">>> making a script file for HCopy... \n") |
||||
hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False) |
||||
hcopy_scp.close() |
||||
#hcopy_scp = os.path.join(default.htk_dir, 'tmp', 'HCopy.scp') |
||||
|
||||
# get a list of features (hcopy.scp) from the filelist in FAME! corpus |
||||
feature_dir = output_dir + '\\mfc\\' + dataset |
||||
am_func.make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp.name) |
||||
feature_dir_ = os.path.join(feature_dir, dataset) |
||||
if not os.path.exists(feature_dir_): |
||||
os.makedirs(feature_dir_) |
||||
|
||||
# extract features |
||||
subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name |
||||
subprocess.call(subprocessStr, shell=True) |
||||
print(">>> extracting features... \n") |
||||
fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name) |
||||
|
||||
#subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name |
||||
#subprocess.call(subprocessStr, shell=True) |
||||
pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name) |
||||
|
||||
# a script file for HCompV |
||||
print(">>> making a script file for HCompV... \n") |
||||
|
||||
## ======================= make a list of features ======================= |
||||
if make_feature_list: |
||||
print("==== make a list of features ====\n") |
||||
#if make_feature_list: |
||||
# print("==== make a list of features ====\n") |
||||
|
||||
for dataset in dataset_list: |
||||
print(dataset) |
||||
# for dataset in dataset_list: |
||||
# print(dataset) |
||||
|
||||
feature_dir = output_dir + '\\mfc\\' + dataset |
||||
hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp' |
||||
#feature_dir = output_dir + '\\mfc\\' + dataset |
||||
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') |
||||
|
||||
am_func.make_filelist(feature_dir, hcompv_scp) |
||||
#am_func.make_filelist(feature_dir, hcompv_scp) |
||||
file_handling.make_filelist(feature_dir_, hcompv_scp, '.mfc') |
||||
|
||||
|
||||
## ======================= convert lexicon from ipa to fame_htk ======================= |
Loading…
Reference in new issue