64 lines
1.8 KiB
Python
64 lines
1.8 KiB
Python
import os
|
|
import sys
|
|
|
|
repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model'
|
|
curr_dir = repo_dir + '\\acoustic_model'
|
|
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir))
|
|
|
|
|
|
def make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp):
|
|
""" Make a script file for HCopy using the filelist in FAME! corpus. """
|
|
filelist_txt = FAME_dir + '\\fame\\filelists\\' + dataset + 'list.txt'
|
|
with open(filelist_txt) as fin:
|
|
filelist = fin.read()
|
|
filelist = filelist.split('\n')
|
|
|
|
with open(hcopy_scp, 'w') as fout:
|
|
for filename_ in filelist:
|
|
filename = filename_.replace('.TextGrid', '')
|
|
|
|
if len(filename) > 3: # remove '.', '..' and ''
|
|
wav_file = FAME_dir + '\\fame\\wav\\' + dataset + '\\' + filename + '.wav'
|
|
mfc_file = feature_dir + '\\' + filename + '.mfc'
|
|
|
|
fout.write(wav_file + '\t' + mfc_file + '\n')
|
|
|
|
|
|
def make_filelist(input_dir, output_txt):
|
|
""" Make a list of files in the input_dir. """
|
|
filenames = os.listdir(input_dir)
|
|
|
|
with open(output_txt, 'w') as fout:
|
|
for filename in filenames:
|
|
fout.write(input_dir + '\\' + filename + '\n')
|
|
|
|
|
|
def get_phonelist(lexicon_file):
|
|
""" Make a list of phones which appears in the lexicon. """
|
|
|
|
with open(lexicon_file, "rt", encoding="utf-8") as fin:
|
|
lines = fin.read()
|
|
lines = lines.split('\n')
|
|
phonelist = set([])
|
|
for line in lines:
|
|
line = line.split('\t')
|
|
if len(line) > 1:
|
|
pronunciation = set(line[1].split())
|
|
phonelist = phonelist | pronunciation
|
|
return phonelist
|
|
|
|
|
|
def find_phone(lexicon_file, phone):
|
|
""" Search where the phone is used in the lexicon. """
|
|
with open(lexicon_file, "rt", encoding="utf-8") as fin:
|
|
lines = fin.read()
|
|
lines = lines.split('\n')
|
|
|
|
extracted = []
|
|
for line in lines:
|
|
line = line.split('\t')
|
|
if len(line) > 1:
|
|
pron = line[1]
|
|
if phone in pron:
|
|
extracted.append(line)
|
|
return extracted |