import os import sys repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model' curr_dir = repo_dir + '\\acoustic_model' sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir)) def make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp): """ Make a script file for HCopy using the filelist in FAME! corpus. """ filelist_txt = FAME_dir + '\\fame\\filelists\\' + dataset + 'list.txt' with open(filelist_txt) as fin: filelist = fin.read() filelist = filelist.split('\n') with open(hcopy_scp, 'w') as fout: for filename_ in filelist: filename = filename_.replace('.TextGrid', '') if len(filename) > 3: # remove '.', '..' and '' wav_file = FAME_dir + '\\fame\\wav\\' + dataset + '\\' + filename + '.wav' mfc_file = feature_dir + '\\' + filename + '.mfc' fout.write(wav_file + '\t' + mfc_file + '\n') def make_filelist(input_dir, output_txt): """ Make a list of files in the input_dir. """ filenames = os.listdir(input_dir) with open(output_txt, 'w') as fout: for filename in filenames: fout.write(input_dir + '\\' + filename + '\n') def get_phonelist(lexicon_file): """ Make a list of phones which appears in the lexicon. """ with open(lexicon_file, "rt", encoding="utf-8") as fin: lines = fin.read() lines = lines.split('\n') phonelist = set([]) for line in lines: line = line.split('\t') if len(line) > 1: pronunciation = set(line[1].split()) phonelist = phonelist | pronunciation return phonelist def find_phone(lexicon_file, phone): """ Search where the phone is used in the lexicon. """ with open(lexicon_file, "rt", encoding="utf-8") as fin: lines = fin.read() lines = lines.split('\n') extracted = [] for line in lines: line = line.split('\t') if len(line) > 1: pron = line[1] if phone in pron: extracted.append(line) return extracted