import os import sys from collections import Counter import numpy as np import pandas as pd import defaultfiles as default sys.path.append(default.forced_alignment_module_dir) from forced_alignment import convert_phone_set def make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp): """ Make a script file for HCopy using the filelist in FAME! corpus. """ filelist_txt = FAME_dir + '\\fame\\filelists\\' + dataset + 'list.txt' with open(filelist_txt) as fin: filelist = fin.read() filelist = filelist.split('\n') with open(hcopy_scp, 'w') as fout: for filename_ in filelist: filename = filename_.replace('.TextGrid', '') if len(filename) > 3: # remove '.', '..' and '' wav_file = FAME_dir + '\\fame\\wav\\' + dataset + '\\' + filename + '.wav' mfc_file = feature_dir + '\\' + filename + '.mfc' fout.write(wav_file + '\t' + mfc_file + '\n') def make_filelist(input_dir, output_txt): """ Make a list of files in the input_dir. """ filenames = os.listdir(input_dir) with open(output_txt, 'w') as fout: for filename in filenames: fout.write(input_dir + '\\' + filename + '\n') def make_htk_dict(word, pronvar_, fileDic, output_type): """ make dict files which can be used for HTK. param word: target word. param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray. param fileDic: output dic file. param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3. """ #assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.') WORD = word.upper() if output_type == 0: # full pronvar = np.unique(pronvar_) with open(fileDic, 'w') as f: for pvar in pronvar: f.write('{0}\t{1}\n'.format(WORD, pvar)) else: c = Counter(pronvar_) total_num = sum(c.values()) with open(fileDic, 'w') as f: if output_type == 3: for key, value in c.most_common(3): f.write('{0}\t{1}\n'.format(WORD, key)) else: for key, value in c.items(): percentage = value/total_num*100 if output_type == 1: # all f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key)) elif output_type == 2: # less than 2 percent if percentage < 2: f.write('{0}\t{1}\n'.format(WORD, key)) def get_phonelist(lexicon_file): """ Make a list of phones which appears in the lexicon. """ with open(lexicon_file, "rt", encoding="utf-8") as fin: lines = fin.read() lines = lines.split('\n') phonelist = set([]) for line in lines: line = line.split('\t') if len(line) > 1: pronunciation = set(line[1].split()) phonelist = phonelist | pronunciation return phonelist def find_phone(lexicon_file, phone): """ Search where the phone is used in the lexicon. """ with open(lexicon_file, "rt", encoding="utf-8") as fin: lines = fin.read() lines = lines.split('\n') extracted = [] for line in lines: line = line.split('\t') if len(line) > 1: pronunciation = line[1] if phone in pronunciation: extracted.append(line) return extracted def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out): """ Convert a lexicon file from IPA to HTK format for FAME! corpus. """ lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation']) with open(lexicon_file_out, "w", encoding="utf-8") as fout: for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']): pronunciation_no_space = pronunciation.replace(' ', '') pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space) if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk: fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk)) def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out): """ Combine two lexicon files and sort by words. """ with open(lexicon_file1, "rt", encoding="utf-8") as fin: lines1 = fin.read() lines1 = lines1.split('\n') with open(lexicon_file2, "rt", encoding="utf-8") as fin: lines2 = fin.read() lines2 = lines2.split('\n') lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation']) lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation']) lex = pd.concat([lex1, lex2]) lex = lex.sort_values(by='word', ascending=True) lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t') def read_fileFA(fileFA): """ read the result file of HTK forced alignment. this function only works when input is one word. """ with open(fileFA, 'r') as f: lines = f.read() lines = lines.split('\n') phones = [] for line in lines: line_split = line.split() if len(line_split) > 1: phones.append(line_split[2]) return ' '.join(phones) def fame_pronunciation_variant(ipa): ipa = ipa.replace('æ', 'ɛ') ipa = ipa.replace('ɐ', 'a') ipa = ipa.replace('ɑ', 'a') ipa = ipa.replace('ɾ', 'r') ipa = ipa.replace('ɹ', 'r') # ??? ipa = ipa.replace('ʁ', 'r') ipa = ipa.replace('ʀ', 'r') # ??? ipa = ipa.replace('ʊ', 'u') ipa = ipa.replace('χ', 'x') pronvar_list = [ipa] while 'ø:' in ' '.join(pronvar_list) or 'œ' in ' '.join(pronvar_list) or 'ɒ' in ' '.join(pronvar_list): pronvar_list_ = [] for p in pronvar_list: if 'ø:' in p: pronvar_list_.append(p.replace('ø:', 'ö')) pronvar_list_.append(p.replace('ø:', 'ö:')) if 'œ' in p: pronvar_list_.append(p.replace('œ', 'ɔ̈')) pronvar_list_.append(p.replace('œ', 'ɔ̈:')) if 'ɒ' in p: pronvar_list_.append(p.replace('ɒ', 'ɔ̈')) pronvar_list_.append(p.replace('ɒ', 'ɔ̈:')) pronvar_list = np.unique(pronvar_list_) return pronvar_list def make_fame2ipa_variants(fame): fame = 'rɛös' ipa = [fame] ipa.append(fame.replace('ɛ', 'æ')) ipa.append(fame.replace('a', 'ɐ')) ipa.append(fame.replace('a', 'ɑ')) ipa.append(fame.replace('r', 'ɾ')) ipa.append(fame.replace('r', 'ɹ')) ipa.append(fame.replace('r', 'ʁ')) ipa.append(fame.replace('r', 'ʀ')) ipa.append(fame.replace('u', 'ʊ')) ipa.append(fame.replace('x', 'χ')) ipa.append(fame.replace('ö', 'ø:')) ipa.append(fame.replace('ö:', 'ø:')) ipa.append(fame.replace('ɔ̈', 'œ')) ipa.append(fame.replace('ɔ̈:', 'œ')) ipa.append(fame.replace('ɔ̈', 'ɒ')) ipa.append(fame.replace('ɔ̈:', 'ɒ')) return ipa