import os os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model') import sys from collections import Counter import pickle import numpy as np import pandas as pd import defaultfiles as default #sys.path.append(default.forced_alignment_module_dir) #from forced_alignment import convert_phone_set #def find_phone(lexicon_file, phone): # """ Search where the phone is used in the lexicon. """ # with open(lexicon_file, "rt", encoding="utf-8") as fin: # lines = fin.read() # lines = lines.split('\n') # extracted = [] # for line in lines: # line = line.split('\t') # if len(line) > 1: # pronunciation = line[1] # if phone in pronunciation: # extracted.append(line) # return extracted #def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out): # """ Convert a lexicon file from IPA to HTK format for FAME! corpus. """ # lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation']) # with open(lexicon_file_out, "w", encoding="utf-8") as fout: # for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']): # pronunciation_no_space = pronunciation.replace(' ', '') # pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space) # if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk: # fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk)) #def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out): # """ Combine two lexicon files and sort by words. """ # with open(lexicon_file1, "rt", encoding="utf-8") as fin: # lines1 = fin.read() # lines1 = lines1.split('\n') # with open(lexicon_file2, "rt", encoding="utf-8") as fin: # lines2 = fin.read() # lines2 = lines2.split('\n') # lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation']) # lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation']) # lex = pd.concat([lex1, lex2]) # lex = lex.sort_values(by='word', ascending=True) # lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t') #def read_fileFA(fileFA): # """ # read the result file of HTK forced alignment. # this function only works when input is one word. # """ # with open(fileFA, 'r') as f: # lines = f.read() # lines = lines.split('\n') # phones = [] # for line in lines: # line_split = line.split() # if len(line_split) > 1: # phones.append(line_split[2]) # return ' '.join(phones) #def fame_pronunciation_variant(ipa): # ipa = ipa.replace('æ', 'ɛ') # ipa = ipa.replace('ɐ', 'a') # ipa = ipa.replace('ɑ', 'a') # ipa = ipa.replace('ɾ', 'r') # ipa = ipa.replace('ɹ', 'r') # ??? # ipa = ipa.replace('ʁ', 'r') # ipa = ipa.replace('ʀ', 'r') # ??? # ipa = ipa.replace('ʊ', 'u') # ipa = ipa.replace('χ', 'x') # pronvar_list = [ipa] # while 'ø:' in ' '.join(pronvar_list) or 'œ' in ' '.join(pronvar_list) or 'ɒ' in ' '.join(pronvar_list): # pronvar_list_ = [] # for p in pronvar_list: # if 'ø:' in p: # pronvar_list_.append(p.replace('ø:', 'ö')) # pronvar_list_.append(p.replace('ø:', 'ö:')) # if 'œ' in p: # pronvar_list_.append(p.replace('œ', 'ɔ̈')) # pronvar_list_.append(p.replace('œ', 'ɔ̈:')) # if 'ɒ' in p: # pronvar_list_.append(p.replace('ɒ', 'ɔ̈')) # pronvar_list_.append(p.replace('ɒ', 'ɔ̈:')) # pronvar_list = np.unique(pronvar_list_) # return pronvar_list #def make_fame2ipa_variants(fame): # fame = 'rɛös' # ipa = [fame] # ipa.append(fame.replace('ɛ', 'æ')) # ipa.append(fame.replace('a', 'ɐ')) # ipa.append(fame.replace('a', 'ɑ')) # ipa.append(fame.replace('r', 'ɾ')) # ipa.append(fame.replace('r', 'ɹ')) # ipa.append(fame.replace('r', 'ʁ')) # ipa.append(fame.replace('r', 'ʀ')) # ipa.append(fame.replace('u', 'ʊ')) # ipa.append(fame.replace('x', 'χ')) # ipa.append(fame.replace('ö', 'ø:')) # ipa.append(fame.replace('ö:', 'ø:')) # ipa.append(fame.replace('ɔ̈', 'œ')) # ipa.append(fame.replace('ɔ̈:', 'œ')) # ipa.append(fame.replace('ɔ̈', 'ɒ')) # ipa.append(fame.replace('ɔ̈:', 'ɒ')) # return ipa def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp): """ Make a script file for HCopy using the filelist in FAME! corpus. """ filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt') with open(filelist_txt) as fin: filelist = fin.read() filelist = filelist.split('\n') with open(hcopy_scp, 'w') as fout: for filename_ in filelist: filename = filename_.replace('.TextGrid', '') if len(filename) > 3: # remove '.', '..' and '' wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav') mfc_file = os.path.join(feature_dir, filename + '.mfc') fout.write(wav_file + '\t' + mfc_file + '\n') #def make_filelist(input_dir, output_txt): # """ Make a list of files in the input_dir. """ # filenames = os.listdir(input_dir) # with open(output_txt, 'w') as fout: # for filename in filenames: # fout.write(input_dir + '\\' + filename + '\n') #def make_htk_dict(word, pronvar_, fileDic, output_type): # """ # make dict files which can be used for HTK. # param word: target word. # param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray. # param fileDic: output dic file. # param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3. # """ # #assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.') # WORD = word.upper() # if output_type == 0: # full # pronvar = np.unique(pronvar_) # with open(fileDic, 'w') as f: # for pvar in pronvar: # f.write('{0}\t{1}\n'.format(WORD, pvar)) # else: # c = Counter(pronvar_) # total_num = sum(c.values()) # with open(fileDic, 'w') as f: # if output_type == 3: # for key, value in c.most_common(3): # f.write('{0}\t{1}\n'.format(WORD, key)) # else: # for key, value in c.items(): # percentage = value/total_num*100 # if output_type == 1: # all # f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key)) # elif output_type == 2: # less than 2 percent # if percentage < 2: # f.write('{0}\t{1}\n'.format(WORD, key)) def load_lexicon(lexicon_file): lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8") lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True) return lex def get_phonelist(lexicon_asr): """ Make a list of phones which appears in the lexicon. """ #with open(lexicon_file, "rt", encoding="utf-8") as fin: # lines = fin.read() # lines = lines.split('\n') # phonelist = set([]) # for line in lines: # line = line.split('\t') # if len(line) > 1: # pronunciation = set(line[1].split()) # phonelist = phonelist | pronunciation lex = load_lexicon(lexicon_asr) return set(' '.join(lex['pronunciation']).split(' ')) import time timer_start = time.time() #def get_translation_key(): dir_tmp = r'c:\Users\A.Kunikoshi\source\repos\acoustic_model\_tmp' lexicon_ipa = r'f:\_corpus\FAME\lexicon\lex.ipa' lexicon_asr = r'f:\_corpus\FAME\lexicon\lex.asr' lex_ipa = load_lexicon(lexicon_ipa) lex_asr = load_lexicon(lexicon_asr) if 0: phone_to_be_searched = get_phonelist(lexicon_asr) translation_key = dict() for word in lex_asr['word']: if np.sum(lex_asr['word'] == word) == 1 and np.sum(lex_ipa['word'] == word) == 1: asr = lex_asr[lex_asr['word'] == word].iat[0, 1] ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] asr_list = asr.split(' ') # if there are phones which is not in phone_to_be_searched if len([True for i in asr_list if i in phone_to_be_searched]) > 0: if(len(ipa) == len(asr_list)): print("{0}: {1} --> {2}".format(word, ipa, asr)) for ipa_, asr_ in zip(ipa, asr_list): if asr_ in phone_to_be_searched: #if not translation_key[ipa_] == asr_: translation_key[ipa_] = asr_ phone_to_be_searched.remove(asr_) print("elapsed time: {}".format(time.time() - timer_start)) np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key) np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched) else: translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item() phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item()