acoustic_model/acoustic_model/fame_functions.py

288 lines
9.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import sys
from collections import Counter
import pickle
import numpy as np
import pandas as pd
import defaultfiles as default
import fame_phoneset
import convert_phone_set
#sys.path.append(default.forced_alignment_module_dir)
#from forced_alignment import convert_phone_set
#def find_phone(lexicon_file, phone):
# """ Search where the phone is used in the lexicon. """
# with open(lexicon_file, "rt", encoding="utf-8") as fin:
# lines = fin.read()
# lines = lines.split('\n')
# extracted = []
# for line in lines:
# line = line.split('\t')
# if len(line) > 1:
# pronunciation = line[1]
# if phone in pronunciation:
# extracted.append(line)
# return extracted
#def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
# """ Convert a lexicon file from IPA to HTK format for FAME! corpus. """
# lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation'])
# with open(lexicon_file_out, "w", encoding="utf-8") as fout:
# for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']):
# pronunciation_no_space = pronunciation.replace(' ', '')
# pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space)
# if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk:
# fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk))
#def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out):
# """ Combine two lexicon files and sort by words. """
# with open(lexicon_file1, "rt", encoding="utf-8") as fin:
# lines1 = fin.read()
# lines1 = lines1.split('\n')
# with open(lexicon_file2, "rt", encoding="utf-8") as fin:
# lines2 = fin.read()
# lines2 = lines2.split('\n')
# lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation'])
# lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation'])
# lex = pd.concat([lex1, lex2])
# lex = lex.sort_values(by='word', ascending=True)
# lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t')
#def read_fileFA(fileFA):
# """
# read the result file of HTK forced alignment.
# this function only works when input is one word.
# """
# with open(fileFA, 'r') as f:
# lines = f.read()
# lines = lines.split('\n')
# phones = []
# for line in lines:
# line_split = line.split()
# if len(line_split) > 1:
# phones.append(line_split[2])
# return ' '.join(phones)
#def fame_pronunciation_variant(ipa):
# ipa = ipa.replace('æ', 'ɛ')
# ipa = ipa.replace('ɐ', 'a')
# ipa = ipa.replace('ɑ', 'a')
# ipa = ipa.replace('ɾ', 'r')
# ipa = ipa.replace('ɹ', 'r') # ???
# ipa = ipa.replace('ʁ', 'r')
# ipa = ipa.replace('ʀ', 'r') # ???
# ipa = ipa.replace('ʊ', 'u')
# ipa = ipa.replace('χ', 'x')
# pronvar_list = [ipa]
# while 'ø:' in ' '.join(pronvar_list) or 'œ' in ' '.join(pronvar_list) or 'ɒ' in ' '.join(pronvar_list):
# pronvar_list_ = []
# for p in pronvar_list:
# if 'ø:' in p:
# pronvar_list_.append(p.replace('ø:', 'ö'))
# pronvar_list_.append(p.replace('ø:', 'ö:'))
# if 'œ' in p:
# pronvar_list_.append(p.replace('œ', 'ɔ̈'))
# pronvar_list_.append(p.replace('œ', 'ɔ̈:'))
# if 'ɒ' in p:
# pronvar_list_.append(p.replace('ɒ', 'ɔ̈'))
# pronvar_list_.append(p.replace('ɒ', 'ɔ̈:'))
# pronvar_list = np.unique(pronvar_list_)
# return pronvar_list
#def make_fame2ipa_variants(fame):
# fame = 'rɛös'
# ipa = [fame]
# ipa.append(fame.replace('ɛ', 'æ'))
# ipa.append(fame.replace('a', 'ɐ'))
# ipa.append(fame.replace('a', 'ɑ'))
# ipa.append(fame.replace('r', 'ɾ'))
# ipa.append(fame.replace('r', 'ɹ'))
# ipa.append(fame.replace('r', 'ʁ'))
# ipa.append(fame.replace('r', 'ʀ'))
# ipa.append(fame.replace('u', 'ʊ'))
# ipa.append(fame.replace('x', 'χ'))
# ipa.append(fame.replace('ö', 'ø:'))
# ipa.append(fame.replace('ö:', 'ø:'))
# ipa.append(fame.replace('ɔ̈', 'œ'))
# ipa.append(fame.replace('ɔ̈:', 'œ'))
# ipa.append(fame.replace('ɔ̈', 'ɒ'))
# ipa.append(fame.replace('ɔ̈:', 'ɒ'))
# return ipa
def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp):
""" Make a script file for HCopy using the filelist in FAME! corpus. """
filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt')
with open(filelist_txt) as fin:
filelist = fin.read()
filelist = filelist.split('\n')
with open(hcopy_scp, 'w') as fout:
for filename_ in filelist:
filename = filename_.replace('.TextGrid', '')
if len(filename) > 3: # remove '.', '..' and ''
wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav')
mfc_file = os.path.join(feature_dir, filename + '.mfc')
fout.write(wav_file + '\t' + mfc_file + '\n')
#def make_filelist(input_dir, output_txt):
# """ Make a list of files in the input_dir. """
# filenames = os.listdir(input_dir)
# with open(output_txt, 'w') as fout:
# for filename in filenames:
# fout.write(input_dir + '\\' + filename + '\n')
#def make_htk_dict(word, pronvar_, fileDic, output_type):
# """
# make dict files which can be used for HTK.
# param word: target word.
# param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray.
# param fileDic: output dic file.
# param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3.
# """
# #assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.')
# WORD = word.upper()
# if output_type == 0: # full
# pronvar = np.unique(pronvar_)
# with open(fileDic, 'w') as f:
# for pvar in pronvar:
# f.write('{0}\t{1}\n'.format(WORD, pvar))
# else:
# c = Counter(pronvar_)
# total_num = sum(c.values())
# with open(fileDic, 'w') as f:
# if output_type == 3:
# for key, value in c.most_common(3):
# f.write('{0}\t{1}\n'.format(WORD, key))
# else:
# for key, value in c.items():
# percentage = value/total_num*100
# if output_type == 1: # all
# f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key))
# elif output_type == 2: # less than 2 percent
# if percentage < 2:
# f.write('{0}\t{1}\n'.format(WORD, key))
def load_lexicon(lexicon_file):
lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8")
lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True)
return lex
def get_phonelist(lexicon_asr):
""" Make a list of phones which appears in the lexicon. """
#with open(lexicon_file, "rt", encoding="utf-8") as fin:
# lines = fin.read()
# lines = lines.split('\n')
# phonelist = set([])
# for line in lines:
# line = line.split('\t')
# if len(line) > 1:
# pronunciation = set(line[1].split())
# phonelist = phonelist | pronunciation
lex = load_lexicon(lexicon_asr)
return set(' '.join(lex['pronunciation']).split(' '))
def extract_unknown_phones(word_list, known_phones):
return [i for i in word_list if not i in known_phones]
if __name__ == '__main__':
import time
timer_start = time.time()
#def get_translation_key():
dir_tmp = r'c:\Users\Aki\source\repos\acoustic_model\_tmp'
lexicon_ipa = r'd:\_corpus\FAME\lexicon\lex.ipa'
lexicon_asr = r'd:\_corpus\FAME\lexicon\lex.asr'
lex_ipa = load_lexicon(lexicon_ipa)
lex_asr = load_lexicon(lexicon_asr)
if 1:
phone_to_be_searched = fame_phoneset.phoneset_ipa[:]
translation_key = dict()
for word in lex_ipa['word']:
if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1:
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
asr_list = asr.split(' ')
# if there are phones which is not in phone_to_be_searched
#if len([True for i in asr_list if i in phone_to_be_searched]) > 0:
if(len(ipa_list) == len(asr_list)):
print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
for ipa_, asr_ in zip(ipa_list, asr_list):
if ipa_ in phone_to_be_searched:
translation_key[ipa_] = asr_
phone_to_be_searched.remove(ipa_)
print("elapsed time: {}".format(time.time() - timer_start))
np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key)
np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched)
else:
translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item()
phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item()
#phone_unknown = list(phone_to_be_searched)
##phone_unknown.remove('')
#phone_known = list(translation_key.keys())
#p = phone_unknown[0]
### extract lines which contains 'unknown' phone.
#lex_ipa_ = lex_ipa[lex_ipa['pronunciation'].str.count(p)>0]
##phone_unknown_ = phone_unknown[:]
##phone_unknown_.remove(p)
#phone_known_ = phone_known[:]
#phone_known_.append(p)
#for index, row in lex_ipa_.iterrows():
# ipa = row['pronunciation']
# phone_extract_unknown_phones(asr_list, phone_known_):
# # check the number of phones in phone_unknown_
# if len([True for i in asr_list if i in phone_unknown_]) == 0:
# word = row['word']
# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
# print("{0}: {1} --> {2}".format(word, ipa, asr))
# #print("{0}:{1}".format(index, row['pronunciation']))