2019-01-24 09:38:28 +01:00
|
|
|
|
import os
|
2019-01-27 01:34:04 +01:00
|
|
|
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
2019-01-24 09:38:28 +01:00
|
|
|
|
|
|
|
|
|
import sys
|
|
|
|
|
from collections import Counter
|
|
|
|
|
import pickle
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
|
|
import defaultfiles as default
|
2019-01-27 01:34:04 +01:00
|
|
|
|
import fame_phoneset
|
|
|
|
|
import convert_phone_set
|
2019-01-24 09:38:28 +01:00
|
|
|
|
|
|
|
|
|
#sys.path.append(default.forced_alignment_module_dir)
|
|
|
|
|
#from forced_alignment import convert_phone_set
|
|
|
|
|
|
|
|
|
|
#def find_phone(lexicon_file, phone):
|
|
|
|
|
# """ Search where the phone is used in the lexicon. """
|
|
|
|
|
# with open(lexicon_file, "rt", encoding="utf-8") as fin:
|
|
|
|
|
# lines = fin.read()
|
|
|
|
|
# lines = lines.split('\n')
|
|
|
|
|
|
|
|
|
|
# extracted = []
|
|
|
|
|
# for line in lines:
|
|
|
|
|
# line = line.split('\t')
|
|
|
|
|
# if len(line) > 1:
|
|
|
|
|
# pronunciation = line[1]
|
|
|
|
|
# if phone in pronunciation:
|
|
|
|
|
# extracted.append(line)
|
|
|
|
|
# return extracted
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
|
|
|
|
|
# """ Convert a lexicon file from IPA to HTK format for FAME! corpus. """
|
|
|
|
|
|
|
|
|
|
# lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation'])
|
|
|
|
|
# with open(lexicon_file_out, "w", encoding="utf-8") as fout:
|
|
|
|
|
# for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']):
|
|
|
|
|
# pronunciation_no_space = pronunciation.replace(' ', '')
|
|
|
|
|
# pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space)
|
|
|
|
|
# if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk:
|
|
|
|
|
# fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out):
|
|
|
|
|
# """ Combine two lexicon files and sort by words. """
|
|
|
|
|
|
|
|
|
|
# with open(lexicon_file1, "rt", encoding="utf-8") as fin:
|
|
|
|
|
# lines1 = fin.read()
|
|
|
|
|
# lines1 = lines1.split('\n')
|
|
|
|
|
# with open(lexicon_file2, "rt", encoding="utf-8") as fin:
|
|
|
|
|
# lines2 = fin.read()
|
|
|
|
|
# lines2 = lines2.split('\n')
|
|
|
|
|
|
|
|
|
|
# lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation'])
|
|
|
|
|
# lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation'])
|
|
|
|
|
# lex = pd.concat([lex1, lex2])
|
|
|
|
|
# lex = lex.sort_values(by='word', ascending=True)
|
|
|
|
|
# lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#def read_fileFA(fileFA):
|
|
|
|
|
# """
|
|
|
|
|
# read the result file of HTK forced alignment.
|
|
|
|
|
# this function only works when input is one word.
|
|
|
|
|
# """
|
|
|
|
|
# with open(fileFA, 'r') as f:
|
|
|
|
|
# lines = f.read()
|
|
|
|
|
# lines = lines.split('\n')
|
|
|
|
|
|
|
|
|
|
# phones = []
|
|
|
|
|
# for line in lines:
|
|
|
|
|
# line_split = line.split()
|
|
|
|
|
# if len(line_split) > 1:
|
|
|
|
|
# phones.append(line_split[2])
|
|
|
|
|
|
|
|
|
|
# return ' '.join(phones)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#def fame_pronunciation_variant(ipa):
|
|
|
|
|
# ipa = ipa.replace('æ', 'ɛ')
|
|
|
|
|
# ipa = ipa.replace('ɐ', 'a')
|
|
|
|
|
# ipa = ipa.replace('ɑ', 'a')
|
|
|
|
|
# ipa = ipa.replace('ɾ', 'r')
|
|
|
|
|
# ipa = ipa.replace('ɹ', 'r') # ???
|
|
|
|
|
# ipa = ipa.replace('ʁ', 'r')
|
|
|
|
|
# ipa = ipa.replace('ʀ', 'r') # ???
|
|
|
|
|
# ipa = ipa.replace('ʊ', 'u')
|
|
|
|
|
# ipa = ipa.replace('χ', 'x')
|
|
|
|
|
|
|
|
|
|
# pronvar_list = [ipa]
|
|
|
|
|
# while 'ø:' in ' '.join(pronvar_list) or 'œ' in ' '.join(pronvar_list) or 'ɒ' in ' '.join(pronvar_list):
|
|
|
|
|
# pronvar_list_ = []
|
|
|
|
|
# for p in pronvar_list:
|
|
|
|
|
# if 'ø:' in p:
|
|
|
|
|
# pronvar_list_.append(p.replace('ø:', 'ö'))
|
|
|
|
|
# pronvar_list_.append(p.replace('ø:', 'ö:'))
|
|
|
|
|
# if 'œ' in p:
|
|
|
|
|
# pronvar_list_.append(p.replace('œ', 'ɔ̈'))
|
|
|
|
|
# pronvar_list_.append(p.replace('œ', 'ɔ̈:'))
|
|
|
|
|
# if 'ɒ' in p:
|
|
|
|
|
# pronvar_list_.append(p.replace('ɒ', 'ɔ̈'))
|
|
|
|
|
# pronvar_list_.append(p.replace('ɒ', 'ɔ̈:'))
|
|
|
|
|
# pronvar_list = np.unique(pronvar_list_)
|
|
|
|
|
# return pronvar_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#def make_fame2ipa_variants(fame):
|
|
|
|
|
# fame = 'rɛös'
|
|
|
|
|
# ipa = [fame]
|
|
|
|
|
# ipa.append(fame.replace('ɛ', 'æ'))
|
|
|
|
|
# ipa.append(fame.replace('a', 'ɐ'))
|
|
|
|
|
# ipa.append(fame.replace('a', 'ɑ'))
|
|
|
|
|
# ipa.append(fame.replace('r', 'ɾ'))
|
|
|
|
|
# ipa.append(fame.replace('r', 'ɹ'))
|
|
|
|
|
# ipa.append(fame.replace('r', 'ʁ'))
|
|
|
|
|
# ipa.append(fame.replace('r', 'ʀ'))
|
|
|
|
|
# ipa.append(fame.replace('u', 'ʊ'))
|
|
|
|
|
# ipa.append(fame.replace('x', 'χ'))
|
|
|
|
|
|
|
|
|
|
# ipa.append(fame.replace('ö', 'ø:'))
|
|
|
|
|
# ipa.append(fame.replace('ö:', 'ø:'))
|
|
|
|
|
# ipa.append(fame.replace('ɔ̈', 'œ'))
|
|
|
|
|
# ipa.append(fame.replace('ɔ̈:', 'œ'))
|
|
|
|
|
# ipa.append(fame.replace('ɔ̈', 'ɒ'))
|
|
|
|
|
# ipa.append(fame.replace('ɔ̈:', 'ɒ'))
|
|
|
|
|
|
|
|
|
|
# return ipa
|
|
|
|
|
|
|
|
|
|
def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp):
|
|
|
|
|
""" Make a script file for HCopy using the filelist in FAME! corpus. """
|
|
|
|
|
|
|
|
|
|
filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt')
|
|
|
|
|
with open(filelist_txt) as fin:
|
|
|
|
|
filelist = fin.read()
|
|
|
|
|
filelist = filelist.split('\n')
|
|
|
|
|
|
|
|
|
|
with open(hcopy_scp, 'w') as fout:
|
|
|
|
|
for filename_ in filelist:
|
|
|
|
|
filename = filename_.replace('.TextGrid', '')
|
|
|
|
|
|
|
|
|
|
if len(filename) > 3: # remove '.', '..' and ''
|
|
|
|
|
wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav')
|
|
|
|
|
mfc_file = os.path.join(feature_dir, filename + '.mfc')
|
|
|
|
|
|
|
|
|
|
fout.write(wav_file + '\t' + mfc_file + '\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#def make_filelist(input_dir, output_txt):
|
|
|
|
|
# """ Make a list of files in the input_dir. """
|
|
|
|
|
# filenames = os.listdir(input_dir)
|
|
|
|
|
|
|
|
|
|
# with open(output_txt, 'w') as fout:
|
|
|
|
|
# for filename in filenames:
|
|
|
|
|
# fout.write(input_dir + '\\' + filename + '\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#def make_htk_dict(word, pronvar_, fileDic, output_type):
|
|
|
|
|
# """
|
|
|
|
|
# make dict files which can be used for HTK.
|
|
|
|
|
# param word: target word.
|
|
|
|
|
# param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray.
|
|
|
|
|
# param fileDic: output dic file.
|
|
|
|
|
# param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3.
|
|
|
|
|
# """
|
|
|
|
|
# #assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.')
|
|
|
|
|
# WORD = word.upper()
|
|
|
|
|
|
|
|
|
|
# if output_type == 0: # full
|
|
|
|
|
# pronvar = np.unique(pronvar_)
|
|
|
|
|
|
|
|
|
|
# with open(fileDic, 'w') as f:
|
|
|
|
|
# for pvar in pronvar:
|
|
|
|
|
# f.write('{0}\t{1}\n'.format(WORD, pvar))
|
|
|
|
|
# else:
|
|
|
|
|
# c = Counter(pronvar_)
|
|
|
|
|
# total_num = sum(c.values())
|
|
|
|
|
# with open(fileDic, 'w') as f:
|
|
|
|
|
# if output_type == 3:
|
|
|
|
|
# for key, value in c.most_common(3):
|
|
|
|
|
# f.write('{0}\t{1}\n'.format(WORD, key))
|
|
|
|
|
# else:
|
|
|
|
|
# for key, value in c.items():
|
|
|
|
|
# percentage = value/total_num*100
|
|
|
|
|
|
|
|
|
|
# if output_type == 1: # all
|
|
|
|
|
# f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key))
|
|
|
|
|
# elif output_type == 2: # less than 2 percent
|
|
|
|
|
# if percentage < 2:
|
|
|
|
|
# f.write('{0}\t{1}\n'.format(WORD, key))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_lexicon(lexicon_file):
|
|
|
|
|
lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8")
|
|
|
|
|
lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True)
|
|
|
|
|
return lex
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_phonelist(lexicon_asr):
|
|
|
|
|
""" Make a list of phones which appears in the lexicon. """
|
|
|
|
|
|
|
|
|
|
#with open(lexicon_file, "rt", encoding="utf-8") as fin:
|
|
|
|
|
# lines = fin.read()
|
|
|
|
|
# lines = lines.split('\n')
|
|
|
|
|
# phonelist = set([])
|
|
|
|
|
# for line in lines:
|
|
|
|
|
# line = line.split('\t')
|
|
|
|
|
# if len(line) > 1:
|
|
|
|
|
# pronunciation = set(line[1].split())
|
|
|
|
|
# phonelist = phonelist | pronunciation
|
|
|
|
|
lex = load_lexicon(lexicon_asr)
|
|
|
|
|
return set(' '.join(lex['pronunciation']).split(' '))
|
|
|
|
|
|
|
|
|
|
|
2019-01-27 01:34:04 +01:00
|
|
|
|
def extract_unknown_phones(word_list, known_phones):
|
|
|
|
|
return [i for i in word_list if not i in known_phones]
|
2019-01-24 09:38:28 +01:00
|
|
|
|
|
|
|
|
|
|
2019-01-27 01:34:04 +01:00
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
import time
|
|
|
|
|
timer_start = time.time()
|
|
|
|
|
|
|
|
|
|
#def get_translation_key():
|
|
|
|
|
dir_tmp = r'c:\Users\Aki\source\repos\acoustic_model\_tmp'
|
|
|
|
|
lexicon_ipa = r'd:\_corpus\FAME\lexicon\lex.ipa'
|
|
|
|
|
lexicon_asr = r'd:\_corpus\FAME\lexicon\lex.asr'
|
|
|
|
|
|
|
|
|
|
lex_ipa = load_lexicon(lexicon_ipa)
|
|
|
|
|
lex_asr = load_lexicon(lexicon_asr)
|
|
|
|
|
if 1:
|
|
|
|
|
phone_to_be_searched = fame_phoneset.phoneset_ipa[:]
|
|
|
|
|
translation_key = dict()
|
|
|
|
|
for word in lex_ipa['word']:
|
|
|
|
|
if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1:
|
|
|
|
|
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
|
|
|
|
asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
2019-01-24 09:38:28 +01:00
|
|
|
|
|
2019-01-27 01:34:04 +01:00
|
|
|
|
ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
|
|
|
|
asr_list = asr.split(' ')
|
|
|
|
|
|
|
|
|
|
# if there are phones which is not in phone_to_be_searched
|
|
|
|
|
#if len([True for i in asr_list if i in phone_to_be_searched]) > 0:
|
|
|
|
|
if(len(ipa_list) == len(asr_list)):
|
|
|
|
|
print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
|
|
|
|
|
for ipa_, asr_ in zip(ipa_list, asr_list):
|
|
|
|
|
if ipa_ in phone_to_be_searched:
|
2019-01-24 09:38:28 +01:00
|
|
|
|
translation_key[ipa_] = asr_
|
2019-01-27 01:34:04 +01:00
|
|
|
|
phone_to_be_searched.remove(ipa_)
|
|
|
|
|
|
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start))
|
2019-01-24 09:38:28 +01:00
|
|
|
|
|
2019-01-27 01:34:04 +01:00
|
|
|
|
np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key)
|
|
|
|
|
np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched)
|
|
|
|
|
else:
|
|
|
|
|
translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item()
|
|
|
|
|
phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item()
|
2019-01-24 09:38:28 +01:00
|
|
|
|
|
2019-01-27 01:34:04 +01:00
|
|
|
|
|
|
|
|
|
#phone_unknown = list(phone_to_be_searched)
|
|
|
|
|
##phone_unknown.remove('')
|
|
|
|
|
#phone_known = list(translation_key.keys())
|
|
|
|
|
|
|
|
|
|
#p = phone_unknown[0]
|
|
|
|
|
|
|
|
|
|
### extract lines which contains 'unknown' phone.
|
|
|
|
|
#lex_ipa_ = lex_ipa[lex_ipa['pronunciation'].str.count(p)>0]
|
|
|
|
|
##phone_unknown_ = phone_unknown[:]
|
|
|
|
|
##phone_unknown_.remove(p)
|
|
|
|
|
#phone_known_ = phone_known[:]
|
|
|
|
|
#phone_known_.append(p)
|
|
|
|
|
#for index, row in lex_ipa_.iterrows():
|
|
|
|
|
# ipa = row['pronunciation']
|
|
|
|
|
# phone_extract_unknown_phones(asr_list, phone_known_):
|
|
|
|
|
|
|
|
|
|
# # check the number of phones in phone_unknown_
|
|
|
|
|
# if len([True for i in asr_list if i in phone_unknown_]) == 0:
|
|
|
|
|
# word = row['word']
|
|
|
|
|
# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
|
|
|
|
# print("{0}: {1} --> {2}".format(word, ipa, asr))
|
|
|
|
|
# #print("{0}:{1}".format(index, row['pronunciation']))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|