|
|
|
@ -12,24 +12,6 @@ import defaultfiles as default
@@ -12,24 +12,6 @@ import defaultfiles as default
|
|
|
|
|
import fame_phoneset |
|
|
|
|
import convert_phone_set |
|
|
|
|
|
|
|
|
|
#sys.path.append(default.forced_alignment_module_dir) |
|
|
|
|
#from forced_alignment import convert_phone_set |
|
|
|
|
|
|
|
|
|
#def find_phone(lexicon_file, phone): |
|
|
|
|
# """ Search where the phone is used in the lexicon. """ |
|
|
|
|
# with open(lexicon_file, "rt", encoding="utf-8") as fin: |
|
|
|
|
# lines = fin.read() |
|
|
|
|
# lines = lines.split('\n') |
|
|
|
|
|
|
|
|
|
# extracted = [] |
|
|
|
|
# for line in lines: |
|
|
|
|
# line = line.split('\t') |
|
|
|
|
# if len(line) > 1: |
|
|
|
|
# pronunciation = line[1] |
|
|
|
|
# if phone in pronunciation: |
|
|
|
|
# extracted.append(line) |
|
|
|
|
# return extracted |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out): |
|
|
|
|
# """ Convert a lexicon file from IPA to HTK format for FAME! corpus. """ |
|
|
|
@ -128,25 +110,6 @@ import convert_phone_set
@@ -128,25 +110,6 @@ import convert_phone_set
|
|
|
|
|
|
|
|
|
|
# return ipa |
|
|
|
|
|
|
|
|
|
def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp): |
|
|
|
|
""" Make a script file for HCopy using the filelist in FAME! corpus. """ |
|
|
|
|
|
|
|
|
|
filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt') |
|
|
|
|
with open(filelist_txt) as fin: |
|
|
|
|
filelist = fin.read() |
|
|
|
|
filelist = filelist.split('\n') |
|
|
|
|
|
|
|
|
|
with open(hcopy_scp, 'w') as fout: |
|
|
|
|
for filename_ in filelist: |
|
|
|
|
filename = filename_.replace('.TextGrid', '') |
|
|
|
|
|
|
|
|
|
if len(filename) > 3: # remove '.', '..' and '' |
|
|
|
|
wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav') |
|
|
|
|
mfc_file = os.path.join(feature_dir, filename + '.mfc') |
|
|
|
|
|
|
|
|
|
fout.write(wav_file + '\t' + mfc_file + '\n') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#def make_filelist(input_dir, output_txt): |
|
|
|
|
# """ Make a list of files in the input_dir. """ |
|
|
|
|
# filenames = os.listdir(input_dir) |
|
|
|
@ -191,98 +154,147 @@ def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_s
@@ -191,98 +154,147 @@ def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_s
|
|
|
|
|
# f.write('{0}\t{1}\n'.format(WORD, key)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp): |
|
|
|
|
""" Make a script file for HCopy using the filelist in FAME! corpus. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
fame_dir (path): the directory of FAME corpus. |
|
|
|
|
dataset (str): 'devel', 'test' or 'train'. |
|
|
|
|
feature_dir (path): the directory where feature will be stored. |
|
|
|
|
hcopy_scp (path): a script file for HCopy to be made. |
|
|
|
|
|
|
|
|
|
""" |
|
|
|
|
filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt') |
|
|
|
|
with open(filelist_txt) as fin: |
|
|
|
|
filelist = fin.read() |
|
|
|
|
filelist = filelist.split('\n') |
|
|
|
|
|
|
|
|
|
with open(hcopy_scp, 'w') as fout: |
|
|
|
|
for filename_ in filelist: |
|
|
|
|
filename = filename_.replace('.TextGrid', '') |
|
|
|
|
|
|
|
|
|
if len(filename) > 3: # remove '.', '..' and '' |
|
|
|
|
wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav') |
|
|
|
|
mfc_file = os.path.join(feature_dir, filename + '.mfc') |
|
|
|
|
|
|
|
|
|
fout.write(wav_file + '\t' + mfc_file + '\n') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_lexicon(lexicon_file): |
|
|
|
|
""" load lexicon file as Data Frame. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. |
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
lex (df): lexicon as Data Frame, which has columns 'word' and 'pronunciation'. |
|
|
|
|
|
|
|
|
|
""" |
|
|
|
|
lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8") |
|
|
|
|
lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True) |
|
|
|
|
return lex |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_phonelist(lexicon_asr): |
|
|
|
|
""" Make a list of phones which appears in the lexicon. """ |
|
|
|
|
|
|
|
|
|
#with open(lexicon_file, "rt", encoding="utf-8") as fin: |
|
|
|
|
# lines = fin.read() |
|
|
|
|
# lines = lines.split('\n') |
|
|
|
|
# phonelist = set([]) |
|
|
|
|
# for line in lines: |
|
|
|
|
# line = line.split('\t') |
|
|
|
|
# if len(line) > 1: |
|
|
|
|
# pronunciation = set(line[1].split()) |
|
|
|
|
# phonelist = phonelist | pronunciation |
|
|
|
|
lex = load_lexicon(lexicon_asr) |
|
|
|
|
return set(' '.join(lex['pronunciation']).split(' ')) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_unknown_phones(word_list, known_phones): |
|
|
|
|
return [i for i in word_list if not i in known_phones] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
import time |
|
|
|
|
timer_start = time.time() |
|
|
|
|
|
|
|
|
|
#def get_translation_key(): |
|
|
|
|
dir_tmp = r'c:\Users\Aki\source\repos\acoustic_model\_tmp' |
|
|
|
|
lexicon_ipa = r'd:\_corpus\FAME\lexicon\lex.ipa' |
|
|
|
|
lexicon_asr = r'd:\_corpus\FAME\lexicon\lex.asr' |
|
|
|
|
|
|
|
|
|
lex_ipa = load_lexicon(lexicon_ipa) |
|
|
|
|
lex_asr = load_lexicon(lexicon_asr) |
|
|
|
|
if 1: |
|
|
|
|
phone_to_be_searched = fame_phoneset.phoneset_ipa[:] |
|
|
|
|
translation_key = dict() |
|
|
|
|
for word in lex_ipa['word']: |
|
|
|
|
if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1: |
|
|
|
|
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] |
|
|
|
|
asr = lex_asr[lex_asr['word'] == word].iat[0, 1] |
|
|
|
|
def get_phoneset_from_lexicon(lexicon_file, phoneset='asr'): |
|
|
|
|
""" Make a list of phones which appears in the lexicon. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. |
|
|
|
|
phoneset (str): the phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'. |
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
(list_of_phones) (set): the set of phones included in the lexicon_file. |
|
|
|
|
|
|
|
|
|
""" |
|
|
|
|
assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\'' |
|
|
|
|
|
|
|
|
|
lex = load_lexicon(lexicon_file) |
|
|
|
|
if phoneset == 'asr': |
|
|
|
|
return set(' '.join(lex['pronunciation']).split(' ')) |
|
|
|
|
elif phoneset == 'ipa': |
|
|
|
|
join_pronunciations = ''.join(lex['pronunciation']) |
|
|
|
|
return set(convert_phone_set.split_word(join_pronunciations, fame_phoneset.multi_character_phones_ipa)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_unknown_phones(ipa, known_phones): |
|
|
|
|
"""extract unknown phones in the pronunciation written in IPA. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
ipa (str): a pronunciation written in IPA. |
|
|
|
|
known_phones (list): list of phones already know. |
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
(list_of_phones) (list): unknown phones not included in 'known_phones'. |
|
|
|
|
|
|
|
|
|
""" |
|
|
|
|
ipa_split = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) |
|
|
|
|
return [i for i in ipa_split if not i in known_phones] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_translation_key(lexicon_file_ipa, lexicon_file_asr): |
|
|
|
|
""" get correspondence between lexicon_file_ipa and lexicon_file_asr. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
lexicon_file_ipa (path): lexicon in the format of 'word' /t 'pronunciation (IPA)'. |
|
|
|
|
lexicon_file_asr (path): lexicon in the format of 'word' /t 'pronunciation (asr)'. |
|
|
|
|
the each character of 'pronunciation' should be delimited by ' '. |
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
translation_key (dict): translation key from ipa to asr. |
|
|
|
|
(phone_unknown) (list): the list of IPA phones, which does not appear in lexicon_file_asr. |
|
|
|
|
|
|
|
|
|
""" |
|
|
|
|
lex_ipa = load_lexicon(lexicon_file_ipa) |
|
|
|
|
lex_asr = load_lexicon(lexicon_file_asr) |
|
|
|
|
phone_unknown = fame_phoneset.phoneset_ipa[:] |
|
|
|
|
translation_key = dict() |
|
|
|
|
for word in lex_ipa['word']: |
|
|
|
|
if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1: |
|
|
|
|
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] |
|
|
|
|
asr = lex_asr[lex_asr['word'] == word].iat[0, 1] |
|
|
|
|
|
|
|
|
|
ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) |
|
|
|
|
asr_list = asr.split(' ') |
|
|
|
|
ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa) |
|
|
|
|
asr_list = asr.split(' ') |
|
|
|
|
|
|
|
|
|
# if there are phones which is not in phone_unknown |
|
|
|
|
#if len([True for i in asr_list if i in phone_unknown]) > 0: |
|
|
|
|
if(len(ipa_list) == len(asr_list)): |
|
|
|
|
print("{0}: {1} --> {2}".format(word, ipa_list, asr_list)) |
|
|
|
|
for ipa_, asr_ in zip(ipa_list, asr_list): |
|
|
|
|
if ipa_ in phone_unknown: |
|
|
|
|
translation_key[ipa_] = asr_ |
|
|
|
|
phone_unknown.remove(ipa_) |
|
|
|
|
return translation_key, list(phone_unknown) |
|
|
|
|
|
|
|
|
|
# if there are phones which is not in phone_to_be_searched |
|
|
|
|
#if len([True for i in asr_list if i in phone_to_be_searched]) > 0: |
|
|
|
|
if(len(ipa_list) == len(asr_list)): |
|
|
|
|
print("{0}: {1} --> {2}".format(word, ipa_list, asr_list)) |
|
|
|
|
for ipa_, asr_ in zip(ipa_list, asr_list): |
|
|
|
|
if ipa_ in phone_to_be_searched: |
|
|
|
|
translation_key[ipa_] = asr_ |
|
|
|
|
phone_to_be_searched.remove(ipa_) |
|
|
|
|
|
|
|
|
|
print("elapsed time: {}".format(time.time() - timer_start)) |
|
|
|
|
def find_phone(lexicon_file, phone, phoneset='ipa'): |
|
|
|
|
""" extract rows where the phone is used in the lexicon_file. |
|
|
|
|
|
|
|
|
|
np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key) |
|
|
|
|
np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched) |
|
|
|
|
else: |
|
|
|
|
translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item() |
|
|
|
|
phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item() |
|
|
|
|
Args: |
|
|
|
|
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. |
|
|
|
|
phone (str): the phone to be searched. |
|
|
|
|
phoneset (str): the phoneset with which lexicon_file is written. 'asr' or 'ipa'(default). |
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
extracted (df): rows where the phone is used. |
|
|
|
|
|
|
|
|
|
#phone_unknown = list(phone_to_be_searched) |
|
|
|
|
##phone_unknown.remove('') |
|
|
|
|
#phone_known = list(translation_key.keys()) |
|
|
|
|
ToDo: |
|
|
|
|
* develop when the phonset == 'asr'. |
|
|
|
|
|
|
|
|
|
#p = phone_unknown[0] |
|
|
|
|
""" |
|
|
|
|
assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\'' |
|
|
|
|
|
|
|
|
|
lex = load_lexicon(lexicon_file) |
|
|
|
|
|
|
|
|
|
### extract lines which contains 'unknown' phone. |
|
|
|
|
#lex_ipa_ = lex_ipa[lex_ipa['pronunciation'].str.count(p)>0] |
|
|
|
|
##phone_unknown_ = phone_unknown[:] |
|
|
|
|
##phone_unknown_.remove(p) |
|
|
|
|
#phone_known_ = phone_known[:] |
|
|
|
|
#phone_known_.append(p) |
|
|
|
|
#for index, row in lex_ipa_.iterrows(): |
|
|
|
|
# ipa = row['pronunciation'] |
|
|
|
|
# phone_extract_unknown_phones(asr_list, phone_known_): |
|
|
|
|
|
|
|
|
|
# # check the number of phones in phone_unknown_ |
|
|
|
|
# if len([True for i in asr_list if i in phone_unknown_]) == 0: |
|
|
|
|
# word = row['word'] |
|
|
|
|
# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1] |
|
|
|
|
# print("{0}: {1} --> {2}".format(word, ipa, asr)) |
|
|
|
|
# #print("{0}:{1}".format(index, row['pronunciation'])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# to reduce the calculation time, only target rows which include 'phone' at least once. |
|
|
|
|
lex_ = lex[lex['pronunciation'].str.count(phone)>0] |
|
|
|
|
|
|
|
|
|
extracted = pd.DataFrame(index=[], columns=['word', 'pronunciation']) |
|
|
|
|
for index, row in lex_.iterrows(): |
|
|
|
|
if phoneset == 'ipa': |
|
|
|
|
pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_phoneset.multi_character_phones_ipa) |
|
|
|
|
if phone in pronunciation: |
|
|
|
|
extracted_ = pd.Series([row['word'], pronunciation], index=extracted.columns) |
|
|
|
|
extracted = extracted.append(extracted_, ignore_index=True) |
|
|
|
|
return extracted |