fame_asr phoneset is added including reduced version and htk compatible version.
This commit is contained in:
parent
87abbbb95a
commit
8cda93de75
Binary file not shown.
Binary file not shown.
@ -32,7 +32,9 @@
|
|||||||
<Compile Include="defaultfiles.py">
|
<Compile Include="defaultfiles.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
<Compile Include="fame_phoneset.py">
|
<Compile Include="fame_asr.py" />
|
||||||
|
<Compile Include="fame_ipa.py" />
|
||||||
|
<Compile Include="fame_test.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
<Compile Include="fa_test.py">
|
<Compile Include="fa_test.py">
|
||||||
|
@ -20,7 +20,7 @@ def split_word(word, multi_character_phones):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
word (str): a word written in given phoneset.
|
word (str): a word written in given phoneset.
|
||||||
multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_phoneset.py.
|
multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_ipa.py.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(word_seperated) (list): the word splitted in given phoneset.
|
(word_seperated) (list): the word splitted in given phoneset.
|
||||||
|
127
acoustic_model/fame_asr.py
Normal file
127
acoustic_model/fame_asr.py
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
""" definition of the phones to be used. """
|
||||||
|
|
||||||
|
# phonese in {FAME}/lexicon/lex.asr
|
||||||
|
phoneset = [
|
||||||
|
# vowels
|
||||||
|
'a',
|
||||||
|
'a:',
|
||||||
|
'e',
|
||||||
|
'e:',
|
||||||
|
'i',
|
||||||
|
'i:',
|
||||||
|
'i̯',
|
||||||
|
'o',
|
||||||
|
'o:',
|
||||||
|
'ö',
|
||||||
|
'ö:',
|
||||||
|
'u',
|
||||||
|
'u:',
|
||||||
|
'ü',
|
||||||
|
'ü:',
|
||||||
|
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone.
|
||||||
|
'ṷ',
|
||||||
|
'y',
|
||||||
|
'ɔ',
|
||||||
|
'ɔ:',
|
||||||
|
'ɔ̈',
|
||||||
|
'ɔ̈:',
|
||||||
|
'ə',
|
||||||
|
'ɛ',
|
||||||
|
'ɛ:',
|
||||||
|
'ɪ',
|
||||||
|
'ɪ:',
|
||||||
|
|
||||||
|
# plosives
|
||||||
|
'p',
|
||||||
|
'b',
|
||||||
|
't',
|
||||||
|
'd',
|
||||||
|
'k',
|
||||||
|
'g',
|
||||||
|
'ɡ', # = 'g'
|
||||||
|
|
||||||
|
# nasals
|
||||||
|
'm',
|
||||||
|
'n',
|
||||||
|
'ŋ',
|
||||||
|
|
||||||
|
# fricatives
|
||||||
|
'f',
|
||||||
|
'v',
|
||||||
|
's',
|
||||||
|
's:',
|
||||||
|
'z',
|
||||||
|
'x',
|
||||||
|
'h',
|
||||||
|
|
||||||
|
# tap and flip
|
||||||
|
'r',
|
||||||
|
'r:',
|
||||||
|
|
||||||
|
# approximant
|
||||||
|
'j',
|
||||||
|
'l'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
## reduce the number of phones.
|
||||||
|
# the phones which seldom occur are replaced with another more popular phones.
|
||||||
|
# replacements are based on the advice from Martijn Wieling.
|
||||||
|
reduction_key = {
|
||||||
|
'y':'i:', 'e':'e:', 'ə:':'ɛ:', 'r:':'r', 'ɡ':'g'
|
||||||
|
}
|
||||||
|
# already removed beforehand in phoneset. Just to be sure.
|
||||||
|
phones_to_be_removed = ['ú', 's:', 'ɔ̈:']
|
||||||
|
|
||||||
|
phoneset_short = [reduction_key.get(i, i) for i in phoneset
|
||||||
|
if not i in phones_to_be_removed]
|
||||||
|
phoneset_short = list(set(phoneset_short))
|
||||||
|
phoneset_short.sort()
|
||||||
|
|
||||||
|
|
||||||
|
## translation_key to htk format (ascii).
|
||||||
|
# phones which gives UnicodeEncodeError when phone.encode("ascii")
|
||||||
|
# are replaced with other characters.
|
||||||
|
translation_key_asr2htk = {
|
||||||
|
'i̯': 'i_',
|
||||||
|
'ṷ': 'u_',
|
||||||
|
|
||||||
|
# on the analogy of German umlaut, 'e' is used.
|
||||||
|
'ö': 'oe', 'ö:': 'oe:',
|
||||||
|
'ü': 'ue', 'ü:': 'ue:',
|
||||||
|
|
||||||
|
# on the analogy of Chinese...
|
||||||
|
'ŋ': 'ng',
|
||||||
|
|
||||||
|
# refer to Xsampa.
|
||||||
|
'ɔ': 'O', 'ɔ:': 'O:', 'ɔ̈': 'Oe',
|
||||||
|
'ɛ': 'E', 'ɛ:': 'E:',
|
||||||
|
'ɪ': 'I', 'ɪ:': 'I:',
|
||||||
|
|
||||||
|
# it is @ in Xsampa, but that is not handy on HTK.
|
||||||
|
'ə': 'A'
|
||||||
|
}
|
||||||
|
phoneset_htk = [translation_key_asr2htk.get(i, i) for i in phoneset_short]
|
||||||
|
|
||||||
|
## check
|
||||||
|
#for i in phoneset_short:
|
||||||
|
# try:
|
||||||
|
# print("{0} --> {1}".format(i, i.encode("ascii")))
|
||||||
|
# except UnicodeEncodeError:
|
||||||
|
# print(">>> {}".format(i))
|
||||||
|
|
||||||
|
|
||||||
|
## the list of multi character phones.
|
||||||
|
# for example, the length of 'a:' is 3, but in the codes it is treated as one letter.
|
||||||
|
|
||||||
|
# original.
|
||||||
|
multi_character_phones = [i for i in phoneset if len(i) > 1]
|
||||||
|
multi_character_phones.sort(key=len, reverse=True)
|
||||||
|
|
||||||
|
# phonset reduced.
|
||||||
|
multi_character_phones_short = [i for i in phoneset_short if len(i) > 1]
|
||||||
|
multi_character_phones_short.sort(key=len, reverse=True)
|
||||||
|
|
||||||
|
# htk compatible.
|
||||||
|
multi_character_phones_htk = [i for i in phoneset_htk if len(i) > 1]
|
||||||
|
multi_character_phones_htk.sort(key=len, reverse=True)
|
@ -1,5 +1,4 @@
|
|||||||
import os
|
import os
|
||||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
@ -9,7 +8,7 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
import fame_phoneset
|
from phoneset import fame_ipa
|
||||||
import convert_phone_set
|
import convert_phone_set
|
||||||
|
|
||||||
|
|
||||||
@ -110,14 +109,6 @@ import convert_phone_set
|
|||||||
|
|
||||||
# return ipa
|
# return ipa
|
||||||
|
|
||||||
#def make_filelist(input_dir, output_txt):
|
|
||||||
# """ Make a list of files in the input_dir. """
|
|
||||||
# filenames = os.listdir(input_dir)
|
|
||||||
|
|
||||||
# with open(output_txt, 'w') as fout:
|
|
||||||
# for filename in filenames:
|
|
||||||
# fout.write(input_dir + '\\' + filename + '\n')
|
|
||||||
|
|
||||||
|
|
||||||
#def make_htk_dict(word, pronvar_, fileDic, output_type):
|
#def make_htk_dict(word, pronvar_, fileDic, output_type):
|
||||||
# """
|
# """
|
||||||
@ -179,10 +170,11 @@ def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_s
|
|||||||
|
|
||||||
fout.write(wav_file + '\t' + mfc_file + '\n')
|
fout.write(wav_file + '\t' + mfc_file + '\n')
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
def load_lexicon(lexicon_file):
|
def load_lexicon(lexicon_file):
|
||||||
""" load lexicon file as Data Frame.
|
""" load lexicon file as data frame.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
||||||
@ -196,25 +188,27 @@ def load_lexicon(lexicon_file):
|
|||||||
return lex
|
return lex
|
||||||
|
|
||||||
|
|
||||||
def get_phoneset_from_lexicon(lexicon_file, phoneset='asr'):
|
def get_phoneset_from_lexicon(lexicon_file, phoneset_name='asr'):
|
||||||
""" Make a list of phones which appears in the lexicon.
|
""" Make a list of phones which appears in the lexicon.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
||||||
phoneset (str): the phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'.
|
phoneset_name (str): the name of phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(list_of_phones) (set): the set of phones included in the lexicon_file.
|
(list_of_phones) (set): the set of phones included in the lexicon_file.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\''
|
assert phoneset_name in ['asr', 'ipa'], 'phoneset_name should be \'asr\' or \'ipa\''
|
||||||
|
|
||||||
lex = load_lexicon(lexicon_file)
|
lex = load_lexicon(lexicon_file)
|
||||||
if phoneset == 'asr':
|
if phoneset_name == 'asr':
|
||||||
return set(' '.join(lex['pronunciation']).split(' '))
|
return set(' '.join(lex['pronunciation']).split(' '))
|
||||||
elif phoneset == 'ipa':
|
elif phoneset_name == 'ipa':
|
||||||
join_pronunciations = ''.join(lex['pronunciation'])
|
join_pronunciations = ''.join(lex['pronunciation'])
|
||||||
return set(convert_phone_set.split_word(join_pronunciations, fame_phoneset.multi_character_phones_ipa))
|
return set(convert_phone_set.split_word(join_pronunciations, fame_ipa.multi_character_phones))
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
def extract_unknown_phones(ipa, known_phones):
|
def extract_unknown_phones(ipa, known_phones):
|
||||||
@ -228,7 +222,7 @@ def extract_unknown_phones(ipa, known_phones):
|
|||||||
(list_of_phones) (list): unknown phones not included in 'known_phones'.
|
(list_of_phones) (list): unknown phones not included in 'known_phones'.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
ipa_split = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
ipa_split = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones)
|
||||||
return [i for i in ipa_split if not i in known_phones]
|
return [i for i in ipa_split if not i in known_phones]
|
||||||
|
|
||||||
|
|
||||||
@ -247,14 +241,14 @@ def get_translation_key(lexicon_file_ipa, lexicon_file_asr):
|
|||||||
"""
|
"""
|
||||||
lex_ipa = load_lexicon(lexicon_file_ipa)
|
lex_ipa = load_lexicon(lexicon_file_ipa)
|
||||||
lex_asr = load_lexicon(lexicon_file_asr)
|
lex_asr = load_lexicon(lexicon_file_asr)
|
||||||
phone_unknown = fame_phoneset.phoneset_ipa[:]
|
phone_unknown = fame_ipa.phoneset[:]
|
||||||
translation_key = dict()
|
translation_key = dict()
|
||||||
for word in lex_ipa['word']:
|
for word in lex_ipa['word']:
|
||||||
if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1:
|
if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1:
|
||||||
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
||||||
asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
||||||
|
|
||||||
ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
ipa_list = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones)
|
||||||
asr_list = asr.split(' ')
|
asr_list = asr.split(' ')
|
||||||
|
|
||||||
# if there are phones which is not in phone_unknown
|
# if there are phones which is not in phone_unknown
|
||||||
@ -268,13 +262,13 @@ def get_translation_key(lexicon_file_ipa, lexicon_file_asr):
|
|||||||
return translation_key, list(phone_unknown)
|
return translation_key, list(phone_unknown)
|
||||||
|
|
||||||
|
|
||||||
def find_phone(lexicon_file, phone, phoneset='ipa'):
|
def find_phone(lexicon_file, phone, phoneset_name='ipa'):
|
||||||
""" extract rows where the phone is used in the lexicon_file.
|
""" extract rows where the phone is used in the lexicon_file.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
||||||
phone (str): the phone to be searched.
|
phone (str): the phone to be searched.
|
||||||
phoneset (str): the phoneset with which lexicon_file is written. 'asr' or 'ipa'(default).
|
phoneset_name (str): the name of phoneset_name with which lexicon_file is written. 'asr' or 'ipa'(default).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
extracted (df): rows where the phone is used.
|
extracted (df): rows where the phone is used.
|
||||||
@ -283,7 +277,7 @@ def find_phone(lexicon_file, phone, phoneset='ipa'):
|
|||||||
* develop when the phonset == 'asr'.
|
* develop when the phonset == 'asr'.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\''
|
assert phoneset_name in ['asr', 'ipa'], 'phoneset_name should be \'asr\' or \'ipa\''
|
||||||
|
|
||||||
lex = load_lexicon(lexicon_file)
|
lex = load_lexicon(lexicon_file)
|
||||||
|
|
||||||
@ -292,8 +286,8 @@ def find_phone(lexicon_file, phone, phoneset='ipa'):
|
|||||||
|
|
||||||
extracted = pd.DataFrame(index=[], columns=['word', 'pronunciation'])
|
extracted = pd.DataFrame(index=[], columns=['word', 'pronunciation'])
|
||||||
for index, row in lex_.iterrows():
|
for index, row in lex_.iterrows():
|
||||||
if phoneset == 'ipa':
|
if phoneset_name == 'ipa':
|
||||||
pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_phoneset.multi_character_phones_ipa)
|
pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_ipa.multi_character_phones)
|
||||||
if phone in pronunciation:
|
if phone in pronunciation:
|
||||||
extracted_ = pd.Series([row['word'], pronunciation], index=extracted.columns)
|
extracted_ = pd.Series([row['word'], pronunciation], index=extracted.columns)
|
||||||
extracted = extracted.append(extracted_, ignore_index=True)
|
extracted = extracted.append(extracted_, ignore_index=True)
|
||||||
|
@ -8,8 +8,8 @@ import tempfile
|
|||||||
#from collections import Counter
|
#from collections import Counter
|
||||||
import time
|
import time
|
||||||
|
|
||||||
#import numpy as np
|
import numpy as np
|
||||||
#import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
import fame_functions
|
import fame_functions
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
@ -54,6 +54,10 @@ conv_lexicon = 1
|
|||||||
#mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl']
|
#mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl']
|
||||||
#FAME_dir = config['Settings']['FAME_dir']
|
#FAME_dir = config['Settings']['FAME_dir']
|
||||||
|
|
||||||
|
#lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
|
||||||
|
#lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa')
|
||||||
|
#lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
|
||||||
|
|
||||||
#lex_asr = FAME_dir + '\\lexicon\\lex.asr'
|
#lex_asr = FAME_dir + '\\lexicon\\lex.asr'
|
||||||
#lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk'
|
#lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk'
|
||||||
#lex_oov = FAME_dir + '\\lexicon\\lex.oov'
|
#lex_oov = FAME_dir + '\\lexicon\\lex.oov'
|
||||||
@ -111,71 +115,6 @@ if extract_features:
|
|||||||
## ======================= convert lexicon from ipa to fame_htk =======================
|
## ======================= convert lexicon from ipa to fame_htk =======================
|
||||||
if conv_lexicon:
|
if conv_lexicon:
|
||||||
print('==== convert lexicon from ipa 2 fame ====\n')
|
print('==== convert lexicon from ipa 2 fame ====\n')
|
||||||
|
|
||||||
#dir_out = r'c:\Users\Aki\source\repos\acoustic_model\_tmp'
|
|
||||||
lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
|
|
||||||
lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa')
|
|
||||||
lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
|
|
||||||
|
|
||||||
# get the correspondence between lex_ipa and lex_asr.
|
|
||||||
lex_asr = fame_functions.load_lexicon(lexicon_asr)
|
|
||||||
lex_ipa = fame_functions.load_lexicon(lexicon_ipa)
|
|
||||||
if 1:
|
|
||||||
timer_start = time.time()
|
|
||||||
translation_key, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr)
|
|
||||||
print("elapsed time: {}".format(time.time() - timer_start))
|
|
||||||
|
|
||||||
np.save('translation_key_ipa2asr.npy', translation_key)
|
|
||||||
np.save('phone_unknown.npy', phone_unknown)
|
|
||||||
else:
|
|
||||||
translation_key = np.load('translation_key_ipa2asr.npy').item()
|
|
||||||
phone_unknown = np.load('phone_unknown.npy')
|
|
||||||
phone_unknown = list(phone_unknown)
|
|
||||||
|
|
||||||
|
|
||||||
## manually check the correspondence for the phone in phone_unknown.
|
|
||||||
#p = phone_unknown[0]
|
|
||||||
#lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa')
|
|
||||||
|
|
||||||
#for word in lex_ipa_['word']:
|
|
||||||
# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
|
||||||
# if np.sum(lex_asr['word'] == word) > 0:
|
|
||||||
# asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
|
||||||
|
|
||||||
# ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
|
||||||
# asr_list = asr.split(' ')
|
|
||||||
# if p in ipa_list and (len(ipa_list) == len(asr_list)):
|
|
||||||
# print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
|
|
||||||
# for ipa_, asr_ in zip(ipa_list, asr_list):
|
|
||||||
# if ipa_ in phone_unknown:
|
|
||||||
# translation_key[ipa_] = asr_
|
|
||||||
# phone_unknown.remove(ipa_)
|
|
||||||
|
|
||||||
|
|
||||||
## check if all the phones in lexicon_ipa are in fame_phoneset.py.
|
|
||||||
#timer_start = time.time()
|
|
||||||
#phoneset_lex = get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa')
|
|
||||||
#print("elapsed time: {}".format(time.time() - timer_start))
|
|
||||||
|
|
||||||
#phoneset_py = fame_phoneset.phoneset_ipa
|
|
||||||
#set(phoneset_lex) - set(phoneset_py)
|
|
||||||
|
|
||||||
##timer_start = time.time()
|
|
||||||
##extracted = find_phone(lexicon_ipa, 'ⁿ')
|
|
||||||
##print("elapsed time: {}".format(time.time() - timer_start))
|
|
||||||
|
|
||||||
|
|
||||||
# lex.asr is Kaldi compatible version of lex.ipa.
|
|
||||||
# to check...
|
|
||||||
#lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation'])
|
|
||||||
#with open(lex_ipa_, "w", encoding="utf-8") as fout:
|
|
||||||
# for word, pronunciation in zip(lexicon_ipa['word'], lexicon_ipa['pronunciation']):
|
|
||||||
# # ignore nasalization and '.'
|
|
||||||
# pronunciation_ = pronunciation.replace(u'ⁿ', '')
|
|
||||||
# pronunciation_ = pronunciation_.replace('.', '')
|
|
||||||
# pronunciation_split = convert_phone_set.split_ipa_fame(pronunciation_)
|
|
||||||
# fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split)))
|
|
||||||
|
|
||||||
# convert each lexicon from ipa description to fame_htk phoneset.
|
# convert each lexicon from ipa description to fame_htk phoneset.
|
||||||
#am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk)
|
#am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk)
|
||||||
#am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk)
|
#am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk)
|
||||||
|
107
acoustic_model/fame_ipa.py
Normal file
107
acoustic_model/fame_ipa.py
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
""" definition of the phones to be used. """
|
||||||
|
|
||||||
|
phoneset = [
|
||||||
|
# vowels
|
||||||
|
'i̯',
|
||||||
|
'i̯ⁿ',
|
||||||
|
'y',
|
||||||
|
'i',
|
||||||
|
'i.',
|
||||||
|
'iⁿ',
|
||||||
|
'i:',
|
||||||
|
'i:ⁿ',
|
||||||
|
'ɪ',
|
||||||
|
'ɪⁿ',
|
||||||
|
'ɪ.',
|
||||||
|
#'ɪ:', # not included in lex.ipa
|
||||||
|
'ɪ:ⁿ',
|
||||||
|
'e',
|
||||||
|
'e:',
|
||||||
|
'e:ⁿ',
|
||||||
|
'ə',
|
||||||
|
'əⁿ',
|
||||||
|
'ə:',
|
||||||
|
'ɛ',
|
||||||
|
'ɛ.',
|
||||||
|
'ɛⁿ',
|
||||||
|
'ɛ:',
|
||||||
|
'ɛ:ⁿ',
|
||||||
|
'a',
|
||||||
|
'aⁿ',
|
||||||
|
'a.',
|
||||||
|
'a:',
|
||||||
|
'a:ⁿ',
|
||||||
|
'ṷ',
|
||||||
|
'ṷ.',
|
||||||
|
'ṷⁿ',
|
||||||
|
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone.
|
||||||
|
'u',
|
||||||
|
'uⁿ',
|
||||||
|
'u.',
|
||||||
|
'u:',
|
||||||
|
'u:ⁿ',
|
||||||
|
'ü',
|
||||||
|
'ü.',
|
||||||
|
'üⁿ',
|
||||||
|
'ü:',
|
||||||
|
'ü:ⁿ',
|
||||||
|
'o',
|
||||||
|
'oⁿ',
|
||||||
|
'o.',
|
||||||
|
'o:',
|
||||||
|
'o:ⁿ',
|
||||||
|
'ö',
|
||||||
|
'ö.',
|
||||||
|
'öⁿ',
|
||||||
|
'ö:',
|
||||||
|
'ö:ⁿ',
|
||||||
|
'ɔ',
|
||||||
|
'ɔ.',
|
||||||
|
'ɔⁿ',
|
||||||
|
'ɔ:',
|
||||||
|
'ɔ:ⁿ',
|
||||||
|
#'ɔ̈', # not included in lex.ipa
|
||||||
|
'ɔ̈.',
|
||||||
|
'ɔ̈:',
|
||||||
|
|
||||||
|
# plosives
|
||||||
|
'p',
|
||||||
|
'b',
|
||||||
|
't',
|
||||||
|
'tⁿ',
|
||||||
|
'd',
|
||||||
|
'k',
|
||||||
|
'g',
|
||||||
|
'ɡ', # = 'g'
|
||||||
|
|
||||||
|
# nasals
|
||||||
|
'm',
|
||||||
|
'n',
|
||||||
|
'ŋ',
|
||||||
|
|
||||||
|
# fricatives
|
||||||
|
'f',
|
||||||
|
'v',
|
||||||
|
's',
|
||||||
|
's:',
|
||||||
|
'z',
|
||||||
|
'zⁿ',
|
||||||
|
'x',
|
||||||
|
'h',
|
||||||
|
|
||||||
|
# tap and flip
|
||||||
|
'r',
|
||||||
|
'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.
|
||||||
|
'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'.
|
||||||
|
|
||||||
|
# approximant
|
||||||
|
'j',
|
||||||
|
'j.',
|
||||||
|
'l'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
## the list of multi character phones.
|
||||||
|
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
|
||||||
|
multi_character_phones = [i for i in phoneset if len(i) > 1]
|
||||||
|
multi_character_phones.sort(key=len, reverse=True)
|
93
acoustic_model/fame_test.py
Normal file
93
acoustic_model/fame_test.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
import fame_functions
|
||||||
|
import defaultfiles as default
|
||||||
|
sys.path.append(default.toolbox_dir)
|
||||||
|
from phoneset import fame_ipa, fame_asr
|
||||||
|
|
||||||
|
|
||||||
|
lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
|
||||||
|
lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa')
|
||||||
|
lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
|
||||||
|
|
||||||
|
|
||||||
|
## check if all the phones in lexicon.ipa are in fame_ipa.py.
|
||||||
|
#timer_start = time.time()
|
||||||
|
#phoneset_lex = fame_functions.get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa')
|
||||||
|
#phoneset_py = fame_ipa.phoneset
|
||||||
|
#print("phones which is in lexicon.ipa but not in fame_ipa.py:\n{}".format(
|
||||||
|
# set(phoneset_lex) - set(phoneset_py)))
|
||||||
|
#print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
|
|
||||||
|
# check which word has the phone.
|
||||||
|
#timer_start = time.time()
|
||||||
|
#extracted = find_phone(lexicon_ipa, 'ⁿ')
|
||||||
|
#print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
|
|
||||||
|
|
||||||
|
## get the correspondence between lex_ipa and lex_asr.
|
||||||
|
lex_asr = fame_functions.load_lexicon(lexicon_asr)
|
||||||
|
lex_ipa = fame_functions.load_lexicon(lexicon_ipa)
|
||||||
|
if 0:
|
||||||
|
timer_start = time.time()
|
||||||
|
translation_key_ipa2asr, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr)
|
||||||
|
print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
|
|
||||||
|
np.save(os.path.join('phoneset', 'output_get_translation_key_translation_key.npy'), translation_key_ipa2asr)
|
||||||
|
np.save(os.path.join('phoneset', 'output_get_translation_key_phone_unknown.npy'), phone_unknown)
|
||||||
|
else:
|
||||||
|
translation_key_ipa2asr = np.load(os.path.join('phoneset', 'output_get_translation_key_translation_key.npy')).item()
|
||||||
|
phone_unknown = np.load(os.path.join('phoneset', 'output_get_translation_key_phone_unknown.npy'))
|
||||||
|
phone_unknown = list(phone_unknown)
|
||||||
|
|
||||||
|
# manually check the correspondence for the phone in phone_unknown.
|
||||||
|
#p = phone_unknown[0]
|
||||||
|
#lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa')
|
||||||
|
|
||||||
|
#for word in lex_ipa_['word']:
|
||||||
|
# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
||||||
|
# if np.sum(lex_asr['word'] == word) > 0:
|
||||||
|
# asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
||||||
|
|
||||||
|
# ipa_list = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones)
|
||||||
|
# asr_list = asr.split(' ')
|
||||||
|
# if p in ipa_list and (len(ipa_list) == len(asr_list)):
|
||||||
|
# print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
|
||||||
|
# for ipa_, asr_ in zip(ipa_list, asr_list):
|
||||||
|
# if ipa_ in phone_unknown:
|
||||||
|
# translation_key_ipa2asr[ipa_] = asr_
|
||||||
|
# phone_unknown.remove(ipa_)
|
||||||
|
translation_key_ipa2asr['ə:'] = 'ə'
|
||||||
|
translation_key_ipa2asr['r.'] = 'r'
|
||||||
|
translation_key_ipa2asr['r:'] = 'r'
|
||||||
|
np.save(os.path.join('phoneset', 'fame_ipa2asr.npy'), translation_key_ipa2asr)
|
||||||
|
|
||||||
|
|
||||||
|
## check if all the phones in lexicon.asr are in translation_key_ipa2asr.
|
||||||
|
timer_start = time.time()
|
||||||
|
phoneset_lex = fame_functions.get_phoneset_from_lexicon(lexicon_asr, phoneset='asr')
|
||||||
|
phoneset_lex.remove("")
|
||||||
|
phoneset_asr = list(set(translation_key_ipa2asr.values()))
|
||||||
|
print("phones which is in lexicon.asr but not in the translation_key_ipa2asr:\n{}".format(
|
||||||
|
set(phoneset_lex) - set(phoneset_asr)))
|
||||||
|
print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
|
|
||||||
|
## make the translation key between asr to htk.
|
||||||
|
#multi_character_phones = [i for i in phoneset_asr if len(i) > 1]
|
||||||
|
#multi_character_phones.sort(key=len, reverse=True)
|
||||||
|
|
||||||
|
#lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation'])
|
||||||
|
#with open(lex_ipa_, "w", encoding="utf-8") as fout:
|
||||||
|
# for word, pronunciation in zip(lexicon_ipa['word'], lexicon_ipa['pronunciation']):
|
||||||
|
# # ignore nasalization and '.'
|
||||||
|
# pronunciation_ = pronunciation.replace(u'ⁿ', '')
|
||||||
|
# pronunciation_ = pronunciation_.replace('.', '')
|
||||||
|
# pronunciation_split = convert_phone_set.split_ipa_fame(pronunciation_)
|
||||||
|
# fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split)))
|
@ -1,7 +1,6 @@
|
|||||||
""" definition of the phones to be used. """
|
""" definition of the phones to be used. """
|
||||||
|
|
||||||
## phones in IPA.
|
phoneset = [
|
||||||
phoneset_ipa = [
|
|
||||||
# vowels
|
# vowels
|
||||||
'i̯',
|
'i̯',
|
||||||
'i̯ⁿ',
|
'i̯ⁿ',
|
||||||
@ -103,5 +102,5 @@ phoneset_ipa = [
|
|||||||
|
|
||||||
## the list of multi character phones.
|
## the list of multi character phones.
|
||||||
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
|
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
|
||||||
multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1]
|
multi_character_phones = [i for i in phoneset if len(i) > 1]
|
||||||
multi_character_phones_ipa.sort(key=len, reverse=True)
|
multi_character_phones.sort(key=len, reverse=True)
|
106
acoustic_model/phoneset/fame_ipa.py
Normal file
106
acoustic_model/phoneset/fame_ipa.py
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
""" definition of the phones to be used. """
|
||||||
|
|
||||||
|
phoneset = [
|
||||||
|
# vowels
|
||||||
|
'i̯',
|
||||||
|
'i̯ⁿ',
|
||||||
|
'y',
|
||||||
|
'i',
|
||||||
|
'i.',
|
||||||
|
'iⁿ',
|
||||||
|
'i:',
|
||||||
|
'i:ⁿ',
|
||||||
|
'ɪ',
|
||||||
|
'ɪⁿ',
|
||||||
|
'ɪ.',
|
||||||
|
#'ɪ:', # not included in lex.ipa
|
||||||
|
'ɪ:ⁿ',
|
||||||
|
'e',
|
||||||
|
'e:',
|
||||||
|
'e:ⁿ',
|
||||||
|
'ə',
|
||||||
|
'əⁿ',
|
||||||
|
'ə:',
|
||||||
|
'ɛ',
|
||||||
|
'ɛ.',
|
||||||
|
'ɛⁿ',
|
||||||
|
'ɛ:',
|
||||||
|
'ɛ:ⁿ',
|
||||||
|
'a',
|
||||||
|
'aⁿ',
|
||||||
|
'a.',
|
||||||
|
'a:',
|
||||||
|
'a:ⁿ',
|
||||||
|
'ṷ',
|
||||||
|
'ṷ.',
|
||||||
|
'ṷⁿ',
|
||||||
|
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr.
|
||||||
|
'u',
|
||||||
|
'uⁿ',
|
||||||
|
'u.',
|
||||||
|
'u:',
|
||||||
|
'u:ⁿ',
|
||||||
|
'ü',
|
||||||
|
'ü.',
|
||||||
|
'üⁿ',
|
||||||
|
'ü:',
|
||||||
|
'ü:ⁿ',
|
||||||
|
'o',
|
||||||
|
'oⁿ',
|
||||||
|
'o.',
|
||||||
|
'o:',
|
||||||
|
'o:ⁿ',
|
||||||
|
'ö',
|
||||||
|
'ö.',
|
||||||
|
'öⁿ',
|
||||||
|
'ö:',
|
||||||
|
'ö:ⁿ',
|
||||||
|
'ɔ',
|
||||||
|
'ɔ.',
|
||||||
|
'ɔⁿ',
|
||||||
|
'ɔ:',
|
||||||
|
'ɔ:ⁿ',
|
||||||
|
#'ɔ̈', # not included in lex.ipa
|
||||||
|
'ɔ̈.',
|
||||||
|
'ɔ̈:',
|
||||||
|
|
||||||
|
# plosives
|
||||||
|
'p',
|
||||||
|
'b',
|
||||||
|
't',
|
||||||
|
'tⁿ',
|
||||||
|
'd',
|
||||||
|
'k',
|
||||||
|
'g',
|
||||||
|
'ɡ', # = 'g'
|
||||||
|
|
||||||
|
# nasals
|
||||||
|
'm',
|
||||||
|
'n',
|
||||||
|
'ŋ',
|
||||||
|
|
||||||
|
# fricatives
|
||||||
|
'f',
|
||||||
|
'v',
|
||||||
|
's',
|
||||||
|
's:',
|
||||||
|
'z',
|
||||||
|
'zⁿ',
|
||||||
|
'x',
|
||||||
|
'h',
|
||||||
|
|
||||||
|
# tap and flip
|
||||||
|
'r',
|
||||||
|
'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.
|
||||||
|
'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'.
|
||||||
|
|
||||||
|
# approximant
|
||||||
|
'j',
|
||||||
|
'j.',
|
||||||
|
'l'
|
||||||
|
]
|
||||||
|
|
||||||
|
## the list of multi character phones.
|
||||||
|
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
|
||||||
|
multi_character_phones = [i for i in phoneset if len(i) > 1]
|
||||||
|
multi_character_phones.sort(key=len, reverse=True)
|
BIN
acoustic_model/phoneset/fame_ipa2asr.npy
Normal file
BIN
acoustic_model/phoneset/fame_ipa2asr.npy
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user