Compare commits

..

No commits in common. "87abbbb95aeb0b72cd5b553cbec8e0901d472e4e" and "7844a56281e4f11df12e52b856512d7c30fe5f31" have entirely different histories.

11 changed files with 118 additions and 344 deletions

Binary file not shown.

Binary file not shown.

BIN
_tmp/translation_key.npy Normal file

Binary file not shown.

View File

@ -10,6 +10,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py ..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py ..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
..\toolbox\evaluation.py = ..\toolbox\evaluation.py ..\toolbox\evaluation.py = ..\toolbox\evaluation.py
..\toolbox\toolbox\file_handling.py = ..\toolbox\toolbox\file_handling.py
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py ..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py ..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py ..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
@ -22,7 +23,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py ..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
EndProjectSection EndProjectSection
EndProject EndProject
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "toolbox", "..\toolbox\toolbox.pyproj", "{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}" Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "pyhtk", "..\pyhtk\pyhtk\pyhtk.pyproj", "{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}"
EndProject EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
@ -32,8 +33,8 @@ Global
GlobalSection(ProjectConfigurationPlatforms) = postSolution GlobalSection(ProjectConfigurationPlatforms) = postSolution
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU {4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU
{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Release|Any CPU.ActiveCfg = Release|Any CPU {75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Release|Any CPU.ActiveCfg = Release|Any CPU
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

View File

@ -23,18 +23,12 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<Compile Include="check_novoapi.py" /> <Compile Include="check_novoapi.py" />
<Compile Include="convert_phone_set.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="convert_xsampa2ipa.py"> <Compile Include="convert_xsampa2ipa.py">
<SubType>Code</SubType> <SubType>Code</SubType>
</Compile> </Compile>
<Compile Include="defaultfiles.py"> <Compile Include="defaultfiles.py">
<SubType>Code</SubType> <SubType>Code</SubType>
</Compile> </Compile>
<Compile Include="fame_phoneset.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="fa_test.py"> <Compile Include="fa_test.py">
<SubType>Code</SubType> <SubType>Code</SubType>
</Compile> </Compile>

View File

@ -1,29 +0,0 @@
"""Module to convert phonemes."""
def multi_character_tokenize(line, multi_character_tokens):
"""Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0,
if so tokenizes and eats that token. Otherwise tokenizes a single character"""
while line != '':
for token in multi_character_tokens:
if line.startswith(token) and len(token) > 0:
yield token
line = line[len(token):]
break
else:
yield line[:1]
line = line[1:]
def split_word(word, multi_character_phones):
"""
split a line by given phoneset.
Args:
word (str): a word written in given phoneset.
multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_phoneset.py.
Returns:
(word_seperated) (list): the word splitted in given phoneset.
"""
return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]

View File

@ -4,8 +4,7 @@ import os
#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model' #cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
#htk_dir = r'C:\Aki\htk_fame' htk_dir = r'C:\Aki\htk_fame'
htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk'
config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy') config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
#config_train = os.path.join(cygwin_dir, 'config', 'config.train') #config_train = os.path.join(cygwin_dir, 'config', 'config.train')
@ -29,21 +28,22 @@ config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
#filePhoneList = config['pyHTK']['filePhoneList'] #filePhoneList = config['pyHTK']['filePhoneList']
#AcousticModel = config['pyHTK']['AcousticModel'] #AcousticModel = config['pyHTK']['AcousticModel']
repo_dir = r'C:\Users\Aki\source\repos' repo_dir = r'C:\Users\A.Kunikoshi\source\repos'
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification') accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
toolbox_dir = os.path.join(repo_dir, 'toolbox') pyhtk_dir = os.path.join(repo_dir, 'pyhtk', 'pyhtk')
toolbox_dir = os.path.join(repo_dir, 'toolbox', 'toolbox')
#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017' htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
#config_hvite = os.path.join(htk_config_dir, 'config.HVite') config_hvite = os.path.join(htk_config_dir, 'config.HVite')
#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo') #acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
#acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo' acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
#phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt') phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
WSL_dir = r'C:\OneDrive\WSL' WSL_dir = r'C:\OneDrive\WSL'
#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') #fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
fame_dir = r'd:\_corpus\fame' fame_dir = r'f:\_corpus\fame'
fame_s5_dir = os.path.join(fame_dir, 's5') fame_s5_dir = os.path.join(fame_dir, 's5')
fame_corpus_dir = os.path.join(fame_dir, 'corpus') fame_corpus_dir = os.path.join(fame_dir, 'corpus')

View File

@ -1,5 +1,5 @@
import os import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model')
import sys import sys
from collections import Counter from collections import Counter
@ -9,8 +9,24 @@ import numpy as np
import pandas as pd import pandas as pd
import defaultfiles as default import defaultfiles as default
import fame_phoneset
import convert_phone_set #sys.path.append(default.forced_alignment_module_dir)
#from forced_alignment import convert_phone_set
#def find_phone(lexicon_file, phone):
# """ Search where the phone is used in the lexicon. """
# with open(lexicon_file, "rt", encoding="utf-8") as fin:
# lines = fin.read()
# lines = lines.split('\n')
# extracted = []
# for line in lines:
# line = line.split('\t')
# if len(line) > 1:
# pronunciation = line[1]
# if phone in pronunciation:
# extracted.append(line)
# return extracted
#def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out): #def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
@ -110,6 +126,25 @@ import convert_phone_set
# return ipa # return ipa
def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp):
""" Make a script file for HCopy using the filelist in FAME! corpus. """
filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt')
with open(filelist_txt) as fin:
filelist = fin.read()
filelist = filelist.split('\n')
with open(hcopy_scp, 'w') as fout:
for filename_ in filelist:
filename = filename_.replace('.TextGrid', '')
if len(filename) > 3: # remove '.', '..' and ''
wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav')
mfc_file = os.path.join(feature_dir, filename + '.mfc')
fout.write(wav_file + '\t' + mfc_file + '\n')
#def make_filelist(input_dir, output_txt): #def make_filelist(input_dir, output_txt):
# """ Make a list of files in the input_dir. """ # """ Make a list of files in the input_dir. """
# filenames = os.listdir(input_dir) # filenames = os.listdir(input_dir)
@ -154,147 +189,64 @@ import convert_phone_set
# f.write('{0}\t{1}\n'.format(WORD, key)) # f.write('{0}\t{1}\n'.format(WORD, key))
def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp):
""" Make a script file for HCopy using the filelist in FAME! corpus.
Args:
fame_dir (path): the directory of FAME corpus.
dataset (str): 'devel', 'test' or 'train'.
feature_dir (path): the directory where feature will be stored.
hcopy_scp (path): a script file for HCopy to be made.
"""
filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt')
with open(filelist_txt) as fin:
filelist = fin.read()
filelist = filelist.split('\n')
with open(hcopy_scp, 'w') as fout:
for filename_ in filelist:
filename = filename_.replace('.TextGrid', '')
if len(filename) > 3: # remove '.', '..' and ''
wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav')
mfc_file = os.path.join(feature_dir, filename + '.mfc')
fout.write(wav_file + '\t' + mfc_file + '\n')
def load_lexicon(lexicon_file): def load_lexicon(lexicon_file):
""" load lexicon file as Data Frame.
Args:
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
Returns:
lex (df): lexicon as Data Frame, which has columns 'word' and 'pronunciation'.
"""
lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8") lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8")
lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True) lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True)
return lex return lex
def get_phoneset_from_lexicon(lexicon_file, phoneset='asr'): def get_phonelist(lexicon_asr):
""" Make a list of phones which appears in the lexicon. """ Make a list of phones which appears in the lexicon. """
Args: #with open(lexicon_file, "rt", encoding="utf-8") as fin:
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. # lines = fin.read()
phoneset (str): the phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'. # lines = lines.split('\n')
# phonelist = set([])
Returns: # for line in lines:
(list_of_phones) (set): the set of phones included in the lexicon_file. # line = line.split('\t')
# if len(line) > 1:
""" # pronunciation = set(line[1].split())
assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\'' # phonelist = phonelist | pronunciation
lex = load_lexicon(lexicon_asr)
lex = load_lexicon(lexicon_file)
if phoneset == 'asr':
return set(' '.join(lex['pronunciation']).split(' ')) return set(' '.join(lex['pronunciation']).split(' '))
elif phoneset == 'ipa':
join_pronunciations = ''.join(lex['pronunciation'])
return set(convert_phone_set.split_word(join_pronunciations, fame_phoneset.multi_character_phones_ipa))
import time
def extract_unknown_phones(ipa, known_phones): timer_start = time.time()
"""extract unknown phones in the pronunciation written in IPA.
Args: #def get_translation_key():
ipa (str): a pronunciation written in IPA. dir_tmp = r'c:\Users\A.Kunikoshi\source\repos\acoustic_model\_tmp'
known_phones (list): list of phones already know. lexicon_ipa = r'f:\_corpus\FAME\lexicon\lex.ipa'
lexicon_asr = r'f:\_corpus\FAME\lexicon\lex.asr'
Returns: lex_ipa = load_lexicon(lexicon_ipa)
(list_of_phones) (list): unknown phones not included in 'known_phones'. lex_asr = load_lexicon(lexicon_asr)
if 0:
""" phone_to_be_searched = get_phonelist(lexicon_asr)
ipa_split = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
return [i for i in ipa_split if not i in known_phones]
def get_translation_key(lexicon_file_ipa, lexicon_file_asr):
""" get correspondence between lexicon_file_ipa and lexicon_file_asr.
Args:
lexicon_file_ipa (path): lexicon in the format of 'word' /t 'pronunciation (IPA)'.
lexicon_file_asr (path): lexicon in the format of 'word' /t 'pronunciation (asr)'.
the each character of 'pronunciation' should be delimited by ' '.
Returns:
translation_key (dict): translation key from ipa to asr.
(phone_unknown) (list): the list of IPA phones, which does not appear in lexicon_file_asr.
"""
lex_ipa = load_lexicon(lexicon_file_ipa)
lex_asr = load_lexicon(lexicon_file_asr)
phone_unknown = fame_phoneset.phoneset_ipa[:]
translation_key = dict() translation_key = dict()
for word in lex_ipa['word']: for word in lex_asr['word']:
if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1: if np.sum(lex_asr['word'] == word) == 1 and np.sum(lex_ipa['word'] == word) == 1:
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
asr = lex_asr[lex_asr['word'] == word].iat[0, 1] asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
asr_list = asr.split(' ') asr_list = asr.split(' ')
# if there are phones which is not in phone_to_be_searched
# if there are phones which is not in phone_unknown if len([True for i in asr_list if i in phone_to_be_searched]) > 0:
#if len([True for i in asr_list if i in phone_unknown]) > 0: if(len(ipa) == len(asr_list)):
if(len(ipa_list) == len(asr_list)): print("{0}: {1} --> {2}".format(word, ipa, asr))
print("{0}: {1} --> {2}".format(word, ipa_list, asr_list)) for ipa_, asr_ in zip(ipa, asr_list):
for ipa_, asr_ in zip(ipa_list, asr_list): if asr_ in phone_to_be_searched:
if ipa_ in phone_unknown: #if not translation_key[ipa_] == asr_:
translation_key[ipa_] = asr_ translation_key[ipa_] = asr_
phone_unknown.remove(ipa_) phone_to_be_searched.remove(asr_)
return translation_key, list(phone_unknown)
print("elapsed time: {}".format(time.time() - timer_start))
def find_phone(lexicon_file, phone, phoneset='ipa'): np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key)
""" extract rows where the phone is used in the lexicon_file. np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched)
else:
Args: translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item()
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'. phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item()
phone (str): the phone to be searched.
phoneset (str): the phoneset with which lexicon_file is written. 'asr' or 'ipa'(default).
Returns:
extracted (df): rows where the phone is used.
ToDo:
* develop when the phonset == 'asr'.
"""
assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\''
lex = load_lexicon(lexicon_file)
# to reduce the calculation time, only target rows which include 'phone' at least once.
lex_ = lex[lex['pronunciation'].str.count(phone)>0]
extracted = pd.DataFrame(index=[], columns=['word', 'pronunciation'])
for index, row in lex_.iterrows():
if phoneset == 'ipa':
pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_phoneset.multi_character_phones_ipa)
if phone in pronunciation:
extracted_ = pd.Series([row['word'], pronunciation], index=extracted.columns)
extracted = extracted.append(extracted_, ignore_index=True)
return extracted

View File

@ -1,21 +1,21 @@
import sys import sys
import os import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model')
import tempfile import tempfile
#import configparser #import configparser
#import subprocess #import subprocess
#from collections import Counter #from collections import Counter
import time
#import numpy as np #import numpy as np
#import pandas as pd #import pandas as pd
import fame_functions import fame_functions
import defaultfiles as default import defaultfiles as default
sys.path.append(default.pyhtk_dir)
import pyhtk
sys.path.append(default.toolbox_dir) sys.path.append(default.toolbox_dir)
import file_handling as fh import file_handling
from htk import pyhtk
## ======================= user define ======================= ## ======================= user define =======================
@ -28,8 +28,8 @@ from htk import pyhtk
dataset_list = ['devel', 'test', 'train'] dataset_list = ['devel', 'test', 'train']
# procedure # procedure
extract_features = 0 extract_features = 1
conv_lexicon = 1 #conv_lexicon = 0
#check_lexicon = 0 #check_lexicon = 0
#make_mlf = 0 #make_mlf = 0
#combine_files = 0 #combine_files = 0
@ -85,12 +85,14 @@ if not os.path.exists(tmp_dir):
## ======================= extract features ======================= ## ======================= extract features =======================
if extract_features: if extract_features:
for dataset in dataset_list: for dataset in dataset_list:
#for dataset in ['test']:
print('==== {} ===='.format(dataset)) print('==== {} ===='.format(dataset))
# a script file for HCopy # a script file for HCopy
print(">>> making a script file for HCopy... \n") print(">>> making a script file for HCopy... \n")
hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False) hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
hcopy_scp.close() hcopy_scp.close()
#hcopy_scp = os.path.join(default.htk_dir, 'tmp', 'HCopy.scp')
# get a list of features (hcopy.scp) from the filelist in FAME! corpus # get a list of features (hcopy.scp) from the filelist in FAME! corpus
feature_dir_ = os.path.join(feature_dir, dataset) feature_dir_ = os.path.join(feature_dir, dataset)
@ -100,71 +102,32 @@ if extract_features:
# extract features # extract features
print(">>> extracting features... \n") print(">>> extracting features... \n")
fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name) fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
#subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name
#subprocess.call(subprocessStr, shell=True)
pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name) pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)
# a script file for HCompV # a script file for HCompV
print(">>> making a script file for HCompV... \n") print(">>> making a script file for HCompV... \n")
## ======================= make a list of features =======================
#if make_feature_list:
# print("==== make a list of features ====\n")
# for dataset in dataset_list:
# print(dataset)
#feature_dir = output_dir + '\\mfc\\' + dataset
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp') hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')
#am_func.make_filelist(feature_dir, hcompv_scp)
file_handling.make_filelist(feature_dir_, hcompv_scp, '.mfc')
## ======================= convert lexicon from ipa to fame_htk ======================= ## ======================= convert lexicon from ipa to fame_htk =======================
if conv_lexicon: if conv_lexicon:
print('==== convert lexicon from ipa 2 fame ====\n') print('==== convert lexicon from ipa 2 fame ====\n')
#dir_out = r'c:\Users\Aki\source\repos\acoustic_model\_tmp'
lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa')
lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
# get the correspondence between lex_ipa and lex_asr.
lex_asr = fame_functions.load_lexicon(lexicon_asr)
lex_ipa = fame_functions.load_lexicon(lexicon_ipa)
if 1:
timer_start = time.time()
translation_key, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr)
print("elapsed time: {}".format(time.time() - timer_start))
np.save('translation_key_ipa2asr.npy', translation_key)
np.save('phone_unknown.npy', phone_unknown)
else:
translation_key = np.load('translation_key_ipa2asr.npy').item()
phone_unknown = np.load('phone_unknown.npy')
phone_unknown = list(phone_unknown)
## manually check the correspondence for the phone in phone_unknown.
#p = phone_unknown[0]
#lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa')
#for word in lex_ipa_['word']:
# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
# if np.sum(lex_asr['word'] == word) > 0:
# asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
# ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
# asr_list = asr.split(' ')
# if p in ipa_list and (len(ipa_list) == len(asr_list)):
# print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
# for ipa_, asr_ in zip(ipa_list, asr_list):
# if ipa_ in phone_unknown:
# translation_key[ipa_] = asr_
# phone_unknown.remove(ipa_)
## check if all the phones in lexicon_ipa are in fame_phoneset.py.
#timer_start = time.time()
#phoneset_lex = get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa')
#print("elapsed time: {}".format(time.time() - timer_start))
#phoneset_py = fame_phoneset.phoneset_ipa
#set(phoneset_lex) - set(phoneset_py)
##timer_start = time.time()
##extracted = find_phone(lexicon_ipa, 'ⁿ')
##print("elapsed time: {}".format(time.time() - timer_start))
# lex.asr is Kaldi compatible version of lex.ipa. # lex.asr is Kaldi compatible version of lex.ipa.
# to check... # to check...
#lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation']) #lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation'])
@ -177,13 +140,13 @@ if conv_lexicon:
# fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split))) # fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split)))
# convert each lexicon from ipa description to fame_htk phoneset. # convert each lexicon from ipa description to fame_htk phoneset.
#am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk) am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk)
#am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk) am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk)
# combine lexicon # combine lexicon
# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov. # pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov.
# therefore there is no overlap between lex_asr and lex_oov. # therefore there is no overlap between lex_asr and lex_oov.
#am_func.combine_lexicon(lex_asr_htk, lex_oov_htk, lex_htk) am_func.combine_lexicon(lex_asr_htk, lex_oov_htk, lex_htk)
## ======================= check if all the phones are successfully converted ======================= ## ======================= check if all the phones are successfully converted =======================

View File

@ -1,107 +0,0 @@
""" definition of the phones to be used. """
## phones in IPA.
phoneset_ipa = [
# vowels
'',
'i̯ⁿ',
'y',
'i',
'i.',
'iⁿ',
'i:',
'i:ⁿ',
'ɪ',
'ɪⁿ',
'ɪ.',
#'ɪ:', # not included in lex.ipa
'ɪ:ⁿ',
'e',
'e:',
'e:ⁿ',
'ə',
'əⁿ',
'ə:',
'ɛ',
'ɛ.',
'ɛⁿ',
'ɛ:',
'ɛ:ⁿ',
'a',
'aⁿ',
'a.',
'a:',
'a:ⁿ',
'',
'ṷ.',
'ṷⁿ',
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr.
'u',
'uⁿ',
'u.',
'u:',
'u:ⁿ',
'ü',
'ü.',
'üⁿ',
'ü:',
'ü:ⁿ',
'o',
'oⁿ',
'o.',
'o:',
'o:ⁿ',
'ö',
'ö.',
'öⁿ',
'ö:',
'ö:ⁿ',
'ɔ',
'ɔ.',
'ɔⁿ',
'ɔ:',
'ɔ:ⁿ',
#'ɔ̈', # not included in lex.ipa
'ɔ̈.',
'ɔ̈:',
# plosives
'p',
'b',
't',
'tⁿ',
'd',
'k',
'g',
'ɡ', # = 'g'
# nasals
'm',
'n',
'ŋ',
# fricatives
'f',
'v',
's',
's:',
'z',
'zⁿ',
'x',
'h',
# tap and flip
'r',
'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.
'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'.
# approximant
'j',
'j.',
'l'
]
## the list of multi character phones.
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1]
multi_character_phones_ipa.sort(key=len, reverse=True)