Compare commits
No commits in common. "87abbbb95aeb0b72cd5b553cbec8e0901d472e4e" and "7844a56281e4f11df12e52b856512d7c30fe5f31" have entirely different histories.
87abbbb95a
...
7844a56281
Binary file not shown.
BIN
_tmp/phone_to_be_searched.npy
Normal file
BIN
_tmp/phone_to_be_searched.npy
Normal file
Binary file not shown.
BIN
_tmp/translation_key.npy
Normal file
BIN
_tmp/translation_key.npy
Normal file
Binary file not shown.
@ -10,6 +10,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
|||||||
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
|
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
|
||||||
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
|
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
|
||||||
..\toolbox\evaluation.py = ..\toolbox\evaluation.py
|
..\toolbox\evaluation.py = ..\toolbox\evaluation.py
|
||||||
|
..\toolbox\toolbox\file_handling.py = ..\toolbox\toolbox\file_handling.py
|
||||||
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
|
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
|
||||||
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
|
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
|
||||||
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
|
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
|
||||||
@ -22,7 +23,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
|||||||
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
|
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
|
||||||
EndProjectSection
|
EndProjectSection
|
||||||
EndProject
|
EndProject
|
||||||
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "toolbox", "..\toolbox\toolbox.pyproj", "{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}"
|
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "pyhtk", "..\pyhtk\pyhtk\pyhtk.pyproj", "{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}"
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
@ -32,8 +33,8 @@ Global
|
|||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
Binary file not shown.
@ -23,18 +23,12 @@
|
|||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Compile Include="check_novoapi.py" />
|
<Compile Include="check_novoapi.py" />
|
||||||
<Compile Include="convert_phone_set.py">
|
|
||||||
<SubType>Code</SubType>
|
|
||||||
</Compile>
|
|
||||||
<Compile Include="convert_xsampa2ipa.py">
|
<Compile Include="convert_xsampa2ipa.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
<Compile Include="defaultfiles.py">
|
<Compile Include="defaultfiles.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
<Compile Include="fame_phoneset.py">
|
|
||||||
<SubType>Code</SubType>
|
|
||||||
</Compile>
|
|
||||||
<Compile Include="fa_test.py">
|
<Compile Include="fa_test.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
|
@ -1,29 +0,0 @@
|
|||||||
"""Module to convert phonemes."""
|
|
||||||
|
|
||||||
def multi_character_tokenize(line, multi_character_tokens):
|
|
||||||
"""Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0,
|
|
||||||
if so tokenizes and eats that token. Otherwise tokenizes a single character"""
|
|
||||||
while line != '':
|
|
||||||
for token in multi_character_tokens:
|
|
||||||
if line.startswith(token) and len(token) > 0:
|
|
||||||
yield token
|
|
||||||
line = line[len(token):]
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
yield line[:1]
|
|
||||||
line = line[1:]
|
|
||||||
|
|
||||||
|
|
||||||
def split_word(word, multi_character_phones):
|
|
||||||
"""
|
|
||||||
split a line by given phoneset.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
word (str): a word written in given phoneset.
|
|
||||||
multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_phoneset.py.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(word_seperated) (list): the word splitted in given phoneset.
|
|
||||||
|
|
||||||
"""
|
|
||||||
return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]
|
|
@ -4,8 +4,7 @@ import os
|
|||||||
|
|
||||||
#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
|
#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
|
||||||
|
|
||||||
#htk_dir = r'C:\Aki\htk_fame'
|
htk_dir = r'C:\Aki\htk_fame'
|
||||||
htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk'
|
|
||||||
|
|
||||||
config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
|
config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
|
||||||
#config_train = os.path.join(cygwin_dir, 'config', 'config.train')
|
#config_train = os.path.join(cygwin_dir, 'config', 'config.train')
|
||||||
@ -29,21 +28,22 @@ config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
|
|||||||
#filePhoneList = config['pyHTK']['filePhoneList']
|
#filePhoneList = config['pyHTK']['filePhoneList']
|
||||||
#AcousticModel = config['pyHTK']['AcousticModel']
|
#AcousticModel = config['pyHTK']['AcousticModel']
|
||||||
|
|
||||||
repo_dir = r'C:\Users\Aki\source\repos'
|
repo_dir = r'C:\Users\A.Kunikoshi\source\repos'
|
||||||
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
||||||
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
||||||
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
|
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
|
||||||
toolbox_dir = os.path.join(repo_dir, 'toolbox')
|
pyhtk_dir = os.path.join(repo_dir, 'pyhtk', 'pyhtk')
|
||||||
|
toolbox_dir = os.path.join(repo_dir, 'toolbox', 'toolbox')
|
||||||
|
|
||||||
#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
|
htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
|
||||||
#config_hvite = os.path.join(htk_config_dir, 'config.HVite')
|
config_hvite = os.path.join(htk_config_dir, 'config.HVite')
|
||||||
#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
|
#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
|
||||||
#acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
|
acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
|
||||||
#phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
|
phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
|
||||||
|
|
||||||
WSL_dir = r'C:\OneDrive\WSL'
|
WSL_dir = r'C:\OneDrive\WSL'
|
||||||
#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
||||||
fame_dir = r'd:\_corpus\fame'
|
fame_dir = r'f:\_corpus\fame'
|
||||||
|
|
||||||
fame_s5_dir = os.path.join(fame_dir, 's5')
|
fame_s5_dir = os.path.join(fame_dir, 's5')
|
||||||
fame_corpus_dir = os.path.join(fame_dir, 'corpus')
|
fame_corpus_dir = os.path.join(fame_dir, 'corpus')
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model')
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
@ -9,8 +9,24 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
import fame_phoneset
|
|
||||||
import convert_phone_set
|
#sys.path.append(default.forced_alignment_module_dir)
|
||||||
|
#from forced_alignment import convert_phone_set
|
||||||
|
|
||||||
|
#def find_phone(lexicon_file, phone):
|
||||||
|
# """ Search where the phone is used in the lexicon. """
|
||||||
|
# with open(lexicon_file, "rt", encoding="utf-8") as fin:
|
||||||
|
# lines = fin.read()
|
||||||
|
# lines = lines.split('\n')
|
||||||
|
|
||||||
|
# extracted = []
|
||||||
|
# for line in lines:
|
||||||
|
# line = line.split('\t')
|
||||||
|
# if len(line) > 1:
|
||||||
|
# pronunciation = line[1]
|
||||||
|
# if phone in pronunciation:
|
||||||
|
# extracted.append(line)
|
||||||
|
# return extracted
|
||||||
|
|
||||||
|
|
||||||
#def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
|
#def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
|
||||||
@ -110,6 +126,25 @@ import convert_phone_set
|
|||||||
|
|
||||||
# return ipa
|
# return ipa
|
||||||
|
|
||||||
|
def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp):
|
||||||
|
""" Make a script file for HCopy using the filelist in FAME! corpus. """
|
||||||
|
|
||||||
|
filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt')
|
||||||
|
with open(filelist_txt) as fin:
|
||||||
|
filelist = fin.read()
|
||||||
|
filelist = filelist.split('\n')
|
||||||
|
|
||||||
|
with open(hcopy_scp, 'w') as fout:
|
||||||
|
for filename_ in filelist:
|
||||||
|
filename = filename_.replace('.TextGrid', '')
|
||||||
|
|
||||||
|
if len(filename) > 3: # remove '.', '..' and ''
|
||||||
|
wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav')
|
||||||
|
mfc_file = os.path.join(feature_dir, filename + '.mfc')
|
||||||
|
|
||||||
|
fout.write(wav_file + '\t' + mfc_file + '\n')
|
||||||
|
|
||||||
|
|
||||||
#def make_filelist(input_dir, output_txt):
|
#def make_filelist(input_dir, output_txt):
|
||||||
# """ Make a list of files in the input_dir. """
|
# """ Make a list of files in the input_dir. """
|
||||||
# filenames = os.listdir(input_dir)
|
# filenames = os.listdir(input_dir)
|
||||||
@ -154,147 +189,64 @@ import convert_phone_set
|
|||||||
# f.write('{0}\t{1}\n'.format(WORD, key))
|
# f.write('{0}\t{1}\n'.format(WORD, key))
|
||||||
|
|
||||||
|
|
||||||
def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp):
|
|
||||||
""" Make a script file for HCopy using the filelist in FAME! corpus.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
fame_dir (path): the directory of FAME corpus.
|
|
||||||
dataset (str): 'devel', 'test' or 'train'.
|
|
||||||
feature_dir (path): the directory where feature will be stored.
|
|
||||||
hcopy_scp (path): a script file for HCopy to be made.
|
|
||||||
|
|
||||||
"""
|
|
||||||
filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt')
|
|
||||||
with open(filelist_txt) as fin:
|
|
||||||
filelist = fin.read()
|
|
||||||
filelist = filelist.split('\n')
|
|
||||||
|
|
||||||
with open(hcopy_scp, 'w') as fout:
|
|
||||||
for filename_ in filelist:
|
|
||||||
filename = filename_.replace('.TextGrid', '')
|
|
||||||
|
|
||||||
if len(filename) > 3: # remove '.', '..' and ''
|
|
||||||
wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav')
|
|
||||||
mfc_file = os.path.join(feature_dir, filename + '.mfc')
|
|
||||||
|
|
||||||
fout.write(wav_file + '\t' + mfc_file + '\n')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def load_lexicon(lexicon_file):
|
def load_lexicon(lexicon_file):
|
||||||
""" load lexicon file as Data Frame.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
lex (df): lexicon as Data Frame, which has columns 'word' and 'pronunciation'.
|
|
||||||
|
|
||||||
"""
|
|
||||||
lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8")
|
lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8")
|
||||||
lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True)
|
lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True)
|
||||||
return lex
|
return lex
|
||||||
|
|
||||||
|
|
||||||
def get_phoneset_from_lexicon(lexicon_file, phoneset='asr'):
|
def get_phonelist(lexicon_asr):
|
||||||
""" Make a list of phones which appears in the lexicon.
|
""" Make a list of phones which appears in the lexicon. """
|
||||||
|
|
||||||
Args:
|
|
||||||
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
|
||||||
phoneset (str): the phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'.
|
|
||||||
|
|
||||||
Returns:
|
#with open(lexicon_file, "rt", encoding="utf-8") as fin:
|
||||||
(list_of_phones) (set): the set of phones included in the lexicon_file.
|
# lines = fin.read()
|
||||||
|
# lines = lines.split('\n')
|
||||||
|
# phonelist = set([])
|
||||||
|
# for line in lines:
|
||||||
|
# line = line.split('\t')
|
||||||
|
# if len(line) > 1:
|
||||||
|
# pronunciation = set(line[1].split())
|
||||||
|
# phonelist = phonelist | pronunciation
|
||||||
|
lex = load_lexicon(lexicon_asr)
|
||||||
|
return set(' '.join(lex['pronunciation']).split(' '))
|
||||||
|
|
||||||
"""
|
import time
|
||||||
assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\''
|
|
||||||
|
|
||||||
lex = load_lexicon(lexicon_file)
|
timer_start = time.time()
|
||||||
if phoneset == 'asr':
|
|
||||||
return set(' '.join(lex['pronunciation']).split(' '))
|
|
||||||
elif phoneset == 'ipa':
|
|
||||||
join_pronunciations = ''.join(lex['pronunciation'])
|
|
||||||
return set(convert_phone_set.split_word(join_pronunciations, fame_phoneset.multi_character_phones_ipa))
|
|
||||||
|
|
||||||
|
#def get_translation_key():
|
||||||
|
dir_tmp = r'c:\Users\A.Kunikoshi\source\repos\acoustic_model\_tmp'
|
||||||
|
lexicon_ipa = r'f:\_corpus\FAME\lexicon\lex.ipa'
|
||||||
|
lexicon_asr = r'f:\_corpus\FAME\lexicon\lex.asr'
|
||||||
|
|
||||||
def extract_unknown_phones(ipa, known_phones):
|
lex_ipa = load_lexicon(lexicon_ipa)
|
||||||
"""extract unknown phones in the pronunciation written in IPA.
|
lex_asr = load_lexicon(lexicon_asr)
|
||||||
|
if 0:
|
||||||
Args:
|
phone_to_be_searched = get_phonelist(lexicon_asr)
|
||||||
ipa (str): a pronunciation written in IPA.
|
|
||||||
known_phones (list): list of phones already know.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(list_of_phones) (list): unknown phones not included in 'known_phones'.
|
|
||||||
|
|
||||||
"""
|
|
||||||
ipa_split = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
|
||||||
return [i for i in ipa_split if not i in known_phones]
|
|
||||||
|
|
||||||
|
|
||||||
def get_translation_key(lexicon_file_ipa, lexicon_file_asr):
|
|
||||||
""" get correspondence between lexicon_file_ipa and lexicon_file_asr.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
lexicon_file_ipa (path): lexicon in the format of 'word' /t 'pronunciation (IPA)'.
|
|
||||||
lexicon_file_asr (path): lexicon in the format of 'word' /t 'pronunciation (asr)'.
|
|
||||||
the each character of 'pronunciation' should be delimited by ' '.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
translation_key (dict): translation key from ipa to asr.
|
|
||||||
(phone_unknown) (list): the list of IPA phones, which does not appear in lexicon_file_asr.
|
|
||||||
|
|
||||||
"""
|
|
||||||
lex_ipa = load_lexicon(lexicon_file_ipa)
|
|
||||||
lex_asr = load_lexicon(lexicon_file_asr)
|
|
||||||
phone_unknown = fame_phoneset.phoneset_ipa[:]
|
|
||||||
translation_key = dict()
|
translation_key = dict()
|
||||||
for word in lex_ipa['word']:
|
for word in lex_asr['word']:
|
||||||
if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1:
|
if np.sum(lex_asr['word'] == word) == 1 and np.sum(lex_ipa['word'] == word) == 1:
|
||||||
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
|
||||||
asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
||||||
|
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
||||||
|
|
||||||
ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
|
||||||
asr_list = asr.split(' ')
|
asr_list = asr.split(' ')
|
||||||
|
# if there are phones which is not in phone_to_be_searched
|
||||||
|
if len([True for i in asr_list if i in phone_to_be_searched]) > 0:
|
||||||
|
if(len(ipa) == len(asr_list)):
|
||||||
|
print("{0}: {1} --> {2}".format(word, ipa, asr))
|
||||||
|
for ipa_, asr_ in zip(ipa, asr_list):
|
||||||
|
if asr_ in phone_to_be_searched:
|
||||||
|
#if not translation_key[ipa_] == asr_:
|
||||||
|
translation_key[ipa_] = asr_
|
||||||
|
phone_to_be_searched.remove(asr_)
|
||||||
|
|
||||||
# if there are phones which is not in phone_unknown
|
print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
#if len([True for i in asr_list if i in phone_unknown]) > 0:
|
|
||||||
if(len(ipa_list) == len(asr_list)):
|
|
||||||
print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
|
|
||||||
for ipa_, asr_ in zip(ipa_list, asr_list):
|
|
||||||
if ipa_ in phone_unknown:
|
|
||||||
translation_key[ipa_] = asr_
|
|
||||||
phone_unknown.remove(ipa_)
|
|
||||||
return translation_key, list(phone_unknown)
|
|
||||||
|
|
||||||
|
np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key)
|
||||||
def find_phone(lexicon_file, phone, phoneset='ipa'):
|
np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched)
|
||||||
""" extract rows where the phone is used in the lexicon_file.
|
else:
|
||||||
|
translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item()
|
||||||
Args:
|
phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item()
|
||||||
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
|
||||||
phone (str): the phone to be searched.
|
|
||||||
phoneset (str): the phoneset with which lexicon_file is written. 'asr' or 'ipa'(default).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
extracted (df): rows where the phone is used.
|
|
||||||
|
|
||||||
ToDo:
|
|
||||||
* develop when the phonset == 'asr'.
|
|
||||||
|
|
||||||
"""
|
|
||||||
assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\''
|
|
||||||
|
|
||||||
lex = load_lexicon(lexicon_file)
|
|
||||||
|
|
||||||
# to reduce the calculation time, only target rows which include 'phone' at least once.
|
|
||||||
lex_ = lex[lex['pronunciation'].str.count(phone)>0]
|
|
||||||
|
|
||||||
extracted = pd.DataFrame(index=[], columns=['word', 'pronunciation'])
|
|
||||||
for index, row in lex_.iterrows():
|
|
||||||
if phoneset == 'ipa':
|
|
||||||
pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_phoneset.multi_character_phones_ipa)
|
|
||||||
if phone in pronunciation:
|
|
||||||
extracted_ = pd.Series([row['word'], pronunciation], index=extracted.columns)
|
|
||||||
extracted = extracted.append(extracted_, ignore_index=True)
|
|
||||||
return extracted
|
|
||||||
|
@ -1,21 +1,21 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model')
|
||||||
|
|
||||||
import tempfile
|
import tempfile
|
||||||
#import configparser
|
#import configparser
|
||||||
#import subprocess
|
#import subprocess
|
||||||
#from collections import Counter
|
#from collections import Counter
|
||||||
import time
|
|
||||||
|
|
||||||
#import numpy as np
|
#import numpy as np
|
||||||
#import pandas as pd
|
#import pandas as pd
|
||||||
|
|
||||||
import fame_functions
|
import fame_functions
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
|
sys.path.append(default.pyhtk_dir)
|
||||||
|
import pyhtk
|
||||||
sys.path.append(default.toolbox_dir)
|
sys.path.append(default.toolbox_dir)
|
||||||
import file_handling as fh
|
import file_handling
|
||||||
from htk import pyhtk
|
|
||||||
|
|
||||||
|
|
||||||
## ======================= user define =======================
|
## ======================= user define =======================
|
||||||
@ -28,8 +28,8 @@ from htk import pyhtk
|
|||||||
dataset_list = ['devel', 'test', 'train']
|
dataset_list = ['devel', 'test', 'train']
|
||||||
|
|
||||||
# procedure
|
# procedure
|
||||||
extract_features = 0
|
extract_features = 1
|
||||||
conv_lexicon = 1
|
#conv_lexicon = 0
|
||||||
#check_lexicon = 0
|
#check_lexicon = 0
|
||||||
#make_mlf = 0
|
#make_mlf = 0
|
||||||
#combine_files = 0
|
#combine_files = 0
|
||||||
@ -85,12 +85,14 @@ if not os.path.exists(tmp_dir):
|
|||||||
## ======================= extract features =======================
|
## ======================= extract features =======================
|
||||||
if extract_features:
|
if extract_features:
|
||||||
for dataset in dataset_list:
|
for dataset in dataset_list:
|
||||||
|
#for dataset in ['test']:
|
||||||
print('==== {} ===='.format(dataset))
|
print('==== {} ===='.format(dataset))
|
||||||
|
|
||||||
# a script file for HCopy
|
# a script file for HCopy
|
||||||
print(">>> making a script file for HCopy... \n")
|
print(">>> making a script file for HCopy... \n")
|
||||||
hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
|
hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
|
||||||
hcopy_scp.close()
|
hcopy_scp.close()
|
||||||
|
#hcopy_scp = os.path.join(default.htk_dir, 'tmp', 'HCopy.scp')
|
||||||
|
|
||||||
# get a list of features (hcopy.scp) from the filelist in FAME! corpus
|
# get a list of features (hcopy.scp) from the filelist in FAME! corpus
|
||||||
feature_dir_ = os.path.join(feature_dir, dataset)
|
feature_dir_ = os.path.join(feature_dir, dataset)
|
||||||
@ -100,70 +102,31 @@ if extract_features:
|
|||||||
# extract features
|
# extract features
|
||||||
print(">>> extracting features... \n")
|
print(">>> extracting features... \n")
|
||||||
fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
|
fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
|
||||||
|
|
||||||
|
#subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name
|
||||||
|
#subprocess.call(subprocessStr, shell=True)
|
||||||
pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)
|
pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)
|
||||||
|
|
||||||
# a script file for HCompV
|
# a script file for HCompV
|
||||||
print(">>> making a script file for HCompV... \n")
|
print(">>> making a script file for HCompV... \n")
|
||||||
|
|
||||||
|
## ======================= make a list of features =======================
|
||||||
|
#if make_feature_list:
|
||||||
|
# print("==== make a list of features ====\n")
|
||||||
|
|
||||||
|
# for dataset in dataset_list:
|
||||||
|
# print(dataset)
|
||||||
|
|
||||||
|
#feature_dir = output_dir + '\\mfc\\' + dataset
|
||||||
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
|
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
|
||||||
fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')
|
|
||||||
|
#am_func.make_filelist(feature_dir, hcompv_scp)
|
||||||
|
file_handling.make_filelist(feature_dir_, hcompv_scp, '.mfc')
|
||||||
|
|
||||||
|
|
||||||
## ======================= convert lexicon from ipa to fame_htk =======================
|
## ======================= convert lexicon from ipa to fame_htk =======================
|
||||||
if conv_lexicon:
|
if conv_lexicon:
|
||||||
print('==== convert lexicon from ipa 2 fame ====\n')
|
print('==== convert lexicon from ipa 2 fame ====\n')
|
||||||
|
|
||||||
#dir_out = r'c:\Users\Aki\source\repos\acoustic_model\_tmp'
|
|
||||||
lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
|
|
||||||
lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa')
|
|
||||||
lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
|
|
||||||
|
|
||||||
# get the correspondence between lex_ipa and lex_asr.
|
|
||||||
lex_asr = fame_functions.load_lexicon(lexicon_asr)
|
|
||||||
lex_ipa = fame_functions.load_lexicon(lexicon_ipa)
|
|
||||||
if 1:
|
|
||||||
timer_start = time.time()
|
|
||||||
translation_key, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr)
|
|
||||||
print("elapsed time: {}".format(time.time() - timer_start))
|
|
||||||
|
|
||||||
np.save('translation_key_ipa2asr.npy', translation_key)
|
|
||||||
np.save('phone_unknown.npy', phone_unknown)
|
|
||||||
else:
|
|
||||||
translation_key = np.load('translation_key_ipa2asr.npy').item()
|
|
||||||
phone_unknown = np.load('phone_unknown.npy')
|
|
||||||
phone_unknown = list(phone_unknown)
|
|
||||||
|
|
||||||
|
|
||||||
## manually check the correspondence for the phone in phone_unknown.
|
|
||||||
#p = phone_unknown[0]
|
|
||||||
#lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa')
|
|
||||||
|
|
||||||
#for word in lex_ipa_['word']:
|
|
||||||
# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
|
||||||
# if np.sum(lex_asr['word'] == word) > 0:
|
|
||||||
# asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
|
||||||
|
|
||||||
# ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
|
||||||
# asr_list = asr.split(' ')
|
|
||||||
# if p in ipa_list and (len(ipa_list) == len(asr_list)):
|
|
||||||
# print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
|
|
||||||
# for ipa_, asr_ in zip(ipa_list, asr_list):
|
|
||||||
# if ipa_ in phone_unknown:
|
|
||||||
# translation_key[ipa_] = asr_
|
|
||||||
# phone_unknown.remove(ipa_)
|
|
||||||
|
|
||||||
|
|
||||||
## check if all the phones in lexicon_ipa are in fame_phoneset.py.
|
|
||||||
#timer_start = time.time()
|
|
||||||
#phoneset_lex = get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa')
|
|
||||||
#print("elapsed time: {}".format(time.time() - timer_start))
|
|
||||||
|
|
||||||
#phoneset_py = fame_phoneset.phoneset_ipa
|
|
||||||
#set(phoneset_lex) - set(phoneset_py)
|
|
||||||
|
|
||||||
##timer_start = time.time()
|
|
||||||
##extracted = find_phone(lexicon_ipa, 'ⁿ')
|
|
||||||
##print("elapsed time: {}".format(time.time() - timer_start))
|
|
||||||
|
|
||||||
|
|
||||||
# lex.asr is Kaldi compatible version of lex.ipa.
|
# lex.asr is Kaldi compatible version of lex.ipa.
|
||||||
# to check...
|
# to check...
|
||||||
@ -177,13 +140,13 @@ if conv_lexicon:
|
|||||||
# fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split)))
|
# fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split)))
|
||||||
|
|
||||||
# convert each lexicon from ipa description to fame_htk phoneset.
|
# convert each lexicon from ipa description to fame_htk phoneset.
|
||||||
#am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk)
|
am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk)
|
||||||
#am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk)
|
am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk)
|
||||||
|
|
||||||
# combine lexicon
|
# combine lexicon
|
||||||
# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov.
|
# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov.
|
||||||
# therefore there is no overlap between lex_asr and lex_oov.
|
# therefore there is no overlap between lex_asr and lex_oov.
|
||||||
#am_func.combine_lexicon(lex_asr_htk, lex_oov_htk, lex_htk)
|
am_func.combine_lexicon(lex_asr_htk, lex_oov_htk, lex_htk)
|
||||||
|
|
||||||
|
|
||||||
## ======================= check if all the phones are successfully converted =======================
|
## ======================= check if all the phones are successfully converted =======================
|
||||||
|
@ -1,107 +0,0 @@
|
|||||||
""" definition of the phones to be used. """
|
|
||||||
|
|
||||||
## phones in IPA.
|
|
||||||
phoneset_ipa = [
|
|
||||||
# vowels
|
|
||||||
'i̯',
|
|
||||||
'i̯ⁿ',
|
|
||||||
'y',
|
|
||||||
'i',
|
|
||||||
'i.',
|
|
||||||
'iⁿ',
|
|
||||||
'i:',
|
|
||||||
'i:ⁿ',
|
|
||||||
'ɪ',
|
|
||||||
'ɪⁿ',
|
|
||||||
'ɪ.',
|
|
||||||
#'ɪ:', # not included in lex.ipa
|
|
||||||
'ɪ:ⁿ',
|
|
||||||
'e',
|
|
||||||
'e:',
|
|
||||||
'e:ⁿ',
|
|
||||||
'ə',
|
|
||||||
'əⁿ',
|
|
||||||
'ə:',
|
|
||||||
'ɛ',
|
|
||||||
'ɛ.',
|
|
||||||
'ɛⁿ',
|
|
||||||
'ɛ:',
|
|
||||||
'ɛ:ⁿ',
|
|
||||||
'a',
|
|
||||||
'aⁿ',
|
|
||||||
'a.',
|
|
||||||
'a:',
|
|
||||||
'a:ⁿ',
|
|
||||||
'ṷ',
|
|
||||||
'ṷ.',
|
|
||||||
'ṷⁿ',
|
|
||||||
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr.
|
|
||||||
'u',
|
|
||||||
'uⁿ',
|
|
||||||
'u.',
|
|
||||||
'u:',
|
|
||||||
'u:ⁿ',
|
|
||||||
'ü',
|
|
||||||
'ü.',
|
|
||||||
'üⁿ',
|
|
||||||
'ü:',
|
|
||||||
'ü:ⁿ',
|
|
||||||
'o',
|
|
||||||
'oⁿ',
|
|
||||||
'o.',
|
|
||||||
'o:',
|
|
||||||
'o:ⁿ',
|
|
||||||
'ö',
|
|
||||||
'ö.',
|
|
||||||
'öⁿ',
|
|
||||||
'ö:',
|
|
||||||
'ö:ⁿ',
|
|
||||||
'ɔ',
|
|
||||||
'ɔ.',
|
|
||||||
'ɔⁿ',
|
|
||||||
'ɔ:',
|
|
||||||
'ɔ:ⁿ',
|
|
||||||
#'ɔ̈', # not included in lex.ipa
|
|
||||||
'ɔ̈.',
|
|
||||||
'ɔ̈:',
|
|
||||||
|
|
||||||
# plosives
|
|
||||||
'p',
|
|
||||||
'b',
|
|
||||||
't',
|
|
||||||
'tⁿ',
|
|
||||||
'd',
|
|
||||||
'k',
|
|
||||||
'g',
|
|
||||||
'ɡ', # = 'g'
|
|
||||||
|
|
||||||
# nasals
|
|
||||||
'm',
|
|
||||||
'n',
|
|
||||||
'ŋ',
|
|
||||||
|
|
||||||
# fricatives
|
|
||||||
'f',
|
|
||||||
'v',
|
|
||||||
's',
|
|
||||||
's:',
|
|
||||||
'z',
|
|
||||||
'zⁿ',
|
|
||||||
'x',
|
|
||||||
'h',
|
|
||||||
|
|
||||||
# tap and flip
|
|
||||||
'r',
|
|
||||||
'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.
|
|
||||||
'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'.
|
|
||||||
|
|
||||||
# approximant
|
|
||||||
'j',
|
|
||||||
'j.',
|
|
||||||
'l'
|
|
||||||
]
|
|
||||||
|
|
||||||
## the list of multi character phones.
|
|
||||||
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
|
|
||||||
multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1]
|
|
||||||
multi_character_phones_ipa.sort(key=len, reverse=True)
|
|
Loading…
Reference in New Issue
Block a user