phonset is given as fame_phoneset.py. translation key is obtained based on the information.
This commit is contained in:
parent
7844a56281
commit
813f013d7a
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -10,7 +10,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
|||||||
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
|
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
|
||||||
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
|
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
|
||||||
..\toolbox\evaluation.py = ..\toolbox\evaluation.py
|
..\toolbox\evaluation.py = ..\toolbox\evaluation.py
|
||||||
..\toolbox\toolbox\file_handling.py = ..\toolbox\toolbox\file_handling.py
|
|
||||||
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
|
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
|
||||||
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
|
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
|
||||||
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
|
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
|
||||||
@ -23,7 +22,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
|||||||
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
|
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
|
||||||
EndProjectSection
|
EndProjectSection
|
||||||
EndProject
|
EndProject
|
||||||
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "pyhtk", "..\pyhtk\pyhtk\pyhtk.pyproj", "{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}"
|
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "toolbox", "..\toolbox\toolbox.pyproj", "{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}"
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
@ -33,8 +32,8 @@ Global
|
|||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{F0D46C9C-51C6-4989-8A2F-35F2A0C048BE}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
Binary file not shown.
@ -23,12 +23,18 @@
|
|||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Compile Include="check_novoapi.py" />
|
<Compile Include="check_novoapi.py" />
|
||||||
|
<Compile Include="convert_phone_set.py">
|
||||||
|
<SubType>Code</SubType>
|
||||||
|
</Compile>
|
||||||
<Compile Include="convert_xsampa2ipa.py">
|
<Compile Include="convert_xsampa2ipa.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
<Compile Include="defaultfiles.py">
|
<Compile Include="defaultfiles.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
|
<Compile Include="fame_phoneset.py">
|
||||||
|
<SubType>Code</SubType>
|
||||||
|
</Compile>
|
||||||
<Compile Include="fa_test.py">
|
<Compile Include="fa_test.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
|
29
acoustic_model/convert_phone_set.py
Normal file
29
acoustic_model/convert_phone_set.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
"""Module to convert phonemes."""
|
||||||
|
|
||||||
|
def multi_character_tokenize(line, multi_character_tokens):
|
||||||
|
"""Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0,
|
||||||
|
if so tokenizes and eats that token. Otherwise tokenizes a single character"""
|
||||||
|
while line != '':
|
||||||
|
for token in multi_character_tokens:
|
||||||
|
if line.startswith(token) and len(token) > 0:
|
||||||
|
yield token
|
||||||
|
line = line[len(token):]
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
yield line[:1]
|
||||||
|
line = line[1:]
|
||||||
|
|
||||||
|
|
||||||
|
def split_word(word, multi_character_phones):
|
||||||
|
"""
|
||||||
|
Split a line by given phoneset.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
word (str): one word written in given phoneset.
|
||||||
|
multi_character_phones:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
word_seperated (str): the word splitted in given phoneset.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]
|
@ -4,7 +4,8 @@ import os
|
|||||||
|
|
||||||
#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
|
#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
|
||||||
|
|
||||||
htk_dir = r'C:\Aki\htk_fame'
|
#htk_dir = r'C:\Aki\htk_fame'
|
||||||
|
htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk'
|
||||||
|
|
||||||
config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
|
config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
|
||||||
#config_train = os.path.join(cygwin_dir, 'config', 'config.train')
|
#config_train = os.path.join(cygwin_dir, 'config', 'config.train')
|
||||||
@ -28,22 +29,22 @@ config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
|
|||||||
#filePhoneList = config['pyHTK']['filePhoneList']
|
#filePhoneList = config['pyHTK']['filePhoneList']
|
||||||
#AcousticModel = config['pyHTK']['AcousticModel']
|
#AcousticModel = config['pyHTK']['AcousticModel']
|
||||||
|
|
||||||
repo_dir = r'C:\Users\A.Kunikoshi\source\repos'
|
repo_dir = r'C:\Users\Aki\source\repos'
|
||||||
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
||||||
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
||||||
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
|
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
|
||||||
pyhtk_dir = os.path.join(repo_dir, 'pyhtk', 'pyhtk')
|
#pyhtk_dir = os.path.join(repo_dir, 'pyhtk', 'pyhtk')
|
||||||
toolbox_dir = os.path.join(repo_dir, 'toolbox', 'toolbox')
|
toolbox_dir = os.path.join(repo_dir, 'toolbox')
|
||||||
|
|
||||||
htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
|
#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
|
||||||
config_hvite = os.path.join(htk_config_dir, 'config.HVite')
|
#config_hvite = os.path.join(htk_config_dir, 'config.HVite')
|
||||||
#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
|
#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
|
||||||
acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
|
#acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
|
||||||
phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
|
#phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
|
||||||
|
|
||||||
WSL_dir = r'C:\OneDrive\WSL'
|
WSL_dir = r'C:\OneDrive\WSL'
|
||||||
#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
||||||
fame_dir = r'f:\_corpus\fame'
|
fame_dir = r'd:\_corpus\fame'
|
||||||
|
|
||||||
fame_s5_dir = os.path.join(fame_dir, 's5')
|
fame_s5_dir = os.path.join(fame_dir, 's5')
|
||||||
fame_corpus_dir = os.path.join(fame_dir, 'corpus')
|
fame_corpus_dir = os.path.join(fame_dir, 'corpus')
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model')
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
@ -9,6 +9,8 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
|
import fame_phoneset
|
||||||
|
import convert_phone_set
|
||||||
|
|
||||||
#sys.path.append(default.forced_alignment_module_dir)
|
#sys.path.append(default.forced_alignment_module_dir)
|
||||||
#from forced_alignment import convert_phone_set
|
#from forced_alignment import convert_phone_set
|
||||||
@ -213,40 +215,74 @@ def get_phonelist(lexicon_asr):
|
|||||||
lex = load_lexicon(lexicon_asr)
|
lex = load_lexicon(lexicon_asr)
|
||||||
return set(' '.join(lex['pronunciation']).split(' '))
|
return set(' '.join(lex['pronunciation']).split(' '))
|
||||||
|
|
||||||
import time
|
|
||||||
|
|
||||||
timer_start = time.time()
|
def extract_unknown_phones(word_list, known_phones):
|
||||||
|
return [i for i in word_list if not i in known_phones]
|
||||||
|
|
||||||
#def get_translation_key():
|
|
||||||
dir_tmp = r'c:\Users\A.Kunikoshi\source\repos\acoustic_model\_tmp'
|
|
||||||
lexicon_ipa = r'f:\_corpus\FAME\lexicon\lex.ipa'
|
|
||||||
lexicon_asr = r'f:\_corpus\FAME\lexicon\lex.asr'
|
|
||||||
|
|
||||||
lex_ipa = load_lexicon(lexicon_ipa)
|
if __name__ == '__main__':
|
||||||
lex_asr = load_lexicon(lexicon_asr)
|
import time
|
||||||
if 0:
|
timer_start = time.time()
|
||||||
phone_to_be_searched = get_phonelist(lexicon_asr)
|
|
||||||
translation_key = dict()
|
#def get_translation_key():
|
||||||
for word in lex_asr['word']:
|
dir_tmp = r'c:\Users\Aki\source\repos\acoustic_model\_tmp'
|
||||||
if np.sum(lex_asr['word'] == word) == 1 and np.sum(lex_ipa['word'] == word) == 1:
|
lexicon_ipa = r'd:\_corpus\FAME\lexicon\lex.ipa'
|
||||||
asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
lexicon_asr = r'd:\_corpus\FAME\lexicon\lex.asr'
|
||||||
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
|
||||||
|
lex_ipa = load_lexicon(lexicon_ipa)
|
||||||
|
lex_asr = load_lexicon(lexicon_asr)
|
||||||
|
if 1:
|
||||||
|
phone_to_be_searched = fame_phoneset.phoneset_ipa[:]
|
||||||
|
translation_key = dict()
|
||||||
|
for word in lex_ipa['word']:
|
||||||
|
if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1:
|
||||||
|
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
||||||
|
asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
||||||
|
|
||||||
asr_list = asr.split(' ')
|
ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
||||||
# if there are phones which is not in phone_to_be_searched
|
asr_list = asr.split(' ')
|
||||||
if len([True for i in asr_list if i in phone_to_be_searched]) > 0:
|
|
||||||
if(len(ipa) == len(asr_list)):
|
# if there are phones which is not in phone_to_be_searched
|
||||||
print("{0}: {1} --> {2}".format(word, ipa, asr))
|
#if len([True for i in asr_list if i in phone_to_be_searched]) > 0:
|
||||||
for ipa_, asr_ in zip(ipa, asr_list):
|
if(len(ipa_list) == len(asr_list)):
|
||||||
if asr_ in phone_to_be_searched:
|
print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
|
||||||
#if not translation_key[ipa_] == asr_:
|
for ipa_, asr_ in zip(ipa_list, asr_list):
|
||||||
|
if ipa_ in phone_to_be_searched:
|
||||||
translation_key[ipa_] = asr_
|
translation_key[ipa_] = asr_
|
||||||
phone_to_be_searched.remove(asr_)
|
phone_to_be_searched.remove(ipa_)
|
||||||
|
|
||||||
print("elapsed time: {}".format(time.time() - timer_start))
|
print("elapsed time: {}".format(time.time() - timer_start))
|
||||||
|
|
||||||
np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key)
|
np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key)
|
||||||
np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched)
|
np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched)
|
||||||
else:
|
else:
|
||||||
translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item()
|
translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item()
|
||||||
phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item()
|
phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item()
|
||||||
|
|
||||||
|
|
||||||
|
#phone_unknown = list(phone_to_be_searched)
|
||||||
|
##phone_unknown.remove('')
|
||||||
|
#phone_known = list(translation_key.keys())
|
||||||
|
|
||||||
|
#p = phone_unknown[0]
|
||||||
|
|
||||||
|
### extract lines which contains 'unknown' phone.
|
||||||
|
#lex_ipa_ = lex_ipa[lex_ipa['pronunciation'].str.count(p)>0]
|
||||||
|
##phone_unknown_ = phone_unknown[:]
|
||||||
|
##phone_unknown_.remove(p)
|
||||||
|
#phone_known_ = phone_known[:]
|
||||||
|
#phone_known_.append(p)
|
||||||
|
#for index, row in lex_ipa_.iterrows():
|
||||||
|
# ipa = row['pronunciation']
|
||||||
|
# phone_extract_unknown_phones(asr_list, phone_known_):
|
||||||
|
|
||||||
|
# # check the number of phones in phone_unknown_
|
||||||
|
# if len([True for i in asr_list if i in phone_unknown_]) == 0:
|
||||||
|
# word = row['word']
|
||||||
|
# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
||||||
|
# print("{0}: {1} --> {2}".format(word, ipa, asr))
|
||||||
|
# #print("{0}:{1}".format(index, row['pronunciation']))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model')
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
|
||||||
import tempfile
|
import tempfile
|
||||||
#import configparser
|
#import configparser
|
||||||
@ -12,10 +12,9 @@ import tempfile
|
|||||||
|
|
||||||
import fame_functions
|
import fame_functions
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
sys.path.append(default.pyhtk_dir)
|
|
||||||
import pyhtk
|
|
||||||
sys.path.append(default.toolbox_dir)
|
sys.path.append(default.toolbox_dir)
|
||||||
import file_handling
|
import file_handling as fh
|
||||||
|
from htk import pyhtk
|
||||||
|
|
||||||
|
|
||||||
## ======================= user define =======================
|
## ======================= user define =======================
|
||||||
@ -94,7 +93,7 @@ if extract_features:
|
|||||||
hcopy_scp.close()
|
hcopy_scp.close()
|
||||||
#hcopy_scp = os.path.join(default.htk_dir, 'tmp', 'HCopy.scp')
|
#hcopy_scp = os.path.join(default.htk_dir, 'tmp', 'HCopy.scp')
|
||||||
|
|
||||||
# get a list of features (hcopy.scp) from the filelist in FAME! corpus
|
## get a list of features (hcopy.scp) from the filelist in FAME! corpus
|
||||||
feature_dir_ = os.path.join(feature_dir, dataset)
|
feature_dir_ = os.path.join(feature_dir, dataset)
|
||||||
if not os.path.exists(feature_dir_):
|
if not os.path.exists(feature_dir_):
|
||||||
os.makedirs(feature_dir_)
|
os.makedirs(feature_dir_)
|
||||||
@ -110,6 +109,7 @@ if extract_features:
|
|||||||
# a script file for HCompV
|
# a script file for HCompV
|
||||||
print(">>> making a script file for HCompV... \n")
|
print(">>> making a script file for HCompV... \n")
|
||||||
|
|
||||||
|
|
||||||
## ======================= make a list of features =======================
|
## ======================= make a list of features =======================
|
||||||
#if make_feature_list:
|
#if make_feature_list:
|
||||||
# print("==== make a list of features ====\n")
|
# print("==== make a list of features ====\n")
|
||||||
@ -121,7 +121,7 @@ if extract_features:
|
|||||||
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
|
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
|
||||||
|
|
||||||
#am_func.make_filelist(feature_dir, hcompv_scp)
|
#am_func.make_filelist(feature_dir, hcompv_scp)
|
||||||
file_handling.make_filelist(feature_dir_, hcompv_scp, '.mfc')
|
fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')
|
||||||
|
|
||||||
|
|
||||||
## ======================= convert lexicon from ipa to fame_htk =======================
|
## ======================= convert lexicon from ipa to fame_htk =======================
|
||||||
|
55
acoustic_model/fame_phoneset.py
Normal file
55
acoustic_model/fame_phoneset.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
phoneset_ipa = [
|
||||||
|
# vowels
|
||||||
|
'i̯',
|
||||||
|
'y',
|
||||||
|
'i',
|
||||||
|
'i:',
|
||||||
|
'ɪ',
|
||||||
|
'ɪ:',
|
||||||
|
'e',
|
||||||
|
'e:',
|
||||||
|
'ə',
|
||||||
|
'ə:',
|
||||||
|
'ɛ',
|
||||||
|
'ɛ:',
|
||||||
|
'a',
|
||||||
|
'a:',
|
||||||
|
'ṷ',
|
||||||
|
'ú',
|
||||||
|
'u',
|
||||||
|
'u:',
|
||||||
|
'ü',
|
||||||
|
'ü:',
|
||||||
|
'o',
|
||||||
|
'o:',
|
||||||
|
'ö',
|
||||||
|
'ö:',
|
||||||
|
'ɔ',
|
||||||
|
'ɔ:',
|
||||||
|
'ɔ̈',
|
||||||
|
'ɔ̈:',
|
||||||
|
|
||||||
|
# plosives
|
||||||
|
'p',
|
||||||
|
'b',
|
||||||
|
't',
|
||||||
|
'd',
|
||||||
|
'k',
|
||||||
|
'g',
|
||||||
|
|
||||||
|
# nasals
|
||||||
|
'm',
|
||||||
|
'n',
|
||||||
|
'ŋ',
|
||||||
|
|
||||||
|
# fricatives
|
||||||
|
'f',
|
||||||
|
'v',
|
||||||
|
's',
|
||||||
|
's:',
|
||||||
|
'z',
|
||||||
|
'x',
|
||||||
|
'h',
|
||||||
|
]
|
||||||
|
|
||||||
|
multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1]
|
Loading…
Reference in New Issue
Block a user