Compare commits
5 Commits
87abbbb95a
...
f6e7c8eefa
Author | SHA1 | Date | |
---|---|---|---|
|
f6e7c8eefa | ||
|
322a8a0079 | ||
|
22cccfb61d | ||
|
dc6b7b84b6 | ||
|
8cda93de75 |
Binary file not shown.
Binary file not shown.
@ -4,8 +4,7 @@
|
||||
<SchemaVersion>2.0</SchemaVersion>
|
||||
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
||||
<ProjectHome>.</ProjectHome>
|
||||
<StartupFile>
|
||||
</StartupFile>
|
||||
<StartupFile>fame_hmm.py</StartupFile>
|
||||
<SearchPath>
|
||||
</SearchPath>
|
||||
<WorkingDirectory>.</WorkingDirectory>
|
||||
@ -23,7 +22,7 @@
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="check_novoapi.py" />
|
||||
<Compile Include="convert_phone_set.py">
|
||||
<Compile Include="convert_phoneset.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="convert_xsampa2ipa.py">
|
||||
@ -32,7 +31,7 @@
|
||||
<Compile Include="defaultfiles.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="fame_phoneset.py">
|
||||
<Compile Include="fame_test.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="fa_test.py">
|
||||
@ -50,9 +49,20 @@
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="fame_hmm.py" />
|
||||
<Compile Include="phoneset\fame_asr.py" />
|
||||
<Compile Include="phoneset\fame_ipa.py" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Content Include="config.ini" />
|
||||
<Content Include="phoneset\fame_ipa2asr.npy" />
|
||||
<Content Include="phoneset\output_get_translation_key_phone_unknown.npy" />
|
||||
<Content Include="phoneset\output_get_translation_key_translation_key.npy" />
|
||||
<Content Include="phoneset\__pycache__\fame_asr.cpython-36.pyc" />
|
||||
<Content Include="phoneset\__pycache__\fame_ipa.cpython-36.pyc" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Folder Include="phoneset\" />
|
||||
<Folder Include="phoneset\__pycache__\" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets" />
|
||||
<!-- Uncomment the CoreCompile target to enable the Build command in
|
||||
|
@ -20,10 +20,21 @@ def split_word(word, multi_character_phones):
|
||||
|
||||
Args:
|
||||
word (str): a word written in given phoneset.
|
||||
multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_phoneset.py.
|
||||
multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_ipa.py.
|
||||
|
||||
Returns:
|
||||
(word_seperated) (list): the word splitted in given phoneset.
|
||||
|
||||
"""
|
||||
return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]
|
||||
return [phone
|
||||
for phone in multi_character_tokenize(word.strip(), multi_character_phones)
|
||||
]
|
||||
|
||||
|
||||
def convert_phoneset(word_list, translation_key):
|
||||
"""
|
||||
Args:
|
||||
word_list (str): a list of phones written in given phoneset.
|
||||
translation_key (dict):
|
||||
"""
|
||||
return [translation_key.get(phone, phone) for phone in word_list]
|
@ -1,14 +1,13 @@
|
||||
import os
|
||||
|
||||
#default_hvite_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'htk', 'config.HVite')
|
||||
# add path of the parent directory
|
||||
#os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
|
||||
|
||||
#htk_dir = r'C:\Aki\htk_fame'
|
||||
htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk'
|
||||
|
||||
config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
|
||||
#config_train = os.path.join(cygwin_dir, 'config', 'config.train')
|
||||
|
||||
#config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
|
||||
#mkhmmdefs_pl = os.path.join(cygwin_dir, 'src', 'acoustic_model', 'mkhmmdefs.pl')
|
||||
|
||||
@ -39,11 +38,11 @@ toolbox_dir = os.path.join(repo_dir, 'toolbox')
|
||||
#config_hvite = os.path.join(htk_config_dir, 'config.HVite')
|
||||
#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
|
||||
#acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
|
||||
#phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
|
||||
phonelist_txt = os.path.join(htk_dir, 'config', 'phonelist.txt')
|
||||
|
||||
WSL_dir = r'C:\OneDrive\WSL'
|
||||
#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
||||
fame_dir = r'd:\_corpus\fame'
|
||||
fame_dir = r'c:\OneDrive\Research\rug\_data\FAME'
|
||||
|
||||
fame_s5_dir = os.path.join(fame_dir, 's5')
|
||||
fame_corpus_dir = os.path.join(fame_dir, 'corpus')
|
||||
|
@ -9,38 +9,8 @@ import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import defaultfiles as default
|
||||
import fame_phoneset
|
||||
import convert_phone_set
|
||||
|
||||
|
||||
#def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
|
||||
# """ Convert a lexicon file from IPA to HTK format for FAME! corpus. """
|
||||
|
||||
# lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation'])
|
||||
# with open(lexicon_file_out, "w", encoding="utf-8") as fout:
|
||||
# for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']):
|
||||
# pronunciation_no_space = pronunciation.replace(' ', '')
|
||||
# pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space)
|
||||
# if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk:
|
||||
# fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk))
|
||||
|
||||
|
||||
#def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out):
|
||||
# """ Combine two lexicon files and sort by words. """
|
||||
|
||||
# with open(lexicon_file1, "rt", encoding="utf-8") as fin:
|
||||
# lines1 = fin.read()
|
||||
# lines1 = lines1.split('\n')
|
||||
# with open(lexicon_file2, "rt", encoding="utf-8") as fin:
|
||||
# lines2 = fin.read()
|
||||
# lines2 = lines2.split('\n')
|
||||
|
||||
# lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation'])
|
||||
# lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation'])
|
||||
# lex = pd.concat([lex1, lex2])
|
||||
# lex = lex.sort_values(by='word', ascending=True)
|
||||
# lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t')
|
||||
|
||||
import convert_phoneset
|
||||
from phoneset import fame_ipa, fame_asr
|
||||
|
||||
#def read_fileFA(fileFA):
|
||||
# """
|
||||
@ -110,14 +80,6 @@ import convert_phone_set
|
||||
|
||||
# return ipa
|
||||
|
||||
#def make_filelist(input_dir, output_txt):
|
||||
# """ Make a list of files in the input_dir. """
|
||||
# filenames = os.listdir(input_dir)
|
||||
|
||||
# with open(output_txt, 'w') as fout:
|
||||
# for filename in filenames:
|
||||
# fout.write(input_dir + '\\' + filename + '\n')
|
||||
|
||||
|
||||
#def make_htk_dict(word, pronvar_, fileDic, output_type):
|
||||
# """
|
||||
@ -179,10 +141,11 @@ def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_s
|
||||
|
||||
fout.write(wav_file + '\t' + mfc_file + '\n')
|
||||
|
||||
return
|
||||
|
||||
|
||||
def load_lexicon(lexicon_file):
|
||||
""" load lexicon file as Data Frame.
|
||||
""" load lexicon file as data frame.
|
||||
|
||||
Args:
|
||||
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
||||
@ -196,25 +159,27 @@ def load_lexicon(lexicon_file):
|
||||
return lex
|
||||
|
||||
|
||||
def get_phoneset_from_lexicon(lexicon_file, phoneset='asr'):
|
||||
def get_phoneset_from_lexicon(lexicon_file, phoneset_name='asr'):
|
||||
""" Make a list of phones which appears in the lexicon.
|
||||
|
||||
Args:
|
||||
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
||||
phoneset (str): the phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'.
|
||||
phoneset_name (str): the name of phoneset with which lexicon_file is written. 'asr'(default) or 'ipa'.
|
||||
|
||||
Returns:
|
||||
(list_of_phones) (set): the set of phones included in the lexicon_file.
|
||||
|
||||
"""
|
||||
assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\''
|
||||
assert phoneset_name in ['asr', 'ipa'], 'phoneset_name should be \'asr\' or \'ipa\''
|
||||
|
||||
lex = load_lexicon(lexicon_file)
|
||||
if phoneset == 'asr':
|
||||
if phoneset_name == 'asr':
|
||||
return set(' '.join(lex['pronunciation']).split(' '))
|
||||
elif phoneset == 'ipa':
|
||||
elif phoneset_name == 'ipa':
|
||||
join_pronunciations = ''.join(lex['pronunciation'])
|
||||
return set(convert_phone_set.split_word(join_pronunciations, fame_phoneset.multi_character_phones_ipa))
|
||||
return set(convert_phone_set.split_word(join_pronunciations, fame_ipa.multi_character_phones))
|
||||
|
||||
return
|
||||
|
||||
|
||||
def extract_unknown_phones(ipa, known_phones):
|
||||
@ -228,7 +193,7 @@ def extract_unknown_phones(ipa, known_phones):
|
||||
(list_of_phones) (list): unknown phones not included in 'known_phones'.
|
||||
|
||||
"""
|
||||
ipa_split = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
||||
ipa_split = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones)
|
||||
return [i for i in ipa_split if not i in known_phones]
|
||||
|
||||
|
||||
@ -247,14 +212,14 @@ def get_translation_key(lexicon_file_ipa, lexicon_file_asr):
|
||||
"""
|
||||
lex_ipa = load_lexicon(lexicon_file_ipa)
|
||||
lex_asr = load_lexicon(lexicon_file_asr)
|
||||
phone_unknown = fame_phoneset.phoneset_ipa[:]
|
||||
phone_unknown = fame_ipa.phoneset[:]
|
||||
translation_key = dict()
|
||||
for word in lex_ipa['word']:
|
||||
if np.sum(lex_ipa['word'] == word) == 1 and np.sum(lex_asr['word'] == word) == 1:
|
||||
ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
||||
asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
||||
|
||||
ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
||||
ipa_list = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones)
|
||||
asr_list = asr.split(' ')
|
||||
|
||||
# if there are phones which is not in phone_unknown
|
||||
@ -268,13 +233,13 @@ def get_translation_key(lexicon_file_ipa, lexicon_file_asr):
|
||||
return translation_key, list(phone_unknown)
|
||||
|
||||
|
||||
def find_phone(lexicon_file, phone, phoneset='ipa'):
|
||||
def find_phone(lexicon_file, phone, phoneset_name='ipa'):
|
||||
""" extract rows where the phone is used in the lexicon_file.
|
||||
|
||||
Args:
|
||||
lexicon_file (path): lexicon in the format of 'word' /t 'pronunciation'.
|
||||
phone (str): the phone to be searched.
|
||||
phoneset (str): the phoneset with which lexicon_file is written. 'asr' or 'ipa'(default).
|
||||
phoneset_name (str): the name of phoneset_name with which lexicon_file is written. 'asr' or 'ipa'(default).
|
||||
|
||||
Returns:
|
||||
extracted (df): rows where the phone is used.
|
||||
@ -283,7 +248,7 @@ def find_phone(lexicon_file, phone, phoneset='ipa'):
|
||||
* develop when the phonset == 'asr'.
|
||||
|
||||
"""
|
||||
assert phoneset in ['asr', 'ipa'], 'phoneset should be \'asr\' or \'ipa\''
|
||||
assert phoneset_name in ['asr', 'ipa'], 'phoneset_name should be \'asr\' or \'ipa\''
|
||||
|
||||
lex = load_lexicon(lexicon_file)
|
||||
|
||||
@ -292,9 +257,87 @@ def find_phone(lexicon_file, phone, phoneset='ipa'):
|
||||
|
||||
extracted = pd.DataFrame(index=[], columns=['word', 'pronunciation'])
|
||||
for index, row in lex_.iterrows():
|
||||
if phoneset == 'ipa':
|
||||
pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_phoneset.multi_character_phones_ipa)
|
||||
if phoneset_name == 'ipa':
|
||||
pronunciation = convert_phone_set.split_word(row['pronunciation'], fame_ipa.multi_character_phones)
|
||||
if phone in pronunciation:
|
||||
extracted_ = pd.Series([row['word'], pronunciation], index=extracted.columns)
|
||||
extracted = extracted.append(extracted_, ignore_index=True)
|
||||
return extracted
|
||||
|
||||
|
||||
def asr2htk_space_delimited(pronunciation):
|
||||
"""convert phoneset from asr to htk.
|
||||
|
||||
Args:
|
||||
pronunciation (str): space delimited asr phones.
|
||||
|
||||
Returns:
|
||||
(pronunciation) (str): space delimited asr phones in htk format (ascii).
|
||||
|
||||
"""
|
||||
pronunciation_short = [fame_asr.reduction_key.get(i, i) for i in pronunciation.split(' ')
|
||||
if not i in fame_asr.phones_to_be_removed]
|
||||
return ' '.join(convert_phoneset.convert_phoneset(
|
||||
pronunciation_short, fame_asr.translation_key_asr2htk))
|
||||
|
||||
|
||||
def lexicon_asr2htk(lexicon_file_asr, lexicon_file_htk):
|
||||
""" Convert a lexicon file from asr to htk format (ascii).
|
||||
|
||||
Args:
|
||||
lexicon_file_asr (path): a lexicon file written in asr format e.g. fame/lex.asr.
|
||||
lexicon_file_htk (path): a lexicon file written in htk format (ascii).
|
||||
|
||||
"""
|
||||
lex_asr = load_lexicon(lexicon_file_asr)
|
||||
def word2htk_(row):
|
||||
return word2htk(row['word'])
|
||||
def asr2htk_space_delimited_(row):
|
||||
return asr2htk_space_delimited(row['pronunciation'])
|
||||
|
||||
lex_htk = pd.DataFrame({
|
||||
'word': lex_asr.apply(word2htk_, axis=1).str.upper(),
|
||||
'pronunciation': lex_asr.apply(asr2htk_space_delimited_, axis=1)
|
||||
})
|
||||
lex_htk = lex_htk.ix[:, ['word', 'pronunciation']]
|
||||
lex_htk.to_csv(lexicon_file_htk, header=None, index=None, sep='\t', encoding='utf-8')
|
||||
return
|
||||
|
||||
|
||||
def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out):
|
||||
""" Combine two lexicon files and sort by words.
|
||||
|
||||
Args:
|
||||
lexicon_file1, lexicon_file2 (path): input lexicon files.
|
||||
|
||||
Returns:
|
||||
lexicon_file_out (path): lexicon_file which lexcion_file1 and 2 are combined and sorted.
|
||||
|
||||
"""
|
||||
lex1 = load_lexicon(lexicon_file1)
|
||||
lex2 = load_lexicon(lexicon_file2)
|
||||
lex = pd.concat([lex1, lex2])
|
||||
lex = lex.sort_values(by='word', ascending=True)
|
||||
lex.to_csv(lexicon_out, index=False, header=False, sep='\t', encoding='utf-8')
|
||||
|
||||
|
||||
def fix_single_quote(lexicon_file):
|
||||
""" add '\' before all single quote at the beginning of words.
|
||||
convert special characters to ascii compatible characters.
|
||||
|
||||
Args:
|
||||
lexicon_file (path): lexicon file, which will be overwitten.
|
||||
|
||||
"""
|
||||
lex = load_lexicon(lexicon_file)
|
||||
lex = lex.dropna() # remove N/A.
|
||||
for i in lex[lex['word'].str.startswith('\'')].index.values:
|
||||
lex.iat[i, 0] = lex.iat[i, 0].replace('\'', '\\\'')
|
||||
# to_csv does not work with space seperator. therefore all tabs should manually be replaced.
|
||||
#lex.to_csv(lexicon_file, index=False, header=False, encoding="utf-8", sep=' ', quoting=csv.QUOTE_NONE, escapechar='\\')
|
||||
lex.to_csv(lexicon_file, index=False, header=False, sep='\t', encoding='utf-8')
|
||||
return
|
||||
|
||||
|
||||
def word2htk(word):
|
||||
return ''.join([fame_asr.translation_key_word2htk.get(i, i) for i in word])
|
||||
|
@ -3,15 +3,15 @@ import os
|
||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||
|
||||
import tempfile
|
||||
#import configparser
|
||||
#import subprocess
|
||||
#from collections import Counter
|
||||
import shutil
|
||||
import glob
|
||||
import time
|
||||
|
||||
#import numpy as np
|
||||
#import pandas as pd
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import fame_functions
|
||||
from phoneset import fame_ipa, fame_asr
|
||||
import defaultfiles as default
|
||||
sys.path.append(default.toolbox_dir)
|
||||
import file_handling as fh
|
||||
@ -19,60 +19,42 @@ from htk import pyhtk
|
||||
|
||||
|
||||
## ======================= user define =======================
|
||||
#repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model'
|
||||
#curr_dir = repo_dir + '\\acoustic_model'
|
||||
#config_ini = curr_dir + '\\config.ini'
|
||||
#output_dir = 'C:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model'
|
||||
#forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment'
|
||||
# procedure
|
||||
make_lexicon = 0
|
||||
make_label = 0 # it takes roughly 4800 sec on Surface pro 2.
|
||||
make_htk_files = 0
|
||||
extract_features = 0
|
||||
flat_start = 0
|
||||
train_model_without_sp = 1
|
||||
|
||||
|
||||
# pre-defined values.
|
||||
|
||||
dataset_list = ['devel', 'test', 'train']
|
||||
hmmdefs_name = 'hmmdefs'
|
||||
|
||||
# procedure
|
||||
extract_features = 0
|
||||
conv_lexicon = 1
|
||||
#check_lexicon = 0
|
||||
#make_mlf = 0
|
||||
#combine_files = 0
|
||||
#flat_start = 0
|
||||
#train_model = 1
|
||||
lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
|
||||
lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov')
|
||||
|
||||
config_dir = os.path.join(default.htk_dir, 'config')
|
||||
config_hcopy = os.path.join(config_dir, 'config.HCopy')
|
||||
config_train = os.path.join(config_dir, 'config.train')
|
||||
global_ded = os.path.join(config_dir, 'global.ded')
|
||||
mkphones_led = os.path.join(config_dir, 'mkphones.led')
|
||||
prototype = os.path.join(config_dir, 'proto39')
|
||||
|
||||
model_dir = os.path.join(default.htk_dir, 'model')
|
||||
|
||||
|
||||
#sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir))
|
||||
#sys.path.append(forced_alignment_module)
|
||||
#from forced_alignment import convert_phone_set
|
||||
# directories / files to be made.
|
||||
|
||||
lexicon_dir = os.path.join(default.htk_dir, 'lexicon')
|
||||
lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr')
|
||||
lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov')
|
||||
lexicon_htk = os.path.join(lexicon_dir, 'lex.htk')
|
||||
|
||||
|
||||
## ======================= load variables =======================
|
||||
|
||||
#config = configparser.ConfigParser()
|
||||
#config.sections()
|
||||
#config.read(config_ini)
|
||||
|
||||
#config_hcopy = config['Settings']['config_hcopy']
|
||||
#config_train = config['Settings']['config_train']
|
||||
#mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl']
|
||||
#FAME_dir = config['Settings']['FAME_dir']
|
||||
|
||||
#lex_asr = FAME_dir + '\\lexicon\\lex.asr'
|
||||
#lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk'
|
||||
#lex_oov = FAME_dir + '\\lexicon\\lex.oov'
|
||||
#lex_oov_htk = FAME_dir + '\\lexicon\\lex.oov_htk'
|
||||
##lex_ipa = FAME_dir + '\\lexicon\\lex.ipa'
|
||||
##lex_ipa_ = FAME_dir + '\\lexicon\\lex.ipa_'
|
||||
##lex_ipa_htk = FAME_dir + '\\lexicon\\lex.ipa_htk'
|
||||
#lex_htk = FAME_dir + '\\lexicon\\lex_original.htk'
|
||||
#lex_htk_ = FAME_dir + '\\lexicon\\lex.htk'
|
||||
|
||||
#hcompv_scp = output_dir + '\\scp\\combined.scp'
|
||||
#combined_mlf = output_dir + '\\label\\combined.mlf'
|
||||
|
||||
#model_dir = output_dir + '\\model'
|
||||
#model0_dir = model_dir + '\\hmm0'
|
||||
#proto_init = model_dir + '\\proto38'
|
||||
#proto_name = 'proto'
|
||||
#phonelist = output_dir + '\\config\\phonelist_friesian.txt'
|
||||
#hmmdefs_name = 'hmmdefs'
|
||||
phonelist_txt = os.path.join(config_dir, 'phonelist.txt')
|
||||
model0_dir = os.path.join(model_dir, 'hmm0')
|
||||
|
||||
feature_dir = os.path.join(default.htk_dir, 'mfc')
|
||||
if not os.path.exists(feature_dir):
|
||||
@ -80,134 +62,26 @@ if not os.path.exists(feature_dir):
|
||||
tmp_dir = os.path.join(default.htk_dir, 'tmp')
|
||||
if not os.path.exists(tmp_dir):
|
||||
os.makedirs(tmp_dir)
|
||||
label_dir = os.path.join(default.htk_dir, 'label')
|
||||
if not os.path.exists(label_dir):
|
||||
os.makedirs(label_dir)
|
||||
|
||||
|
||||
## ======================= extract features =======================
|
||||
if extract_features:
|
||||
for dataset in dataset_list:
|
||||
print('==== {} ===='.format(dataset))
|
||||
|
||||
# a script file for HCopy
|
||||
print(">>> making a script file for HCopy... \n")
|
||||
hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
|
||||
hcopy_scp.close()
|
||||
|
||||
# get a list of features (hcopy.scp) from the filelist in FAME! corpus
|
||||
feature_dir_ = os.path.join(feature_dir, dataset)
|
||||
if not os.path.exists(feature_dir_):
|
||||
os.makedirs(feature_dir_)
|
||||
|
||||
# extract features
|
||||
print(">>> extracting features... \n")
|
||||
fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
|
||||
pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)
|
||||
|
||||
# a script file for HCompV
|
||||
print(">>> making a script file for HCompV... \n")
|
||||
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
|
||||
fh.make_filelist(feature_dir_, hcompv_scp, '.mfc')
|
||||
|
||||
|
||||
## ======================= convert lexicon from ipa to fame_htk =======================
|
||||
if conv_lexicon:
|
||||
print('==== convert lexicon from ipa 2 fame ====\n')
|
||||
|
||||
#dir_out = r'c:\Users\Aki\source\repos\acoustic_model\_tmp'
|
||||
lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
|
||||
lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa')
|
||||
lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
|
||||
|
||||
# get the correspondence between lex_ipa and lex_asr.
|
||||
lex_asr = fame_functions.load_lexicon(lexicon_asr)
|
||||
lex_ipa = fame_functions.load_lexicon(lexicon_ipa)
|
||||
if 1:
|
||||
## ======================= make lexicon for HTK =======================
|
||||
if make_lexicon:
|
||||
timer_start = time.time()
|
||||
translation_key, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr)
|
||||
print("elapsed time: {}".format(time.time() - timer_start))
|
||||
print('==== making lexicon for HTK ====')
|
||||
|
||||
np.save('translation_key_ipa2asr.npy', translation_key)
|
||||
np.save('phone_unknown.npy', phone_unknown)
|
||||
else:
|
||||
translation_key = np.load('translation_key_ipa2asr.npy').item()
|
||||
phone_unknown = np.load('phone_unknown.npy')
|
||||
phone_unknown = list(phone_unknown)
|
||||
|
||||
|
||||
## manually check the correspondence for the phone in phone_unknown.
|
||||
#p = phone_unknown[0]
|
||||
#lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa')
|
||||
|
||||
#for word in lex_ipa_['word']:
|
||||
# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
||||
# if np.sum(lex_asr['word'] == word) > 0:
|
||||
# asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
||||
|
||||
# ipa_list = convert_phone_set.split_word(ipa, fame_phoneset.multi_character_phones_ipa)
|
||||
# asr_list = asr.split(' ')
|
||||
# if p in ipa_list and (len(ipa_list) == len(asr_list)):
|
||||
# print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
|
||||
# for ipa_, asr_ in zip(ipa_list, asr_list):
|
||||
# if ipa_ in phone_unknown:
|
||||
# translation_key[ipa_] = asr_
|
||||
# phone_unknown.remove(ipa_)
|
||||
|
||||
|
||||
## check if all the phones in lexicon_ipa are in fame_phoneset.py.
|
||||
#timer_start = time.time()
|
||||
#phoneset_lex = get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa')
|
||||
#print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
#phoneset_py = fame_phoneset.phoneset_ipa
|
||||
#set(phoneset_lex) - set(phoneset_py)
|
||||
|
||||
##timer_start = time.time()
|
||||
##extracted = find_phone(lexicon_ipa, 'ⁿ')
|
||||
##print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
|
||||
# lex.asr is Kaldi compatible version of lex.ipa.
|
||||
# to check...
|
||||
#lexicon_ipa = pd.read_table(lex_ipa, names=['word', 'pronunciation'])
|
||||
#with open(lex_ipa_, "w", encoding="utf-8") as fout:
|
||||
# for word, pronunciation in zip(lexicon_ipa['word'], lexicon_ipa['pronunciation']):
|
||||
# # ignore nasalization and '.'
|
||||
# pronunciation_ = pronunciation.replace(u'ⁿ', '')
|
||||
# pronunciation_ = pronunciation_.replace('.', '')
|
||||
# pronunciation_split = convert_phone_set.split_ipa_fame(pronunciation_)
|
||||
# fout.write("{0}\t{1}\n".format(word, ' '.join(pronunciation_split)))
|
||||
|
||||
# convert each lexicon from ipa description to fame_htk phoneset.
|
||||
#am_func.ipa2famehtk_lexicon(lex_oov, lex_oov_htk)
|
||||
#am_func.ipa2famehtk_lexicon(lex_asr, lex_asr_htk)
|
||||
# convert each lexicon from fame_asr phoneset to fame_htk phoneset.
|
||||
print('>>> converting each lexicon from fame_asr phoneset to fame_htk phoneset...')
|
||||
fame_functions.lexicon_asr2htk(lexicon_asr, lexicon_htk_asr)
|
||||
fame_functions.lexicon_asr2htk(lexicon_oov, lexicon_htk_oov)
|
||||
|
||||
# combine lexicon
|
||||
print('>>> combining lexicon files into one lexicon...')
|
||||
# pronunciations which is not found in lex.asr are generated using G2P and listed in lex.oov.
|
||||
# therefore there is no overlap between lex_asr and lex_oov.
|
||||
#am_func.combine_lexicon(lex_asr_htk, lex_oov_htk, lex_htk)
|
||||
|
||||
|
||||
## ======================= check if all the phones are successfully converted =======================
|
||||
if check_lexicon:
|
||||
print("==== check if all the phones are successfully converted. ====\n")
|
||||
|
||||
# the phones used in the lexicon.
|
||||
phonelist_asr = am_func.get_phonelist(lex_asr)
|
||||
phonelist_oov = am_func.get_phonelist(lex_oov)
|
||||
phonelist_htk = am_func.get_phonelist(lex_htk)
|
||||
|
||||
phonelist = phonelist_asr.union(phonelist_oov)
|
||||
|
||||
# the lines which include a specific phone.
|
||||
lines = am_func.find_phone(lex_asr, 'g')
|
||||
|
||||
# statistics over the lexicon
|
||||
lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation'])
|
||||
pronunciation = lexicon_htk['pronunciation']
|
||||
phones_all = []
|
||||
for word in pronunciation:
|
||||
phones_all = phones_all + word.split()
|
||||
c = Counter(phones_all)
|
||||
|
||||
fame_functions.combine_lexicon(lexicon_htk_asr, lexicon_htk_oov, lexicon_htk)
|
||||
|
||||
## =======================
|
||||
## manually make changes to the pronunciation dictionary and save it as lex.htk
|
||||
@ -215,164 +89,172 @@ if check_lexicon:
|
||||
# (1) Replace all tabs with single space;
|
||||
# (2) Put a '\' before any dictionary entry beginning with single quote
|
||||
#http://electroblaze.blogspot.nl/2013/03/understanding-htk-error-messages.html
|
||||
fame_functions.fix_single_quote(lexicon_htk)
|
||||
print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
|
||||
## ======================= make label file =======================
|
||||
if make_mlf:
|
||||
print("==== make mlf ====\n")
|
||||
|
||||
print("generating word level transcription...\n")
|
||||
## ======================= make label files =======================
|
||||
if make_label:
|
||||
for dataset in dataset_list:
|
||||
hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp'
|
||||
hcompv_scp2 = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
|
||||
script_list = FAME_dir + '\\data\\' + dataset + '\\text'
|
||||
mlf_word = output_dir + '\\label\\' + dataset + '_word.mlf'
|
||||
mlf_phone = output_dir + '\\label\\' + dataset + '_phone.mlf'
|
||||
timer_start = time.time()
|
||||
print("==== making label files on dataset {}".format(dataset))
|
||||
|
||||
# lexicon
|
||||
lexicon_htk = pd.read_table(lex_htk, names=['word', 'pronunciation'])
|
||||
|
||||
# list of features
|
||||
with open(hcompv_scp) as fin:
|
||||
features = fin.read()
|
||||
features = features.split('\n')
|
||||
script_list = os.path.join(default.fame_dir, 'data', dataset, 'text')
|
||||
wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
|
||||
label_dir_ = os.path.join(label_dir, dataset)
|
||||
dictionary_file = os.path.join(label_dir_, 'temp.dic')
|
||||
fh.make_new_directory(label_dir_)
|
||||
|
||||
# list of scripts
|
||||
with open(script_list, "rt", encoding="utf-8") as fin:
|
||||
scripts = fin.read()
|
||||
scripts = pd.Series(scripts.split('\n'))
|
||||
scripts = fin.read().split('\n')
|
||||
|
||||
i = 0
|
||||
missing_words = []
|
||||
fscp = open(hcompv_scp2, 'wt')
|
||||
fmlf = open(mlf_word, "wt", encoding="utf-8")
|
||||
fmlf.write("#!MLF!#\n")
|
||||
feature_nr = 1
|
||||
for feature in features:
|
||||
sys.stdout.write("\r%d/%d" % (feature_nr, len(features)))
|
||||
sys.stdout.flush()
|
||||
feature_nr += 1
|
||||
file_basename = os.path.basename(feature).replace('.mfc', '')
|
||||
for line in scripts:
|
||||
# sample line:
|
||||
# sp0457m_test_1968_plakkenfryslanterhorne_2168 en dan begjinne je natuerlik
|
||||
filename_ = line.split(' ')[0]
|
||||
filename = '_'.join(filename_.split('_')[1:])
|
||||
sentence = ' '.join(line.split(' ')[1:])
|
||||
sentence_htk = fame_functions.word2htk(sentence)
|
||||
|
||||
# get words from scripts.
|
||||
try:
|
||||
script = scripts[scripts.str.contains(file_basename)]
|
||||
except IndexError:
|
||||
script = []
|
||||
wav_file = os.path.join(wav_dir_, filename + '.wav')
|
||||
if os.path.exists(wav_file) and pyhtk.can_be_ascii(sentence_htk) == 0:
|
||||
if pyhtk.create_dictionary_without_log(
|
||||
sentence_htk, global_ded, dictionary_file, lexicon_htk) == 0:
|
||||
# when the file name is too long, HDMan command does not work.
|
||||
# therefore first temporary dictionary_file is made, then renamed.
|
||||
shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic'))
|
||||
|
||||
if len(script) != 0:
|
||||
script_id = script.index[0]
|
||||
script_txt = script.get(script_id)
|
||||
script_words = script_txt.split(' ')
|
||||
del script_words[0]
|
||||
|
||||
# check if all words can be found in the lexicon.
|
||||
SCRIPT_WORDS = []
|
||||
script_prons = []
|
||||
is_in_lexicon = 1
|
||||
for word in script_words:
|
||||
WORD = word.upper()
|
||||
SCRIPT_WORDS.append(WORD)
|
||||
extracted = lexicon_htk[lexicon_htk['word']==WORD]
|
||||
if len(extracted) == 0:
|
||||
missing_words.append(word)
|
||||
script_prons.append(extracted)
|
||||
is_in_lexicon *= len(extracted)
|
||||
|
||||
# if all pronunciations are found in the lexicon, update scp and mlf files.
|
||||
if is_in_lexicon:
|
||||
# add the feature filename into the .scp file.
|
||||
fscp.write("{}\n".format(feature))
|
||||
i += 1
|
||||
|
||||
# add the words to the mlf file.
|
||||
fmlf.write('\"*/{}.lab\"\n'.format(file_basename))
|
||||
#fmlf.write('{}'.format('\n'.join(SCRIPT_WORDS)))
|
||||
for word_ in SCRIPT_WORDS:
|
||||
if word_[0] == '\'':
|
||||
word_ = '\\' + word_
|
||||
fmlf.write('{}\n'.format(word_))
|
||||
fmlf.write('.\n')
|
||||
print("\n{0} has {1} samples.\n".format(dataset, i))
|
||||
np.save(output_dir + '\\missing_words' + '_' + dataset + '.npy', missing_words)
|
||||
|
||||
fscp.close()
|
||||
fmlf.close()
|
||||
label_file = os.path.join(label_dir_, filename + '.lab')
|
||||
pyhtk.create_label_file(sentence_htk, label_file)
|
||||
else:
|
||||
os.remove(dictionary_file)
|
||||
print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
|
||||
## generate phone level transcription
|
||||
print("generating phone level transcription...\n")
|
||||
mkphones = output_dir + '\\label\\mkphones0.txt'
|
||||
subprocessStr = r"HLEd -l * -d " + lex_htk_ + ' -i ' + mlf_phone + ' ' + mkphones + ' ' + mlf_word
|
||||
subprocess.call(subprocessStr, shell=True)
|
||||
## ======================= make other required files =======================
|
||||
if make_htk_files:
|
||||
timer_start = time.time()
|
||||
print("==== making files required for HTK ====")
|
||||
|
||||
|
||||
## ======================= combined scps and mlfs =======================
|
||||
if combine_files:
|
||||
print("==== combine scps and mlfs ====\n")
|
||||
|
||||
fscp = open(hcompv_scp, 'wt')
|
||||
fmlf = open(combined_mlf, 'wt')
|
||||
print(">>> making a phonelist...")
|
||||
pyhtk.create_phonelist_file(fame_asr.phoneset_htk, phonelist_txt)
|
||||
|
||||
for dataset in dataset_list:
|
||||
fmlf.write("#!MLF!#\n")
|
||||
wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
|
||||
feature_dir_ = os.path.join(feature_dir, dataset)
|
||||
label_dir_ = os.path.join(label_dir, dataset)
|
||||
mlf_word = os.path.join(label_dir, dataset + '_word.mlf')
|
||||
mlf_phone = os.path.join(label_dir, dataset + '_phone.mlf')
|
||||
|
||||
#print(">>> making a script file for {}...".format(dataset))
|
||||
#listdir = glob.glob(os.path.join(wav_dir_, '*.dic'))
|
||||
#mfc_list = [filename.replace(wav_dir_, feature_dir_).replace('.dic', '.mfc') for filename in listdir]
|
||||
#hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
|
||||
#with open(hcompv_scp, 'wb') as f:
|
||||
# f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii'))
|
||||
|
||||
print(">>> making a mlf file for {}...".format(dataset))
|
||||
lab_list = glob.glob(os.path.join(label_dir_, '*.lab'))
|
||||
with open(mlf_word, 'wb') as fmlf:
|
||||
fmlf.write(bytes('#!MLF!#\n', 'ascii'))
|
||||
for label_file in lab_list:
|
||||
filename = os.path.basename(label_file)
|
||||
fmlf.write(bytes('\"*/{}\"\n'.format(filename), 'ascii'))
|
||||
with open(label_file) as flab:
|
||||
lines = flab.read()
|
||||
fmlf.write(bytes(lines + '.\n', 'ascii'))
|
||||
|
||||
print(">>> generating phone level transcription for {}...".format(dataset))
|
||||
pyhtk.mlf_word2phone(lexicon_htk, mlf_phone, mlf_word, mkphones_led)
|
||||
print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
|
||||
## ======================= extract features =======================
|
||||
if extract_features:
|
||||
for dataset in dataset_list:
|
||||
each_mlf = output_dir + '\\label\\' + dataset + '_phone.mlf'
|
||||
each_scp = output_dir + '\\scp\\' + dataset + '_all_words_in_lexicon.scp'
|
||||
timer_start = time.time()
|
||||
print('==== extract features on dataset {} ===='.format(dataset))
|
||||
|
||||
with open(each_mlf, 'r') as fin:
|
||||
lines = fin.read()
|
||||
lines = lines.split('\n')
|
||||
fmlf.write('\n'.join(lines[1:]))
|
||||
wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
|
||||
label_dir_ = os.path.join(label_dir, dataset)
|
||||
feature_dir_ = os.path.join(feature_dir, dataset)
|
||||
fh.make_new_directory(feature_dir_)
|
||||
|
||||
with open(each_scp, 'r') as fin:
|
||||
lines = fin.read()
|
||||
fscp.write(lines)
|
||||
# a script file for HCopy
|
||||
print(">>> making a script file for HCopy...")
|
||||
hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
|
||||
hcopy_scp.close()
|
||||
|
||||
fscp.close()
|
||||
fmlf.close()
|
||||
# get a list of features (hcopy.scp)
|
||||
# from the filelist in FAME! corpus.
|
||||
#fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
|
||||
# from the list of label files.
|
||||
lab_list = glob.glob(os.path.join(label_dir_, '*.lab'))
|
||||
feature_list = [
|
||||
os.path.join(wav_dir_, os.path.basename(lab_file).replace('.lab', '.wav')) + '\t'
|
||||
+ os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc'))
|
||||
for lab_file in lab_list]
|
||||
with open(hcopy_scp.name, 'wb') as f:
|
||||
f.write(bytes('\n'.join(feature_list), 'ascii'))
|
||||
|
||||
# extract features.
|
||||
print(">>> extracting features on {}...".format(dataset))
|
||||
pyhtk.wav2mfc(config_hcopy, hcopy_scp.name)
|
||||
os.remove(hcopy_scp.name)
|
||||
|
||||
# make hcompv.scp.
|
||||
print(">>> making a script file for {}...".format(dataset))
|
||||
listdir = glob.glob(os.path.join(label_dir_, '*.dic'))
|
||||
mfc_list = [filename.replace(label_dir_, feature_dir_).replace('.dic', '.mfc') for filename in listdir]
|
||||
hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
|
||||
with open(hcompv_scp, 'wb') as f:
|
||||
f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii'))
|
||||
|
||||
print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
|
||||
## ======================= flat start monophones =======================
|
||||
if flat_start:
|
||||
subprocessStr = 'HCompV -T 1 -C ' + config_train + ' -m -v 0.01 -S ' + hcompv_scp + ' -M ' + model0_dir + ' ' + proto_init
|
||||
subprocess.call(subprocessStr, shell=True)
|
||||
hcompv_scp = os.path.join(tmp_dir, 'test.scp')
|
||||
|
||||
timer_start = time.time()
|
||||
print('==== flat start ====')
|
||||
pyhtk.flat_start(config_train, hcompv_scp, model0_dir, prototype)
|
||||
|
||||
# allocate mean & variance to all phones in the phone list
|
||||
subprocessStr = 'perl ' + mkhmmdefs_pl + ' ' + model0_dir + '\\proto38' + ' ' + phonelist + ' > ' + model0_dir + '\\' + hmmdefs_name
|
||||
subprocess.call(subprocessStr, shell=True)
|
||||
pyhtk.create_hmmdefs(
|
||||
os.path.join(model0_dir, 'proto39'),
|
||||
os.path.join(model0_dir, 'hmmdefs'),
|
||||
phonelist_txt)
|
||||
print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
|
||||
## ======================= estimate monophones =======================
|
||||
if train_model:
|
||||
iter_num_max = 3
|
||||
for mix_num in [128, 256, 512, 1024]:
|
||||
for iter_num in range(1, iter_num_max+1):
|
||||
print("===== mix{}, iter{} =====".format(mix_num, iter_num))
|
||||
iter_num_pre = iter_num - 1
|
||||
modelN_dir = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num)
|
||||
if not os.path.exists(modelN_dir):
|
||||
os.makedirs(modelN_dir)
|
||||
if train_model_without_sp:
|
||||
hcompv_scp = os.path.join(tmp_dir, 'test.scp')
|
||||
mlf_file = os.path.join(label_dir, 'test_phone.mlf')
|
||||
output_dir = os.path.join(model_dir, 'hmm1')
|
||||
fh.make_new_directory(output_dir)
|
||||
|
||||
if iter_num == 1 and mix_num == 1:
|
||||
modelN_dir_pre = model0_dir
|
||||
else:
|
||||
modelN_dir_pre = model_dir + '\\hmm' + str(mix_num) + '-' + str(iter_num_pre)
|
||||
|
||||
## re-estimation
|
||||
subprocessStr = 'HERest -T 1 -C ' + config_train + ' -v 0.01 -I ' + combined_mlf + ' -H ' + modelN_dir_pre + '\\' + hmmdefs_name + ' -M ' + modelN_dir + ' ' + phonelist + ' -S ' + hcompv_scp
|
||||
subprocess.call(subprocessStr, shell=True)
|
||||
|
||||
mix_num_next = mix_num * 2
|
||||
modelN_dir_next = model_dir + '\\hmm' + str(mix_num_next) + '-0'
|
||||
if not os.path.exists(modelN_dir_next):
|
||||
os.makedirs(modelN_dir_next)
|
||||
|
||||
header_file = modelN_dir + '\\mix' + str(mix_num_next) + '.hed'
|
||||
with open(header_file, 'w') as fout:
|
||||
fout.write("MU %d {*.state[2-4].mix}" % (mix_num_next))
|
||||
|
||||
subprocessStr = 'HHEd -T 1 -H ' + modelN_dir + '\\' + hmmdefs_name + ' -M ' + modelN_dir_next + ' ' + header_file + ' ' + phonelist
|
||||
|
||||
subprocess.call(subprocessStr, shell=True)
|
||||
print('==== train model without sp ====')
|
||||
if not os.path.exists(os.path.join(output_dir, 'iter0')):
|
||||
shutil.copytree(model0_dir, os.path.join(output_dir, 'iter0'))
|
||||
niter = 1
|
||||
for niter in range(1, 5):
|
||||
timer_start = time.time()
|
||||
hmm_n = 'iter' + str(niter)
|
||||
hmm_n_pre = 'iter' + str(niter-1)
|
||||
modeln_dir = os.path.join(output_dir, hmm_n)
|
||||
modeln_dir_pre = os.path.join(output_dir, hmm_n_pre)
|
||||
|
||||
# re-estimation
|
||||
fh.make_new_directory(modeln_dir)
|
||||
pyhtk.re_estimation(
|
||||
config_train,
|
||||
os.path.join(modeln_dir_pre, 'proto39'),
|
||||
os.path.join(modeln_dir_pre, hmmdefs_name),
|
||||
modeln_dir,
|
||||
hcompv_scp, phonelist_txt,
|
||||
mlf_file=mlf_file)
|
||||
print("elapsed time: {}".format(time.time() - timer_start))
|
134
acoustic_model/fame_test.py
Normal file
134
acoustic_model/fame_test.py
Normal file
@ -0,0 +1,134 @@
|
||||
import sys
|
||||
import os
|
||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||
from collections import Counter
|
||||
import time
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import fame_functions
|
||||
import defaultfiles as default
|
||||
sys.path.append(default.toolbox_dir)
|
||||
from phoneset import fame_ipa, fame_asr
|
||||
import convert_phoneset
|
||||
|
||||
lexicon_dir = os.path.join(default.fame_dir, 'lexicon')
|
||||
lexicon_ipa = os.path.join(lexicon_dir, 'lex.ipa')
|
||||
lexicon_asr = os.path.join(lexicon_dir, 'lex.asr')
|
||||
lexicon_htk = os.path.join(default.htk_dir, 'lexicon', 'lex.htk')
|
||||
|
||||
## check if all the phones in lexicon.ipa are in fame_ipa.py.
|
||||
#timer_start = time.time()
|
||||
#phoneset_lex = fame_functions.get_phoneset_from_lexicon(lexicon_ipa, phoneset='ipa')
|
||||
#phoneset_py = fame_ipa.phoneset
|
||||
#print("phones which is in lexicon.ipa but not in fame_ipa.py:\n{}".format(
|
||||
# set(phoneset_lex) - set(phoneset_py)))
|
||||
#print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
# check which word has the phone.
|
||||
#timer_start = time.time()
|
||||
#extracted = find_phone(lexicon_ipa, 'ⁿ')
|
||||
#print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
|
||||
## get the correspondence between lex_ipa and lex_asr.
|
||||
lex_asr = fame_functions.load_lexicon(lexicon_asr)
|
||||
lex_ipa = fame_functions.load_lexicon(lexicon_ipa)
|
||||
if 0:
|
||||
timer_start = time.time()
|
||||
translation_key_ipa2asr, phone_unknown = fame_functions.get_translation_key(lexicon_ipa, lexicon_asr)
|
||||
print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
np.save(os.path.join('phoneset', 'output_get_translation_key_translation_key.npy'), translation_key_ipa2asr)
|
||||
np.save(os.path.join('phoneset', 'output_get_translation_key_phone_unknown.npy'), phone_unknown)
|
||||
else:
|
||||
translation_key_ipa2asr = np.load(os.path.join('phoneset', 'output_get_translation_key_translation_key.npy')).item()
|
||||
phone_unknown = np.load(os.path.join('phoneset', 'output_get_translation_key_phone_unknown.npy'))
|
||||
phone_unknown = list(phone_unknown)
|
||||
|
||||
# manually check the correspondence for the phone in phone_unknown.
|
||||
#p = phone_unknown[0]
|
||||
#lex_ipa_ = find_phone(lexicon_ipa, p, phoneset='ipa')
|
||||
|
||||
#for word in lex_ipa_['word']:
|
||||
# ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
|
||||
# if np.sum(lex_asr['word'] == word) > 0:
|
||||
# asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
|
||||
|
||||
# ipa_list = convert_phone_set.split_word(ipa, fame_ipa.multi_character_phones)
|
||||
# asr_list = asr.split(' ')
|
||||
# if p in ipa_list and (len(ipa_list) == len(asr_list)):
|
||||
# print("{0}: {1} --> {2}".format(word, ipa_list, asr_list))
|
||||
# for ipa_, asr_ in zip(ipa_list, asr_list):
|
||||
# if ipa_ in phone_unknown:
|
||||
# translation_key_ipa2asr[ipa_] = asr_
|
||||
# phone_unknown.remove(ipa_)
|
||||
|
||||
translation_key_ipa2asr['ə:'] = 'ə'
|
||||
translation_key_ipa2asr['r.'] = 'r'
|
||||
translation_key_ipa2asr['r:'] = 'r'
|
||||
np.save(os.path.join('phoneset', 'fame_ipa2asr.npy'), translation_key_ipa2asr)
|
||||
|
||||
|
||||
## check if all the phones in lexicon.asr are in translation_key_ipa2asr.
|
||||
#timer_start = time.time()
|
||||
#phoneset_lex = fame_functions.get_phoneset_from_lexicon(lexicon_asr, phoneset='asr')
|
||||
#phoneset_lex.remove("")
|
||||
#phoneset_asr = list(set(translation_key_ipa2asr.values()))
|
||||
#print("phones which is in lexicon.asr but not in the translation_key_ipa2asr:\n{}".format(
|
||||
# set(phoneset_lex) - set(phoneset_asr)))
|
||||
#print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
|
||||
## check if all the phones in lexicon.htk are in fame_asr.py.
|
||||
#timer_start = time.time()
|
||||
#phoneset_htk = fame_asr.phoneset_htk
|
||||
#phoneset_lex = fame_functions.get_phoneset_from_lexicon(lexicon_htk)
|
||||
#phoneset_lex.remove('')
|
||||
#print("phones which is in lexicon.htk but not in the fame_asr.py are:\n{}".format(
|
||||
# set(phoneset_htk) - set(phoneset_lex)))
|
||||
#print("elapsed time: {}".format(time.time() - timer_start))
|
||||
|
||||
## statistics over the lexicon
|
||||
#lex_htk = fame_functions.load_lexicon(lexicon_htk)
|
||||
#phones_all = (' '.join(lex_htk['pronunciation'])).split(' ')
|
||||
#c = Counter(phones_all)
|
||||
|
||||
#lexicon_out = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\lexicon\lex.htk2'
|
||||
#for i in lex_htk[lex_htk['word'].str.startswith('\'')].index.values:
|
||||
# lex_htk.iat[i, 0] = lex_htk.iat[i, 0].replace('\'', '\\\'')
|
||||
## to_csv does not work with space seperator. therefore all tabs should manually be replaced.
|
||||
##lex_htk.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep=' ', quoting=csv.QUOTE_NONE, escapechar='\\')
|
||||
#lex_htk.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t')
|
||||
|
||||
|
||||
## check which letters are not coded in ascii.
|
||||
print('asr phones which cannot be coded in ascii:\n')
|
||||
for i in fame_asr.phoneset_short:
|
||||
try:
|
||||
i_encoded = i.encode("ascii")
|
||||
#print("{0} --> {1}".format(i, i.encode("ascii")))
|
||||
except UnicodeEncodeError:
|
||||
print(">>> {}".format(i))
|
||||
|
||||
print("letters in the scripts which is not coded in ascii:\n")
|
||||
for dataset in ['train', 'devel', 'test']:
|
||||
timer_start = time.time()
|
||||
|
||||
script_list = os.path.join(default.fame_dir, 'data', dataset, 'text')
|
||||
with open(script_list, "rt", encoding="utf-8") as fin:
|
||||
scripts = fin.read().split('\n')
|
||||
|
||||
for line in scripts:
|
||||
sentence = ' '.join(line.split(' ')[1:])
|
||||
sentence_htk = fame_functions.word2htk(sentence)
|
||||
|
||||
#if len(re.findall(r'[âêôûč\'àéèúćäëïöü]', sentence))==0:
|
||||
try:
|
||||
sentence_htk = bytes(sentence_htk, 'ascii')
|
||||
except UnicodeEncodeError:
|
||||
print(sentence)
|
||||
print(sentence_htk)
|
||||
|
137
acoustic_model/phoneset/fame_asr.py
Normal file
137
acoustic_model/phoneset/fame_asr.py
Normal file
@ -0,0 +1,137 @@
|
||||
""" definition of the phones to be used. """
|
||||
|
||||
# phonese in {FAME}/lexicon/lex.asr
|
||||
phoneset = [
|
||||
# vowels
|
||||
'a',
|
||||
'a:',
|
||||
'e',
|
||||
'e:',
|
||||
'i',
|
||||
'i:',
|
||||
'i̯',
|
||||
'o',
|
||||
'o:',
|
||||
'ö',
|
||||
'ö:',
|
||||
'u',
|
||||
'u:',
|
||||
'ü',
|
||||
'ü:',
|
||||
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone.
|
||||
'ṷ',
|
||||
'y',
|
||||
'ɔ',
|
||||
'ɔ:',
|
||||
'ɔ̈',
|
||||
'ɔ̈:',
|
||||
'ə',
|
||||
'ɛ',
|
||||
'ɛ:',
|
||||
'ɪ',
|
||||
'ɪ:',
|
||||
|
||||
# plosives
|
||||
'p',
|
||||
'b',
|
||||
't',
|
||||
'd',
|
||||
'k',
|
||||
'g',
|
||||
'ɡ', # = 'g'
|
||||
|
||||
# nasals
|
||||
'm',
|
||||
'n',
|
||||
'ŋ',
|
||||
|
||||
# fricatives
|
||||
'f',
|
||||
'v',
|
||||
's',
|
||||
's:',
|
||||
'z',
|
||||
'x',
|
||||
'h',
|
||||
|
||||
# tap and flip
|
||||
'r',
|
||||
'r:',
|
||||
|
||||
# approximant
|
||||
'j',
|
||||
'l'
|
||||
]
|
||||
|
||||
|
||||
## reduce the number of phones.
|
||||
# the phones which seldom occur are replaced with another more popular phones.
|
||||
# replacements are based on the advice from Martijn Wieling.
|
||||
reduction_key = {
|
||||
'y':'i:', 'e':'e:', 'ə:':'ɛ:', 'r:':'r', 'ɡ':'g'
|
||||
}
|
||||
# already removed beforehand in phoneset. Just to be sure.
|
||||
phones_to_be_removed = ['ú', 's:', 'ɔ̈:']
|
||||
|
||||
phoneset_short = [reduction_key.get(i, i) for i in phoneset
|
||||
if not i in phones_to_be_removed]
|
||||
phoneset_short = list(set(phoneset_short))
|
||||
phoneset_short.sort()
|
||||
|
||||
|
||||
## translation_key to htk format (ascii).
|
||||
# phones which gives UnicodeEncodeError when phone.encode("ascii")
|
||||
# are replaced with other characters.
|
||||
translation_key_asr2htk = {
|
||||
'i̯': 'i_',
|
||||
'ṷ': 'u_',
|
||||
|
||||
# on the analogy of German umlaut, 'e' is used.
|
||||
'ö': 'oe', 'ö:': 'oe:',
|
||||
'ü': 'ue', 'ü:': 'ue:',
|
||||
|
||||
# on the analogy of Chinese...
|
||||
'ŋ': 'ng',
|
||||
|
||||
# refer to Xsampa.
|
||||
'ɔ': 'O', 'ɔ:': 'O:', 'ɔ̈': 'Oe',
|
||||
'ɛ': 'E', 'ɛ:': 'E:',
|
||||
'ɪ': 'I', 'ɪ:': 'I:',
|
||||
|
||||
# it is @ in Xsampa, but that is not handy on HTK.
|
||||
'ə': 'A'
|
||||
}
|
||||
phoneset_htk = [translation_key_asr2htk.get(i, i) for i in phoneset_short]
|
||||
|
||||
#not_in_ascii = [
|
||||
# '\'',
|
||||
# 'â', 'ê', 'ô', 'û', 'č',
|
||||
# 'à', 'í', 'é', 'è', 'ú', 'ć',
|
||||
# 'ä', 'ë', 'ï', 'ö', 'ü'
|
||||
#]
|
||||
translation_key_word2htk = {
|
||||
#'\'': '\\\'',
|
||||
'í':'i1', 'é':'e1', 'ú':'u1', 'ć':'c1',
|
||||
'à':'a2', 'è':'e2',
|
||||
'â':'a3', 'ê':'e3', 'ô':'o3', 'û':'u3',
|
||||
'č':'c4',
|
||||
'ä': 'ao', 'ë': 'ee', 'ï': 'ie', 'ö': 'oe', 'ü': 'ue',
|
||||
}
|
||||
#[translation_key_word2htk.get(i, i) for i in not_in_ascii]
|
||||
|
||||
|
||||
|
||||
## the list of multi character phones.
|
||||
# for example, the length of 'a:' is 3, but in the codes it is treated as one letter.
|
||||
|
||||
# original.
|
||||
multi_character_phones = [i for i in phoneset if len(i) > 1]
|
||||
multi_character_phones.sort(key=len, reverse=True)
|
||||
|
||||
# phonset reduced.
|
||||
multi_character_phones_short = [i for i in phoneset_short if len(i) > 1]
|
||||
multi_character_phones_short.sort(key=len, reverse=True)
|
||||
|
||||
# htk compatible.
|
||||
multi_character_phones_htk = [i for i in phoneset_htk if len(i) > 1]
|
||||
multi_character_phones_htk.sort(key=len, reverse=True)
|
@ -1,7 +1,6 @@
|
||||
""" definition of the phones to be used. """
|
||||
|
||||
## phones in IPA.
|
||||
phoneset_ipa = [
|
||||
phoneset = [
|
||||
# vowels
|
||||
'i̯',
|
||||
'i̯ⁿ',
|
||||
@ -35,7 +34,7 @@ phoneset_ipa = [
|
||||
'ṷ',
|
||||
'ṷ.',
|
||||
'ṷⁿ',
|
||||
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr.
|
||||
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone.
|
||||
'u',
|
||||
'uⁿ',
|
||||
'u.',
|
||||
@ -101,7 +100,8 @@ phoneset_ipa = [
|
||||
'l'
|
||||
]
|
||||
|
||||
|
||||
## the list of multi character phones.
|
||||
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
|
||||
multi_character_phones_ipa = [i for i in phoneset_ipa if len(i) > 1]
|
||||
multi_character_phones_ipa.sort(key=len, reverse=True)
|
||||
multi_character_phones = [i for i in phoneset if len(i) > 1]
|
||||
multi_character_phones.sort(key=len, reverse=True)
|
BIN
acoustic_model/phoneset/fame_ipa2asr.npy
Normal file
BIN
acoustic_model/phoneset/fame_ipa2asr.npy
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user