diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo
index a64dccd..6764d7f 100644
Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ
diff --git a/_tmp/phone_to_be_searched.npy b/_tmp/phone_to_be_searched.npy
new file mode 100644
index 0000000..17bfbfa
Binary files /dev/null and b/_tmp/phone_to_be_searched.npy differ
diff --git a/_tmp/translation_key.npy b/_tmp/translation_key.npy
new file mode 100644
index 0000000..fffeade
Binary files /dev/null and b/_tmp/translation_key.npy differ
diff --git a/acoustic_model.sln b/acoustic_model.sln
index 406d9e5..886b9ee 100644
--- a/acoustic_model.sln
+++ b/acoustic_model.sln
@@ -10,19 +10,21 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
..\toolbox\evaluation.py = ..\toolbox\evaluation.py
+ ..\toolbox\toolbox\file_handling.py = ..\toolbox\toolbox\file_handling.py
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
- reus-test\reus-test.py = reus-test\reus-test.py
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
EndProjectSection
EndProject
+Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "pyhtk", "..\pyhtk\pyhtk\pyhtk.pyproj", "{75FCEFAF-9397-43FC-8189-DE97ADB77AA5}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -31,6 +33,8 @@ Global
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{4D8C8573-32F0-4A62-9E62-3CE5CC680390}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {75FCEFAF-9397-43FC-8189-DE97ADB77AA5}.Release|Any CPU.ActiveCfg = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc
index ef367cd..f41e244 100644
Binary files a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc and b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc differ
diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj
index 17163f2..4dd80ce 100644
--- a/acoustic_model/acoustic_model.pyproj
+++ b/acoustic_model/acoustic_model.pyproj
@@ -4,7 +4,8 @@
2.0
4d8c8573-32f0-4a62-9e62-3ce5cc680390
.
- forced_aligner_comparison.py
+
+
.
@@ -21,10 +22,6 @@
false
-
-
- Code
-
Code
@@ -35,9 +32,8 @@
Code
-
- Code
-
+
+
Code
@@ -47,6 +43,7 @@
Code
+
diff --git a/acoustic_model/acoustic_model_function.py b/acoustic_model/acoustic_model_function.py
deleted file mode 100644
index 4fced38..0000000
--- a/acoustic_model/acoustic_model_function.py
+++ /dev/null
@@ -1,202 +0,0 @@
-import os
-import sys
-from collections import Counter
-
-import numpy as np
-import pandas as pd
-
-import defaultfiles as default
-
-sys.path.append(default.forced_alignment_module_dir)
-from forced_alignment import convert_phone_set
-
-
-def make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp):
- """ Make a script file for HCopy using the filelist in FAME! corpus. """
- filelist_txt = FAME_dir + '\\fame\\filelists\\' + dataset + 'list.txt'
- with open(filelist_txt) as fin:
- filelist = fin.read()
- filelist = filelist.split('\n')
-
- with open(hcopy_scp, 'w') as fout:
- for filename_ in filelist:
- filename = filename_.replace('.TextGrid', '')
-
- if len(filename) > 3: # remove '.', '..' and ''
- wav_file = FAME_dir + '\\fame\\wav\\' + dataset + '\\' + filename + '.wav'
- mfc_file = feature_dir + '\\' + filename + '.mfc'
-
- fout.write(wav_file + '\t' + mfc_file + '\n')
-
-
-def make_filelist(input_dir, output_txt):
- """ Make a list of files in the input_dir. """
- filenames = os.listdir(input_dir)
-
- with open(output_txt, 'w') as fout:
- for filename in filenames:
- fout.write(input_dir + '\\' + filename + '\n')
-
-
-def make_htk_dict(word, pronvar_, fileDic, output_type):
- """
- make dict files which can be used for HTK.
- param word: target word.
- param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray.
- param fileDic: output dic file.
- param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3.
- """
- #assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.')
- WORD = word.upper()
-
- if output_type == 0: # full
- pronvar = np.unique(pronvar_)
-
- with open(fileDic, 'w') as f:
- for pvar in pronvar:
- f.write('{0}\t{1}\n'.format(WORD, pvar))
- else:
- c = Counter(pronvar_)
- total_num = sum(c.values())
- with open(fileDic, 'w') as f:
- if output_type == 3:
- for key, value in c.most_common(3):
- f.write('{0}\t{1}\n'.format(WORD, key))
- else:
- for key, value in c.items():
- percentage = value/total_num*100
-
- if output_type == 1: # all
- f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key))
- elif output_type == 2: # less than 2 percent
- if percentage < 2:
- f.write('{0}\t{1}\n'.format(WORD, key))
-
-
-def get_phonelist(lexicon_file):
- """ Make a list of phones which appears in the lexicon. """
-
- with open(lexicon_file, "rt", encoding="utf-8") as fin:
- lines = fin.read()
- lines = lines.split('\n')
- phonelist = set([])
- for line in lines:
- line = line.split('\t')
- if len(line) > 1:
- pronunciation = set(line[1].split())
- phonelist = phonelist | pronunciation
- return phonelist
-
-
-def find_phone(lexicon_file, phone):
- """ Search where the phone is used in the lexicon. """
- with open(lexicon_file, "rt", encoding="utf-8") as fin:
- lines = fin.read()
- lines = lines.split('\n')
-
- extracted = []
- for line in lines:
- line = line.split('\t')
- if len(line) > 1:
- pronunciation = line[1]
- if phone in pronunciation:
- extracted.append(line)
- return extracted
-
-
-def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
- """ Convert a lexicon file from IPA to HTK format for FAME! corpus. """
-
- lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation'])
- with open(lexicon_file_out, "w", encoding="utf-8") as fout:
- for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']):
- pronunciation_no_space = pronunciation.replace(' ', '')
- pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space)
- if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk:
- fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk))
-
-
-def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out):
- """ Combine two lexicon files and sort by words. """
-
- with open(lexicon_file1, "rt", encoding="utf-8") as fin:
- lines1 = fin.read()
- lines1 = lines1.split('\n')
- with open(lexicon_file2, "rt", encoding="utf-8") as fin:
- lines2 = fin.read()
- lines2 = lines2.split('\n')
-
- lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation'])
- lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation'])
- lex = pd.concat([lex1, lex2])
- lex = lex.sort_values(by='word', ascending=True)
- lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t')
-
-
-def read_fileFA(fileFA):
- """
- read the result file of HTK forced alignment.
- this function only works when input is one word.
- """
- with open(fileFA, 'r') as f:
- lines = f.read()
- lines = lines.split('\n')
-
- phones = []
- for line in lines:
- line_split = line.split()
- if len(line_split) > 1:
- phones.append(line_split[2])
-
- return ' '.join(phones)
-
-
-def fame_pronunciation_variant(ipa):
- ipa = ipa.replace('æ', 'ɛ')
- ipa = ipa.replace('ɐ', 'a')
- ipa = ipa.replace('ɑ', 'a')
- ipa = ipa.replace('ɾ', 'r')
- ipa = ipa.replace('ɹ', 'r') # ???
- ipa = ipa.replace('ʁ', 'r')
- ipa = ipa.replace('ʀ', 'r') # ???
- ipa = ipa.replace('ʊ', 'u')
- ipa = ipa.replace('χ', 'x')
-
- pronvar_list = [ipa]
- while 'ø:' in ' '.join(pronvar_list) or 'œ' in ' '.join(pronvar_list) or 'ɒ' in ' '.join(pronvar_list):
- pronvar_list_ = []
- for p in pronvar_list:
- if 'ø:' in p:
- pronvar_list_.append(p.replace('ø:', 'ö'))
- pronvar_list_.append(p.replace('ø:', 'ö:'))
- if 'œ' in p:
- pronvar_list_.append(p.replace('œ', 'ɔ̈'))
- pronvar_list_.append(p.replace('œ', 'ɔ̈:'))
- if 'ɒ' in p:
- pronvar_list_.append(p.replace('ɒ', 'ɔ̈'))
- pronvar_list_.append(p.replace('ɒ', 'ɔ̈:'))
- pronvar_list = np.unique(pronvar_list_)
- return pronvar_list
-
-
-def make_fame2ipa_variants(fame):
- fame = 'rɛös'
- ipa = [fame]
- ipa.append(fame.replace('ɛ', 'æ'))
- ipa.append(fame.replace('a', 'ɐ'))
- ipa.append(fame.replace('a', 'ɑ'))
- ipa.append(fame.replace('r', 'ɾ'))
- ipa.append(fame.replace('r', 'ɹ'))
- ipa.append(fame.replace('r', 'ʁ'))
- ipa.append(fame.replace('r', 'ʀ'))
- ipa.append(fame.replace('u', 'ʊ'))
- ipa.append(fame.replace('x', 'χ'))
-
- ipa.append(fame.replace('ö', 'ø:'))
- ipa.append(fame.replace('ö:', 'ø:'))
- ipa.append(fame.replace('ɔ̈', 'œ'))
- ipa.append(fame.replace('ɔ̈:', 'œ'))
- ipa.append(fame.replace('ɔ̈', 'ɒ'))
- ipa.append(fame.replace('ɔ̈:', 'ɒ'))
-
- return ipa
diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py
index f53100f..1046c29 100644
--- a/acoustic_model/defaultfiles.py
+++ b/acoustic_model/defaultfiles.py
@@ -2,11 +2,13 @@ import os
#default_hvite_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'htk', 'config.HVite')
-cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
+#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
-#config_hcopy = os.path.join(cygwin_dir, 'config', 'config.HCopy')
+htk_dir = r'C:\Aki\htk_fame'
+
+config_hcopy = os.path.join(htk_dir, 'config', 'config.HCopy')
#config_train = os.path.join(cygwin_dir, 'config', 'config.train')
-config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
+#config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
#mkhmmdefs_pl = os.path.join(cygwin_dir, 'src', 'acoustic_model', 'mkhmmdefs.pl')
#dbLexicon = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\lexicon.accdb
@@ -26,19 +28,23 @@ config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
#filePhoneList = config['pyHTK']['filePhoneList']
#AcousticModel = config['pyHTK']['AcousticModel']
-repo_dir = r'C:\Users\Aki\source\repos'
+repo_dir = r'C:\Users\A.Kunikoshi\source\repos'
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
+pyhtk_dir = os.path.join(repo_dir, 'pyhtk', 'pyhtk')
+toolbox_dir = os.path.join(repo_dir, 'toolbox', 'toolbox')
-htk_config_dir = r'c:\Users\Aki\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
+htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
config_hvite = os.path.join(htk_config_dir, 'config.HVite')
#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
-acoustic_model = r'c:\cygwin64\home\Aki\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
+acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
WSL_dir = r'C:\OneDrive\WSL'
-fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
+#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
+fame_dir = r'f:\_corpus\fame'
+
fame_s5_dir = os.path.join(fame_dir, 's5')
fame_corpus_dir = os.path.join(fame_dir, 'corpus')
diff --git a/acoustic_model/fame_functions.py b/acoustic_model/fame_functions.py
new file mode 100644
index 0000000..4a16a95
--- /dev/null
+++ b/acoustic_model/fame_functions.py
@@ -0,0 +1,252 @@
+import os
+os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model')
+
+import sys
+from collections import Counter
+import pickle
+
+import numpy as np
+import pandas as pd
+
+import defaultfiles as default
+
+#sys.path.append(default.forced_alignment_module_dir)
+#from forced_alignment import convert_phone_set
+
+#def find_phone(lexicon_file, phone):
+# """ Search where the phone is used in the lexicon. """
+# with open(lexicon_file, "rt", encoding="utf-8") as fin:
+# lines = fin.read()
+# lines = lines.split('\n')
+
+# extracted = []
+# for line in lines:
+# line = line.split('\t')
+# if len(line) > 1:
+# pronunciation = line[1]
+# if phone in pronunciation:
+# extracted.append(line)
+# return extracted
+
+
+#def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
+# """ Convert a lexicon file from IPA to HTK format for FAME! corpus. """
+
+# lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation'])
+# with open(lexicon_file_out, "w", encoding="utf-8") as fout:
+# for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']):
+# pronunciation_no_space = pronunciation.replace(' ', '')
+# pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space)
+# if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk:
+# fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk))
+
+
+#def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out):
+# """ Combine two lexicon files and sort by words. """
+
+# with open(lexicon_file1, "rt", encoding="utf-8") as fin:
+# lines1 = fin.read()
+# lines1 = lines1.split('\n')
+# with open(lexicon_file2, "rt", encoding="utf-8") as fin:
+# lines2 = fin.read()
+# lines2 = lines2.split('\n')
+
+# lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation'])
+# lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation'])
+# lex = pd.concat([lex1, lex2])
+# lex = lex.sort_values(by='word', ascending=True)
+# lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t')
+
+
+#def read_fileFA(fileFA):
+# """
+# read the result file of HTK forced alignment.
+# this function only works when input is one word.
+# """
+# with open(fileFA, 'r') as f:
+# lines = f.read()
+# lines = lines.split('\n')
+
+# phones = []
+# for line in lines:
+# line_split = line.split()
+# if len(line_split) > 1:
+# phones.append(line_split[2])
+
+# return ' '.join(phones)
+
+
+#def fame_pronunciation_variant(ipa):
+# ipa = ipa.replace('æ', 'ɛ')
+# ipa = ipa.replace('ɐ', 'a')
+# ipa = ipa.replace('ɑ', 'a')
+# ipa = ipa.replace('ɾ', 'r')
+# ipa = ipa.replace('ɹ', 'r') # ???
+# ipa = ipa.replace('ʁ', 'r')
+# ipa = ipa.replace('ʀ', 'r') # ???
+# ipa = ipa.replace('ʊ', 'u')
+# ipa = ipa.replace('χ', 'x')
+
+# pronvar_list = [ipa]
+# while 'ø:' in ' '.join(pronvar_list) or 'œ' in ' '.join(pronvar_list) or 'ɒ' in ' '.join(pronvar_list):
+# pronvar_list_ = []
+# for p in pronvar_list:
+# if 'ø:' in p:
+# pronvar_list_.append(p.replace('ø:', 'ö'))
+# pronvar_list_.append(p.replace('ø:', 'ö:'))
+# if 'œ' in p:
+# pronvar_list_.append(p.replace('œ', 'ɔ̈'))
+# pronvar_list_.append(p.replace('œ', 'ɔ̈:'))
+# if 'ɒ' in p:
+# pronvar_list_.append(p.replace('ɒ', 'ɔ̈'))
+# pronvar_list_.append(p.replace('ɒ', 'ɔ̈:'))
+# pronvar_list = np.unique(pronvar_list_)
+# return pronvar_list
+
+
+#def make_fame2ipa_variants(fame):
+# fame = 'rɛös'
+# ipa = [fame]
+# ipa.append(fame.replace('ɛ', 'æ'))
+# ipa.append(fame.replace('a', 'ɐ'))
+# ipa.append(fame.replace('a', 'ɑ'))
+# ipa.append(fame.replace('r', 'ɾ'))
+# ipa.append(fame.replace('r', 'ɹ'))
+# ipa.append(fame.replace('r', 'ʁ'))
+# ipa.append(fame.replace('r', 'ʀ'))
+# ipa.append(fame.replace('u', 'ʊ'))
+# ipa.append(fame.replace('x', 'χ'))
+
+# ipa.append(fame.replace('ö', 'ø:'))
+# ipa.append(fame.replace('ö:', 'ø:'))
+# ipa.append(fame.replace('ɔ̈', 'œ'))
+# ipa.append(fame.replace('ɔ̈:', 'œ'))
+# ipa.append(fame.replace('ɔ̈', 'ɒ'))
+# ipa.append(fame.replace('ɔ̈:', 'ɒ'))
+
+# return ipa
+
+def make_hcopy_scp_from_filelist_in_fame(fame_dir, dataset, feature_dir, hcopy_scp):
+ """ Make a script file for HCopy using the filelist in FAME! corpus. """
+
+ filelist_txt = os.path.join(fame_dir, 'fame', 'filelists', dataset + 'list.txt')
+ with open(filelist_txt) as fin:
+ filelist = fin.read()
+ filelist = filelist.split('\n')
+
+ with open(hcopy_scp, 'w') as fout:
+ for filename_ in filelist:
+ filename = filename_.replace('.TextGrid', '')
+
+ if len(filename) > 3: # remove '.', '..' and ''
+ wav_file = os.path.join(fame_dir, 'fame', 'wav', dataset, filename + '.wav')
+ mfc_file = os.path.join(feature_dir, filename + '.mfc')
+
+ fout.write(wav_file + '\t' + mfc_file + '\n')
+
+
+#def make_filelist(input_dir, output_txt):
+# """ Make a list of files in the input_dir. """
+# filenames = os.listdir(input_dir)
+
+# with open(output_txt, 'w') as fout:
+# for filename in filenames:
+# fout.write(input_dir + '\\' + filename + '\n')
+
+
+#def make_htk_dict(word, pronvar_, fileDic, output_type):
+# """
+# make dict files which can be used for HTK.
+# param word: target word.
+# param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray.
+# param fileDic: output dic file.
+# param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3.
+# """
+# #assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.')
+# WORD = word.upper()
+
+# if output_type == 0: # full
+# pronvar = np.unique(pronvar_)
+
+# with open(fileDic, 'w') as f:
+# for pvar in pronvar:
+# f.write('{0}\t{1}\n'.format(WORD, pvar))
+# else:
+# c = Counter(pronvar_)
+# total_num = sum(c.values())
+# with open(fileDic, 'w') as f:
+# if output_type == 3:
+# for key, value in c.most_common(3):
+# f.write('{0}\t{1}\n'.format(WORD, key))
+# else:
+# for key, value in c.items():
+# percentage = value/total_num*100
+
+# if output_type == 1: # all
+# f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key))
+# elif output_type == 2: # less than 2 percent
+# if percentage < 2:
+# f.write('{0}\t{1}\n'.format(WORD, key))
+
+
+
+
+
+def load_lexicon(lexicon_file):
+ lex = pd.read_csv(lexicon_file, delimiter='\t', header=None, encoding="utf-8")
+ lex.rename(columns={0: 'word', 1: 'pronunciation'}, inplace=True)
+ return lex
+
+
+def get_phonelist(lexicon_asr):
+ """ Make a list of phones which appears in the lexicon. """
+
+ #with open(lexicon_file, "rt", encoding="utf-8") as fin:
+ # lines = fin.read()
+ # lines = lines.split('\n')
+ # phonelist = set([])
+ # for line in lines:
+ # line = line.split('\t')
+ # if len(line) > 1:
+ # pronunciation = set(line[1].split())
+ # phonelist = phonelist | pronunciation
+ lex = load_lexicon(lexicon_asr)
+ return set(' '.join(lex['pronunciation']).split(' '))
+
+import time
+
+timer_start = time.time()
+
+#def get_translation_key():
+dir_tmp = r'c:\Users\A.Kunikoshi\source\repos\acoustic_model\_tmp'
+lexicon_ipa = r'f:\_corpus\FAME\lexicon\lex.ipa'
+lexicon_asr = r'f:\_corpus\FAME\lexicon\lex.asr'
+
+lex_ipa = load_lexicon(lexicon_ipa)
+lex_asr = load_lexicon(lexicon_asr)
+if 0:
+ phone_to_be_searched = get_phonelist(lexicon_asr)
+ translation_key = dict()
+ for word in lex_asr['word']:
+ if np.sum(lex_asr['word'] == word) == 1 and np.sum(lex_ipa['word'] == word) == 1:
+ asr = lex_asr[lex_asr['word'] == word].iat[0, 1]
+ ipa = lex_ipa[lex_ipa['word'] == word].iat[0, 1]
+
+ asr_list = asr.split(' ')
+ # if there are phones which is not in phone_to_be_searched
+ if len([True for i in asr_list if i in phone_to_be_searched]) > 0:
+ if(len(ipa) == len(asr_list)):
+ print("{0}: {1} --> {2}".format(word, ipa, asr))
+ for ipa_, asr_ in zip(ipa, asr_list):
+ if asr_ in phone_to_be_searched:
+ #if not translation_key[ipa_] == asr_:
+ translation_key[ipa_] = asr_
+ phone_to_be_searched.remove(asr_)
+
+ print("elapsed time: {}".format(time.time() - timer_start))
+
+ np.save(os.path.join(dir_tmp, 'translation_key.npy'), translation_key)
+ np.save(os.path.join(dir_tmp, 'phone_to_be_searched.npy'), phone_to_be_searched)
+else:
+ translation_key = np.load(os.path.join(dir_tmp, 'translation_key.npy')).item()
+ phone_to_be_searched = np.load(os.path.join(dir_tmp, 'phone_to_be_searched.npy')).item()
diff --git a/acoustic_model/train_hmm_fame.py b/acoustic_model/fame_hmm.py
similarity index 71%
rename from acoustic_model/train_hmm_fame.py
rename to acoustic_model/fame_hmm.py
index 8cf7789..5f69329 100644
--- a/acoustic_model/train_hmm_fame.py
+++ b/acoustic_model/fame_hmm.py
@@ -1,105 +1,127 @@
-import os
import sys
-import tempfile
-import configparser
-import subprocess
-from collections import Counter
+import os
+os.chdir(r'C:\Users\A.Kunikoshi\source\repos\acoustic_model\acoustic_model')
-import numpy as np
-import pandas as pd
+import tempfile
+#import configparser
+#import subprocess
+#from collections import Counter
+
+#import numpy as np
+#import pandas as pd
+
+import fame_functions
+import defaultfiles as default
+sys.path.append(default.pyhtk_dir)
+import pyhtk
+sys.path.append(default.toolbox_dir)
+import file_handling
## ======================= user define =======================
-repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model'
-curr_dir = repo_dir + '\\acoustic_model'
-config_ini = curr_dir + '\\config.ini'
-output_dir = 'C:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model'
-forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment'
+#repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model'
+#curr_dir = repo_dir + '\\acoustic_model'
+#config_ini = curr_dir + '\\config.ini'
+#output_dir = 'C:\\OneDrive\\Research\\rug\\experiments\\friesian\\acoustic_model'
+#forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment'
dataset_list = ['devel', 'test', 'train']
# procedure
-extract_features = 0
-make_feature_list = 0
-conv_lexicon = 0
-check_lexicon = 0
-make_mlf = 0
-combine_files = 0
-flat_start = 0
-train_model = 1
+extract_features = 1
+#conv_lexicon = 0
+#check_lexicon = 0
+#make_mlf = 0
+#combine_files = 0
+#flat_start = 0
+#train_model = 1
-sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir))
-sys.path.append(forced_alignment_module)
-from forced_alignment import convert_phone_set
+#sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir))
+#sys.path.append(forced_alignment_module)
+#from forced_alignment import convert_phone_set
-import acoustic_model_functions as am_func
## ======================= load variables =======================
-config = configparser.ConfigParser()
-config.sections()
-config.read(config_ini)
+#config = configparser.ConfigParser()
+#config.sections()
+#config.read(config_ini)
-config_hcopy = config['Settings']['config_hcopy']
-config_train = config['Settings']['config_train']
-mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl']
-FAME_dir = config['Settings']['FAME_dir']
+#config_hcopy = config['Settings']['config_hcopy']
+#config_train = config['Settings']['config_train']
+#mkhmmdefs_pl = config['Settings']['mkhmmdefs_pl']
+#FAME_dir = config['Settings']['FAME_dir']
-lex_asr = FAME_dir + '\\lexicon\\lex.asr'
-lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk'
-lex_oov = FAME_dir + '\\lexicon\\lex.oov'
-lex_oov_htk = FAME_dir + '\\lexicon\\lex.oov_htk'
-#lex_ipa = FAME_dir + '\\lexicon\\lex.ipa'
-#lex_ipa_ = FAME_dir + '\\lexicon\\lex.ipa_'
-#lex_ipa_htk = FAME_dir + '\\lexicon\\lex.ipa_htk'
-lex_htk = FAME_dir + '\\lexicon\\lex_original.htk'
-lex_htk_ = FAME_dir + '\\lexicon\\lex.htk'
+#lex_asr = FAME_dir + '\\lexicon\\lex.asr'
+#lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk'
+#lex_oov = FAME_dir + '\\lexicon\\lex.oov'
+#lex_oov_htk = FAME_dir + '\\lexicon\\lex.oov_htk'
+##lex_ipa = FAME_dir + '\\lexicon\\lex.ipa'
+##lex_ipa_ = FAME_dir + '\\lexicon\\lex.ipa_'
+##lex_ipa_htk = FAME_dir + '\\lexicon\\lex.ipa_htk'
+#lex_htk = FAME_dir + '\\lexicon\\lex_original.htk'
+#lex_htk_ = FAME_dir + '\\lexicon\\lex.htk'
-hcompv_scp = output_dir + '\\scp\\combined.scp'
-combined_mlf = output_dir + '\\label\\combined.mlf'
+#hcompv_scp = output_dir + '\\scp\\combined.scp'
+#combined_mlf = output_dir + '\\label\\combined.mlf'
-model_dir = output_dir + '\\model'
-model0_dir = model_dir + '\\hmm0'
-proto_init = model_dir + '\\proto38'
-proto_name = 'proto'
-phonelist = output_dir + '\\config\\phonelist_friesian.txt'
-hmmdefs_name = 'hmmdefs'
+#model_dir = output_dir + '\\model'
+#model0_dir = model_dir + '\\hmm0'
+#proto_init = model_dir + '\\proto38'
+#proto_name = 'proto'
+#phonelist = output_dir + '\\config\\phonelist_friesian.txt'
+#hmmdefs_name = 'hmmdefs'
+feature_dir = os.path.join(default.htk_dir, 'mfc')
+if not os.path.exists(feature_dir):
+ os.makedirs(feature_dir)
+tmp_dir = os.path.join(default.htk_dir, 'tmp')
+if not os.path.exists(tmp_dir):
+ os.makedirs(tmp_dir)
## ======================= extract features =======================
if extract_features:
- print("==== extract features ====\n")
-
for dataset in dataset_list:
- print(dataset)
-
+ #for dataset in ['test']:
+ print('==== {} ===='.format(dataset))
+
# a script file for HCopy
+ print(">>> making a script file for HCopy... \n")
hcopy_scp = tempfile.NamedTemporaryFile(mode='w', delete=False)
hcopy_scp.close()
+ #hcopy_scp = os.path.join(default.htk_dir, 'tmp', 'HCopy.scp')
# get a list of features (hcopy.scp) from the filelist in FAME! corpus
- feature_dir = output_dir + '\\mfc\\' + dataset
- am_func.make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp.name)
+ feature_dir_ = os.path.join(feature_dir, dataset)
+ if not os.path.exists(feature_dir_):
+ os.makedirs(feature_dir_)
# extract features
- subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name
- subprocess.call(subprocessStr, shell=True)
+ print(">>> extracting features... \n")
+ fame_functions.make_hcopy_scp_from_filelist_in_fame(default.fame_dir, dataset, feature_dir_, hcopy_scp.name)
+ #subprocessStr = 'HCopy -C ' + config_hcopy + ' -S ' + hcopy_scp.name
+ #subprocess.call(subprocessStr, shell=True)
+ pyhtk.wav2mfc(default.config_hcopy, hcopy_scp.name)
+
+ # a script file for HCompV
+ print(">>> making a script file for HCompV... \n")
## ======================= make a list of features =======================
-if make_feature_list:
- print("==== make a list of features ====\n")
+#if make_feature_list:
+# print("==== make a list of features ====\n")
- for dataset in dataset_list:
- print(dataset)
+# for dataset in dataset_list:
+# print(dataset)
- feature_dir = output_dir + '\\mfc\\' + dataset
- hcompv_scp = output_dir + '\\scp\\' + dataset + '.scp'
+ #feature_dir = output_dir + '\\mfc\\' + dataset
+ hcompv_scp = os.path.join(tmp_dir, dataset + '.scp')
- am_func.make_filelist(feature_dir, hcompv_scp)
+ #am_func.make_filelist(feature_dir, hcompv_scp)
+ file_handling.make_filelist(feature_dir_, hcompv_scp, '.mfc')
## ======================= convert lexicon from ipa to fame_htk =======================