with bug-fixed xsampa->ipa conversion, FA is performed.

This commit is contained in:
yemaozi88 2018-09-02 12:16:37 +02:00
parent df0e96c4f1
commit 3a98e184fe
12 changed files with 352 additions and 352 deletions

Binary file not shown.

View File

@ -9,14 +9,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
ProjectSection(SolutionItems) = preProject ProjectSection(SolutionItems) = preProject
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py ..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py ..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
..\ipa-xsama-converter\converter.py = ..\ipa-xsama-converter\converter.py ..\toolbox\evaluation.py = ..\toolbox\evaluation.py
..\forced_alignment\forced_alignment\defaultfiles.py = ..\forced_alignment\forced_alignment\defaultfiles.py
..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj ..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py ..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py ..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
..\accent_classification\accent_classification\output_confusion_matrix.py = ..\accent_classification\accent_classification\output_confusion_matrix.py
..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py ..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py ..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py ..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py ..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py

Binary file not shown.

View File

@ -4,7 +4,7 @@
<SchemaVersion>2.0</SchemaVersion> <SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid> <ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
<ProjectHome>.</ProjectHome> <ProjectHome>.</ProjectHome>
<StartupFile>phone_conversion_check.py</StartupFile> <StartupFile>performance_check.py</StartupFile>
<SearchPath> <SearchPath>
</SearchPath> </SearchPath>
<WorkingDirectory>.</WorkingDirectory> <WorkingDirectory>.</WorkingDirectory>
@ -28,10 +28,13 @@
<Compile Include="convert_xsampa2ipa.py"> <Compile Include="convert_xsampa2ipa.py">
<SubType>Code</SubType> <SubType>Code</SubType>
</Compile> </Compile>
<Compile Include="performance_check.py"> <Compile Include="defaultfiles.py">
<SubType>Code</SubType> <SubType>Code</SubType>
</Compile> </Compile>
<Compile Include="phone_conversion_check.py"> <Compile Include="fa_test.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="performance_check.py">
<SubType>Code</SubType> <SubType>Code</SubType>
</Compile> </Compile>
</ItemGroup> </ItemGroup>

View File

@ -1,17 +1,13 @@
import os import os
import sys import sys
from collections import Counter
import numpy as np
import pandas as pd import pandas as pd
import defaultfiles as default
## ======================= user define ======================= sys.path.append(default.forced_alignment_module_dir)
repo_dir = 'C:\\Users\\Aki\\source\\repos\\acoustic_model'
curr_dir = repo_dir + '\\acoustic_model'
forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced_alignment'
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), curr_dir))
sys.path.append(forced_alignment_module)
from forced_alignment import convert_phone_set from forced_alignment import convert_phone_set
@ -42,6 +38,41 @@ def make_filelist(input_dir, output_txt):
fout.write(input_dir + '\\' + filename + '\n') fout.write(input_dir + '\\' + filename + '\n')
def make_dic(word, pronvar_, fileDic, output_type):
"""
make dict files which can be used for HTK.
param word: target word.
param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray.
param fileDic: output dic file.
param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3.
"""
#assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.')
WORD = word.upper()
if output_type == 0: # full
pronvar = np.unique(pronvar_)
with open(fileDic, 'w') as f:
for pvar in pronvar:
f.write('{0}\t{1}\n'.format(WORD, pvar))
else:
c = Counter(pronvar_)
total_num = sum(c.values())
with open(fileDic, 'w') as f:
if output_type == 3:
for key, value in c.most_common(3):
f.write('{0}\t{1}\n'.format(WORD, key))
else:
for key, value in c.items():
percentage = value/total_num*100
if output_type == 1: # all
f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key))
elif output_type == 2: # less than 2 percent
if percentage < 2:
f.write('{0}\t{1}\n'.format(WORD, key))
def get_phonelist(lexicon_file): def get_phonelist(lexicon_file):
""" Make a list of phones which appears in the lexicon. """ """ Make a list of phones which appears in the lexicon. """
@ -99,4 +130,22 @@ def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out):
lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation']) lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation'])
lex = pd.concat([lex1, lex2]) lex = pd.concat([lex1, lex2])
lex = lex.sort_values(by='word', ascending=True) lex = lex.sort_values(by='word', ascending=True)
lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t') lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t')
def read_fileFA(fileFA):
"""
read the result file of HTK forced alignment.
this function only works when input is one word.
"""
with open(fileFA, 'r') as f:
lines = f.read()
lines = lines.split('\n')
phones = []
for line in lines:
line_split = line.split()
if len(line_split) > 1:
phones.append(line_split[2])
return ' '.join(phones)

View File

@ -7,122 +7,155 @@ import json
import sys import sys
import os import os
import defaultfiles as default
#sys.path.append(ipa_xsampa_converter_dir) sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
#import converter from forced_alignment import convert_phone_set
def load_converter(source, sink, ipa_xsampa_converter_dir): def load_converter(source, sink, ipa_xsampa_converter_dir):
"""load the converter. """load the converter.
source and sink are either of "ipa", "xsampa" or "sassc". source and sink are either of "ipa", "xsampa" or "sassc".
""" """
choices = ["ipa", "xsampa", "sassc"] choices = ["ipa", "xsampa", "sassc"]
# Validate params # Validate params
try: try:
choice1 = choices.index(source) choice1 = choices.index(source)
choice2 = choices.index(sink) choice2 = choices.index(sink)
if choice1 == choice2: if choice1 == choice2:
print("source and destination format are the same.") print("source and destination format are the same.")
except ValueError: except ValueError:
print("source and destination should be one of [ipa xsampa sassc].") print("source and destination should be one of [ipa xsampa sassc].")
exit(1) exit(1)
# Mappings from disk # Mappings from disk
# some may not be used if source or sink is already IPA # some may not be used if source or sink is already IPA
source_to_ipa = {} source_to_ipa = {}
ipa_to_sink = {} ipa_to_sink = {}
ipa_xsampa = [] ipa_xsampa = []
sassc_ipa = [] sassc_ipa = []
# The IPAs that actually occur within SASSC # The IPAs that actually occur within SASSC
sassc_active_ipa = {} sassc_active_ipa = {}
script_dir = os.path.dirname(os.path.realpath(__file__)) script_dir = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(ipa_xsampa_converter_dir, "ipa_xsampa_map.json"), encoding="utf-8") as f: with open(os.path.join(ipa_xsampa_converter_dir, "ipa_xsampa_map.json"), encoding="utf-8") as f:
ipa_xsampa = json.load(f) ipa_xsampa = json.load(f)
sassc_active = source == "sassc" or sink == "sassc" sassc_active = source == "sassc" or sink == "sassc"
if sassc_active: if sassc_active:
with open(os.path.join(script_dir, "./sassc_ipa.json")) as f: with open(os.path.join(script_dir, "./sassc_ipa.json")) as f:
sassc_ipa = json.load(f) sassc_ipa = json.load(f)
for pair in sassc_ipa: for pair in sassc_ipa:
for char in pair[1]: for char in pair[1]:
sassc_active_ipa[char] = 1 sassc_active_ipa[char] = 1
if source == "xsampa": if source == "xsampa":
for pair in ipa_xsampa: for pair in ipa_xsampa:
source_to_ipa[pair[1]] = pair[0] source_to_ipa[pair[1]] = pair[0]
elif source == "sassc": elif source == "sassc":
for pair in sassc_ipa: for pair in sassc_ipa:
source_to_ipa[pair[0]] = pair[1] source_to_ipa[pair[0]] = pair[1]
if sink == "xsampa": if sink == "xsampa":
for pair in ipa_xsampa: for pair in ipa_xsampa:
ipa_to_sink[pair[0]] = pair[1] ipa_to_sink[pair[0]] = pair[1]
elif sink == "sassc": elif sink == "sassc":
for pair in sassc_ipa: for pair in sassc_ipa:
ipa_to_sink[pair[1]] = pair[0] ipa_to_sink[pair[1]] = pair[0]
# Combine them into a single mapping # Combine them into a single mapping
mapping = {} mapping = {}
if source == "ipa": if source == "ipa":
mapping = ipa_to_sink mapping = ipa_to_sink
elif sink == "ipa": elif sink == "ipa":
mapping = source_to_ipa mapping = source_to_ipa
else: else:
for k, ipas in source_to_ipa.iteritems(): for k, ipas in source_to_ipa.iteritems():
map_out = "" map_out = ""
failed = False failed = False
for ipa in ipas: for ipa in ipas:
val = ipa_to_sink.get(ipa) val = ipa_to_sink.get(ipa)
if not val: if not val:
failed = True failed = True
break break
map_out += val map_out += val
mapping[k] = map_out if not failed else None mapping[k] = map_out if not failed else None
return mapping return mapping
def conversion(source, sink, mapping, line): def conversion(source, sink, mapping, line):
""" """
conversion. conversion.
Args: Args:
mapping: can be obtained with load_converter(). mapping: can be obtained with load_converter().
line: must be seperated, by default the seperator is whitespace. line: must be seperated, by default the seperator is whitespace.
""" """
# Edit this to change the seperator # Edit this to change the seperator
SEPERATOR = " " SEPERATOR = " "
line = line.strip() line = line.strip()
output = [] output = []
#if sassc_active: #if sassc_active:
# tokens = line.split(SEPERATOR) # tokens = line.split(SEPERATOR)
#else: #else:
tokens = line tokens = line
for token in tokens: for token in tokens:
if token.isspace(): if token.isspace():
output.append(token) output.append(token)
continue continue
# Remove extraneous chars that IPA does not accept # Remove extraneous chars that IPA does not accept
if sink == "sassc": if sink == "sassc":
cleaned_token = u"" cleaned_token = u""
for char in token: for char in token:
if sassc_active_ipa.get(char): if sassc_active_ipa.get(char):
cleaned_token += char cleaned_token += char
token = cleaned_token token = cleaned_token
mapped = mapping.get(token) mapped = mapping.get(token)
if not mapped: if not mapped:
print("WARNING: could not map token ", token, file=sys.stderr) print("WARNING: could not map token ", token, file=sys.stderr)
else: else:
output.append(mapped) output.append(mapped)
#if sassc_active: #if sassc_active:
# output = SEPERATOR.join(output) # output = SEPERATOR.join(output)
#else: #else:
output = "".join(output) output = "".join(output)
return output return output
def xsampa2ipa(mapping, xsampa):
"""
conversion from xsampa to ipa.
Args:
mapping: can be obtained with load_converter().
xsampa: a line written in xsampa.
Notes:
function conversion does not work when:
- the input is a word.
- when the line includes '\'.
- 'ɡ' and 'g' are considered to be different.
"""
# make a multi_character_list to split 'xsampa'.
multi_character_list = []
for i in list(mapping):
if len(i) > 1:
multi_character_list.append(i)
# conversion
ipa = []
for phone in convert_phone_set.multi_character_tokenize(xsampa, multi_character_list):
ipa.append(mapping.get(phone, phone))
ipa = ''.join(ipa)
# strange conversion.
ipa = ipa.replace('ɡ', 'g')
return ipa

View File

@ -0,0 +1,35 @@
import os
#default_hvite_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', 'htk', 'config.HVite')
cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
#config_hcopy = os.path.join(cygwin_dir, 'config', 'config.HCopy')
#config_train = os.path.join(cygwin_dir, 'config', 'config.train')
config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
#mkhmmdefs_pl = os.path.join(cygwin_dir, 'src', 'acoustic_model', 'mkhmmdefs.pl')
#dbLexicon = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\lexicon.accdb
#scriptBarbara = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\pronvars_barbara.perl
#exeG2P = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\string2phon.exe
#[pyHTK]
#configHVite = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\config.HVite
#filePhoneList = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\phonelist_barbara.txt
#AcousticModel = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\hmmdefs_16-2_barbara.compo
#dbLexicon = config['cLexicon']['dbLexicon']
#scriptBarbara = config['cLexicon']['scriptBarbara']
#exeG2P = config['cLexicon']['exeG2P']
#configHVite = config['pyHTK']['configHVite']
#filePhoneList = config['pyHTK']['filePhoneList']
#AcousticModel = config['pyHTK']['AcousticModel']
repo_dir = r'C:\Users\Aki\source\repos'
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
fame_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus'
experiments_dir = r'c:\OneDrive\Research\rug\experiments'
phonelist = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')

16
acoustic_model/fa_test.py Normal file
View File

@ -0,0 +1,16 @@
import os
import sys
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import defaultfiles as default
sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
from forced_alignment import forced_alignment
wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav'
forced_alignment(
wav_file,
#'Australië'
'BUFFETCOUPON COULISSEN DOUANE'
)

View File

@ -1,255 +1,176 @@
import os import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import sys import sys
import csv import csv
import subprocess import subprocess
import configparser
from collections import Counter from collections import Counter
import re import re
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix #from sklearn.metrics import confusion_matrix
import acoustic_model_functions as am_func
## ======================= functions ======================= import convert_xsampa2ipa
import defaultfiles as default
def read_fileFA(fileFA):
"""
read the result file of HTK forced alignment.
this function only works when input is one word.
"""
with open(fileFA, 'r') as f:
lines = f.read()
lines = lines.split('\n')
phones = []
for line in lines:
line_split = line.split()
if len(line_split) > 1:
phones.append(line_split[2])
return ' '.join(phones)
def make_dic(word, pronvar_, fileDic, output_type):
"""
make dict files which can be used for HTK.
param word: target word.
param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray.
param fileDic: output dic file.
param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3.
"""
#assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.')
if output_type == 0: # full
pronvar = np.unique(pronvar_)
with open(fileDic, 'w') as f:
for pvar in pronvar:
f.write('{0}\t{1}\n'.format(WORD, pvar))
else:
c = Counter(pronvar_)
total_num = sum(c.values())
with open(fileDic, 'w') as f:
if output_type == 3:
for key, value in c.most_common(3):
f.write('{0}\t{1}\n'.format(WORD, key))
else:
for key, value in c.items():
percentage = value/total_num*100
if output_type == 1: # all
f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key))
elif output_type == 2: # less than 2 percent
if percentage < 2:
f.write('{0}\t{1}\n'.format(WORD, key))
## ======================= user define ======================= ## ======================= user define =======================
curr_dir = r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model' #curr_dir = r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model'
config_ini = curr_dir + '\\config.ini' #config_ini = 'config.ini'
forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment' #repo_dir = r'C:\Users\Aki\source\repos'
forced_alignment_module_old = r'C:\OneDrive\Research\rug\code\forced_alignment\forced_alignment' #forced_alignment_module = repo_dir + '\\forced_alignment'
ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter' #forced_alignment_module_old = repo_dir + '\\aki_tools'
accent_classification_dir = r'C:\Users\Aki\source\repos\accent_classification\accent_classification' #ipa_xsampa_converter_dir = repo_dir + '\\ipa-xsama-converter'
#accent_classification_dir = repo_dir + '\\accent_classification\accent_classification'
excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
experiments_dir = r'C:\OneDrive\Research\rug\experiments' #experiments_dir = r'C:\OneDrive\Research\rug\experiments'
data_dir = experiments_dir + '\\stimmen\\data' data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data')
csvfile = data_dir + '\\Frisian Variants Picture Task Stimmen.csv' #csvfile = data_dir + '\\Frisian Variants Picture Task Stimmen.csv'
wav_dir = os.path.join(default.experiments_dir, 'stimmen', 'wav')
acoustic_model_dir = os.path.join(default.experiments_dir, 'friesian', 'acoustic_model', 'model')
htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short')
fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA')
#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
#lex_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
#lex_asr_htk = os.path.join(default.fame_dir, 'lexicon', 'lex.asr_htk')
cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
# procedure # procedure
convert_phones = 0
make_dic_files = 0 make_dic_files = 0
make_dic_files_short = 0 do_forced_alignment_htk = 1
do_forced_alignment_htk = 0
make_kaldi_data_files = 0 make_kaldi_data_files = 0
make_kaldi_lexicon_txt = 0 make_kaldi_lexicon_txt = 0
load_forced_alignment_kaldi = 1 load_forced_alignment_kaldi = 0
eval_forced_alignment = 0 eval_forced_alignment = 0
## ======================= add paths ======================= ## ======================= add paths =======================
sys.path.append(forced_alignment_module) sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
from forced_alignment import convert_phone_set from forced_alignment import convert_phone_set
from forced_alignment import pyhtk
# for interactive window sys.path.append(os.path.join(default.repo_dir, 'toolbox'))
sys.path.append(curr_dir) #import pyHTK
import convert_xsampa2ipa from evaluation import plot_confusion_matrix
import acoustic_model_functions as am_func
# for forced-alignment
sys.path.append(forced_alignment_module_old)
import pyHTK
# to output confusion matrix
sys.path.append(accent_classification_dir)
from output_confusion_matrix import plot_confusion_matrix
## ======================= load variables =======================
config = configparser.ConfigParser()
config.sections()
config.read(config_ini)
FAME_dir = config['Settings']['FAME_dir']
lex_asr = FAME_dir + '\\lexicon\\lex.asr'
lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk'
## ======================= convert phones ====================== ## ======================= convert phones ======================
if convert_phones:
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir)
## check phones included in FAME! mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
# the phones used in the lexicon.
#phonelist = am_func.get_phonelist(lex_htk)
# the lines which include a specific phone. xls = pd.ExcelFile(excel_file)
#lines = am_func.find_phone(lex_asr, 'x')
with open(csvfile, encoding="utf-8") as fin: ## check conversion
lines = csv.reader(fin, delimiter=';', lineterminator="\n", skipinitialspace=True) #df = pd.read_excel(xls, 'frequency')
next(lines, None) # skip the headers #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_)
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
# if not ipa_converted == ipa:
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
filenames = []
words = []
pronunciations = []
for line in lines:
if line[1] is not '' and len(line) > 5:
filenames.append(line[0])
words.append(line[1])
pron_xsampa = line[3]
pron_ipa = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, pron_xsampa)
pron_ipa = pron_ipa.replace('ː', ':')
pron_famehtk = convert_phone_set.ipa2famehtk(pron_ipa)
# adjust to phones used in the acoustic model.
pron_famehtk = pron_famehtk.replace('sp', 'sil')
pron_famehtk = pron_famehtk.replace('ce :', 'ce') # because ceh is ignored.
pron_famehtk = pron_famehtk.replace('w :', 'wh')
pron_famehtk = pron_famehtk.replace('e :', 'eh')
pron_famehtk = pron_famehtk.replace('eh :', 'eh')
pron_famehtk = pron_famehtk.replace('ih :', 'ih')
#translation_key = {'sp': 'sil', 'ce :': 'ceh', 'w :': 'wh'} ## check phones included in FAME!
#pron = [] # the phones used in the lexicon.
#for phoneme in pron_famehtk.split(' '): #phonelist = am_func.get_phonelist(lex_asr)
# pron.append(translation_key.get(phoneme, phoneme))
#pronunciations.append(' '.join(pron_famehtk))
pronunciations.append(pron_famehtk)
# check if all phones are in the phonelist of the acoustic model. # the lines which include a specific phone.
#phonelist = ' '.join(pronunciations) #lines = am_func.find_phone(lex_asr, 'x')
#np.unique(phonelist.split(' '))
#phonelist.find(':')
filenames = np.array(filenames)
words = np.array(words)
pronunciations = np.array(pronunciations)
del line, lines # Filename, Word, Self Xsampa
del pron_xsampa, pron_ipa, pron_famehtk df = pd.read_excel(xls, 'original')
np.save(data_dir + '\\filenames.npy', filenames)
np.save(data_dir + '\\words.npy', words)
np.save(data_dir + '\\pronunciations.npy', pronunciations)
else:
filenames = np.load(data_dir + '\\filenames.npy')
words = np.load(data_dir + '\\words.npy')
pronunciations = np.load(data_dir + '\\pronunciations.npy') ipas = []
word_list = np.unique(words) famehtks = []
for xsampa in df['Self Xsampa']:
if not isinstance(xsampa, float): # 'NaN'
# typo?
xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t')
xsampa = xsampa.replace(';', ':')
ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
ipa = ipa.replace('ː', ':')
ipa = ipa.replace(' ', '')
ipas.append(ipa)
famehtk = convert_phone_set.ipa2famehtk(ipa)
famehtks.append(famehtk)
else:
ipas.append('')
famehtks.append('')
# extract interesting cols.
df = pd.DataFrame({'filename': df['Filename'],
'word': df['Word'],
'xsampa': df['Self Xsampa'],
'ipa': pd.Series(ipas),
'famehtk': pd.Series(famehtks)})
# cleansing.
df = df[~df['famehtk'].isin(['/', ''])]
## ======================= make dict files used for HTK. ====================== ## ======================= make dict files used for HTK. ======================
if make_dic_files: if make_dic_files:
output_type = 2 word_list = np.unique(df['word'])
output_dir = experiments_dir + r'\stimmen\dic_short'
output_type = 3
for word in word_list: for word in word_list:
WORD = word.upper() htk_dict_file = htk_dict_dir + '\\' + word + '.dic'
fileDic = output_dir + '\\' + word + '.dic'
# pronunciation variant of the target word. # pronunciation variant of the target word.
pronvar_ = pronunciations[words == word] pronvar_ = df['famehtk'][df['word'].str.match(word)]
# remove ''
pronvar_ = np.delete(pronvar_, np.where(pronvar_==''))
# make dic file. # make dic file.
make_dic(word, pronvar_, fileDic, output_type) am_func.make_dic(word, pronvar_, htk_dict_file, output_type)
## ======================= forced alignment using HTK ======================= ## ======================= forced alignment using HTK =======================
if do_forced_alignment_htk: if do_forced_alignment_htk:
configHVite = cygwin_dir + r'\config\config.HVite'
filePhoneList = experiments_dir + r'\friesian\acoustic_model\config\phonelist_friesian.txt'
wav_dir = experiments_dir + r'\stimmen\wav'
#hmm_num = 128 #hmm_num = 2
for hmm_num in [1, 2, 4, 8, 16, 32, 64, 128, 256]: for hmm_num in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]:
hmm_num_str = str(hmm_num) hmm_num_str = str(hmm_num)
AcousticModel = experiments_dir + r'\friesian\acoustic_model\model\hmm' + hmm_num_str + r'-2\hmmdefs' acoustic_model = os.path.join(acoustic_model_dir, 'hmm' + hmm_num_str + r'-2\hmmdefs')
predictions = [] predictions = []
file_num_max = len(filenames) for i, filename in enumerate(df['filename']):
for i in range(0, file_num_max): print('=== {0}/{1} ==='.format(i, len(df)))
#for i in range(500, 502): wav_file = os.path.join(wav_dir, filename)
print('=== {0}/{1} ==='.format(i, file_num_max))
filename = filenames[i]
fileWav = wav_dir + '\\' + filename
if os.path.exists(fileWav): if os.path.exists(wav_file) and i in df['filename'].keys():
word = words[i] word = df['word'][i]
WORD = word.upper() WORD = word.upper()
# make label file. # make label file.
fileLab = wav_dir + '\\' + filename.replace('.wav', '.lab') label_file = os.path.join(wav_dir, filename.replace('.wav', '.lab'))
with open(fileLab, 'w') as f: with open(label_file, 'w') as f:
lines = f.write(WORD) lines = f.write(WORD)
fileDic = experiments_dir + r'\stimmen\dic_top3' + '\\' + word + '.dic' htk_dict_file = os.path.join(htk_dict_dir, word + '.dic')
fileFA = experiments_dir + r'\stimmen\FA' + '\\' + filename.replace('.wav', '.txt') + hmm_num_str fa_file = os.path.join(fa_dir, filename.replace('.wav', '.txt') + hmm_num_str)
pyhtk.doHVite(wav_file, label_file, htk_dict_file, fa_file, default.config_hvite, default.phonelist, acoustic_model)
pyHTK.doHVite(fileWav, fileLab, fileDic, fileFA, configHVite, filePhoneList, AcousticModel) prediction = am_func.read_fileFA(fa_file)
prediction = read_fileFA(fileFA)
predictions.append(prediction) predictions.append(prediction)
os.remove(fileLab) os.remove(label_file)
print('{0}: {1} -> {2}'.format(WORD, pronunciations[i], prediction)) print('{0}: {1} -> {2}'.format(WORD, df['famehtk'][i], prediction))
else: else:
predictions.append('') predictions.append('')
print('!!!!! file not found.') print('!!!!! file not found.')
predictions = np.array(predictions) predictions = np.array(predictions)
match = np.c_[words[predictions != ''], pronunciations[predictions != ''], predictions[predictions != '']] #match = np.c_[words[predictions != ''], pronunciations[predictions != ''], predictions[predictions != '']]
np.save(data_dir + '\\match_hmm' + hmm_num_str + '.npy', match) np.save(os.path.join(data_dir, 'predictions_hmm' + hmm_num_str + '.npy'), predictions)
## ======================= make files which is used for forced alignment by Kaldi ======================= ## ======================= make files which is used for forced alignment by Kaldi =======================
@ -392,7 +313,7 @@ if make_kaldi_lexicon_txt:
pronvar_list = np.unique(pronvar_list_) pronvar_list = np.unique(pronvar_list_)
for pronvar_ in pronvar_list: for pronvar_ in pronvar_list:
split_ipa = convert_phone_set.split_ipa_fame(pronvar_) split_ipa = convert_phone_set.split_fame_ipa(pronvar_)
pronvar_out = ' '.join(split_ipa) pronvar_out = ' '.join(split_ipa)
pronvar_list_all.append([word, pronvar_out]) pronvar_list_all.append([word, pronvar_out])
@ -456,13 +377,12 @@ if load_forced_alignment_kaldi:
filename_ = filename filename_ = filename
# correct or not. # correct or not.
for filename, fa_pronunciation in zip(fa_filenames, fa_pronunciations): #for filename, fa_pronunciation in zip(fa_filenames, fa_pronunciations):
## ======================= evaluate the result of forced alignment ======================= ## ======================= evaluate the result of forced alignment =======================
if eval_forced_alignment: if eval_forced_alignment:
match_num = [] match_num = []
for hmm_num in [1, 2, 4, 8, 16, 32, 64, 128, 256]: for hmm_num in [1, 2, 4, 8, 16, 32, 64, 128, 256]:
#hmm_num = 256 #hmm_num = 256

View File

@ -1,54 +0,0 @@
import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import sys
import pandas as pd
## ======================= user define =======================
forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment'
ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter'
experiments_dir = r'c:\OneDrive\Research\rug\experiments'
excel_file = experiments_dir + '\\stimmen\\data\\Frisian Variants Picture Task Stimmen.xlsx'
## ======================= add paths =======================
sys.path.append(forced_alignment_module)
from forced_alignment import convert_phone_set
import convert_xsampa2ipa
xls = pd.ExcelFile(excel_file)
df = pd.read_excel(xls, 'frequency')
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir)
def xsampa2ipa(mapping, xsampa):
# make a multi_character_list to split 'xsampa'.
multi_character_list = []
for i in list(mapping):
if len(i) > 1:
multi_character_list.append(i)
# conversion
ipa = []
for phone in convert_phone_set.multi_character_tokenize(xsampa, multi_character_list):
ipa.append(mapping.get(phone, phone))
ipa = ''.join(ipa)
# strange conversion.
ipa = ipa.replace('ɡ', 'g')
return ipa
for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
#ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_)
ipa_converted = xsampa2ipa(mapping, xsampa)
if not ipa_converted == ipa:
print('{0}: {1} - {2}'.format(xsampa_, ipa_converted, ipa))