acoustic_model/acoustic_model/acoustic_model_functions.py

203 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import sys
from collections import Counter
import numpy as np
import pandas as pd
import defaultfiles as default
sys.path.append(default.forced_alignment_module_dir)
from forced_alignment import convert_phone_set
def make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp):
""" Make a script file for HCopy using the filelist in FAME! corpus. """
filelist_txt = FAME_dir + '\\fame\\filelists\\' + dataset + 'list.txt'
with open(filelist_txt) as fin:
filelist = fin.read()
filelist = filelist.split('\n')
with open(hcopy_scp, 'w') as fout:
for filename_ in filelist:
filename = filename_.replace('.TextGrid', '')
if len(filename) > 3: # remove '.', '..' and ''
wav_file = FAME_dir + '\\fame\\wav\\' + dataset + '\\' + filename + '.wav'
mfc_file = feature_dir + '\\' + filename + '.mfc'
fout.write(wav_file + '\t' + mfc_file + '\n')
def make_filelist(input_dir, output_txt):
""" Make a list of files in the input_dir. """
filenames = os.listdir(input_dir)
with open(output_txt, 'w') as fout:
for filename in filenames:
fout.write(input_dir + '\\' + filename + '\n')
def make_htk_dict(word, pronvar_, fileDic, output_type):
"""
make dict files which can be used for HTK.
param word: target word.
param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray.
param fileDic: output dic file.
param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3.
"""
#assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.')
WORD = word.upper()
if output_type == 0: # full
pronvar = np.unique(pronvar_)
with open(fileDic, 'w') as f:
for pvar in pronvar:
f.write('{0}\t{1}\n'.format(WORD, pvar))
else:
c = Counter(pronvar_)
total_num = sum(c.values())
with open(fileDic, 'w') as f:
if output_type == 3:
for key, value in c.most_common(3):
f.write('{0}\t{1}\n'.format(WORD, key))
else:
for key, value in c.items():
percentage = value/total_num*100
if output_type == 1: # all
f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key))
elif output_type == 2: # less than 2 percent
if percentage < 2:
f.write('{0}\t{1}\n'.format(WORD, key))
def get_phonelist(lexicon_file):
""" Make a list of phones which appears in the lexicon. """
with open(lexicon_file, "rt", encoding="utf-8") as fin:
lines = fin.read()
lines = lines.split('\n')
phonelist = set([])
for line in lines:
line = line.split('\t')
if len(line) > 1:
pronunciation = set(line[1].split())
phonelist = phonelist | pronunciation
return phonelist
def find_phone(lexicon_file, phone):
""" Search where the phone is used in the lexicon. """
with open(lexicon_file, "rt", encoding="utf-8") as fin:
lines = fin.read()
lines = lines.split('\n')
extracted = []
for line in lines:
line = line.split('\t')
if len(line) > 1:
pronunciation = line[1]
if phone in pronunciation:
extracted.append(line)
return extracted
def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
""" Convert a lexicon file from IPA to HTK format for FAME! corpus. """
lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation'])
with open(lexicon_file_out, "w", encoding="utf-8") as fout:
for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']):
pronunciation_no_space = pronunciation.replace(' ', '')
pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space)
if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk:
fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk))
def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out):
""" Combine two lexicon files and sort by words. """
with open(lexicon_file1, "rt", encoding="utf-8") as fin:
lines1 = fin.read()
lines1 = lines1.split('\n')
with open(lexicon_file2, "rt", encoding="utf-8") as fin:
lines2 = fin.read()
lines2 = lines2.split('\n')
lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation'])
lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation'])
lex = pd.concat([lex1, lex2])
lex = lex.sort_values(by='word', ascending=True)
lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t')
def read_fileFA(fileFA):
"""
read the result file of HTK forced alignment.
this function only works when input is one word.
"""
with open(fileFA, 'r') as f:
lines = f.read()
lines = lines.split('\n')
phones = []
for line in lines:
line_split = line.split()
if len(line_split) > 1:
phones.append(line_split[2])
return ' '.join(phones)
def fame_pronunciation_variant(ipa):
ipa = ipa.replace('æ', 'ɛ')
ipa = ipa.replace('ɐ', 'a')
ipa = ipa.replace('ɑ', 'a')
ipa = ipa.replace('ɾ', 'r')
ipa = ipa.replace('ɹ', 'r') # ???
ipa = ipa.replace('ʁ', 'r')
ipa = ipa.replace('ʀ', 'r') # ???
ipa = ipa.replace('ʊ', 'u')
ipa = ipa.replace('χ', 'x')
pronvar_list = [ipa]
while 'ø:' in ' '.join(pronvar_list) or 'œ' in ' '.join(pronvar_list) or 'ɒ' in ' '.join(pronvar_list):
pronvar_list_ = []
for p in pronvar_list:
if 'ø:' in p:
pronvar_list_.append(p.replace('ø:', 'ö'))
pronvar_list_.append(p.replace('ø:', 'ö:'))
if 'œ' in p:
pronvar_list_.append(p.replace('œ', 'ɔ̈'))
pronvar_list_.append(p.replace('œ', 'ɔ̈:'))
if 'ɒ' in p:
pronvar_list_.append(p.replace('ɒ', 'ɔ̈'))
pronvar_list_.append(p.replace('ɒ', 'ɔ̈:'))
pronvar_list = np.unique(pronvar_list_)
return pronvar_list
def make_fame2ipa_variants(fame):
fame = 'rɛös'
ipa = [fame]
ipa.append(fame.replace('ɛ', 'æ'))
ipa.append(fame.replace('a', 'ɐ'))
ipa.append(fame.replace('a', 'ɑ'))
ipa.append(fame.replace('r', 'ɾ'))
ipa.append(fame.replace('r', 'ɹ'))
ipa.append(fame.replace('r', 'ʁ'))
ipa.append(fame.replace('r', 'ʀ'))
ipa.append(fame.replace('u', 'ʊ'))
ipa.append(fame.replace('x', 'χ'))
ipa.append(fame.replace('ö', 'ø:'))
ipa.append(fame.replace('ö:', 'ø:'))
ipa.append(fame.replace('ɔ̈', 'œ'))
ipa.append(fame.replace('ɔ̈:', 'œ'))
ipa.append(fame.replace('ɔ̈', 'ɒ'))
ipa.append(fame.replace('ɔ̈:', 'ɒ'))
return ipa