You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

202 lines
6.6 KiB

import os
import sys
from collections import Counter
import numpy as np
import pandas as pd
import defaultfiles as default
sys.path.append(default.forced_alignment_module_dir)
from forced_alignment import convert_phone_set
def make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp):
""" Make a script file for HCopy using the filelist in FAME! corpus. """
filelist_txt = FAME_dir + '\\fame\\filelists\\' + dataset + 'list.txt'
with open(filelist_txt) as fin:
filelist = fin.read()
filelist = filelist.split('\n')
with open(hcopy_scp, 'w') as fout:
for filename_ in filelist:
filename = filename_.replace('.TextGrid', '')
if len(filename) > 3: # remove '.', '..' and ''
wav_file = FAME_dir + '\\fame\\wav\\' + dataset + '\\' + filename + '.wav'
mfc_file = feature_dir + '\\' + filename + '.mfc'
fout.write(wav_file + '\t' + mfc_file + '\n')
def make_filelist(input_dir, output_txt):
""" Make a list of files in the input_dir. """
filenames = os.listdir(input_dir)
with open(output_txt, 'w') as fout:
for filename in filenames:
fout.write(input_dir + '\\' + filename + '\n')
def make_htk_dict(word, pronvar_, fileDic, output_type):
"""
make dict files which can be used for HTK.
param word: target word.
param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray.
param fileDic: output dic file.
param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3.
"""
#assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.')
WORD = word.upper()
if output_type == 0: # full
pronvar = np.unique(pronvar_)
with open(fileDic, 'w') as f:
for pvar in pronvar:
f.write('{0}\t{1}\n'.format(WORD, pvar))
else:
c = Counter(pronvar_)
total_num = sum(c.values())
with open(fileDic, 'w') as f:
if output_type == 3:
for key, value in c.most_common(3):
f.write('{0}\t{1}\n'.format(WORD, key))
else:
for key, value in c.items():
percentage = value/total_num*100
if output_type == 1: # all
f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key))
elif output_type == 2: # less than 2 percent
if percentage < 2:
f.write('{0}\t{1}\n'.format(WORD, key))
def get_phonelist(lexicon_file):
""" Make a list of phones which appears in the lexicon. """
with open(lexicon_file, "rt", encoding="utf-8") as fin:
lines = fin.read()
lines = lines.split('\n')
phonelist = set([])
for line in lines:
line = line.split('\t')
if len(line) > 1:
pronunciation = set(line[1].split())
phonelist = phonelist | pronunciation
return phonelist
def find_phone(lexicon_file, phone):
""" Search where the phone is used in the lexicon. """
with open(lexicon_file, "rt", encoding="utf-8") as fin:
lines = fin.read()
lines = lines.split('\n')
extracted = []
for line in lines:
line = line.split('\t')
if len(line) > 1:
pronunciation = line[1]
if phone in pronunciation:
extracted.append(line)
return extracted
def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
""" Convert a lexicon file from IPA to HTK format for FAME! corpus. """
lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation'])
with open(lexicon_file_out, "w", encoding="utf-8") as fout:
for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']):
pronunciation_no_space = pronunciation.replace(' ', '')
pronunciation_famehtk = convert_phone_set.ipa2famehtk(pronunciation_no_space)
if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk:
fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk))
def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out):
""" Combine two lexicon files and sort by words. """
with open(lexicon_file1, "rt", encoding="utf-8") as fin:
lines1 = fin.read()
lines1 = lines1.split('\n')
with open(lexicon_file2, "rt", encoding="utf-8") as fin:
lines2 = fin.read()
lines2 = lines2.split('\n')
lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation'])
lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation'])
lex = pd.concat([lex1, lex2])
lex = lex.sort_values(by='word', ascending=True)
lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t')
def read_fileFA(fileFA):
"""
read the result file of HTK forced alignment.
this function only works when input is one word.
"""
with open(fileFA, 'r') as f:
lines = f.read()
lines = lines.split('\n')
phones = []
for line in lines:
line_split = line.split()
if len(line_split) > 1:
phones.append(line_split[2])
return ' '.join(phones)
def fame_pronunciation_variant(ipa):
ipa = ipa.replace('æ', 'ɛ')
ipa = ipa.replace('ɐ', 'a')
ipa = ipa.replace('ɑ', 'a')
ipa = ipa.replace('ɾ', 'r')
ipa = ipa.replace('ɹ', 'r') # ???
ipa = ipa.replace('ʁ', 'r')
ipa = ipa.replace('ʀ', 'r') # ???
ipa = ipa.replace('ʊ', 'u')
ipa = ipa.replace('χ', 'x')
pronvar_list = [ipa]
while 'ø:' in ' '.join(pronvar_list) or 'œ' in ' '.join(pronvar_list) or 'ɒ' in ' '.join(pronvar_list):
pronvar_list_ = []
for p in pronvar_list:
if 'ø:' in p:
pronvar_list_.append(p.replace('ø:', 'ö'))
pronvar_list_.append(p.replace('ø:', 'ö:'))
if 'œ' in p:
pronvar_list_.append(p.replace('œ', 'ɔ̈'))
pronvar_list_.append(p.replace('œ', 'ɔ̈:'))
if 'ɒ' in p:
pronvar_list_.append(p.replace('ɒ', 'ɔ̈'))
pronvar_list_.append(p.replace('ɒ', 'ɔ̈:'))
pronvar_list = np.unique(pronvar_list_)
return pronvar_list
def make_fame2ipa_variants(fame):
fame = 'rɛös'
ipa = [fame]
ipa.append(fame.replace('ɛ', 'æ'))
ipa.append(fame.replace('a', 'ɐ'))
ipa.append(fame.replace('a', 'ɑ'))
ipa.append(fame.replace('r', 'ɾ'))
ipa.append(fame.replace('r', 'ɹ'))
ipa.append(fame.replace('r', 'ʁ'))
ipa.append(fame.replace('r', 'ʀ'))
ipa.append(fame.replace('u', 'ʊ'))
ipa.append(fame.replace('x', 'χ'))
ipa.append(fame.replace('ö', 'ø:'))
ipa.append(fame.replace('ö:', 'ø:'))
ipa.append(fame.replace('ɔ̈', 'œ'))
ipa.append(fame.replace('ɔ̈:', 'œ'))
ipa.append(fame.replace('ɔ̈', 'ɒ'))
ipa.append(fame.replace('ɔ̈:', 'ɒ'))
return ipa