acoustic_model/acoustic_model/acoustic_model_function.py

import os
import sys
from collections import Counter

import numpy as np
import pandas as pd

import defaultfiles as default

sys.path.append(default.forced_alignment_module_dir)
from forced_alignment import convert_phone_set


def make_hcopy_scp_from_filelist_in_fame(FAME_dir, dataset, feature_dir, hcopy_scp):
	""" Make a script file for HCopy using the filelist in FAME! corpus. """
	filelist_txt = FAME_dir + '\\fame\\filelists\\' + dataset + 'list.txt'
	with open(filelist_txt) as fin:
		filelist = fin.read()
		filelist = filelist.split('\n')
	
	with open(hcopy_scp, 'w') as fout:
		for filename_ in filelist:
			filename = filename_.replace('.TextGrid', '')

			if len(filename) > 3: # remove '.', '..' and ''
				wav_file = FAME_dir + '\\fame\\wav\\' + dataset + '\\' + filename + '.wav'
				mfc_file = feature_dir + '\\' + filename + '.mfc'

				fout.write(wav_file + '\t' + mfc_file + '\n')


def make_filelist(input_dir, output_txt):
	""" Make a list of files in the input_dir. """
	filenames = os.listdir(input_dir)

	with open(output_txt, 'w') as fout:
		for filename in filenames:
			fout.write(input_dir + '\\' + filename + '\n')


def make_htk_dict(word, pronvar_, fileDic, output_type):
    """
    make dict files which can be used for HTK.
    param word: target word.
    param pronvar_: pronunciation variant. nx2 (WORD /t pronunciation) ndarray.
    param fileDic: output dic file.
    param output_type: 0:full, 1:statistics, 2:frequency <2% entries are removed. 3:top 3. 
    """
    #assert(output_type < 4 and output_type >= 0, 'output_type should be an integer between 0 and 3.')
    WORD = word.upper()

    if output_type == 0: # full
        pronvar  = np.unique(pronvar_)

        with open(fileDic, 'w') as f:
            for pvar in pronvar:
                f.write('{0}\t{1}\n'.format(WORD, pvar))
    else:
        c = Counter(pronvar_)
        total_num = sum(c.values())
        with open(fileDic, 'w') as f:
            if output_type == 3:
                for key, value in c.most_common(3):
                    f.write('{0}\t{1}\n'.format(WORD, key))
            else:
                for key, value in c.items():
                    percentage = value/total_num*100

                    if output_type == 1: # all
                        f.write('{0}\t{1:.2f}\t{2}\t{3}\n'.format(value, percentage, WORD, key))
                    elif output_type == 2: # less than 2 percent
                        if percentage < 2:
                            f.write('{0}\t{1}\n'.format(WORD, key))


def get_phonelist(lexicon_file):
	""" Make a list of phones which appears in the lexicon. """

	with open(lexicon_file, "rt", encoding="utf-8") as fin:
		lines = fin.read()
		lines = lines.split('\n')
		phonelist = set([])
		for line in lines:
			line = line.split('\t')
			if len(line) > 1:
				pronunciation = set(line[1].split())
				phonelist = phonelist | pronunciation
	return phonelist


def find_phone(lexicon_file, phone):
	""" Search where the phone is used in the lexicon. """
	with open(lexicon_file, "rt", encoding="utf-8") as fin:
		lines = fin.read()
		lines = lines.split('\n')
	
	extracted = []
	for line in lines:
		line = line.split('\t')
		if len(line) > 1:
			pronunciation = line[1]
			if phone in pronunciation:
				extracted.append(line)
	return extracted


def ipa2famehtk_lexicon(lexicon_file_in, lexicon_file_out):
	""" Convert a lexicon file from IPA to HTK format for FAME! corpus. """

	lexicon_in = pd.read_table(lexicon_file_in, names=['word', 'pronunciation'])
	with open(lexicon_file_out, "w", encoding="utf-8") as fout:
		for word, pronunciation in zip(lexicon_in['word'], lexicon_in['pronunciation']):
			pronunciation_no_space = pronunciation.replace(' ', '')
			pronunciation_famehtk  = convert_phone_set.ipa2famehtk(pronunciation_no_space)
			if 'ceh' not in pronunciation_famehtk and 'sh' not in pronunciation_famehtk:
				fout.write("{0}\t{1}\n".format(word.upper(), pronunciation_famehtk))


def combine_lexicon(lexicon_file1, lexicon_file2, lexicon_out):
	""" Combine two lexicon files and sort by words. """

	with open(lexicon_file1, "rt", encoding="utf-8") as fin:
		lines1 = fin.read()
		lines1 = lines1.split('\n')
	with open(lexicon_file2, "rt", encoding="utf-8") as fin:
		lines2 = fin.read()
		lines2 = lines2.split('\n')
	
	lex1 = pd.read_table(lexicon_file1, names=['word', 'pronunciation'])
	lex2 = pd.read_table(lexicon_file2, names=['word', 'pronunciation'])
	lex  = pd.concat([lex1, lex2])
	lex  = lex.sort_values(by='word', ascending=True)
	lex.to_csv(lexicon_out, index=False, header=False, encoding="utf-8", sep='\t')


def read_fileFA(fileFA):
    """
    read the result file of HTK forced alignment.
    this function only works when input is one word.
    """
    with open(fileFA, 'r') as f:
        lines = f.read()
        lines = lines.split('\n')

    phones = []
    for line in lines:
        line_split = line.split()
        if len(line_split) > 1:
            phones.append(line_split[2])

    return ' '.join(phones)


def fame_pronunciation_variant(ipa):
    ipa = ipa.replace('æ', 'ɛ')
    ipa = ipa.replace('ɐ', 'a')
    ipa = ipa.replace('ɑ', 'a')
    ipa = ipa.replace('ɾ', 'r')
    ipa = ipa.replace('ɹ', 'r') # ???
    ipa = ipa.replace('ʁ', 'r')
    ipa = ipa.replace('ʀ', 'r') # ???
    ipa = ipa.replace('ʊ', 'u')
    ipa = ipa.replace('χ', 'x')

    pronvar_list = [ipa]
    while 'ø:' in ' '.join(pronvar_list) or 'œ' in ' '.join(pronvar_list) or 'ɒ' in ' '.join(pronvar_list):
        pronvar_list_ = []
        for p in pronvar_list:
            if 'ø:' in p:
                pronvar_list_.append(p.replace('ø:', 'ö'))
                pronvar_list_.append(p.replace('ø:', 'ö:'))
            if 'œ' in p:
                pronvar_list_.append(p.replace('œ', 'ɔ̈'))
                pronvar_list_.append(p.replace('œ', 'ɔ̈:'))
            if 'ɒ' in p:
                pronvar_list_.append(p.replace('ɒ', 'ɔ̈'))
                pronvar_list_.append(p.replace('ɒ', 'ɔ̈:'))
        pronvar_list = np.unique(pronvar_list_)
    return pronvar_list


def make_fame2ipa_variants(fame):
    fame = 'rɛös'
    ipa = [fame]
    ipa.append(fame.replace('ɛ', 'æ'))
    ipa.append(fame.replace('a', 'ɐ'))
    ipa.append(fame.replace('a', 'ɑ'))
    ipa.append(fame.replace('r', 'ɾ'))
    ipa.append(fame.replace('r', 'ɹ'))
    ipa.append(fame.replace('r', 'ʁ'))
    ipa.append(fame.replace('r', 'ʀ'))
    ipa.append(fame.replace('u', 'ʊ'))
    ipa.append(fame.replace('x', 'χ'))
        
    ipa.append(fame.replace('ö', 'ø:'))
    ipa.append(fame.replace('ö:', 'ø:'))
    ipa.append(fame.replace('ɔ̈', 'œ'))
    ipa.append(fame.replace('ɔ̈:', 'œ'))
    ipa.append(fame.replace('ɔ̈', 'ɒ'))
    ipa.append(fame.replace('ɔ̈:', 'ɒ'))

    return ipa