import os import sys import csv import subprocess import configparser import numpy as np import pandas as pd ## ======================= user define ======================= curr_dir = r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model' config_ini = curr_dir + '\\config.ini' forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment' ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter' csvfile = r"C:\OneDrive\Research\rug\stimmen\Frisian Variants Picture Task Stimmen.csv" # procedure ## ======================= add paths ======================= sys.path.append(forced_alignment_module) from forced_alignment import convert_phone_set # for interactive window sys.path.append(curr_dir) import convert_xsampa2ipa import acoustic_model_functions as am_func ## ======================= load variables ======================= config = configparser.ConfigParser() config.sections() config.read(config_ini) FAME_dir = config['Settings']['FAME_dir'] lex_asr = FAME_dir + '\\lexicon\\lex.asr' lex_asr_htk = FAME_dir + '\\lexicon\\lex.asr_htk' ## ======================= check phones included in FAME! ======================= # the phones used in the lexicon. #phonelist = am_func.get_phonelist(lex_htk) # the lines which include a specific phone. #lines = am_func.find_phone(lex_asr, 'x') ## ======================= convert phones ====================== mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir) with open(csvfile, encoding="utf-8") as fin: lines = csv.reader(fin, delimiter=';', lineterminator="\n", skipinitialspace=True) next(lines, None) # skip the headers filenames = [] words = [] pronunciations = [] for line in lines: if line[1] is not '' and len(line) > 5: filenames.append(line[0]) words.append(line[1]) pron_xsampa = line[3] pron_ipa = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, pron_xsampa) pron_ipa = pron_ipa.replace('ː', ':') pron_famehtk = convert_phone_set.ipa2famehtk(pron_ipa) # adjust to phones used in the acoustic model. pron_famehtk = pron_famehtk.replace('sp', 'sil') pron_famehtk = pron_famehtk.replace('ce :', 'ce') # because ceh is ignored. pron_famehtk = pron_famehtk.replace('w :', 'wh') pron_famehtk = pron_famehtk.replace('e :', 'eh') pron_famehtk = pron_famehtk.replace('eh :', 'eh') pron_famehtk = pron_famehtk.replace('ih :', 'ih') #translation_key = {'sp': 'sil', 'ce :': 'ceh', 'w :': 'wh'} #pron = [] #for phoneme in pron_famehtk.split(' '): # pron.append(translation_key.get(phoneme, phoneme)) #pronunciations.append(' '.join(pron_famehtk)) pronunciations.append(pron_famehtk) filenames = np.array(filenames) words = np.array(words) pronunciations = np.array(pronunciations) del line, lines del pron_xsampa, pron_ipa, pron_famehtk # check if all phones are in the phonelist of the acoustic model. #phonelist = ' '.join(pronunciations) #np.unique(phonelist.split(' ')) #phonelist.find(':') # make dict files. word_list = np.unique(words) word_id = 1 word = word_list[word_id] ## ======================= forced alignment ======================= #if forced_alignment: # try: # scripts.run_command([ # 'HVite','-T', '1', '-a', '-C', configHVite, # '-H', AcousticModel, '-m', '-I', # mlf_file, '-i', fa_file, '-S', # script_file, htk_dict_file, filePhoneList # ]) # except: # print("\033[91mHVite command failed with these input files:\033[0m") # print(_debug_show_file('HVite config', configHVite)) # print(_debug_show_file('Accoustic model', AcousticModel)) # print(_debug_show_file('Master Label file', mlf_file)) # print(_debug_show_file('Output', fa_file)) # print(_debug_show_file('Script file', script_file)) # print(_debug_show_file('HTK dictionary', htk_dict_file)) # print(_debug_show_file('Phoneme list', filePhoneList)) # raise ##os.remove(hcopy_scp.name)