import sys import os import pandas import datetime sys.path.append('..') # these lines are not necessary once forced-alignment is intalled as a package. forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced-alignment' sys.path.append(forced_alignment_module) from forced_alignment import pronunciations from forced_alignment.htk_dict import variances_table #pronunciations.delete_word('kunikoshi') #pronunciations.delete_all_g2p_entries() #existing_pronunciations = set(pronunciations.get_all()) ## only focus on word ## missing pronunciations ## (1) pronunciation is written in IPA. ## (2) pronunciation variants are made based on (1). ## (3) they are converted into HTK format. #missing_pronunciations_file = 'D:\\OneDrive\\Research\\rug\\experiments\\same_utterance\\missing_words_in_barbara_dic\\missing_words_pronvarsHTK.txt' #with open(missing_pronunciations_file) as fin: # lines = fin.read() # lines = lines.split('\n') #source = 'generated using ipa transcription by Marita Everhardt.' #inserts = [] #for line in lines: # line = line.split('\t') # word = line[0].strip().lower() # pronounciation = line[1].strip().split() # # surely not in the table # #if (word, pronounciation) not in existing_pronunciations: # inserts.append("('{}', '{}', '{}', '{}', 0)".format( # word, # ' '.join(pronounciation), # source, # datetime.datetime.now(), )) #sql = """INSERT INTO pronunciations (word, pronunciation, collection, added, automatic) VALUES\n {};""".format( # ',\n '.join(inserts)