48 lines
1.5 KiB
Python
48 lines
1.5 KiB
Python
import sys
|
|
import os
|
|
import pandas
|
|
import datetime
|
|
sys.path.append('..')
|
|
|
|
# these lines are not necessary once forced-alignment is intalled as a package.
|
|
forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced-alignment'
|
|
sys.path.append(forced_alignment_module)
|
|
from forced_alignment import pronunciations
|
|
from forced_alignment.htk_dict import variances_table
|
|
|
|
|
|
#pronunciations.delete_word('kunikoshi')
|
|
#pronunciations.delete_all_g2p_entries()
|
|
|
|
|
|
#existing_pronunciations = set(pronunciations.get_all())
|
|
## only focus on word
|
|
|
|
|
|
## missing pronunciations
|
|
## (1) pronunciation is written in IPA.
|
|
## (2) pronunciation variants are made based on (1).
|
|
## (3) they are converted into HTK format.
|
|
#missing_pronunciations_file = 'D:\\OneDrive\\Research\\rug\\experiments\\same_utterance\\missing_words_in_barbara_dic\\missing_words_pronvarsHTK.txt'
|
|
|
|
#with open(missing_pronunciations_file) as fin:
|
|
# lines = fin.read()
|
|
# lines = lines.split('\n')
|
|
|
|
#source = 'generated using ipa transcription by Marita Everhardt.'
|
|
#inserts = []
|
|
#for line in lines:
|
|
# line = line.split('\t')
|
|
# word = line[0].strip().lower()
|
|
# pronounciation = line[1].strip().split()
|
|
|
|
# # surely not in the table
|
|
# #if (word, pronounciation) not in existing_pronunciations:
|
|
# inserts.append("('{}', '{}', '{}', '{}', 0)".format(
|
|
# word,
|
|
# ' '.join(pronounciation),
|
|
# source,
|
|
# datetime.datetime.now(), ))
|
|
|
|
#sql = """INSERT INTO pronunciations (word, pronunciation, collection, added, automatic) VALUES\n {};""".format(
|
|
# ',\n '.join(inserts) |