accent_classification/accent_classification/manipulate_db.py

48 lines
1.5 KiB
Python

import sys
import os
import pandas
import datetime
sys.path.append('..')
# these lines are not necessary once forced-alignment is intalled as a package.
forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced-alignment'
sys.path.append(forced_alignment_module)
from forced_alignment import pronunciations
from forced_alignment.htk_dict import variances_table
#pronunciations.delete_word('kunikoshi')
#pronunciations.delete_all_g2p_entries()
#existing_pronunciations = set(pronunciations.get_all())
## only focus on word
## missing pronunciations
## (1) pronunciation is written in IPA.
## (2) pronunciation variants are made based on (1).
## (3) they are converted into HTK format.
#missing_pronunciations_file = 'D:\\OneDrive\\Research\\rug\\experiments\\same_utterance\\missing_words_in_barbara_dic\\missing_words_pronvarsHTK.txt'
#with open(missing_pronunciations_file) as fin:
# lines = fin.read()
# lines = lines.split('\n')
#source = 'generated using ipa transcription by Marita Everhardt.'
#inserts = []
#for line in lines:
# line = line.split('\t')
# word = line[0].strip().lower()
# pronounciation = line[1].strip().split()
# # surely not in the table
# #if (word, pronounciation) not in existing_pronunciations:
# inserts.append("('{}', '{}', '{}', '{}', 0)".format(
# word,
# ' '.join(pronounciation),
# source,
# datetime.datetime.now(), ))
#sql = """INSERT INTO pronunciations (word, pronunciation, collection, added, automatic) VALUES\n {};""".format(
# ',\n '.join(inserts)