accent_classification/accent_classification/audio2db.py

84 lines
2.5 KiB
Python

import os
import sys
import numpy as np
import pypyodbc
## user define
forced_alignment_module = 'C:\\Users\\Aki\\source\\repos\\forced-alignment'
dir_same_utterance = 'd:\\OneDrive\\Research\\rug\\experiments\\same_utterance'
wav_dir = dir_same_utterance + '\\wav_with_cities'
script_dir = dir_same_utterance + '\\script'
fileMDB = dir_same_utterance + '\\feature\\DialectClassification.accdb'
table = 'ForcedAlignmentResult'
regionLabels = ['Groningen_and_Drenthe', 'Limburg', 'Oost_Overijsel-Gelderland']
# these lines are not necessary once forced-alignment is intalled as a package.
sys.path.append(forced_alignment_module)
from forced_alignment import forced_alignment
## delete all automatically generated pronunciations
#from forced_alignment import pronunciations
#pronunciations.delete_all_g2p_entries()
## make database connection
param = r"Driver={Microsoft Access Driver (*.mdb, *.accdb)};dbq=" + fileMDB + ";"
conn = pypyodbc.connect(param)
cursor = conn.cursor()
SQLstring1 = 'INSERT INTO ' + table + ' (filename, region, word_id, pronunciation) '
## forced-alignment to all the wav files in dir_same_utterance
word_id_start = 1
for sentenceID in range(1, 11):
sentenceIDstr = format(sentenceID, '02')
# get script
script_file = script_dir + '\\script' + sentenceIDstr + '.txt'
with open(script_file, 'r') as fin:
script = fin.readline()
# loop over three regions
for region in regionLabels:
# loop over the wav_subdir
wav_subdir = wav_dir + '\\' + sentenceIDstr + '\\' + region
wav_files = os.listdir(wav_subdir)
file_nr = 0
for wav_file in wav_files:
file_nr += 1
filename = wav_file.replace('.wav', '')
wav_file_fullpath = wav_subdir + '\\' + wav_file
# forced-alignment
print('{0} {1}: {2} ({3}/{4})'.format(sentenceIDstr, region, wav_file, file_nr, len(wav_files)))
fa = forced_alignment(wav_file_fullpath, script)
# send pronunciation variant to database
word_id = word_id_start
for row in fa:
word = row[0]
phonemes = np.array(row[1])
## get pronunciation variant
pronvar_ = phonemes[:, 2]
pronvar_[np.where(pronvar_=='ssil')]='' # remove 'ssil'
pronvar = ''.join(pronvar_)
## insert the result into the database.
SQLstring2 = 'VALUES (\'' + filename + '\',\'' + region + '\',\'' + str(word_id) + '\',\'' + pronvar + '\')'
SQLstring = SQLstring1 + SQLstring2
cursor.execute(SQLstring)
conn.commit()
word_id = word_id + 1
word_id_start += script.count(' ')+1
conn.close()