functions are added to perform forced_alignment using novoapi. results can be written in novo70 or IPA.
This commit is contained in:
parent
d6d5543d03
commit
1622655542
1
.gitignore
vendored
1
.gitignore
vendored
@ -3,6 +3,7 @@
|
||||
|
||||
## important ##
|
||||
.acoustic_model/forced_alignment_novo.py
|
||||
.acoustic_model/novoapi_functions.py
|
||||
|
||||
# User-specific files
|
||||
*.suo
|
||||
|
Binary file not shown.
Binary file not shown.
@ -36,8 +36,14 @@ fame_s5_dir = os.path.join(fame_dir, 's5')
|
||||
fame_corpus_dir = os.path.join(fame_dir, 'corpus')
|
||||
|
||||
experiments_dir = r'c:\OneDrive\Research\rug\experiments'
|
||||
stimmen_transcription_xlsx = os.path.join(experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
|
||||
stimmen_data_dir = os.path.join(experiments_dir, 'stimmen', 'data')
|
||||
stimmen_dir = os.path.join(experiments_dir, 'stimmen')
|
||||
stimmen_data_dir = os.path.join(stimmen_dir, 'data')
|
||||
# 44.1 kHz
|
||||
#stimmen_wav_dir = os.path.join(stimmen_dir, 'wav')
|
||||
# 16 kHz
|
||||
stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen'
|
||||
|
||||
stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx')
|
||||
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
|
||||
|
||||
novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi')
|
||||
|
@ -256,7 +256,7 @@ if make_kaldi_lexicon_txt:
|
||||
# f.write("{0},{1}\n".format(key,c[key]))
|
||||
|
||||
for key, value in c.most_common(option_num):
|
||||
# make possible pronounciation variant list.
|
||||
# make possible pronunciation variant list.
|
||||
pronvar_list = am_func.fame_pronunciation_variant(key)
|
||||
|
||||
for pronvar_ in pronvar_list:
|
||||
|
@ -37,11 +37,15 @@
|
||||
# Aki Kunikoshi
|
||||
# 428968@gmail.com
|
||||
#
|
||||
import os
|
||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||
|
||||
import argparse
|
||||
import json
|
||||
|
||||
from novoapi.backend import session
|
||||
import novoapi_functions
|
||||
import defaultfiles as default
|
||||
|
||||
# username / password cannot be passed as artuments...
|
||||
p = argparse.ArgumentParser()
|
||||
@ -51,68 +55,11 @@ p.add_argument("--user", default='martijn.wieling')
|
||||
p.add_argument("--password", default='fa0Thaic')
|
||||
args = p.parse_args()
|
||||
|
||||
wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav'
|
||||
|
||||
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
|
||||
grammar = {
|
||||
"type": "confusion_network",
|
||||
"version": "1.0",
|
||||
"data": {
|
||||
"kind": "sequence",
|
||||
"elements": [{
|
||||
"kind": "word",
|
||||
"pronunciation": [{
|
||||
"phones": ["wv",
|
||||
"a1",
|
||||
"n"],
|
||||
"id": 0
|
||||
},
|
||||
{
|
||||
"phones": ["wv",
|
||||
"uh1",
|
||||
"n"],
|
||||
"id": 1
|
||||
}],
|
||||
"label": "one"
|
||||
},
|
||||
{
|
||||
"kind": "word",
|
||||
"pronunciation": [{
|
||||
"phones": ["t",
|
||||
"uw1"],
|
||||
"id": 0
|
||||
}],
|
||||
"label": "two"
|
||||
},
|
||||
{
|
||||
"kind": "word",
|
||||
"pronunciation": [{
|
||||
"phones": ["t",
|
||||
"r",
|
||||
"iy1"],
|
||||
"id": 0
|
||||
},
|
||||
{
|
||||
"phones": ["s",
|
||||
"r",
|
||||
"iy1"],
|
||||
"id": 1
|
||||
}],
|
||||
"label": "three"
|
||||
}]
|
||||
},
|
||||
"return_objects": ["grammar"],
|
||||
"phoneset": "novo70"
|
||||
}
|
||||
|
||||
res = rec.setgrammar(grammar)
|
||||
#print "Set grammar result", res
|
||||
|
||||
#res = rec.recognize_wav("test/onetwothree.wav")
|
||||
res = rec.recognize_wav(wav_file)
|
||||
#print "Recognition result:", json.dumps(res.export(), indent=4)
|
||||
|
||||
#wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav'
|
||||
wav_file = os.path.join(default.stimmen_wav_dir, 'pg_pauw_2206_0fjd8.wav')
|
||||
# list of the pronunciation for each words
|
||||
word = 'pauw'
|
||||
pronunciation_ipa = ['pau', 'pɑu']
|
||||
grammar = novoapi_functions.make_grammar(word, pronunciation_ipa)
|
||||
|
||||
result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa)
|
||||
pronunciation_ipa, pronunciation_novo70, llh = novoapi_functions.result2pronunciation(result, word)
|
@ -1,7 +1,14 @@
|
||||
## this script should be used only by Aki Kunikoshi.
|
||||
|
||||
import numpy as np
|
||||
import argparse
|
||||
import json
|
||||
|
||||
from novoapi.backend import session
|
||||
|
||||
import defaultfiles as default
|
||||
|
||||
|
||||
def load_phonset():
|
||||
translation_key_ipa2novo70 = dict()
|
||||
translation_key_novo702ipa = dict()
|
||||
@ -112,7 +119,7 @@ def make_grammar(word, pronunciation_ipa):
|
||||
|
||||
grammer_data_elements0_pronunciation = []
|
||||
for id, ipa in enumerate(pronunciation_ipa):
|
||||
novo70 = novoapi_functions.ipa2novo70(ipa)
|
||||
novo70 = ipa2novo70(ipa)
|
||||
grammer_data_elements0_pronunciation.append({
|
||||
"phones": novo70.split(),
|
||||
"id": id
|
||||
@ -136,3 +143,31 @@ def make_grammar(word, pronunciation_ipa):
|
||||
}
|
||||
|
||||
return grammar
|
||||
|
||||
|
||||
def forced_alignment(wav_file, word, pronunciation_ipa):
|
||||
### IMPORTANT ###
|
||||
# because of this function, this script should not be uploaded / shared.
|
||||
|
||||
# username / password cannot be passed as artuments...
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--user", default='martijn.wieling')
|
||||
p.add_argument("--password", default='fa0Thaic')
|
||||
args = p.parse_args()
|
||||
|
||||
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
|
||||
|
||||
grammar = make_grammar(word, pronunciation_ipa)
|
||||
result = rec.setgrammar(grammar)
|
||||
#print "Set grammar result", res
|
||||
result = rec.recognize_wav(wav_file)
|
||||
return result.export()
|
||||
|
||||
|
||||
def result2pronunciation(result, word):
|
||||
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
|
||||
llh = result_[0]['llh']
|
||||
phones = result_[0]['phones']
|
||||
pronunciation_novo70 = [phone['label'] for phone in phones]
|
||||
pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70]
|
||||
return pronunciation_ipa, pronunciation_novo70, llh
|
Loading…
Reference in New Issue
Block a user