functions are added to perform forced_alignment using novoapi. results can be written in novo70 or IPA.
This commit is contained in:
parent
d6d5543d03
commit
1622655542
1
.gitignore
vendored
1
.gitignore
vendored
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
## important ##
|
## important ##
|
||||||
.acoustic_model/forced_alignment_novo.py
|
.acoustic_model/forced_alignment_novo.py
|
||||||
|
.acoustic_model/novoapi_functions.py
|
||||||
|
|
||||||
# User-specific files
|
# User-specific files
|
||||||
*.suo
|
*.suo
|
||||||
|
Binary file not shown.
Binary file not shown.
@ -36,8 +36,14 @@ fame_s5_dir = os.path.join(fame_dir, 's5')
|
|||||||
fame_corpus_dir = os.path.join(fame_dir, 'corpus')
|
fame_corpus_dir = os.path.join(fame_dir, 'corpus')
|
||||||
|
|
||||||
experiments_dir = r'c:\OneDrive\Research\rug\experiments'
|
experiments_dir = r'c:\OneDrive\Research\rug\experiments'
|
||||||
stimmen_transcription_xlsx = os.path.join(experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
|
stimmen_dir = os.path.join(experiments_dir, 'stimmen')
|
||||||
stimmen_data_dir = os.path.join(experiments_dir, 'stimmen', 'data')
|
stimmen_data_dir = os.path.join(stimmen_dir, 'data')
|
||||||
|
# 44.1 kHz
|
||||||
|
#stimmen_wav_dir = os.path.join(stimmen_dir, 'wav')
|
||||||
|
# 16 kHz
|
||||||
|
stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen'
|
||||||
|
|
||||||
|
stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx')
|
||||||
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
|
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
|
||||||
|
|
||||||
novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi')
|
novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi')
|
||||||
|
@ -256,7 +256,7 @@ if make_kaldi_lexicon_txt:
|
|||||||
# f.write("{0},{1}\n".format(key,c[key]))
|
# f.write("{0},{1}\n".format(key,c[key]))
|
||||||
|
|
||||||
for key, value in c.most_common(option_num):
|
for key, value in c.most_common(option_num):
|
||||||
# make possible pronounciation variant list.
|
# make possible pronunciation variant list.
|
||||||
pronvar_list = am_func.fame_pronunciation_variant(key)
|
pronvar_list = am_func.fame_pronunciation_variant(key)
|
||||||
|
|
||||||
for pronvar_ in pronvar_list:
|
for pronvar_ in pronvar_list:
|
||||||
|
@ -37,11 +37,15 @@
|
|||||||
# Aki Kunikoshi
|
# Aki Kunikoshi
|
||||||
# 428968@gmail.com
|
# 428968@gmail.com
|
||||||
#
|
#
|
||||||
|
import os
|
||||||
|
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from novoapi.backend import session
|
from novoapi.backend import session
|
||||||
import novoapi_functions
|
import novoapi_functions
|
||||||
|
import defaultfiles as default
|
||||||
|
|
||||||
# username / password cannot be passed as artuments...
|
# username / password cannot be passed as artuments...
|
||||||
p = argparse.ArgumentParser()
|
p = argparse.ArgumentParser()
|
||||||
@ -51,68 +55,11 @@ p.add_argument("--user", default='martijn.wieling')
|
|||||||
p.add_argument("--password", default='fa0Thaic')
|
p.add_argument("--password", default='fa0Thaic')
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
|
||||||
wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav'
|
#wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav'
|
||||||
|
wav_file = os.path.join(default.stimmen_wav_dir, 'pg_pauw_2206_0fjd8.wav')
|
||||||
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
|
|
||||||
grammar = {
|
|
||||||
"type": "confusion_network",
|
|
||||||
"version": "1.0",
|
|
||||||
"data": {
|
|
||||||
"kind": "sequence",
|
|
||||||
"elements": [{
|
|
||||||
"kind": "word",
|
|
||||||
"pronunciation": [{
|
|
||||||
"phones": ["wv",
|
|
||||||
"a1",
|
|
||||||
"n"],
|
|
||||||
"id": 0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"phones": ["wv",
|
|
||||||
"uh1",
|
|
||||||
"n"],
|
|
||||||
"id": 1
|
|
||||||
}],
|
|
||||||
"label": "one"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"kind": "word",
|
|
||||||
"pronunciation": [{
|
|
||||||
"phones": ["t",
|
|
||||||
"uw1"],
|
|
||||||
"id": 0
|
|
||||||
}],
|
|
||||||
"label": "two"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"kind": "word",
|
|
||||||
"pronunciation": [{
|
|
||||||
"phones": ["t",
|
|
||||||
"r",
|
|
||||||
"iy1"],
|
|
||||||
"id": 0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"phones": ["s",
|
|
||||||
"r",
|
|
||||||
"iy1"],
|
|
||||||
"id": 1
|
|
||||||
}],
|
|
||||||
"label": "three"
|
|
||||||
}]
|
|
||||||
},
|
|
||||||
"return_objects": ["grammar"],
|
|
||||||
"phoneset": "novo70"
|
|
||||||
}
|
|
||||||
|
|
||||||
res = rec.setgrammar(grammar)
|
|
||||||
#print "Set grammar result", res
|
|
||||||
|
|
||||||
#res = rec.recognize_wav("test/onetwothree.wav")
|
|
||||||
res = rec.recognize_wav(wav_file)
|
|
||||||
#print "Recognition result:", json.dumps(res.export(), indent=4)
|
|
||||||
|
|
||||||
# list of the pronunciation for each words
|
# list of the pronunciation for each words
|
||||||
word = 'pauw'
|
word = 'pauw'
|
||||||
pronunciation_ipa = ['pau', 'pɑu']
|
pronunciation_ipa = ['pau', 'pɑu']
|
||||||
grammar = novoapi_functions.make_grammar(word, pronunciation_ipa)
|
|
||||||
|
result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa)
|
||||||
|
pronunciation_ipa, pronunciation_novo70, llh = novoapi_functions.result2pronunciation(result, word)
|
@ -1,7 +1,14 @@
|
|||||||
|
## this script should be used only by Aki Kunikoshi.
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
|
||||||
|
from novoapi.backend import session
|
||||||
|
|
||||||
import defaultfiles as default
|
import defaultfiles as default
|
||||||
|
|
||||||
|
|
||||||
def load_phonset():
|
def load_phonset():
|
||||||
translation_key_ipa2novo70 = dict()
|
translation_key_ipa2novo70 = dict()
|
||||||
translation_key_novo702ipa = dict()
|
translation_key_novo702ipa = dict()
|
||||||
@ -112,7 +119,7 @@ def make_grammar(word, pronunciation_ipa):
|
|||||||
|
|
||||||
grammer_data_elements0_pronunciation = []
|
grammer_data_elements0_pronunciation = []
|
||||||
for id, ipa in enumerate(pronunciation_ipa):
|
for id, ipa in enumerate(pronunciation_ipa):
|
||||||
novo70 = novoapi_functions.ipa2novo70(ipa)
|
novo70 = ipa2novo70(ipa)
|
||||||
grammer_data_elements0_pronunciation.append({
|
grammer_data_elements0_pronunciation.append({
|
||||||
"phones": novo70.split(),
|
"phones": novo70.split(),
|
||||||
"id": id
|
"id": id
|
||||||
@ -135,4 +142,32 @@ def make_grammar(word, pronunciation_ipa):
|
|||||||
"phoneset": "novo70"
|
"phoneset": "novo70"
|
||||||
}
|
}
|
||||||
|
|
||||||
return grammar
|
return grammar
|
||||||
|
|
||||||
|
|
||||||
|
def forced_alignment(wav_file, word, pronunciation_ipa):
|
||||||
|
### IMPORTANT ###
|
||||||
|
# because of this function, this script should not be uploaded / shared.
|
||||||
|
|
||||||
|
# username / password cannot be passed as artuments...
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument("--user", default='martijn.wieling')
|
||||||
|
p.add_argument("--password", default='fa0Thaic')
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
|
||||||
|
|
||||||
|
grammar = make_grammar(word, pronunciation_ipa)
|
||||||
|
result = rec.setgrammar(grammar)
|
||||||
|
#print "Set grammar result", res
|
||||||
|
result = rec.recognize_wav(wav_file)
|
||||||
|
return result.export()
|
||||||
|
|
||||||
|
|
||||||
|
def result2pronunciation(result, word):
|
||||||
|
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
|
||||||
|
llh = result_[0]['llh']
|
||||||
|
phones = result_[0]['phones']
|
||||||
|
pronunciation_novo70 = [phone['label'] for phone in phones]
|
||||||
|
pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70]
|
||||||
|
return pronunciation_ipa, pronunciation_novo70, llh
|
Loading…
Reference in New Issue
Block a user