acoustic_model/acoustic_model/novoapi_functions.py

192 lines
5.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

## this script should be used only by Aki Kunikoshi.
import numpy as np
import argparse
import json
from novoapi.backend import session
import defaultfiles as default
def load_phonset():
translation_key_ipa2novo70 = dict()
translation_key_novo702ipa = dict()
#phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx)
#df = pd.read_excel(phonelist_novo70_, 'list')
## *_simple includes columns which has only one phone in.
#for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']):
# if not pd.isnull(ipa):
# print('{0}:{1}'.format(ipa, novo70))
# translation_key[ipa] = novo70
#phonelist_novo70 = np.unique(list(df['novo70_simple']))
phoneset_ipa = []
phoneset_novo70 = []
with open(default.novo70_phoneset, "rt", encoding="utf-8") as fin:
lines = fin.read()
lines = lines.split('\n')
for line in lines:
words = line.split('\t')
if len(words) > 1:
novo70 = words[0]
ipa = words[1]
phoneset_ipa.append(ipa)
phoneset_novo70.append(novo70)
translation_key_ipa2novo70[ipa] = novo70
translation_key_novo702ipa[novo70] = ipa
# As per Nederlandse phoneset_aki.xlsx recieved from David
# [ɔː] oh / ohr # from ipa->novo70, only oh is used.
# [ɪː] ih / ihr # from ipa->novo70, only ih is used.
# [iː] iy
# [œː] uh
# [ɛː] eh
# [w] wv in IPA written as ʋ.
extra_ipa = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'ʋ']
extra_novo70 = ['oh', 'ih', 'iy', 'uh', 'eh', 'wv']
for ipa, novo70 in zip(extra_ipa, extra_novo70):
phoneset_ipa.append(ipa)
phoneset_novo70.append(novo70)
translation_key_ipa2novo70[ipa] = novo70
translation_key_novo702ipa[novo70] = ipa
translation_key_novo702ipa['ohr'] = 'ɔː'
translation_key_novo702ipa['ihr'] = 'ɪː'
phoneset_ipa = np.unique(phoneset_ipa)
phoneset_novo70 = np.unique(phoneset_novo70)
return phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa
def multi_character_tokenize(line, multi_character_tokens):
"""
Tries to match one of the tokens in multi_character_tokens at each position of line,
starting at position 0,
if so tokenizes and eats that token. Otherwise tokenizes a single character.
Copied from forced_alignment.convert_phone_set.py
"""
while line != '':
for token in multi_character_tokens:
if line.startswith(token) and len(token) > 0:
yield token
line = line[len(token):]
break
else:
yield line[:1]
line = line[1:]
def split_ipa(line):
"""
Split a line by IPA phones.
If nasalized sound (such as ɛ̃ː) is included, it will give error.
:param string line: one line written in IPA.
:return string lineSeperated: the line splitted in IPA phone.
"""
multi_character_phones = [
# IPAs in CGN.
u'ʌu', u'ɛi', u'œy', u'aː', u'eː', u'iː', u'oː', u'øː', u'ɛː', u'œː', u'ɔː', u'ɛ̃ː', u'ɑ̃ː', u'ɔ̃ː', u'œ̃', u'ɪː'
]
return [phone for phone in multi_character_tokenize(line.strip(), multi_character_phones)]
def split_novo70(line):
"""
Split a line by novo70 phones.
:param string line: one line written in novo70.
:return string lineSeperated: the line splitted by novo70 phones.
"""
_, phoneset_novo70, _, _ = load_phonset()
multi_character_phones = [p for p in phoneset_novo70 if len(p) > 1]
multi_character_phones = sorted(multi_character_phones, key=len, reverse=True)
return ['sp' if phone == ' ' else phone
for phone in multi_character_tokenize(line.strip(), multi_character_phones)]
def novo702ipa(tokens):
pronunciation = []
_, _, _, translation_key = load_phonset()
for phone in split_novo70(tokens):
pronunciation.append(translation_key.get(phone, phone))
return ' '.join(pronunciation)
# numbering of novo70 should be checked.
def ipa2novo70(tokens):
pronunciation = []
_, _, translation_key, _ = load_phonset()
for phone in split_ipa(tokens):
pronunciation.append(translation_key.get(phone, phone))
return ' '.join(pronunciation)
def make_grammar(word, pronunciation_ipa):
"""
Args:
words
pronunciation_ipa: list of pronunciation variants.
"""
#word = 'pauw'
#pronunciation_ipa = ['pau', 'pɑu']
grammer_data_elements0_pronunciation = []
for id, ipa in enumerate(pronunciation_ipa):
novo70 = ipa2novo70(ipa)
grammer_data_elements0_pronunciation.append({
"phones": novo70.split(),
"id": id
})
grammar_data = {
"kind": 'sequence',
"elements": [{
"kind": "word",
"pronunciation": grammer_data_elements0_pronunciation,
"label": word
}]
}
grammar = {
"type": "confusion_network",
"version": "1.0",
"data": grammar_data,
"return_objects": ["grammar"],
"phoneset": "novo70"
}
return grammar
def forced_alignment(wav_file, word, pronunciation_ipa):
### IMPORTANT ###
# because of this function, this script should not be uploaded / shared.
# username / password cannot be passed as artuments...
p = argparse.ArgumentParser()
p.add_argument("--user", default='martijn.wieling')
p.add_argument("--password", default='fa0Thaic')
args = p.parse_args()
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
grammar = make_grammar(word, pronunciation_ipa)
result = rec.setgrammar(grammar)
#print "Set grammar result", res
result = rec.recognize_wav(wav_file)
return result.export()
def result2pronunciation(result, word):
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
llh = result_[0]['llh']
phones = result_[0]['phones']
pronunciation_novo70 = [phone['label'] for phone in phones]
pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70]
return pronunciation_ipa, pronunciation_novo70, llh