acoustic_model/acoustic_model/novoapi_functions.py

## this script should be used only by Aki Kunikoshi.

import numpy as np
import argparse
import json

from novoapi.backend import session

import os
#os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import defaultfiles as default


def load_phonset():
	translation_key_ipa2novo70 = dict()
	translation_key_novo702ipa = dict()

	#phonelist_novo70_      = pd.ExcelFile(default.phonelist_novo70_xlsx)
	#df = pd.read_excel(phonelist_novo70_, 'list')
	## *_simple includes columns which has only one phone in.
	#for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']):
	#    if not pd.isnull(ipa):
	#        print('{0}:{1}'.format(ipa, novo70))
	#        translation_key[ipa] = novo70
	#phonelist_novo70 = np.unique(list(df['novo70_simple']))

	phoneset_ipa = []
	phoneset_novo70 = []
	with open(default.novo70_phoneset, "rt", encoding="utf-8") as fin:
		lines = fin.read()
		lines = lines.split('\n')
		for line in lines:
			words = line.split('\t')
			if len(words) > 1:
				novo70 = words[0]
				ipa	   = words[1]
				phoneset_ipa.append(ipa)
				phoneset_novo70.append(novo70)
				translation_key_ipa2novo70[ipa] = novo70
				translation_key_novo702ipa[novo70] = ipa

	# As per Nederlandse phoneset_aki.xlsx recieved from David
	# [ɔː] oh / ohr # from ipa->novo70, only oh is used.
	# [ɪː] ih / ihr # from ipa->novo70, only ih is used.
	# [iː] iy
	# [œː] uh
	# [ɛː] eh
	# [w] wv in IPA written as ʋ. 
	extra_ipa	 = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'ʋ']
	extra_novo70 = ['oh', 'ih', 'iy', 'uh', 'eh', 'wv'] 
	for ipa, novo70 in zip(extra_ipa, extra_novo70):
		phoneset_ipa.append(ipa)
		phoneset_novo70.append(novo70)
		translation_key_ipa2novo70[ipa] = novo70
		translation_key_novo702ipa[novo70] = ipa

	translation_key_novo702ipa['ohr'] = 'ɔː'
	translation_key_novo702ipa['ihr'] = 'ɪː'

	phoneset_ipa    = np.unique(phoneset_ipa)
	phoneset_novo70 = np.unique(phoneset_novo70)

	return phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa


def multi_character_tokenize(line, multi_character_tokens):
	"""
	Tries to match one of the tokens in multi_character_tokens at each position of line, 
	starting at position 0,
	if so tokenizes and eats that token. Otherwise tokenizes a single character.

	Copied from forced_alignment.convert_phone_set.py
	"""
	while line != '':
		for token in multi_character_tokens:
			if line.startswith(token) and len(token) > 0:
				yield token
				line = line[len(token):]
				break
		else:
			yield line[:1]
			line = line[1:]


def split_ipa(line):
	"""
	Split a line by IPA phones.
	If nasalized sound (such as ɛ̃ː) is included, it will give error.
	:param string line: one line written in IPA.
	:return string lineSeperated: the line splitted in IPA phone. 
	"""

	multi_character_phones = [
		# IPAs in CGN.
		u'ʌu', u'ɛi', u'œy', u'aː', u'eː', u'iː', u'oː', u'øː', u'ɛː', u'œː', u'ɔː', u'ɛ̃ː', u'ɑ̃ː', u'ɔ̃ː', u'œ̃', u'ɪː'
		]

	return [phone for phone in multi_character_tokenize(line.strip(), multi_character_phones)]


def split_novo70(line):
	"""
	Split a line by novo70 phones.
	:param string line: one line written in novo70.
	:return string lineSeperated: the line splitted by novo70 phones. 
	"""
	_, phoneset_novo70, _, _ = load_phonset()
	multi_character_phones = [p for p in phoneset_novo70 if len(p) > 1]
	multi_character_phones = sorted(multi_character_phones, key=len, reverse=True)

	return ['sp' if phone == ' ' else phone
			for phone in multi_character_tokenize(line.strip(), multi_character_phones)]


def novo702ipa(tokens):
	pronunciation = []
	_, _, _, translation_key = load_phonset()
	for phone in split_novo70(tokens):
		pronunciation.append(translation_key.get(phone, phone))
	return ' '.join(pronunciation)


# numbering of novo70 should be checked.
def ipa2novo70(tokens):
	pronunciation = []
	_, _, translation_key, _ = load_phonset()
	for phone in split_ipa(tokens):
		pronunciation.append(translation_key.get(phone, phone))
	return ' '.join(pronunciation)
	

def make_grammar(word, pronunciation_ipa):
	"""
	Args:
		words
		pronunciation_ipa: list of pronunciation variants.
	"""
	#word = 'pauw'
	#pronunciation_ipa = ['pau', 'pɑu']

	grammer_data_elements0_pronunciation = []
	for id, ipa in enumerate(pronunciation_ipa):
		novo70 = ipa2novo70(ipa)
		grammer_data_elements0_pronunciation.append({
			"phones": novo70.split(),
			"id": id
			})

	grammar_data = {
		"kind": 'sequence',
		"elements": [{	
			"kind": "word",
			"pronunciation": grammer_data_elements0_pronunciation,
			"label": word
			}]
		}

	grammar = {
		"type": "confusion_network",
		"version": "1.0",
		"data": grammar_data,
		"return_objects": ["grammar"],
		"phoneset": "novo70"
		}

	return grammar


def forced_alignment(wav_file, word, pronunciation_ipa):
	### IMPORTANT ###
	# because of this function, this script should not be uploaded / shared.

	# username / password cannot be passed as artuments...
	p = argparse.ArgumentParser()
	p.add_argument("--user", default='martijn.wieling')
	p.add_argument("--password", default='xxxxxx')
	args = p.parse_args()
	
	rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)

	grammar = make_grammar(word, pronunciation_ipa)
	result = rec.setgrammar(grammar)
	#print "Set grammar result", res
	result = rec.recognize_wav(wav_file)
	return result.export()


def result2pronunciation(result, word):
	result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word] 
	llh = result_[0]['llh']
	phones = result_[0]['phones']
	pronunciation_novo70 = [phone['label'] for phone in phones]
	pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70]
	return pronunciation_ipa, pronunciation_novo70, llh


if __name__ == 'main':
	pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
	grammar = make_grammar('reus', pronunciation_ipa)