acoustic_model/acoustic_model/convert_phone_set.py

"""Module to convert phonemes."""

def multi_character_tokenize(line, multi_character_tokens):
	"""Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0,
	if so tokenizes and eats that token. Otherwise tokenizes a single character"""
	while line != '':
		for token in multi_character_tokens:
			if line.startswith(token) and len(token) > 0:
				yield token
				line = line[len(token):]
				break
		else:
			yield line[:1]
			line = line[1:]


def split_word(word, multi_character_phones):
	"""
	Split a line by given phoneset.
	
	Args:
		word (str): one word written in given phoneset.
		multi_character_phones:

	Returns:
		word_seperated (str): the word splitted in given phoneset. 
	"""

	return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]
phonset is given as fame_phoneset.py. translation key is obtained based on the information. 2019-01-27 01:34:04 +01:00			`"""Module to convert phonemes."""`

			`def multi_character_tokenize(line, multi_character_tokens):`
			`"""Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0,`
			`if so tokenizes and eats that token. Otherwise tokenizes a single character"""`
			`while line != '':`
			`for token in multi_character_tokens:`
			`if line.startswith(token) and len(token) > 0:`
			`yield token`
			`line = line[len(token):]`
			`break`
			`else:`
			`yield line[:1]`
			`line = line[1:]`


			`def split_word(word, multi_character_phones):`
			`"""`
			`Split a line by given phoneset.`

			`Args:`
			`word (str): one word written in given phoneset.`
			`multi_character_phones:`

			`Returns:`
			`word_seperated (str): the word splitted in given phoneset.`
			`"""`

			`return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]`