acoustic_model/acoustic_model/convert_phone_set.py

"""Module to convert phonemes."""

def multi_character_tokenize(line, multi_character_tokens):
	"""Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0,
	if so tokenizes and eats that token. Otherwise tokenizes a single character"""
	while line != '':
		for token in multi_character_tokens:
			if line.startswith(token) and len(token) > 0:
				yield token
				line = line[len(token):]
				break
		else:
			yield line[:1]
			line = line[1:]


def split_word(word, multi_character_phones):
	"""
	Split a line by given phoneset.

	Args:
		word (str): one word written in given phoneset.
		multi_character_phones:

	Returns:
		word_seperated (str): the word splitted in given phoneset.
	"""

	return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]