You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

29 lines
836 B

"""Module to convert phonemes."""
def multi_character_tokenize(line, multi_character_tokens):
"""Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0,
if so tokenizes and eats that token. Otherwise tokenizes a single character"""
while line != '':
for token in multi_character_tokens:
if line.startswith(token) and len(token) > 0:
yield token
line = line[len(token):]
yield line[:1]
line = line[1:]
def split_word(word, multi_character_phones):
Split a line by given phoneset.
word (str): one word written in given phoneset.
word_seperated (str): the word splitted in given phoneset.
return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]