"""Module to convert phonemes.""" def multi_character_tokenize(line, multi_character_tokens): """Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0, if so tokenizes and eats that token. Otherwise tokenizes a single character""" while line != '': for token in multi_character_tokens: if line.startswith(token) and len(token) > 0: yield token line = line[len(token):] break else: yield line[:1] line = line[1:] def split_word(word, multi_character_phones): """ split a line by given phoneset. Args: word (str): a word written in given phoneset. multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_phoneset.py. Returns: (word_seperated) (list): the word splitted in given phoneset. """ return [phone for phone in multi_character_tokenize(word.strip(), multi_character_phones)]