acoustic_model/acoustic_model/phoneset/fame_ipa.py

138 lines
2.2 KiB
Python
Raw Normal View History

""" definition of the phones to be used. """
phoneset = [
# vowels
'',
'i̯ⁿ',
'y',
'y:', # not included in lex.ipa, but in stimmen.
'i',
'i.',
'iⁿ',
'i:',
'i:ⁿ',
'ɪ',
'ɪⁿ',
'ɪ.',
'ɪ:', # not included in lex.ipa, but in stimmen.
'ɪ:ⁿ',
'e',
'e:',
'e:ⁿ',
'ə',
'əⁿ',
'ə:',
'ɛ',
'ɛ.',
'ɛⁿ',
'ɛ:',
'ɛ:ⁿ',
'a',
'aⁿ',
'a.',
'a:',
'a:ⁿ',
'',
'ṷ.',
'ṷⁿ',
2019-01-29 21:52:11 +01:00
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone.
'u',
'uⁿ',
'u.',
'u:',
'u:ⁿ',
'ü',
'ü.',
'üⁿ',
'ü:',
'ü:ⁿ',
'o',
'oⁿ',
'o.',
'o:',
'o:ⁿ',
'ö',
'ö.',
'öⁿ',
'ö:',
'ö:ⁿ',
'ɔ',
'ɔ.',
'ɔⁿ',
'ɔ:',
'ɔ:ⁿ',
'ɔ̈', # not included in lex.ipa
'ɔ̈.',
'ɔ̈:',
# plosives
'p',
'b',
't',
'tⁿ',
'd',
'k',
'g',
'ɡ', # = 'g'
# nasals
'm',
'n',
'ŋ',
# fricatives
'f',
'v',
's',
's:',
'z',
'zⁿ',
'x',
'h',
# tap and flip
'r',
'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.
'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'.
# approximant
'j',
'j.',
'l'
]
## reduce the number of phones.
# the phones which are used in stimmen transcription but not in FAME corpus.
# replacements are based on the advice from Jelske Dijkstra on 2018/06/21.
stimmen_replacement = {
'æ': 'ɛ',
'ø': 'ö', # or 'ö:'
'ø:': 'ö:', # Aki added.
'œ': 'ɔ̈', # or 'ɔ̈:'
'œ:': 'ɔ̈:', # Aki added.
'ɐ': 'a', # or 'a:'
'ɐ:': 'a:', # Aki added.
'ɑ': 'a', # or 'a:'
'ɑ:': 'a:', # Aki added
'ɒ': 'ɔ', # or 'ɔ:'
'ɒ:': 'ɔ:', # Aki added.
'ɾ': 'r',
'ʁ': 'r',
'ʊ': 'u',
'χ': 'x',
2019-01-29 21:52:11 +01:00
# aki guessed.
'ʀ': 'r',
'ɹ': 'r',
'w': 'ö'
}
phoneset.extend(list(stimmen_replacement.keys()))
def phone_reduction(phones):
return [stimmen_replacement.get(i, i) for i in phones]
## the list of multi character phones.
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
multi_character_phones = [i for i in phoneset if len(i) > 1]
multi_character_phones.sort(key=len, reverse=True)