138 lines
2.2 KiB
Python
138 lines
2.2 KiB
Python
""" definition of the phones to be used. """
|
||
|
||
phoneset = [
|
||
# vowels
|
||
'i̯',
|
||
'i̯ⁿ',
|
||
'y',
|
||
'y:', # not included in lex.ipa, but in stimmen.
|
||
'i',
|
||
'i.',
|
||
'iⁿ',
|
||
'i:',
|
||
'i:ⁿ',
|
||
'ɪ',
|
||
'ɪⁿ',
|
||
'ɪ.',
|
||
'ɪ:', # not included in lex.ipa, but in stimmen.
|
||
'ɪ:ⁿ',
|
||
'e',
|
||
'e:',
|
||
'e:ⁿ',
|
||
'ə',
|
||
'əⁿ',
|
||
'ə:',
|
||
'ɛ',
|
||
'ɛ.',
|
||
'ɛⁿ',
|
||
'ɛ:',
|
||
'ɛ:ⁿ',
|
||
'a',
|
||
'aⁿ',
|
||
'a.',
|
||
'a:',
|
||
'a:ⁿ',
|
||
'ṷ',
|
||
'ṷ.',
|
||
'ṷⁿ',
|
||
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone.
|
||
'u',
|
||
'uⁿ',
|
||
'u.',
|
||
'u:',
|
||
'u:ⁿ',
|
||
'ü',
|
||
'ü.',
|
||
'üⁿ',
|
||
'ü:',
|
||
'ü:ⁿ',
|
||
'o',
|
||
'oⁿ',
|
||
'o.',
|
||
'o:',
|
||
'o:ⁿ',
|
||
'ö',
|
||
'ö.',
|
||
'öⁿ',
|
||
'ö:',
|
||
'ö:ⁿ',
|
||
'ɔ',
|
||
'ɔ.',
|
||
'ɔⁿ',
|
||
'ɔ:',
|
||
'ɔ:ⁿ',
|
||
#'ɔ̈', # not included in lex.ipa
|
||
'ɔ̈.',
|
||
'ɔ̈:',
|
||
|
||
# plosives
|
||
'p',
|
||
'b',
|
||
't',
|
||
'tⁿ',
|
||
'd',
|
||
'k',
|
||
'g',
|
||
'ɡ', # = 'g'
|
||
|
||
# nasals
|
||
'm',
|
||
'n',
|
||
'ŋ',
|
||
|
||
# fricatives
|
||
'f',
|
||
'v',
|
||
's',
|
||
's:',
|
||
'z',
|
||
'zⁿ',
|
||
'x',
|
||
'h',
|
||
|
||
# tap and flip
|
||
'r',
|
||
'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.
|
||
'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'.
|
||
|
||
# approximant
|
||
'j',
|
||
'j.',
|
||
'l'
|
||
]
|
||
|
||
## reduce the number of phones.
|
||
# the phones which are used in stimmen transcription but not in FAME corpus.
|
||
# replacements are based on the advice from Jelske Dijkstra on 2018/06/21.
|
||
stimmen_replacement = {
|
||
'æ': 'ɛ',
|
||
'ø': 'ö', # or 'ö:'
|
||
'ø:': 'ö:', # Aki added.
|
||
'œ': 'ɔ̈', # or 'ɔ̈:'
|
||
'œ:': 'ɔ̈:', # Aki added.
|
||
'ɐ': 'a', # or 'a:'
|
||
'ɐ:': 'a:', # Aki added.
|
||
'ɑ': 'a', # or 'a:'
|
||
'ɑ:': 'a:', # Aki added
|
||
'ɒ': 'ɔ', # or 'ɔ:'
|
||
'ɒ:': 'ɔ:', # Aki added.
|
||
'ɾ': 'r',
|
||
'ʁ': 'r',
|
||
'ʊ': 'u',
|
||
'χ': 'x',
|
||
|
||
# aki guessed.
|
||
'ʀ': 'r',
|
||
'ɹ': 'r',
|
||
'w': 'ö'
|
||
}
|
||
phoneset.extend(list(stimmen_replacement.keys()))
|
||
|
||
def phone_reduction(phones):
|
||
return [stimmen_replacement.get(i, i) for i in phones]
|
||
|
||
|
||
## the list of multi character phones.
|
||
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
|
||
multi_character_phones = [i for i in phoneset if len(i) > 1]
|
||
multi_character_phones.sort(key=len, reverse=True) |