""" definition of the phones to be used. """ phoneset = [ # vowels 'i̯', 'i̯ⁿ', 'y', 'y:', # not included in lex.ipa, but in stimmen. 'i', 'i.', 'iⁿ', 'i:', 'i:ⁿ', 'ɪ', 'ɪⁿ', 'ɪ.', 'ɪ:', # not included in lex.ipa, but in stimmen. 'ɪ:ⁿ', 'e', 'e:', 'e:ⁿ', 'ə', 'əⁿ', 'ə:', 'ɛ', 'ɛ.', 'ɛⁿ', 'ɛ:', 'ɛ:ⁿ', 'a', 'aⁿ', 'a.', 'a:', 'a:ⁿ', 'ṷ', 'ṷ.', 'ṷⁿ', #'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone. 'u', 'uⁿ', 'u.', 'u:', 'u:ⁿ', 'ü', 'ü.', 'üⁿ', 'ü:', 'ü:ⁿ', 'o', 'oⁿ', 'o.', 'o:', 'o:ⁿ', 'ö', 'ö.', 'öⁿ', 'ö:', 'ö:ⁿ', 'ɔ', 'ɔ.', 'ɔⁿ', 'ɔ:', 'ɔ:ⁿ', 'ɔ̈', # not included in lex.ipa 'ɔ̈.', 'ɔ̈:', # plosives 'p', 'b', 't', 'tⁿ', 'd', 'k', 'g', 'ɡ', # = 'g' # nasals 'm', 'n', 'ŋ', # fricatives 'f', 'v', 's', 's:', 'z', 'zⁿ', 'x', 'h', # tap and flip 'r', 'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'. 'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'. # approximant 'j', 'j.', 'l' ] ## reduce the number of phones. # the phones which are used in stimmen transcription but not in FAME corpus. # replacements are based on the advice from Jelske Dijkstra on 2018/06/21. stimmen_replacement = { 'æ': 'ɛ', 'ø': 'ö', # or 'ö:' 'ø:': 'ö:', # Aki added. 'œ': 'ɔ̈', # or 'ɔ̈:' 'œ:': 'ɔ̈:', # Aki added. 'ɐ': 'a', # or 'a:' 'ɐ:': 'a:', # Aki added. 'ɑ': 'a', # or 'a:' 'ɑ:': 'a:', # Aki added 'ɒ': 'ɔ', # or 'ɔ:' 'ɒ:': 'ɔ:', # Aki added. 'ɾ': 'r', 'ʁ': 'r', 'ʊ': 'u', 'χ': 'x', # aki guessed. 'ʀ': 'r', 'ɹ': 'r', 'w': 'ö' } phoneset.extend(list(stimmen_replacement.keys())) def phone_reduction(phones): return [stimmen_replacement.get(i, i) for i in phones] ## the list of multi character phones. # for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter. multi_character_phones = [i for i in phoneset if len(i) > 1] multi_character_phones.sort(key=len, reverse=True)