2019-01-27 23:52:33 +01:00
""" definition of the phones to be used. """
2019-01-28 12:34:20 +01:00
phoneset = [
2019-01-27 01:34:04 +01:00
# vowels
' i̯ ' ,
2019-01-27 23:52:33 +01:00
' i̯ⁿ ' ,
2019-01-27 01:34:04 +01:00
' y ' ,
2019-02-06 00:00:14 +01:00
' y: ' , # not included in lex.ipa, but in stimmen.
2019-01-27 01:34:04 +01:00
' i ' ,
2019-01-27 23:52:33 +01:00
' i. ' ,
' iⁿ ' ,
2019-01-27 01:34:04 +01:00
' i: ' ,
2019-01-27 23:52:33 +01:00
' i:ⁿ ' ,
2019-01-27 01:34:04 +01:00
' ɪ ' ,
2019-01-27 23:52:33 +01:00
' ɪⁿ ' ,
' ɪ .' ,
2019-02-06 00:00:14 +01:00
' ɪ :' , # not included in lex.ipa, but in stimmen.
2019-01-27 23:52:33 +01:00
' ɪ :ⁿ' ,
2019-01-27 01:34:04 +01:00
' e ' ,
' e: ' ,
2019-01-27 23:52:33 +01:00
' e:ⁿ ' ,
2019-01-27 01:34:04 +01:00
' ə ' ,
2019-01-27 23:52:33 +01:00
' əⁿ ' ,
2019-01-27 01:34:04 +01:00
' ə: ' ,
' ɛ ' ,
2019-01-27 23:52:33 +01:00
' ɛ. ' ,
' ɛⁿ ' ,
2019-01-27 01:34:04 +01:00
' ɛ: ' ,
2019-01-27 23:52:33 +01:00
' ɛ:ⁿ ' ,
2019-01-27 01:34:04 +01:00
' a ' ,
2019-01-27 23:52:33 +01:00
' aⁿ ' ,
' a. ' ,
2019-01-27 01:34:04 +01:00
' a: ' ,
2019-01-27 23:52:33 +01:00
' a:ⁿ ' ,
2019-01-27 01:34:04 +01:00
' ṷ ' ,
2019-01-27 23:52:33 +01:00
' ṷ. ' ,
' ṷⁿ ' ,
2019-01-29 21:52:11 +01:00
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone.
2019-01-27 01:34:04 +01:00
' u ' ,
2019-01-27 23:52:33 +01:00
' uⁿ ' ,
' u. ' ,
2019-01-27 01:34:04 +01:00
' u: ' ,
2019-01-27 23:52:33 +01:00
' u:ⁿ ' ,
2019-01-27 01:34:04 +01:00
' ü ' ,
2019-01-27 23:52:33 +01:00
' ü. ' ,
' üⁿ ' ,
2019-01-27 01:34:04 +01:00
' ü: ' ,
2019-01-27 23:52:33 +01:00
' ü:ⁿ ' ,
2019-01-27 01:34:04 +01:00
' o ' ,
2019-01-27 23:52:33 +01:00
' oⁿ ' ,
' o. ' ,
2019-01-27 01:34:04 +01:00
' o: ' ,
2019-01-27 23:52:33 +01:00
' o:ⁿ ' ,
2019-01-27 01:34:04 +01:00
' ö ' ,
2019-01-27 23:52:33 +01:00
' ö. ' ,
' öⁿ ' ,
2019-01-27 01:34:04 +01:00
' ö: ' ,
2019-01-27 23:52:33 +01:00
' ö:ⁿ ' ,
2019-01-27 01:34:04 +01:00
' ɔ ' ,
2019-01-27 23:52:33 +01:00
' ɔ. ' ,
' ɔⁿ ' ,
2019-01-27 01:34:04 +01:00
' ɔ: ' ,
2019-01-27 23:52:33 +01:00
' ɔ:ⁿ ' ,
2019-03-25 00:06:53 +01:00
' ɔ̈ ' , # not included in lex.ipa
2019-01-27 23:52:33 +01:00
' ɔ̈. ' ,
2019-01-27 01:34:04 +01:00
' ɔ̈: ' ,
# plosives
' p ' ,
' b ' ,
2019-01-27 23:52:33 +01:00
' t ' ,
' tⁿ ' ,
2019-01-27 01:34:04 +01:00
' d ' ,
' k ' ,
' g ' ,
2019-01-27 23:52:33 +01:00
' ɡ ' , # = 'g'
2019-01-27 01:34:04 +01:00
# nasals
' m ' ,
' n ' ,
' ŋ ' ,
# fricatives
' f ' ,
' v ' ,
' s ' ,
' s: ' ,
' z ' ,
2019-01-27 23:52:33 +01:00
' zⁿ ' ,
2019-01-27 01:34:04 +01:00
' x ' ,
' h ' ,
2019-01-27 23:52:33 +01:00
# tap and flip
' r ' ,
' r. ' , # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.
' r: ' , # only appears in word 'mûsearflearmûs' and 'sjochdêr'.
# approximant
' j ' ,
' j. ' ,
' l '
2019-01-27 01:34:04 +01:00
]
2019-02-06 00:00:14 +01:00
## reduce the number of phones.
# the phones which are used in stimmen transcription but not in FAME corpus.
# replacements are based on the advice from Jelske Dijkstra on 2018/06/21.
stimmen_replacement = {
' æ ' : ' ɛ ' ,
' ø ' : ' ö ' , # or 'ö:'
' ø: ' : ' ö: ' , # Aki added.
' œ ' : ' ɔ̈ ' , # or 'ɔ̈:'
' œ: ' : ' ɔ̈: ' , # Aki added.
' ɐ ' : ' a ' , # or 'a:'
' ɐ: ' : ' a: ' , # Aki added.
' ɑ ' : ' a ' , # or 'a:'
' ɑ :' : ' a: ' , # Aki added
' ɒ ' : ' ɔ ' , # or 'ɔ:'
' ɒ: ' : ' ɔ: ' , # Aki added.
' ɾ ' : ' r ' ,
' ʁ ' : ' r ' ,
' ʊ ' : ' u ' ,
' χ ' : ' x ' ,
2019-01-29 21:52:11 +01:00
2019-02-06 00:00:14 +01:00
# aki guessed.
' ʀ ' : ' r ' ,
' ɹ ' : ' r ' ,
' w ' : ' ö '
}
phoneset . extend ( list ( stimmen_replacement . keys ( ) ) )
def phone_reduction ( phones ) :
return [ stimmen_replacement . get ( i , i ) for i in phones ]
2019-01-27 23:52:33 +01:00
## the list of multi character phones.
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
2019-01-28 12:34:20 +01:00
multi_character_phones = [ i for i in phoneset if len ( i ) > 1 ]
multi_character_phones . sort ( key = len , reverse = True )