acoustic_model/acoustic_model/phoneset/fame_ipa.py

""" definition of the phones to be used. """

phoneset = [
	# vowels
	'i̯',
	'i̯ⁿ',
	'y',
	'y:', # not included in lex.ipa, but in stimmen.
	'i',
	'i.',
	'iⁿ',
	'i:',
	'i:ⁿ',
	'ɪ',
	'ɪⁿ',
	'ɪ.',
	'ɪ:', # not included in lex.ipa, but in stimmen.
	'ɪ:ⁿ',
	'e',
	'e:',
	'e:ⁿ',
	'ə',
	'əⁿ',
	'ə:',
	'ɛ',
	'ɛ.',
	'ɛⁿ',
	'ɛ:',
	'ɛ:ⁿ',
	'a',
	'aⁿ',
	'a.',
	'a:',
	'a:ⁿ',
	'ṷ',
	'ṷ.',
	'ṷⁿ',
	#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone. 
	'u',
	'uⁿ',
	'u.',
	'u:',
	'u:ⁿ',
	'ü',
	'ü.',
	'üⁿ',
	'ü:',
	'ü:ⁿ',
	'o',
	'oⁿ',
	'o.',
	'o:',
	'o:ⁿ',
	'ö',
	'ö.',
	'öⁿ',
	'ö:',
	'ö:ⁿ',
	'ɔ',
	'ɔ.',
	'ɔⁿ',
	'ɔ:',
	'ɔ:ⁿ',
	'ɔ̈', # not included in lex.ipa 
	'ɔ̈.',
	'ɔ̈:',

	# plosives
	'p', 
	'b', 
	't',
	'tⁿ',
	'd', 
	'k',
	'g',
	'ɡ', # = 'g'

	# nasals
	'm',
	'n',
	'ŋ',
	
	# fricatives
	'f',
	'v',
	's',
	's:',
	'z',
	'zⁿ',
	'x',
	'h',

	# tap and flip
	'r',
	'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.   
	'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'.

	# approximant
	'j',
	'j.',
	'l'
	]

## reduce the number of phones.
# the phones which are used in stimmen transcription but not in FAME corpus.
# replacements are based on the advice from Jelske Dijkstra on 2018/06/21.
stimmen_replacement = {
	'æ': 'ɛ', 
	'ø': 'ö', # or 'ö:'
	'ø:': 'ö:', # Aki added.
	'œ': 'ɔ̈', # or 'ɔ̈:'
	'œ:': 'ɔ̈:', # Aki added.
	'ɐ': 'a', # or 'a:'
	'ɐ:': 'a:', # Aki added.
	'ɑ': 'a', # or 'a:'
	'ɑ:': 'a:', # Aki added
	'ɒ': 'ɔ', # or 'ɔ:'
	'ɒ:': 'ɔ:', # Aki added.
	'ɾ': 'r',
	'ʁ': 'r',
	'ʊ': 'u',
	'χ': 'x',

	# aki guessed.
	'ʀ': 'r',
	'ɹ': 'r',
	'w': 'ö'
	}
phoneset.extend(list(stimmen_replacement.keys()))

def phone_reduction(phones):
	return [stimmen_replacement.get(i, i) for i in phones]

	
## the list of multi character phones. 
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
multi_character_phones = [i for i in phoneset if len(i) > 1]
multi_character_phones.sort(key=len, reverse=True)
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+								""" definition of the phones to be used. """
-												fame_asr phoneset is added including reduced version and htk compatible version.

											
										
										
											2019-01-28 12:34:20 +01:00
+								phoneset = [
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									# vowels
 									'i̯',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'i̯ⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'y',
-												make sure all the phones in stimmen transcription can be treated correctly.

											
										
										
											2019-02-06 00:00:14 +01:00
+									'y:', # not included in lex.ipa, but in stimmen.
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'i',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'i.',
 									'iⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'i:',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'i:ⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ɪ',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ɪⁿ',
 									'ɪ.',
-												make sure all the phones in stimmen transcription can be treated correctly.

											
										
										
											2019-02-06 00:00:14 +01:00
+									'ɪ:', # not included in lex.ipa, but in stimmen.
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ɪ:ⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'e',
 									'e:',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'e:ⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ə',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'əⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ə:',
 									'ɛ',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ɛ.',
 									'ɛⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ɛ:',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ɛ:ⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'a',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'aⁿ',
 									'a.',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'a:',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'a:ⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ṷ',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ṷ.',
 									'ṷⁿ',
-												lexicon is made.

											
										
										
											2019-01-29 21:52:11 +01:00
+									#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone.
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'u',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'uⁿ',
 									'u.',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'u:',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'u:ⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ü',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ü.',
 									'üⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ü:',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ü:ⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'o',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'oⁿ',
 									'o.',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'o:',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'o:ⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ö',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ö.',
 									'öⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ö:',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ö:ⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ɔ',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ɔ.',
 									'ɔⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ɔ:',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ɔ:ⁿ',
-												fame_phonetics.py and functions to make quests.hed to tie triphone are added.

											
										
										
											2019-03-25 00:06:53 +01:00
+									'ɔ̈', # not included in lex.ipa
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ɔ̈.',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'ɔ̈:',
 									# plosives
 									'p',
 									'b',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									't',
 									'tⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'd',
 									'k',
 									'g',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'ɡ', # = 'g'
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
 									# nasals
 									'm',
 									'n',
 									'ŋ',
 									# fricatives
 									'f',
 									'v',
 									's',
 									's:',
 									'z',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+									'zⁿ',
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									'x',
 									'h',
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
 									# tap and flip
 									'r',
 									'r.', # only appears in word 'mearpartijestelsel'(does not exist in lex_asr) and 'tenoarpartij'.
 									'r:', # only appears in word 'mûsearflearmûs' and 'sjochdêr'.
 									# approximant
 									'j',
 									'j.',
 									'l'
-												phonset is given as fame_phoneset.py. translation key is obtained based on the information.

											
										
										
											2019-01-27 01:34:04 +01:00
+									]
-												make sure all the phones in stimmen transcription can be treated correctly.

											
										
										
											2019-02-06 00:00:14 +01:00
+								## reduce the number of phones.
 								# the phones which are used in stimmen transcription but not in FAME corpus.
 								# replacements are based on the advice from Jelske Dijkstra on 2018/06/21.
 								stimmen_replacement = {
 									'æ': 'ɛ',
 									'ø': 'ö', # or 'ö:'
 									'ø:': 'ö:', # Aki added.
 									'œ': 'ɔ̈', # or 'ɔ̈:'
 									'œ:': 'ɔ̈:', # Aki added.
 									'ɐ': 'a', # or 'a:'
 									'ɐ:': 'a:', # Aki added.
 									'ɑ': 'a', # or 'a:'
 									'ɑ:': 'a:', # Aki added
 									'ɒ': 'ɔ', # or 'ɔ:'
 									'ɒ:': 'ɔ:', # Aki added.
 									'ɾ': 'r',
 									'ʁ': 'r',
 									'ʊ': 'u',
 									'χ': 'x',
-												lexicon is made.

											
										
										
											2019-01-29 21:52:11 +01:00
-												make sure all the phones in stimmen transcription can be treated correctly.

											
										
										
											2019-02-06 00:00:14 +01:00
+									# aki guessed.
 									'ʀ': 'r',
 									'ɹ': 'r',
 									'w': 'ö'
 									}
 								phoneset.extend(list(stimmen_replacement.keys()))
 								def phone_reduction(phones):
 									return [stimmen_replacement.get(i, i) for i in phones]
-												correspondence between lex.asr and lex.ipa is automatically obtained. header is added to the functions in fame_functions.py.

											
										
										
											2019-01-27 23:52:33 +01:00
+								## the list of multi character phones.
 								# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
-												fame_asr phoneset is added including reduced version and htk compatible version.

											
										
										
											2019-01-28 12:34:20 +01:00
+								multi_character_phones = [i for i in phoneset if len(i) > 1]
 								multi_character_phones.sort(key=len, reverse=True)