acoustic_model/acoustic_model/phoneset/fame_asr.py

155 lines
3.3 KiB
Python
Raw Normal View History

""" definition of the phones to be used. """
2019-01-29 21:52:11 +01:00
# phonese in {FAME}/lexicon/lex.asr
phoneset = [
# vowels
'a',
'a:',
2019-01-29 21:52:11 +01:00
'e',
'e:',
'i',
'i:',
'',
'o',
'o:',
'ö',
'ö:',
2019-01-29 21:52:11 +01:00
'u',
'u:',
'ü',
'ü:',
#'ú', # only appears in word 'feeste'(út) and 'gaste'(út) which are 'f e: s t ə' and 'yn' in lex_asr. The pronunciation in Fries may be mistakes so I removed this phone.
'',
'y',
'ɔ',
'ɔ:',
2019-01-29 21:52:11 +01:00
'ɔ̈',
'ɔ̈:',
2019-01-29 21:52:11 +01:00
'ə',
'ɛ',
'ɛ:',
'ɪ',
'ɪ:',
# plosives
'p',
'b',
't',
'd',
'k',
'g',
'ɡ', # = 'g'
# nasals
'm',
'n',
'ŋ',
# fricatives
'f',
'v',
's',
's:',
'z',
'x',
'h',
2019-01-29 21:52:11 +01:00
# tap and flip
'r',
2019-01-29 21:52:11 +01:00
'r:',
# approximant
'j',
'l'
]
2019-01-29 21:52:11 +01:00
## reduce the number of phones.
# the phones which seldom occur are replaced with another more popular phones.
# replacements are based on the advice from Martijn Wieling.
reduction_key = {
2019-02-14 00:21:28 +01:00
'y':'i:', 'e':'e:', 'ə:':'ɛ:', 'r:':'r', 'ɡ':'g',
# aki added because this is used in stimmen_project.
'ɔ̈:':'ɔ:'
2019-01-29 21:52:11 +01:00
}
# already removed beforehand in phoneset. Just to be sure.
2019-02-14 00:21:28 +01:00
phones_to_be_removed = ['ú', 's:']
2019-01-29 21:52:11 +01:00
def phone_reduction(phones):
2019-02-14 00:21:28 +01:00
"""
Args:
phones (list): list of phones.
"""
if sum([phone in phones for phone in phones_to_be_removed]) != 0:
print('input includes phone(s) which is not defined in fame_asr.')
print('those phone(s) are removed.')
return [reduction_key.get(i, i) for i in phones
if i not in phones_to_be_removed]
2019-02-14 00:21:28 +01:00
phoneset_short = list(set(phone_reduction(phoneset)))
2019-01-29 21:52:11 +01:00
phoneset_short.sort()
2019-01-29 21:52:11 +01:00
## translation_key to htk format (ascii).
# phones which gives UnicodeEncodeError when phone.encode("ascii")
# are replaced with other characters.
translation_key_asr2htk = {
'': 'i_',
'': 'u_',
# on the analogy of German umlaut, 'e' is used.
'ö': 'oe', 'ö:': 'oe:', ''
2019-01-29 21:52:11 +01:00
'ü': 'ue', 'ü:': 'ue:',
# on the analogy of Chinese...
'ŋ': 'ng',
# refer to Xsampa.
2019-02-14 00:21:28 +01:00
'ɔ': 'O', 'ɔ:': 'O:', 'ɔ̈': 'Oe',
#'ɔ̈:': 'O:', # does not appear in FAME, but used in stimmen.
2019-01-29 21:52:11 +01:00
'ɛ': 'E', 'ɛ:': 'E:',
'ɪ': 'I', 'ɪ:': 'I:',
# it is @ in Xsampa, but that is not handy on HTK.
'ə': 'A'
}
phoneset_htk = [translation_key_asr2htk.get(i, i) for i in phoneset_short]
#not_in_ascii = [
# '\'',
# 'â', 'ê', 'ô', 'û', 'č',
# 'à', 'í', 'é', 'è', 'ú', 'ć',
# 'ä', 'ë', 'ï', 'ö', 'ü'
#]
translation_key_word2htk = {
#'\'': '\\\'',
'í':'i1', 'é':'e1', 'ú':'u1', 'ć':'c1',
'à':'a2', 'è':'e2',
'â':'a3', 'ê':'e3', 'ô':'o3', 'û':'u3',
'č':'c4',
'ä': 'ao', 'ë': 'ee', 'ï': 'ie', 'ö': 'oe', 'ü': 'ue',
}
#[translation_key_word2htk.get(i, i) for i in not_in_ascii]
2019-03-03 02:05:37 +01:00
#Stop: p, b, t, d, k, g
#Nasal: m, n, ng(ŋ)
#Fricative: s, z, f, v, h, x
#Liquid: l, r
#Vowel: a, a:, e:, i, i:, i_(i̯), o, o:, u, u:, u_(ṷ), oe(ö), oe:(ö:), ue(ü), ue:(ü:), O(ɔ), O:(ɔ:), Oe(ɔ̈), A(ə), E(ɛ), E:(ɛ:), I(ɪ), I:(ɪ:)
2019-01-29 21:52:11 +01:00
## the list of multi character phones.
2019-01-29 21:52:11 +01:00
# for example, the length of 'a:' is 3, but in the codes it is treated as one letter.
# original.
multi_character_phones = [i for i in phoneset if len(i) > 1]
2019-01-29 21:52:11 +01:00
multi_character_phones.sort(key=len, reverse=True)
# phonset reduced.
multi_character_phones_short = [i for i in phoneset_short if len(i) > 1]
multi_character_phones_short.sort(key=len, reverse=True)
# htk compatible.
multi_character_phones_htk = [i for i in phoneset_htk if len(i) > 1]
multi_character_phones_htk.sort(key=len, reverse=True)