@ -16,7 +16,7 @@ import acoustic_model_functions as am_func
@@ -16,7 +16,7 @@ import acoustic_model_functions as am_func
import convert_xsampa2ipa
import defaultfiles as default
from forced_alignment import pyhtk
from forced_alignment import pyhtk , convert_phone_set
import novoapi
@ -35,7 +35,7 @@ translation_key = dict()
@@ -35,7 +35,7 @@ translation_key = dict()
phoneset_ipa = [ ]
phoneset_novo70 = [ ]
with open ( default . cmu69 _phoneset, " rt " , encoding = " utf-8 " ) as fin :
with open ( default . novo70 _phoneset, " rt " , encoding = " utf-8 " ) as fin :
lines = fin . read ( )
lines = lines . split ( ' \n ' )
for line in lines :
@ -49,6 +49,14 @@ with open(default.cmu69_phoneset, "rt", encoding="utf-8") as fin:
@@ -49,6 +49,14 @@ with open(default.cmu69_phoneset, "rt", encoding="utf-8") as fin:
phoneset_ipa = np . unique ( phoneset_ipa )
phoneset_novo70 = np . unique ( phoneset_novo70 )
# As per Nederlandse phoneset_aki.xlsx recieved from David
# [ɔː] oh / ohr
# [ɪː] ih / ihr
# [iː] iy
# [œː] uh
# [ɛː] eh
david_suggestion = [ ' ɔː ' , ' ɪː ' , ' iː ' , ' œː ' , ' ɛː ' ]
## ======================= convert phones ======================
mapping = convert_xsampa2ipa . load_converter ( ' xsampa ' , ' ipa ' , default . ipa_xsampa_converter_dir )
@ -56,7 +64,38 @@ mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_
@@ -56,7 +64,38 @@ mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_
stimmen_transcription_ = pd . ExcelFile ( default . stimmen_transcription_xlsx )
df = pd . read_excel ( stimmen_transcription_ , ' check ' )
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_)
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
# if not ipa_converted == ipa:
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
transcription_ipa = list ( df [ ' IPA ' ] )
# transcription mistake?
transcription_ipa = [ ipa . replace ( ' ; ' , ' : ' ) for ipa in transcription_ipa if not ipa == ' pypɪl ' and not pd . isnull ( ipa ) ]
transcription_ipa = [ ipa . replace ( ' ˑ ' , ' ' ) for ipa in transcription_ipa ] # only one case.
not_in_novo70 = [ ]
for ipa in transcription_ipa :
ipa = convert_phone_set . split_ipa ( ipa )
not_in_novo70_ = [ phone for phone in ipa
if not phone in phoneset_ipa and not phone in david_suggestion ]
not_in_novo70_ = [ phone . replace ( ' sp ' , ' ' ) for phone in not_in_novo70_ ]
not_in_novo70_ = [ phone . replace ( ' : ' , ' ' ) for phone in not_in_novo70_ ]
not_in_novo70_ = [ phone . replace ( ' ː ' , ' ' ) for phone in not_in_novo70_ ]
#translation_key.get(phone, phone)
not_in_novo70 . extend ( not_in_novo70_ )
not_in_novo70_list = list ( set ( not_in_novo70 ) )
def search_phone_ipa ( x , phone_list ) :
return [ phone for phone in phone_list if x in convert_phone_set . split_ipa ( phone ) ]
# 'ɐ', 'ɒ', 'w', 'æ', 'ʀ', 'ʁ',
# 'œː', 'ɾ',
# 'o', 'a'
# [e] 'nyːver mɑntsjə' (1)
# [ɹ] 'iːjəɹ' (2)
search_phone_ipa ( ' ˑ ' , transcription_ipa )