import os import sys os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import numpy as np import defaultfiles as default sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) from forced_alignment import forced_alignment, lexicon, convert_phone_set #wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav' #forced_alignment( # wav_file, # 'Australiƫ' # #'BUFFETCOUPON COULISSEN DOUANE' # ) # according to: http://lands.let.ru.nl/cgn/doc_Dutch/topics/version_1.0/annot/phonetics/fon_prot.pdf phone_list_cgn = ['p', 'd', 't', 'd', 'k', 'g', # plosives 'f', 'v', 's', 'z', 'S', 'Z', 'x', 'G', 'h', # fricatives 'N', 'm', 'n', 'J', 'l', 'r', 'w', 'j', # sonorant 'I', 'E', 'A', 'O', 'Y', # short vowels 'i', 'y', 'e', '2', 'a', 'o', 'u', # long vowels '@', # schwa 'E+', 'Y+', 'A+', # Diftongen 'E:', 'Y:', 'O:', # Leenvocalen 'E~', 'A~', 'O~', 'Y~' # Nasale vocalen ] # load word in the lexicon. lexicon_file = r'C:\cygwin64\home\Aki\acoustic_model\material\barbara\2010_2510_lexicon_pronvars_HTK.txt' with open(lexicon_file, 'r') as f: lines = f.readlines() words = [] for line in lines: line_split = line.split() if len(line_split) > 0: word = line_split[0] word.replace('+s', '') word = word.split('-') words.append(word) words = list(np.unique(words)) pronunciations = lexicon._grapheme_to_phoneme(words) htks = [] phone_list = set() for word in pronunciations.keys(): ipa = pronunciations[word] htk = convert_phone_set.split_ipa(ipa) htks.append(htk) phone_list = phone_list | set(htk)