acoustic_model/acoustic_model/fa_test.py

import os
import sys
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')

import numpy as np

import defaultfiles as default

sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
from forced_alignment import forced_alignment, lexicon, convert_phone_set

#wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav'
#forced_alignment(
#    wav_file,
#    'Australië'
#    #'BUFFETCOUPON COULISSEN DOUANE'
#    )

# according to: http://lands.let.ru.nl/cgn/doc_Dutch/topics/version_1.0/annot/phonetics/fon_prot.pdf
phone_list_cgn = ['p', 'd', 't', 'd', 'k', 'g', # plosives
                 'f', 'v', 's', 'z', 'S', 'Z', 'x', 'G', 'h', # fricatives
                 'N', 'm', 'n', 'J', 'l', 'r', 'w', 'j', # sonorant 
                 'I', 'E', 'A', 'O', 'Y', # short vowels
                 'i', 'y', 'e', '2', 'a', 'o', 'u', # long vowels
                 '@', # schwa 
                 'E+', 'Y+', 'A+', # Diftongen
                 'E:', 'Y:', 'O:', # Leenvocalen
                 'E~', 'A~', 'O~', 'Y~' # Nasale vocalen
                 ]

# load word in the lexicon.
lexicon_file = r'C:\cygwin64\home\Aki\acoustic_model\material\barbara\2010_2510_lexicon_pronvars_HTK.txt'
with open(lexicon_file, 'r') as f:
    lines = f.readlines()

words = []
for line in lines:
    line_split = line.split()
    if len(line_split) > 0:
        word = line_split[0]
        word.replace('+s', '')
        word = word.split('-')
        words.append(word)
words = list(np.unique(words))

pronunciations = lexicon._grapheme_to_phoneme(words)
htks = []
phone_list = set()
for word in pronunciations.keys():
    ipa = pronunciations[word]
    htk = convert_phone_set.split_ipa(ipa)
    htks.append(htk)
    phone_list = phone_list | set(htk)
with bug-fixed xsampa->ipa conversion, FA is performed. 2018-09-02 12:16:37 +02:00			`import os`
			`import sys`
			`os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')`

the script 'forced_alignment_novo.py' which is to run novo_api on Python 3.6 environment is added. 2018-12-26 23:49:28 +01:00			`import numpy as np`

with bug-fixed xsampa->ipa conversion, FA is performed. 2018-09-02 12:16:37 +02:00			`import defaultfiles as default`

			`sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))`
the script 'forced_alignment_novo.py' which is to run novo_api on Python 3.6 environment is added. 2018-12-26 23:49:28 +01:00			`from forced_alignment import forced_alignment, lexicon, convert_phone_set`

			`#wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav'`
			`#forced_alignment(`
			`# wav_file,`
			`# 'Australië'`
			`# #'BUFFETCOUPON COULISSEN DOUANE'`
			`# )`

			`# according to: http://lands.let.ru.nl/cgn/doc_Dutch/topics/version_1.0/annot/phonetics/fon_prot.pdf`
			`phone_list_cgn = ['p', 'd', 't', 'd', 'k', 'g', # plosives`
			`'f', 'v', 's', 'z', 'S', 'Z', 'x', 'G', 'h', # fricatives`
			`'N', 'm', 'n', 'J', 'l', 'r', 'w', 'j', # sonorant`
			`'I', 'E', 'A', 'O', 'Y', # short vowels`
			`'i', 'y', 'e', '2', 'a', 'o', 'u', # long vowels`
			`'@', # schwa`
			`'E+', 'Y+', 'A+', # Diftongen`
			`'E:', 'Y:', 'O:', # Leenvocalen`
			`'E~', 'A~', 'O~', 'Y~' # Nasale vocalen`
			`]`

			`# load word in the lexicon.`
			`lexicon_file = r'C:\cygwin64\home\Aki\acoustic_model\material\barbara\2010_2510_lexicon_pronvars_HTK.txt'`
			`with open(lexicon_file, 'r') as f:`
			`lines = f.readlines()`
with bug-fixed xsampa->ipa conversion, FA is performed. 2018-09-02 12:16:37 +02:00
the script 'forced_alignment_novo.py' which is to run novo_api on Python 3.6 environment is added. 2018-12-26 23:49:28 +01:00			`words = []`
			`for line in lines:`
			`line_split = line.split()`
			`if len(line_split) > 0:`
			`word = line_split[0]`
			`word.replace('+s', '')`
			`word = word.split('-')`
			`words.append(word)`
			`words = list(np.unique(words))`
with bug-fixed xsampa->ipa conversion, FA is performed. 2018-09-02 12:16:37 +02:00
the script 'forced_alignment_novo.py' which is to run novo_api on Python 3.6 environment is added. 2018-12-26 23:49:28 +01:00			`pronunciations = lexicon._grapheme_to_phoneme(words)`
			`htks = []`
			`phone_list = set()`
			`for word in pronunciations.keys():`
			`ipa = pronunciations[word]`
			`htk = convert_phone_set.split_ipa(ipa)`
			`htks.append(htk)`
			`phone_list = phone_list \| set(htk)`