diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 88f58eb..7f32711 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj index 0ec4c9b..f4800ce 100644 --- a/acoustic_model/acoustic_model.pyproj +++ b/acoustic_model/acoustic_model.pyproj @@ -35,12 +35,15 @@ Code - + Code Code + + Code + diff --git a/acoustic_model/check_novoapi.py b/acoustic_model/check_novoapi.py index cf6e7c6..3fd4601 100644 --- a/acoustic_model/check_novoapi.py +++ b/acoustic_model/check_novoapi.py @@ -19,35 +19,10 @@ import defaultfiles as default from forced_alignment import pyhtk, convert_phone_set import novoapi - +import novoapi_functions ## ======================= novo phoneset ====================== -translation_key = dict() - -#phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) -#df = pd.read_excel(phonelist_novo70_, 'list') -## *_simple includes columns which has only one phone in. -#for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']): -# if not pd.isnull(ipa): -# print('{0}:{1}'.format(ipa, novo70)) -# translation_key[ipa] = novo70 -#phonelist_novo70 = np.unique(list(df['novo70_simple'])) - -phoneset_ipa = [] -phoneset_novo70 = [] -with open(default.novo70_phoneset, "rt", encoding="utf-8") as fin: - lines = fin.read() - lines = lines.split('\n') - for line in lines: - words = line.split('\t') - if len(words) > 1: - novo70 = words[0] - ipa = words[1] - phoneset_ipa.append(ipa) - phoneset_novo70.append(novo70) - translation_key[ipa] = novo70 -phoneset_ipa = np.unique(phoneset_ipa) -phoneset_novo70 = np.unique(phoneset_novo70) +phoneset_ipa, phoneset_novo70, translation_key = novoapi_functions.load_phonset() # As per Nederlandse phoneset_aki.xlsx recieved from David # [ɔː] oh / ohr diff --git a/acoustic_model/forced_alignment_novo.py b/acoustic_model/novoapi_forced_alignment.py similarity index 93% rename from acoustic_model/forced_alignment_novo.py rename to acoustic_model/novoapi_forced_alignment.py index 243f275..93b6a73 100644 --- a/acoustic_model/forced_alignment_novo.py +++ b/acoustic_model/novoapi_forced_alignment.py @@ -41,6 +41,7 @@ import argparse import json from novoapi.backend import session +import novoapi_functions # username / password cannot be passed as artuments... p = argparse.ArgumentParser() @@ -110,3 +111,8 @@ res = rec.setgrammar(grammar) #res = rec.recognize_wav("test/onetwothree.wav") res = rec.recognize_wav(wav_file) #print "Recognition result:", json.dumps(res.export(), indent=4) + +# list of the pronunciation for each words +word = 'pauw' +pronunciation_ipa = ['pau', 'pɑu'] +grammar = novoapi_functions.make_grammar(word, pronunciation_ipa) \ No newline at end of file diff --git a/acoustic_model/novoapi_functions.py b/acoustic_model/novoapi_functions.py new file mode 100644 index 0000000..0bdb324 --- /dev/null +++ b/acoustic_model/novoapi_functions.py @@ -0,0 +1,138 @@ +import numpy as np + +import defaultfiles as default + +def load_phonset(): + translation_key_ipa2novo70 = dict() + translation_key_novo702ipa = dict() + + #phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) + #df = pd.read_excel(phonelist_novo70_, 'list') + ## *_simple includes columns which has only one phone in. + #for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']): + # if not pd.isnull(ipa): + # print('{0}:{1}'.format(ipa, novo70)) + # translation_key[ipa] = novo70 + #phonelist_novo70 = np.unique(list(df['novo70_simple'])) + + phoneset_ipa = [] + phoneset_novo70 = [] + with open(default.novo70_phoneset, "rt", encoding="utf-8") as fin: + lines = fin.read() + lines = lines.split('\n') + for line in lines: + words = line.split('\t') + if len(words) > 1: + novo70 = words[0] + ipa = words[1] + phoneset_ipa.append(ipa) + phoneset_novo70.append(novo70) + translation_key_ipa2novo70[ipa] = novo70 + translation_key_novo702ipa[novo70] = ipa + phoneset_ipa = np.unique(phoneset_ipa) + phoneset_novo70 = np.unique(phoneset_novo70) + + return phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa + + +def multi_character_tokenize(line, multi_character_tokens): + """ + Tries to match one of the tokens in multi_character_tokens at each position of line, + starting at position 0, + if so tokenizes and eats that token. Otherwise tokenizes a single character. + + Copied from forced_alignment.convert_phone_set.py + """ + while line != '': + for token in multi_character_tokens: + if line.startswith(token) and len(token) > 0: + yield token + line = line[len(token):] + break + else: + yield line[:1] + line = line[1:] + + +def split_ipa(line): + """ + Split a line by IPA phones. + If nasalized sound (such as ɛ̃ː) is included, it will give error. + :param string line: one line written in IPA. + :return string lineSeperated: the line splitted in IPA phone. + """ + + multi_character_phones = [ + # IPAs in CGN. + u'ʌu', u'ɛi', u'œy', u'aː', u'eː', u'iː', u'oː', u'øː', u'ɛː', u'œː', u'ɔː', u'ɛ̃ː', u'ɑ̃ː', u'ɔ̃ː', u'œ̃', u'ɪː' + ] + + return [phone for phone in multi_character_tokenize(line.strip(), multi_character_phones)] + + +def split_novo70(line): + """ + Split a line by novo70 phones. + :param string line: one line written in novo70. + :return string lineSeperated: the line splitted by novo70 phones. + """ + _, phoneset_novo70, _, _ = load_phonset() + multi_character_phones = [p for p in phoneset_novo70 if len(p) > 1] + multi_character_phones = sorted(multi_character_phones, key=len, reverse=True) + + return ['sp' if phone == ' ' else phone + for phone in multi_character_tokenize(line.strip(), multi_character_phones)] + + +def novo702ipa(tokens): + pronunciation = [] + _, _, _, translation_key = load_phonset() + for phone in split_novo70(tokens): + pronunciation.append(translation_key.get(phone, phone)) + return ' '.join(pronunciation) + + +# numbering of novo70 should be checked. +def ipa2novo70(tokens): + pronunciation = [] + _, _, translation_key, _ = load_phonset() + for phone in split_ipa(tokens): + pronunciation.append(translation_key.get(phone, phone)) + return ' '.join(pronunciation) + + +def make_grammar(word, pronunciation_ipa): + """ + Args: + words + pronunciation_ipa: list of pronunciation variants. + """ + #word = 'pauw' + #pronunciation_ipa = ['pau', 'pɑu'] + + grammer_data_elements0_pronunciation = [] + for id, ipa in enumerate(pronunciation_ipa): + novo70 = novoapi_functions.ipa2novo70(ipa) + grammer_data_elements0_pronunciation.append({ + "phones": novo70.split(), + "id": id + }) + + grammar_data = { + "kind": 'sequence', + "elements": [{ + "kind": "word", + "pronunciation": grammer_data_elements0_pronunciation, + "label": word + }] + } + + grammar = { + "type": "confusion_network", + "version": "1.0", + "data": grammar_data, + "return_objects": ["grammar"], + "phoneset": "novo70" + } + + return grammar \ No newline at end of file