## this script should be used only by Aki Kunikoshi. import numpy as np import argparse import json from novoapi.backend import session import os os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import defaultfiles as default def load_phonset(): translation_key_ipa2novo70 = dict() translation_key_novo702ipa = dict() #phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) #df = pd.read_excel(phonelist_novo70_, 'list') ## *_simple includes columns which has only one phone in. #for ipa, novo70 in zip(df['IPA_simple'], df['novo70_simple']): # if not pd.isnull(ipa): # print('{0}:{1}'.format(ipa, novo70)) # translation_key[ipa] = novo70 #phonelist_novo70 = np.unique(list(df['novo70_simple'])) phoneset_ipa = [] phoneset_novo70 = [] with open(default.novo70_phoneset, "rt", encoding="utf-8") as fin: lines = fin.read() lines = lines.split('\n') for line in lines: words = line.split('\t') if len(words) > 1: novo70 = words[0] ipa = words[1] phoneset_ipa.append(ipa) phoneset_novo70.append(novo70) translation_key_ipa2novo70[ipa] = novo70 translation_key_novo702ipa[novo70] = ipa # As per Nederlandse phoneset_aki.xlsx recieved from David # [ɔː] oh / ohr # from ipa->novo70, only oh is used. # [ɪː] ih / ihr # from ipa->novo70, only ih is used. # [iː] iy # [œː] uh # [ɛː] eh # [w] wv in IPA written as ʋ. extra_ipa = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'ʋ'] extra_novo70 = ['oh', 'ih', 'iy', 'uh', 'eh', 'wv'] for ipa, novo70 in zip(extra_ipa, extra_novo70): phoneset_ipa.append(ipa) phoneset_novo70.append(novo70) translation_key_ipa2novo70[ipa] = novo70 translation_key_novo702ipa[novo70] = ipa translation_key_novo702ipa['ohr'] = 'ɔː' translation_key_novo702ipa['ihr'] = 'ɪː' phoneset_ipa = np.unique(phoneset_ipa) phoneset_novo70 = np.unique(phoneset_novo70) return phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa def multi_character_tokenize(line, multi_character_tokens): """ Tries to match one of the tokens in multi_character_tokens at each position of line, starting at position 0, if so tokenizes and eats that token. Otherwise tokenizes a single character. Copied from forced_alignment.convert_phone_set.py """ while line != '': for token in multi_character_tokens: if line.startswith(token) and len(token) > 0: yield token line = line[len(token):] break else: yield line[:1] line = line[1:] def split_ipa(line): """ Split a line by IPA phones. If nasalized sound (such as ɛ̃ː) is included, it will give error. :param string line: one line written in IPA. :return string lineSeperated: the line splitted in IPA phone. """ multi_character_phones = [ # IPAs in CGN. u'ʌu', u'ɛi', u'œy', u'aː', u'eː', u'iː', u'oː', u'øː', u'ɛː', u'œː', u'ɔː', u'ɛ̃ː', u'ɑ̃ː', u'ɔ̃ː', u'œ̃', u'ɪː' ] return [phone for phone in multi_character_tokenize(line.strip(), multi_character_phones)] def split_novo70(line): """ Split a line by novo70 phones. :param string line: one line written in novo70. :return string lineSeperated: the line splitted by novo70 phones. """ _, phoneset_novo70, _, _ = load_phonset() multi_character_phones = [p for p in phoneset_novo70 if len(p) > 1] multi_character_phones = sorted(multi_character_phones, key=len, reverse=True) return ['sp' if phone == ' ' else phone for phone in multi_character_tokenize(line.strip(), multi_character_phones)] def novo702ipa(tokens): pronunciation = [] _, _, _, translation_key = load_phonset() for phone in split_novo70(tokens): pronunciation.append(translation_key.get(phone, phone)) return ' '.join(pronunciation) # numbering of novo70 should be checked. def ipa2novo70(tokens): pronunciation = [] _, _, translation_key, _ = load_phonset() for phone in split_ipa(tokens): pronunciation.append(translation_key.get(phone, phone)) return ' '.join(pronunciation) def make_grammar(word, pronunciation_ipa): """ Args: words pronunciation_ipa: list of pronunciation variants. """ #word = 'pauw' #pronunciation_ipa = ['pau', 'pɑu'] grammer_data_elements0_pronunciation = [] for id, ipa in enumerate(pronunciation_ipa): novo70 = ipa2novo70(ipa) grammer_data_elements0_pronunciation.append({ "phones": novo70.split(), "id": id }) grammar_data = { "kind": 'sequence', "elements": [{ "kind": "word", "pronunciation": grammer_data_elements0_pronunciation, "label": word }] } grammar = { "type": "confusion_network", "version": "1.0", "data": grammar_data, "return_objects": ["grammar"], "phoneset": "novo70" } return grammar def forced_alignment(wav_file, word, pronunciation_ipa): ### IMPORTANT ### # because of this function, this script should not be uploaded / shared. # username / password cannot be passed as artuments... p = argparse.ArgumentParser() p.add_argument("--user", default='martijn.wieling') p.add_argument("--password", default='fa0Thaic') args = p.parse_args() rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) grammar = make_grammar(word, pronunciation_ipa) result = rec.setgrammar(grammar) #print "Set grammar result", res result = rec.recognize_wav(wav_file) return result.export() def result2pronunciation(result, word): result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word] llh = result_[0]['llh'] phones = result_[0]['phones'] pronunciation_novo70 = [phone['label'] for phone in phones] pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70] return pronunciation_ipa, pronunciation_novo70, llh if __name__ == 'main': pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə'] grammar = make_grammar('reus', pronunciation_ipa)