diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo index 4e9f3ed..88f58eb 100644 Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ diff --git a/acoustic_model/__pycache__/acoustic_model_functions.cpython-36.pyc b/acoustic_model/__pycache__/acoustic_model_functions.cpython-36.pyc index 067e789..8219429 100644 Binary files a/acoustic_model/__pycache__/acoustic_model_functions.cpython-36.pyc and b/acoustic_model/__pycache__/acoustic_model_functions.cpython-36.pyc differ diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj index 27625c9..0ec4c9b 100644 --- a/acoustic_model/acoustic_model.pyproj +++ b/acoustic_model/acoustic_model.pyproj @@ -35,6 +35,9 @@ Code + + Code + Code diff --git a/acoustic_model/acoustic_model_functions.py b/acoustic_model/acoustic_model_functions.py index 662c82b..4fced38 100644 --- a/acoustic_model/acoustic_model_functions.py +++ b/acoustic_model/acoustic_model_functions.py @@ -199,4 +199,4 @@ def make_fame2ipa_variants(fame): ipa.append(fame.replace('ɔ̈', 'ɒ')) ipa.append(fame.replace('ɔ̈:', 'ɒ')) - return ipa \ No newline at end of file + return ipa diff --git a/acoustic_model/check_novoapi.py b/acoustic_model/check_novoapi.py index 4af8368..cf6e7c6 100644 --- a/acoustic_model/check_novoapi.py +++ b/acoustic_model/check_novoapi.py @@ -55,14 +55,15 @@ phoneset_novo70 = np.unique(phoneset_novo70) # [iː] iy # [œː] uh # [ɛː] eh -david_suggestion = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː'] +# [w] wv in IPA written as ʋ. +david_suggestion = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'w'] -## ======================= convert phones ====================== +## ======================= extract words which is written only with novo70 ====================== mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) -df = pd.read_excel(stimmen_transcription_, 'check') +df = pd.read_excel(stimmen_transcription_, 'frequency') #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): # ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) # if not ipa_converted == ipa: @@ -70,11 +71,13 @@ df = pd.read_excel(stimmen_transcription_, 'check') transcription_ipa = list(df['IPA']) # transcription mistake? -transcription_ipa = [ipa.replace(';', ':') for ipa in transcription_ipa if not ipa=='pypɪl' and not pd.isnull(ipa)] +transcription_ipa = [ipa.replace(';', 'ː') for ipa in transcription_ipa if not ipa=='pypɪl' and not pd.isnull(ipa)] transcription_ipa = [ipa.replace('ˑ', '') for ipa in transcription_ipa] # only one case. not_in_novo70 = [] +all_in_novo70 = [] for ipa in transcription_ipa: + ipa = ipa.replace(':', 'ː') ipa = convert_phone_set.split_ipa(ipa) not_in_novo70_ = [phone for phone in ipa @@ -83,19 +86,76 @@ for ipa in transcription_ipa: not_in_novo70_ = [phone.replace(':', '') for phone in not_in_novo70_] not_in_novo70_ = [phone.replace('ː', '') for phone in not_in_novo70_] + if len(not_in_novo70_) == 0: + all_in_novo70.append(''.join(ipa)) + #translation_key.get(phone, phone) not_in_novo70.extend(not_in_novo70_) not_in_novo70_list = list(set(not_in_novo70)) +## check which phone is used in stimmen but not in novo70 +# 'ʀ', 'ʁ', +# 'ɒ', 'ɐ', +# 'o', 'a' (o:, a:?) +# [e] 'nyːver mɑntsjə' (1) +# [ɾ] 'ɪːɾ'(1) +# [ɹ] 'iːjəɹ' (1), 'ɪ:ɹ' (1) +# [ø] 'gʀøtəpi:r'(1), 'grøtəpi:r'(1) +# [æ] 'røːzəʀæt'(2), 'røːzəræt'(1) +# [ʊ] 'ʊ'(1) --> can be ʏ (uh)?? +# [χ] --> can be x?? def search_phone_ipa(x, phone_list): - return [phone for phone in phone_list if x in convert_phone_set.split_ipa(phone)] + x_in_item = [] + for ipa in phone_list: + ipa_original = ipa + ipa = ipa.replace(':', 'ː') + ipa = convert_phone_set.split_ipa(ipa) + if x in ipa and not x+':' in ipa: + x_in_item.append(ipa_original) + return x_in_item +#search_phone_ipa('ø', transcription_ipa) -# 'ɐ', 'ɒ', 'w', 'æ', 'ʀ', 'ʁ', -# 'œː', 'ɾ', -# 'o', 'a' -# [e] 'nyːver mɑntsjə' (1) -# [ɹ] 'iːjəɹ' (2) -search_phone_ipa('ˑ', transcription_ipa) \ No newline at end of file + +df = pd.read_excel(stimmen_transcription_, 'original') + +ipas = [] +famehtks = [] +for xsampa in df['Self Xsampa']: + if not isinstance(xsampa, float): # 'NaN' + # typo? + xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t') + xsampa = xsampa.replace(';', ':') + + ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) + ipa = ipa.replace('ː', ':') + ipa = ipa.replace(' ', '') + ipas.append(ipa) + else: + ipas.append('') + +# extract interesting cols. +df = pd.DataFrame({'filename': df['Filename'], + 'word': df['Word'], + 'xsampa': df['Self Xsampa'], + 'ipa': pd.Series(ipas)}) + +# find options which all phones are in novo70. +#word_list = list(set(df['word'])) +#word_list = [word for word in word_list if not pd.isnull(word)] +#word = word_list[1] + +## pronunciation variants of 'word' +#df_ = df[df['word'] == word]['xsampa'] +##pronunciation_variant = list(set(df_)) + +cols = ['word', 'ipa', 'frequency'] +df_samples = pd.DataFrame(index=[], columns=cols) +for ipa in all_in_novo70: + ipa = ipa.replace('ː', ':') + samples = df[df['ipa'] == ipa] + word = list(set(samples['word']))[0] + samples_Series = pd.Series([word, ipa, len(samples)], index=df_samples.columns) + df_samples = df_samples.append(samples_Series, ignore_index=True) \ No newline at end of file diff --git a/acoustic_model/forced_alignment_novo.py b/acoustic_model/forced_alignment_novo.py new file mode 100644 index 0000000..243f275 --- /dev/null +++ b/acoustic_model/forced_alignment_novo.py @@ -0,0 +1,112 @@ +# +# forced alignment using novo-api. +# +# *** IMPORTANT *** +# This file should be treated as confidencial. +# This file should not be copied or uploaded to public sites. +# +# NOTES: +# The usage of novo api: https://bitbucket.org/novolanguage/python-novo-api +# I couldn't make it work as I described in the mail to Martijn Bartelds on +# 2018/12/03. +# As per the advice from him, I modified testgrammer.py and made it a function. +# +# In order to run on Python 3.6, the following points are changed in novo-api. +# (1) backend/__init__.py +# - #import session +# from . import session +# (2) backend/session.py +# - #except Exception, e: +# except Exception as e: +# - #print self.last_message +# print(self.last_message) +# (3) asr/segment/praat.py +# - def print_tier(output, title, begin, end, segs, (format, formatter)) +# def print_tier(output, title, begin, end, segs, format, formatter): +# (4) asr/spraaklab/__init.py +# - #import session +# from . import session +# (5) asr/spraaklab/schema.py +# - #print data, "validated not OK", e.message +# print("{0} validated not OK {1}".format(data, e.message)) +# - #print data, "validated OK" +# print("{} validated OK".format(data)) +# - #if isinstance(object, basestring): +# if isinstance(object, str) +# +# Aki Kunikoshi +# 428968@gmail.com +# +import argparse +import json + +from novoapi.backend import session + +# username / password cannot be passed as artuments... +p = argparse.ArgumentParser() +#p.add_argument("--user", default=None) +#p.add_argument("--password", default=None) +p.add_argument("--user", default='martijn.wieling') +p.add_argument("--password", default='fa0Thaic') +args = p.parse_args() + +wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav' + +rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) +grammar = { + "type": "confusion_network", + "version": "1.0", + "data": { + "kind": "sequence", + "elements": [{ + "kind": "word", + "pronunciation": [{ + "phones": ["wv", + "a1", + "n"], + "id": 0 + }, + { + "phones": ["wv", + "uh1", + "n"], + "id": 1 + }], + "label": "one" + }, + { + "kind": "word", + "pronunciation": [{ + "phones": ["t", + "uw1"], + "id": 0 + }], + "label": "two" + }, + { + "kind": "word", + "pronunciation": [{ + "phones": ["t", + "r", + "iy1"], + "id": 0 + }, + { + "phones": ["s", + "r", + "iy1"], + "id": 1 + }], + "label": "three" + }] + }, + "return_objects": ["grammar"], + "phoneset": "novo70" +} + +res = rec.setgrammar(grammar) +#print "Set grammar result", res + +#res = rec.recognize_wav("test/onetwothree.wav") +res = rec.recognize_wav(wav_file) +#print "Recognition result:", json.dumps(res.export(), indent=4)