diff --git a/acoustic_model.sln b/acoustic_model.sln index 5c7f4e7..37a1335 100644 --- a/acoustic_model.sln +++ b/acoustic_model.sln @@ -18,8 +18,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution ..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py ..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py ..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py + ..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py ..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py - ..\..\..\..\..\OneDrive\WSL\python-novo-api\test\testgrammar.py = ..\..\..\..\..\OneDrive\WSL\python-novo-api\test\testgrammar.py EndProjectSection EndProject Global diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc index 1057f0f..4fac91d 100644 Binary files a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc and b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc differ diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj index a2b8e35..7a2f4b5 100644 --- a/acoustic_model/acoustic_model.pyproj +++ b/acoustic_model/acoustic_model.pyproj @@ -4,7 +4,7 @@ 2.0 4d8c8573-32f0-4a62-9e62-3ce5cc680390 . - check_novoapi.py + performance_check.py . @@ -25,9 +25,6 @@ Code - - Code - Code @@ -37,10 +34,7 @@ Code - - Code - - + Code diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py index ae510fc..f66ae65 100644 --- a/acoustic_model/defaultfiles.py +++ b/acoustic_model/defaultfiles.py @@ -27,9 +27,10 @@ config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite') #AcousticModel = config['pyHTK']['AcousticModel'] repo_dir = r'C:\Users\Aki\source\repos' -ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') +ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter') forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment') +<<<<<<< HEAD WSL_dir = r'C:\OneDrive\WSL' fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame') fame_s5_dir = os.path.join(fame_dir, 's5') @@ -42,4 +43,9 @@ phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic novo_api_dir = os.path.join(WSL_dir, 'python-novo-api') cmu69_phoneset = os.path.join(novo_api_dir, 'novoapi', 'asr', 'phoneset', 'en', 'cmu69.phoneset') +======= +fame_dir = r'C:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus' +experiments_dir = r'c:\OneDrive\Research\rug\experiments' +>>>>>>> parent of b87a81e... the script 'forced_alignment_novo.py' which is to run novo_api on Python 3.6 environment is added. +phonelist = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') \ No newline at end of file diff --git a/acoustic_model/fa_test.py b/acoustic_model/fa_test.py index 1907949..3a1bb08 100644 --- a/acoustic_model/fa_test.py +++ b/acoustic_model/fa_test.py @@ -2,52 +2,15 @@ import os import sys os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') -import numpy as np - import defaultfiles as default sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) -from forced_alignment import forced_alignment, lexicon, convert_phone_set +from forced_alignment import forced_alignment -#wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav' -#forced_alignment( -# wav_file, -# 'Australiƫ' -# #'BUFFETCOUPON COULISSEN DOUANE' -# ) +wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav' +forced_alignment( + wav_file, + #'Australiƫ' + 'BUFFETCOUPON COULISSEN DOUANE' + ) -# according to: http://lands.let.ru.nl/cgn/doc_Dutch/topics/version_1.0/annot/phonetics/fon_prot.pdf -phone_list_cgn = ['p', 'd', 't', 'd', 'k', 'g', # plosives - 'f', 'v', 's', 'z', 'S', 'Z', 'x', 'G', 'h', # fricatives - 'N', 'm', 'n', 'J', 'l', 'r', 'w', 'j', # sonorant - 'I', 'E', 'A', 'O', 'Y', # short vowels - 'i', 'y', 'e', '2', 'a', 'o', 'u', # long vowels - '@', # schwa - 'E+', 'Y+', 'A+', # Diftongen - 'E:', 'Y:', 'O:', # Leenvocalen - 'E~', 'A~', 'O~', 'Y~' # Nasale vocalen - ] - -# load word in the lexicon. -lexicon_file = r'C:\cygwin64\home\Aki\acoustic_model\material\barbara\2010_2510_lexicon_pronvars_HTK.txt' -with open(lexicon_file, 'r') as f: - lines = f.readlines() - -words = [] -for line in lines: - line_split = line.split() - if len(line_split) > 0: - word = line_split[0] - word.replace('+s', '') - word = word.split('-') - words.append(word) -words = list(np.unique(words)) - -pronunciations = lexicon._grapheme_to_phoneme(words) -htks = [] -phone_list = set() -for word in pronunciations.keys(): - ipa = pronunciations[word] - htk = convert_phone_set.split_ipa(ipa) - htks.append(htk) - phone_list = phone_list | set(htk) \ No newline at end of file diff --git a/acoustic_model/forced_alignment_novo.py b/acoustic_model/forced_alignment_novo.py deleted file mode 100644 index dc83dfc..0000000 --- a/acoustic_model/forced_alignment_novo.py +++ /dev/null @@ -1,133 +0,0 @@ -# -# forced alignment using novo-api. -# -# *** IMPORTANT *** -# This file should be treated as confidencial. -# This file should not be copied or uploaded to public sites. -# -# NOTES: -# The usage of novo api: https://bitbucket.org/novolanguage/python-novo-api -# I couldn't make it work as I described in the mail to Martijn Bartelds on 2018/12/03. -# As per the advice from him, I modified testgrammer.py and made it a function. -# -# In order to run on Python 3.6, the following points are changed in novo-api. -# (1) backend/__init__.py -# - #import session -# from . import session -# (2) backend/session.py -# - #except Exception, e: -# except Exception as e: -# - #print self.last_message -# print(self.last_message) -# (3) asr/segment/praat.py -# - def print_tier(output, title, begin, end, segs, (format, formatter)) -# def print_tier(output, title, begin, end, segs, format, formatter): -# (4) asr/spraaklab/__init.py -# - #import session -# from . import session -# (5) asr/spraaklab/schema.py -# - #print data, "validated not OK", e.message -# print("{0} validated not OK {1}".format(data, e.message)) -# - #print data, "validated OK" -# print("{} validated OK".format(data)) -# - #if isinstance(object, basestring): -# if isinstance(object, str) -# -# Aki Kunikoshi -# 428968@gmail.com -# - -import argparse -import json - -from novoapi.backend import session - -# username / password cannot be passed as artuments... -p = argparse.ArgumentParser() -#p.add_argument("--user", default=None) -#p.add_argument("--password", default=None) -p.add_argument("--user", default='martijn.wieling') -p.add_argument("--password", default='fa0Thaic') -args = p.parse_args() - -wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav' - -rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) - -grammar = { - "type": "confusion_network", - "version": "1.0", - "data": { - "kind": "sequence", - "elements": [ - { - "kind": "word", - "pronunciation": [ - { - "phones": [ - "wv", - "a1", - "n" - ], - "id": 0 - }, - { - "phones": [ - "wv", - "uh1", - "n" - ], - "id": 1 - } - ], - "label": "one" - }, - { - "kind": "word", - "pronunciation": [ - { - "phones": [ - "t", - "uw1" - ], - "id": 0 - } - ], - "label": "two" - }, - { - "kind": "word", - "pronunciation": [ - { - "phones": [ - "t", - "r", - "iy1" - ], - "id": 0 - }, - { - "phones": [ - "s", - "r", - "iy1" - ], - "id": 1 - } - ], - "label": "three" - } - ] - }, - "return_objects": [ - "grammar" - ], - "phoneset": "novo70" -} - -res = rec.setgrammar(grammar) -#print "Set grammar result", res - -#res = rec.recognize_wav("test/onetwothree.wav") -res = rec.recognize_wav(wav_file) -#print "Recognition result:", json.dumps(res.export(), indent=4) diff --git a/acoustic_model/htk_vs_kaldi.py b/acoustic_model/performance_check.py similarity index 95% rename from acoustic_model/htk_vs_kaldi.py rename to acoustic_model/performance_check.py index f9985f1..25e6b17 100644 --- a/acoustic_model/htk_vs_kaldi.py +++ b/acoustic_model/performance_check.py @@ -3,7 +3,7 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model') import sys import csv -#import subprocess +import subprocess from collections import Counter import re @@ -20,6 +20,8 @@ from forced_alignment import pyhtk ## ======================= user define ======================= +excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx') +data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data') wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k @@ -46,6 +48,8 @@ load_forced_alignment_kaldi = 1 eval_forced_alignment_kaldi = 1 + + ## ======================= add paths ======================= sys.path.append(os.path.join(default.repo_dir, 'forced_alignment')) from forced_alignment import convert_phone_set @@ -58,15 +62,15 @@ from evaluation import plot_confusion_matrix ## ======================= convert phones ====================== mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir) -xls = pd.ExcelFile(default.stimmen_transcription_xlsx) +xls = pd.ExcelFile(excel_file) ## check conversion -#df = pd.read_excel(xls, 'check') +#df = pd.read_excel(xls, 'frequency') #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']): -# if xsampa is not '/': -# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) -# if not ipa_converted == ipa: -# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) +# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_) +# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa) +# if not ipa_converted == ipa: +# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa)) ## check phones included in FAME! @@ -156,7 +160,7 @@ if do_forced_alignment_htk: htk_dict_file = os.path.join(htk_dict_dir, word + '.dic') pyhtk.doHVite(wav_file, label_file, htk_dict_file, fa_file, default.config_hvite, - default.phonelist_friesian_txt, acoustic_model) + default.phonelist, acoustic_model) os.remove(label_file) prediction = am_func.read_fileFA(fa_file) @@ -227,7 +231,7 @@ if make_kaldi_data_files: ## ======================= make lexicon txt which is used by Kaldi ======================= if make_kaldi_lexicon_txt: - option_num = 7 + option_num = 6 # remove previous file. if os.path.exists(lexicon_txt): @@ -277,10 +281,10 @@ if load_forced_alignment_kaldi: phones_txt = os.path.join(default.fame_s5_dir, 'data', 'lang', 'phones.txt') merged_alignment_txt = os.path.join(default.fame_s5_dir, 'exp', 'tri1_alignme', 'merged_alignment.txt') - #filenames = np.load(stimmen_data_dir + '\\filenames.npy') - #words = np.load(stimmen_data_dir + '\\words.npy') - #pronunciations = np.load(stimmen_data_dir + '\\pronunciations_ipa.npy') - #pronvar_list_all = np.load(stimmen_data_dir + '\\pronvar_list_all.npy') + #filenames = np.load(data_dir + '\\filenames.npy') + #words = np.load(data_dir + '\\words.npy') + #pronunciations = np.load(data_dir + '\\pronunciations_ipa.npy') + #pronvar_list_all = np.load(data_dir + '\\pronvar_list_all.npy') #word_list = np.unique(words) # load the mapping between phones and ids. @@ -365,7 +369,7 @@ if eval_forced_alignment_htk: if compare_hmm_num: f_result.write("{},".format(hmm_num_str)) - #match = np.load(stimmen_data_dir + '\\match_hmm' + hmm_num_str + '.npy') + #match = np.load(data_dir + '\\match_hmm' + hmm_num_str + '.npy') #prediction = np.load(os.path.join(result_dir, 'htk', 'predictions_hmm' + hmm_num_str + '.npy')) #prediction = pd.Series(prediction, index=df.index, name='prediction') #result = pd.concat([df, prediction], axis=1) diff --git a/novoapi_for_python3x/__init__.py b/novoapi_for_python3x/__init__.py deleted file mode 100644 index 9ff2f76..0000000 --- a/novoapi_for_python3x/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env python - -__version__ = "0.2" - -import backend diff --git a/novoapi_for_python3x/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index b5e000d..0000000 Binary files a/novoapi_for_python3x/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/novoapi_for_python3x/asr/__init__.py b/novoapi_for_python3x/asr/__init__.py deleted file mode 100644 index 2832e82..0000000 --- a/novoapi_for_python3x/asr/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env python - -#import segments -#import spraaklab -from . import segments -from . import spraaklab \ No newline at end of file diff --git a/novoapi_for_python3x/asr/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/asr/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 608781a..0000000 Binary files a/novoapi_for_python3x/asr/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/novoapi_for_python3x/asr/segments/__init__.py b/novoapi_for_python3x/asr/segments/__init__.py deleted file mode 100644 index 737e432..0000000 --- a/novoapi_for_python3x/asr/segments/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env python - -from .segments import Segmentation -from .praat import seg2tg diff --git a/novoapi_for_python3x/asr/segments/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/asr/segments/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 6e69b7e..0000000 Binary files a/novoapi_for_python3x/asr/segments/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/novoapi_for_python3x/asr/segments/__pycache__/praat.cpython-36.pyc b/novoapi_for_python3x/asr/segments/__pycache__/praat.cpython-36.pyc deleted file mode 100644 index 7235caa..0000000 Binary files a/novoapi_for_python3x/asr/segments/__pycache__/praat.cpython-36.pyc and /dev/null differ diff --git a/novoapi_for_python3x/asr/segments/__pycache__/segments.cpython-36.pyc b/novoapi_for_python3x/asr/segments/__pycache__/segments.cpython-36.pyc deleted file mode 100644 index eab7f26..0000000 Binary files a/novoapi_for_python3x/asr/segments/__pycache__/segments.cpython-36.pyc and /dev/null differ diff --git a/novoapi_for_python3x/asr/segments/praat.py b/novoapi_for_python3x/asr/segments/praat.py deleted file mode 100644 index fbc9e4c..0000000 --- a/novoapi_for_python3x/asr/segments/praat.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python -# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen - -import codecs - -def print_header(output, begin, end, nr_tiers): - print >> output, 'File type = "ooTextFile"' - print >> output, 'Object class = "TextGrid"' - print >> output, '' - print >> output, 'xmin = %s' % begin - print >> output, 'xmax = %s' % end - print >> output, 'tiers? ' - print >> output, 'size = %d' % nr_tiers - print >> output, 'item []:' - - -def print_info_tier(output, title, begin, end, label): - print >> output, '\titem [%d]:' % 0 - print >> output, '\t\tclass = "IntervalTier"' - print >> output, '\t\tname = "%s"' % title - print >> output, '\t\txmin = %s' % begin - print >> output, '\t\txmax = %s' % end - print >> output, '\t\tintervals: size = %d' % 1 - - print >> output, '\t\tintervals [1]:' - print >> output, '\t\t\txmin = %s' % begin - print >> output, '\t\t\txmax = %s' % end - print >> output, '\t\t\ttext = "%s"' % label - - -#def print_tier(output, title, begin, end, segs, (format, formatter)): -def print_tier(output, title, begin, end, segs, format, formatter): - print >> output, '\titem [%d]:' % 0 - print >> output, '\t\tclass = "IntervalTier"' - print >> output, '\t\tname = "%s"' % title - print >> output, '\t\txmin = %s' % begin - print >> output, '\t\txmax = %s' % end - print >> output, '\t\tintervals: size = %d' % len(segs) - - count = 1 - for seg in segs: - #print seg - print >> output, '\t\tintervals [%d]:' % count - print >> output, '\t\t\txmin = %s' % repr(int(seg['begin']) / 100.0) - print >> output, '\t\t\txmax = %s' % repr(int(seg['end']) / 100.0) - string = '\t\t\ttext = "' + format + '"' - print >> output, string % formatter(seg['label']) - count += 1 - - -def seg2tg(fname, segments): - if not segments: - return - output = codecs.open(fname, "w", encoding="utf-8") - - confidences = [] - word_labels = [] - phones = [] - - for s in segments: - conf = s.llh if hasattr(s, "llh") else s.score - confidences.append({'begin': s.begin, 'end': s.end, 'label': conf}) - word_labels.append({'begin': s.begin, 'end': s.end, 'label': s.label}) - for p in s.phones: - phones.append({'begin': p.begin, 'end': p.end, 'label': p.label}) - - - begin = repr(int(segments[0].begin) / 100.0) - end = repr(int(segments[-1].end) / 100.0) - - nr_tiers = 3 - print_header(output, begin, end, nr_tiers) - print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x)) - print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x)) - print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x)) - - output.close() diff --git a/novoapi_for_python3x/asr/segments/segments.py b/novoapi_for_python3x/asr/segments/segments.py deleted file mode 100644 index ee5dbcc..0000000 --- a/novoapi_for_python3x/asr/segments/segments.py +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env python -# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen - -## These classes can be initialized with dictionaries, as they are returned by the python spraaklab recognition system. - -class Segment(object): - def __init__(self, segment): - self.begin = segment["begin"] - self.end = segment["end"] - self.begintime = segment.get("beginTime", self.begin / 100.0) - self.endtime = segment.get("endTime", self.end / 100.0) - self.label = segment["label"] - self.score = segment["score"] - if "llh" in segment: - self.llh = segment["llh"] - if "phones" in segment: - self.type = "word" - self.phones = Segmentation(segment["phones"], ["sil"]) - if hasattr(self.phones[0], "llh"): - self.minllh = min([s.llh for s in self.phones]) ## the current word llh for error detection - else: - self.type = "phone" - - def __repr__(self): - res = "%8.3f -- %8.3f score %8.3f " % (self.begintime, self.endtime, self.score) - if hasattr(self, "llh"): - res += "llh %8.3f " % self.llh - res += self.label.encode("utf8") - return res - - def export(self): - r = {"begin": self.begin, "end": self.end, "label": self.label, "score": self.score, "type": self.type} - if hasattr(self, "llh"): - r["llh"] = self.llh - if hasattr(self, "phones"): - r["phones"] = self.phones.export() - return r - -class Segmentation(object): - def __init__(self, segments, sils=["", "", "!sil"]): - """Create a segmentation from a spraaklab recognition structure. - segments: an array of words (or phones), represented by a dict with - "begin", "end", "label", "score", and "llh" keys. Words can also have - "phones" which is another array of segments.""" - self.segments = [Segment(s) for s in segments] - if self.segments: - self.type = self.segments[0].type - else: - self.type = None - self.sils = sils - self.orig = segments ## in case we want to have access to the original recognition structure - - def __getitem__(self, item): - return self.segments[item] - - def __repr__(self): - ns = len(self.segments) - res = "Segmentation with %d %s%s" % (ns, self.type, "" if ns==1 else "s") - for seg in self.segments: - res += "\n " + repr(seg) - return res - - def __len__(self): - return len(self.segments) - - def score(self, skip=None): - if not skip: - skip = self.sils - s = 0.0 - for seg in self.segments: - if seg.label not in skip: - s += seg.score - return s - - def llhs(self, skip=None): - if not skip: - skip = self.sils - return [seg.llh for seg in self.segments if hasattr(seg, "llh") and seg.label not in skip] - - def llh(self, skip=None): - return sum(self.llhs(skip)) - - def minllh(self, skip=None): - llhs = self.llhs(skip) - if llhs: - return min(llhs) - else: - return None - - def labels(self, skip=None): - if not skip: - skip = self.sils - return [seg.label for seg in self.segments if seg.label not in skip] - - def sentence(self, skip=None): - return " ".join(self.labels(skip)) - - def export(self): - return [seg.export() for seg in self.segments] \ No newline at end of file diff --git a/novoapi_for_python3x/asr/spraaklab/__init__.py b/novoapi_for_python3x/asr/spraaklab/__init__.py deleted file mode 100644 index 2c5f2fd..0000000 --- a/novoapi_for_python3x/asr/spraaklab/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env python - -#import schema -from . import schema \ No newline at end of file diff --git a/novoapi_for_python3x/asr/spraaklab/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/asr/spraaklab/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 5a6f6ad..0000000 Binary files a/novoapi_for_python3x/asr/spraaklab/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/novoapi_for_python3x/asr/spraaklab/__pycache__/schema.cpython-36.pyc b/novoapi_for_python3x/asr/spraaklab/__pycache__/schema.cpython-36.pyc deleted file mode 100644 index aebdbf5..0000000 Binary files a/novoapi_for_python3x/asr/spraaklab/__pycache__/schema.cpython-36.pyc and /dev/null differ diff --git a/novoapi_for_python3x/asr/spraaklab/schema.py b/novoapi_for_python3x/asr/spraaklab/schema.py deleted file mode 100644 index 8efc49f..0000000 --- a/novoapi_for_python3x/asr/spraaklab/schema.py +++ /dev/null @@ -1,273 +0,0 @@ -#!/usr/bin/env python -## (c) 2017 NovoLanguage, author: David A. van Leeuwen - -## The purpose of this to define the grammar structure in a json schema, so that it can be validated, -## (de)serialized, and perhaps even automatically converted to a Python class structure. - -import json -import jsonschema - -grammar_schema_v10 = { - "$schema": "http://json-schema.org/schema#", - "title": "NovoLanguage grammar", - "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", - "$ref": "#/definitions/group", - "definitions": { - "phones": { - "type": "array", - "items": { - "type": "string" - }, - "minItems": 1 - }, - "pronunciation": { - "type": "object", - "properties": { - "phones": { - "$ref": "#/definitions/phones" - }, - "syllables": { - "type": "array", - "items": { - "$ref": "#/definitions/syllable" - }, - "minItems": 1 - }, - "id": { - "type": "integer", - "description": "ID to distinguish this pronunciation from other variants" - }, - "meta": { - "type": "object" - } - }, - "required": ["phones"] - }, - "syllable": { - "type": "object", - "properties": { - "begin": { - "type": "integer", - "minimum": 0 - }, - "end": { - "type": "integer", - "minimum": 0 - }, - "stress": { - "type": "integer", - "minimum": 0 - }, - "tone": { - "type": "integer", - "minimum": 0 - } - }, - "required": ["begin", "end"] - }, - "word": { - "type": "object", - "properties": { - "kind": { - "type": "string", - "enum": ["word"] - }, - "label": { - "type": "string" - }, - "pronunciation": { - "anyOf": [ - { - "$ref": "#/definitions/pronunciation" - }, - { - "type": "array", - "items": { - "anyOf": [ - { - "$ref": "#/definitions/pronunciation" - }, - { - "$ref": "#/definitions/phones" - } - ] - }, - "minItems": 1 - }, - { - "$ref": "#/definitions/phones" - } - - ] - }, - "syllables": { - "type": "array", - "items": { - "$ref": "#/definitions/syllable" - } - }, - "graphemes": { - "type": "array", - "items": { - "type": "string" - } - }, - "id": { - "type": "integer", - "description": "ID to distinguish this word from other words (with possibly the same label)" - }, - "meta": { - "type": "object" - } - }, - "required": ["label"] - }, - "element": { - "title": "element", - "oneOf": [ - { - "$ref": "#/definitions/word" - }, - { - "$ref": "#/definitions/group" - }, - { - "type": ["string", "null"] - } - ] - }, - "group": { - "title": "element group", - "type": "object", - "properties": { - "kind": { - "type": "string", - "enum": ["sequence", "alternatives", "order"] - }, - "elements": { - "type": "array", - "items": { - "$ref": "#/definitions/element" - }, - "minItems": 1, - }, - "meta": { - "type": "object" - } - }, - "required": ["kind", "elements"] - } - } -} - -grammar_schema_v01 = { - "$schema": "http://json-schema.org/schema#", - "title": "NovoLanguage grammar v0.1", - "description": "A grammar specification for the NovoLanguage Automatic Speech Recognition", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": ["multiple_choice", "word_order"] - }, - "parts": { - "type": "array", - "minItems": 1, - "maxItems": 5, - "items": { - "type": ["string", "array"], - "items": { - "type": ["string"] - } - } - } - } -} - -grammar_rpc_schema = { - "$schema": "http://json-schema.org/schema#", - "title": "NovoLanguage RPC grammar", - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": ["confusion_network"] - }, - "version": { - "type": "string", - "default": "v0.1" - }, - "data": { - "type": "object" - }, - "return_dict": { - "type": "boolean" - }, - "return_objects": { - "type": "array", - "items": { - "type": "string", - "enum": ["dict", "grammar"] - } - }, - "phoneset": { - "type": "string", - "enum": ["cmu69", "novo70", "mdbg115"] - }, - "parallel_silence": { - "type": "boolean" - } - }, - "required": ["type", "data"] -} - -def validate(object, schema=grammar_schema_v10): - #if isinstance(object, basestring): - if isinstance(object, str): - object = json.loads(object) - if not isinstance(object, dict): - raise TypeError("Expected dict or json string") - try: - jsonschema.validate(object, schema) - except jsonschema.ValidationError: - return False - except Exception: - raise - else: - return True - -def validate_rpc_grammar(message): - """validate an rpc grammar message""" - if not validate(message, grammar_rpc_schema): - raise ValueError("Not a valid RPC grammar") - version = message.get("version", "0.1") - data = message["data"] - if version == "0.1": - if not validate(data, grammar_schema_v01): - raise ValueError("Not a valid grammar v0.1") - elif version == "1.0": - if not validate(data, grammar_schema_v10): - raise ValueError("Not a valid grammar v1.0") - else: - raise ValueError("Unsupported schema version") - - -## test -def test(data=None): - if not data: - data = {"kind": "sequence", "elements": [ - {"kind": "alternatives", "elements": ["a plain string", "an alternative string"]}, - {"kind": "word", "label": "a word", "pronunciation": {"phones": ["ah", "w", "er", "d"]}}, - {"kind": "order", "elements": [{"kind": "word", "label": "another word", "visible": False}, "last word"]}]} - try: - jsonschema.validate(data, schema) - except jsonschema.ValidationError as e: - #print data, "validated not OK", e.message - print("{0} validated not OK {1}".format(data, e.message)) - else: - #print data, "validated OK" - print("{} validated OK".format(data)) - - -if __name__ == "__main__": - test() diff --git a/novoapi_for_python3x/backend/__init__.py b/novoapi_for_python3x/backend/__init__.py deleted file mode 100644 index c52d472..0000000 --- a/novoapi_for_python3x/backend/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env python - -#import session -from . import session \ No newline at end of file diff --git a/novoapi_for_python3x/backend/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/backend/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 109cfba..0000000 Binary files a/novoapi_for_python3x/backend/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/novoapi_for_python3x/backend/__pycache__/session.cpython-36.pyc b/novoapi_for_python3x/backend/__pycache__/session.cpython-36.pyc deleted file mode 100644 index 856150c..0000000 Binary files a/novoapi_for_python3x/backend/__pycache__/session.cpython-36.pyc and /dev/null differ diff --git a/novoapi_for_python3x/backend/session.py b/novoapi_for_python3x/backend/session.py deleted file mode 100644 index b08a096..0000000 --- a/novoapi_for_python3x/backend/session.py +++ /dev/null @@ -1,254 +0,0 @@ -#!/usr/bin/env python -# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen - -## Recognition interface for actual backend. Adapted from player.asr.debug. - -import json -import sys -import wave -import requests -import websocket -import logging -import collections - -import time - -from .. import asr - -logger = logging.getLogger(__name__) - -## turn off annoying warnings -requests.packages.urllib3.disable_warnings() -logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(logging.WARN) - -buffer_size = 4096 -gm = "gm.novolanguage.com" ## dev -protocol = "https" -port = 443 -apiversion = 0 - -sessions = collections.Counter() - -def segmentation(result): - """converts a raw backend recognition result to a segment of novo.asr.segments class Segmentation""" - for w in result: - w["score"] = w["confidence"]["prob"] - w["llh"] = w["confidence"]["llr"] - w["label"] = w["label"]["raw"] - w["begin"] /= 10 - w["end"] /= 10 - for p in w["phones"]: - p["score"] = p["confidence"]["prob"] - p["llh"] = p["confidence"]["llr"] - p["begin"] /= 10 - p["end"] /= 10 - return asr.segments.Segmentation(result) - -class rpcid: - id = 0 - @staticmethod - def next(): - rpcid.id += 1 - return rpcid.id - -class Recognizer(object): - def __init__(self, lang="en", gm=gm, grammar_version="0.1", user=None, password=None, snodeid=None, keepopen=False): - self.lang = lang - self.keepopen = keepopen - self.api_url = "%s://%s:%d/v%d" % (protocol, gm, port, apiversion) - self.verify = False - self.headers = {"Content-Type": "application/json"} - self.login_user(user, password) - data = {"l2": lang, "local": False, "skipupload": True} - if snodeid: - data["snodeid"] = snodeid - self.conn = None - self.init_session(data) - self.grammar_version = grammar_version - self.last_message = None - - def login_user(self, username, password): - # obtain authentication token of user - logger.info('obtain auth token at %s', self.api_url) - data = { - 'username': username, - 'password': password - } - try: - r = requests.post(self.api_url + '/publishers/1/login', headers=self.headers, data=json.dumps(data), verify=self.verify) - except Exception as e: - logger.error("Cannot post request to GM API for user login: %s", e.message) - sys.exit(-1) - assert r.ok, r.reason - result = r.json() - if "errors" in result["response"]: - logger.info("Error in logging in: %s", result["response"]["errors"]) - sys.exit(-1) - - user_auth_token = result['response']['user']['authentication_token'] - logger.info("User auth token is: %s", user_auth_token) - - # set auth token in header - self.headers['Authentication-Token'] = user_auth_token - - def init_session(self, data, direct=False, use_ip=False): - logger.info('Request new session: %s', data) - r = requests.post(self.api_url + '/sessions', headers=self.headers, data=json.dumps(data), verify=self.verify) - if not r.ok: - logger.error("New session request failed: %s", r.text) - return - - status_url = r.headers.get("location") - if status_url: - ## we got a redirect - status = {} - while True: - logger.debug("Checking %s", status_url) - s = requests.get(status_url, verify=self.verify) - if not s.ok: - logger.error('Checking Failed: %s', s.text) - return - - status = s.json() - if status['status'] == 'PENDING': - logger.debug("Status: %s", status['status']) - time.sleep(1) - else: - break - session = status['result'][0] ## [1] is another status code... - if "error" in session: - logger.error("Error in getting a snode: %s", session["error"]) - raise Exception - else: - session = r.json() - - try: - logger.info("Session: %r", session) - if direct: - snode_ip = session["snode"]["ip"] - proxy_url = snode_ip - snode_port = session["port"] - ws_url = "%s://%s:%d/" % ("ws", snode_ip, snode_port) - else: - field = "ip" if use_ip else "hostname" - proxy_url = session['snode']['datacentre']['proxy'][field] - ws_url = 'wss://' + proxy_url + '/' + session['uuid'] - logger.info("Connecting to websocket: %s", ws_url) - conn = websocket.create_connection(ws_url, sslopt={"check_hostname": self.verify}) - logger.info("Connected.") - #except Exception, e: - except Exception as e: - logger.error("Unable to connect to websocket: %s", e.message) - raise e - - self.session_id = session['id'] - self.proxy_url = proxy_url - self.conn = conn - self.session = session - sessions[session["uuid"]] += 1 - - def setgrammar(self, grammar): ## backend grammar object: {"data": {...}, "type": "confusion_network"} - data = {"jsonrpc": "2.0", - 'type': 'jsonrpc', - 'method': 'set_grammar', - 'params': grammar, - "id": rpcid.next()} - asr.spraaklab.schema.validate_rpc_grammar(grammar) - self.conn.send(json.dumps(data)) - result = json.loads(self.conn.recv()) - if result.get("error"): - logger.error("Exercise validation error: %s", result) - return result - - def set_alternatives_grammar(self, *args, **kwargs): - if not "version" in kwargs: - kwargs["version"] = self.grammar_version - return self.setgrammar(alternatives_grammar(*args, **kwargs)) - - def recognize_wav(self, wavf): - w = wave.open(wavf, 'r') - nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams() - if nchannels > 1: - logging.error("Please use .wav with only 1 channel, found %d channels in %s", nchannels, wavf) - return - if (sampwidth != 2): - logging.error("Please use .wav with 2-byte PCM data, found %d bytes in %s", sampwidth, wavf) - return - if (framerate != 16000.0): - logging.error("Please use .wav sampled at 16000 Hz, found %1.0f in %s", framerate, wavf) - return - if (comptype != 'NONE'): - logging.error("Please use .wav with uncompressed data, found %s in %s", compname, wavf) - return - buf = w.readframes(nframes) - w.close() - return self.recognize_data(buf) - - def recognize_data(self, buf): - nbytes_sent = 0 - start = time.time() - for j in range(0, len(buf), buffer_size): - audio_packet = str(buf[j:j + buffer_size]) - nbytes_sent += len(audio_packet) - self.conn.send_binary(audio_packet) - self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()})) - logger.info("Waiting for recognition result...") - self.last_message = self.conn.recv() ## keep result for the interested applications - message = json.loads(self.last_message) - dur = time.time() - start - logger.info("Recognition took %5.3f seconds", dur) - if "error" in message: - raise RuntimeError("Error from recognition backend: %r" % message.get("error")) - return segmentation(message["result"]["words"]) - - def recognize_url(self, url): - start = time.time() - data = json.dumps({"jsonrpc": "2.0", "method": "send_audio", "id": rpcid.next(), "params": {"type": "url", "data": url, "details": ["word", "utterance"]}}) - self.conn.send(data) - logger.info("Waiting for recognition result...") - self.last_message = self.conn.recv() ## keep result for the interested applications - #print self.last_message - print(self.last_message) - message = json.loads(self.last_message) - dur = time.time() - start - logger.info("Recognition took %5.3f seconds", dur) - if "error" in message: - raise RuntimeError("Error from recognition backend: %r" % message.get("error")) - return segmentation(message["result"]["words"]) - - def __del__(self): - sessions[self.session["uuid"]] -= 1 - if self.conn and sessions[self.session["uuid"]] <= 0: - self.conn.close() - url = self.api_url + '/sessions/%d' % self.session_id - if self.keepopen: - logger.info("Keeping session open...") - else: - logger.info("Closing session: %s", url) - r = requests.delete(url, headers=self.headers, verify=self.verify) - assert r.ok, r.reason - -def alternatives_grammar(parts, version="0.1", ret=None): - """Make a grammar of alternatives, as array(sequence)-of-array(alternatives)-of-strings""" - r = {"type": "confusion_network", "version": version} - if version=="0.1": - r["data"] = {"type": "multiple_choice", "parts": parts} - if isinstance(ret, list) and "dict" in ret: - r["return_dict"] = True - elif version=="1.0": - seqels = [] - for part in parts: - altels = [] - for alt in part: - words = alt.split(" ") - if len(words) > 1: - alt = {"kind": "sequence", "elements": words} - altels.append(alt) - seqels.append({"kind": "alternatives", "elements": altels}) - r["data"] = {"kind": "sequence", "elements": seqels} - if isinstance(ret, list): - r["return_objects"] = ret - else: - raise ValueError("Unsupported version: %s" % version) - asr.spraaklab.schema.validate_rpc_grammar(r) - return r diff --git a/novoapi_for_python3x/utils/json/__init__.py b/novoapi_for_python3x/utils/json/__init__.py deleted file mode 100644 index 75d0b5f..0000000 --- a/novoapi_for_python3x/utils/json/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python - -## from https://stackoverflow.com/questions/1447287/format-floats-with-standard-json-module -class PrettyFloat(float): - def __repr__(self): - return '%.15g' % self - -def pretty_floats(obj): - if isinstance(obj, float): - return PrettyFloat(obj) - elif isinstance(obj, dict): - return dict((k, pretty_floats(v)) for k, v in obj.items()) - elif isinstance(obj, (list, tuple)): - return map(pretty_floats, obj) - return obj - -def rounded_floats(obj, ndigits=15): - if isinstance(obj, float): - return PrettyFloat(round(obj, ndigits)) - elif isinstance(obj, dict): - return dict((k, rounded_floats(v, ndigits)) for k, v in obj.items()) - elif isinstance(obj, (list, tuple)): - return map(lambda o: rounded_floats(o, ndigits), obj) - return obj - diff --git a/novoapi_for_python3x/utils/json/__pycache__/__init__.cpython-36.pyc b/novoapi_for_python3x/utils/json/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index e2c786a..0000000 Binary files a/novoapi_for_python3x/utils/json/__pycache__/__init__.cpython-36.pyc and /dev/null differ