Compare commits

..

No commits in common. "de5c9cecb94ce64206dc409b5d7773f5f529524b" and "6edde06a4fdcf239bae7b01f6e389e00ee270647" have entirely different histories.

25 changed files with 14775 additions and 345 deletions

Binary file not shown.

13152
HCompV.scp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -10,6 +10,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
..\toolbox\evaluation.py = ..\toolbox\evaluation.py
..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
..\forced_alignment\forced_alignment\lexicon.py = ..\forced_alignment\forced_alignment\lexicon.py
..\forced_alignment\forced_alignment\mlf.py = ..\forced_alignment\forced_alignment\mlf.py
@ -17,7 +18,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
..\forced_alignment\forced_alignment\test_environment.py = ..\forced_alignment\forced_alignment\test_environment.py
EndProjectSection

View File

@ -4,8 +4,7 @@
<SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
<ProjectHome>.</ProjectHome>
<StartupFile>
</StartupFile>
<StartupFile>check_novoapi.py</StartupFile>
<SearchPath>
</SearchPath>
<WorkingDirectory>.</WorkingDirectory>

View File

@ -3,31 +3,23 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import sys
import csv
from collections import Counter
import random
import shutil
#import subprocess
#from collections import Counter
#import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import matplotlib.pyplot as plt
#from sklearn.metrics import confusion_matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import novoapi
import defaultfiles as default
sys.path.append(default.forced_alignment_module_dir)
from forced_alignment import pyhtk, convert_phone_set
#import acoustic_model_functions as am_func
import acoustic_model_functions as am_func
import convert_xsampa2ipa
import defaultfiles as default
from forced_alignment import pyhtk, convert_phone_set
import novoapi
import novoapi_functions
sys.path.append(default.accent_classification_dir)
import output_confusion_matrix
## procedure
forced_alignment_novo70 = True
balance_sample_numbers = False
## ===== load novo phoneset =====
phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa = novoapi_functions.load_phonset()
@ -147,106 +139,6 @@ df_per_word = pd.DataFrame(index=[], columns=df_samples.keys())
for word in word_list:
df_samples_ = df_samples[df_samples['word']==word]
df_samples_ = df_samples_[df_samples_['frequency']>2]
df_samples_ = df_samples_[df_samples_['frequency']>1]
df_per_word = df_per_word.append(df_samples_, ignore_index=True)
#df_per_word.to_excel(os.path.join(default.stimmen_dir, 'pronunciation_variants_novo70.xlsx'), encoding="utf-8")
## ===== forced alignment =====
reus_dir = r'C:\OneDrive\Desktop\Reus'
if forced_alignment_novo70:
Results = pd.DataFrame(index=[],
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
#for word in word_list:
for word in ['Reus']:
# pronunciation variants top 3
df_per_word_ = df_per_word[df_per_word['word']==word]
df_per_word_ = df_per_word_.sort_values('frequency', ascending=False)
if len(df_per_word_) < 3: # pauw, rozen
pronunciation_ipa = list(df_per_word_['ipa'])
elif word=='Reuzenrad':
pronunciation_ipa = [
df_per_word_.iloc[0]['ipa'],
df_per_word_.iloc[1]['ipa'],
df_per_word_.iloc[2]['ipa'],
df_per_word_.iloc[3]['ipa']]
else:
# oog, oor, reus, roeiboot
pronunciation_ipa = [
df_per_word_.iloc[0]['ipa'],
df_per_word_.iloc[1]['ipa'],
df_per_word_.iloc[2]['ipa']]
#print("{0}: {1}".format(word, pronunciation_ipa))
# samples for the word
df_ = df[df['word']==word]
# samples in which all pronunciations are written in novo70.
samples = df_.query("ipa in @pronunciation_ipa")
## ===== balance sample numbers =====
if balance_sample_numbers:
c = Counter(samples['ipa'])
sample_num_list = [c[key] for key in c.keys()]
sample_num = np.min(sample_num_list)
samples_balanced = pd.DataFrame(index=[], columns=list(samples.keys()))
for key in c.keys():
samples_ = samples[samples['ipa'] == key]
samples_balanced = samples_balanced.append(samples_.sample(sample_num), ignore_index = True)
samples = samples_balanced
results = pd.DataFrame(index=[],
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
for i in range(0, len(samples)):
sample = samples.iloc[i]
filename = sample['filename']
wav_file = os.path.join(default.stimmen_wav_dir, filename)
if os.path.exists(wav_file):
# for Martijn
#shutil.copy(wav_file, os.path.join(reus_dir, filename))
pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
result_ = pd.Series([
sample['filename'],
sample['word'],
sample['xsampa'],
sample['ipa'],
' '.join(result_ipa),
' '.join(result_novo70),
llh
], index=results.columns)
results = results.append(result_, ignore_index = True)
print('{0}/{1}: answer {2} - prediction {3}'.format(
i+1, len(samples), result_['ipa'], result_['result_ipa']))
results.to_excel(os.path.join(reus_dir, 'results.xlsx'), encoding="utf-8")
if len(results) > 0:
Results = Results.append(results, ignore_index = True)
Results.to_excel(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
else:
Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
Results = pd.read_excel(Results_xlsx, 'Sheet1')
## ===== analysis =====
#result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi')
#for word in word_list:
# if not word == 'Oog':
# Results_ = Results[Results['word'] == word]
# y_true = list(Results_['ipa'])
# y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])]
# y_pred = [ipa.replace('ː', ':') for ipa in y_pred_]
# pronunciation_variants = list(set(y_true))
# cm = confusion_matrix(y_true, y_pred, labels=pronunciation_variants)
# plt.figure()
# output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False)
# #plt.show()
# plt.savefig(os.path.join(result_novoapi_dir, word + '.png'))
df_per_word.to_excel(os.path.join(default.stimmen_dir, 'pronunciation_variants_novo70.xlsx'), encoding="utf-8")

View File

@ -29,7 +29,6 @@ config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
repo_dir = r'C:\Users\Aki\source\repos'
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
WSL_dir = r'C:\OneDrive\WSL'
fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')

View File

@ -6,8 +6,6 @@ import json
from novoapi.backend import session
import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import defaultfiles as default
@ -38,29 +36,10 @@ def load_phonset():
phoneset_novo70.append(novo70)
translation_key_ipa2novo70[ipa] = novo70
translation_key_novo702ipa[novo70] = ipa
# As per Nederlandse phoneset_aki.xlsx recieved from David
# [ɔː] oh / ohr # from ipa->novo70, only oh is used.
# [ɪː] ih / ihr # from ipa->novo70, only ih is used.
# [iː] iy
# [œː] uh
# [ɛː] eh
# [w] wv in IPA written as ʋ.
extra_ipa = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'ʋ']
extra_novo70 = ['oh', 'ih', 'iy', 'uh', 'eh', 'wv']
for ipa, novo70 in zip(extra_ipa, extra_novo70):
phoneset_ipa.append(ipa)
phoneset_novo70.append(novo70)
translation_key_ipa2novo70[ipa] = novo70
translation_key_novo702ipa[novo70] = ipa
translation_key_novo702ipa['ohr'] = 'ɔː'
translation_key_novo702ipa['ihr'] = 'ɪː'
phoneset_ipa = np.unique(phoneset_ipa)
phoneset_novo70 = np.unique(phoneset_novo70)
return phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa
return
def multi_character_tokenize(line, multi_character_tokens):
@ -191,9 +170,4 @@ def result2pronunciation(result, word):
phones = result_[0]['phones']
pronunciation_novo70 = [phone['label'] for phone in phones]
pronunciation_ipa = [novo702ipa(phone) for phone in pronunciation_novo70]
return pronunciation_ipa, pronunciation_novo70, llh
if __name__ == 'main':
pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
grammar = make_grammar('reus', pronunciation_ipa)
return pronunciation_ipa, pronunciation_novo70, llh

1600
acoustic_model/script.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,7 @@ def print_info_tier(output, title, begin, end, label):
print >> output, '\t\t\ttext = "%s"' % label
#def print_tier(output, title, begin, end, segs, (format, formatter)):
def print_tier(output, title, begin, end, segs, format, formatter):
print >> output, '\titem [%d]:' % 0
print >> output, '\t\tclass = "IntervalTier"'
@ -69,11 +70,8 @@ def seg2tg(fname, segments):
nr_tiers = 3
print_header(output, begin, end, nr_tiers)
#print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
#print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
#print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
print_tier(output, "confidence", begin, end, confidences, '%.3f', lambda x: x)
print_tier(output, "words", begin, end, word_labels, '%s', lambda x: x)
print_tier(output, "phones", begin, end, phones, '%s', lambda x: x)
print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
output.close()

View File

@ -266,7 +266,7 @@ def test(data=None):
print("{0} validated not OK {1}".format(data, e.message))
else:
#print data, "validated OK"
print("{0} validated OK".format(data))
print("{} validated OK".format(data))
if __name__ == "__main__":

View File

@ -188,8 +188,7 @@ class Recognizer(object):
nbytes_sent = 0
start = time.time()
for j in range(0, len(buf), buffer_size):
#audio_packet = str(buf[j:j + buffer_size])
audio_packet = buf[j:j + buffer_size]
audio_packet = str(buf[j:j + buffer_size])
nbytes_sent += len(audio_packet)
self.conn.send_binary(audio_packet)
self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))

View File

@ -1,64 +0,0 @@
novoapi( https://bitbucket.org/novolanguage/python-novo-api ) is written in Python 2.7.
To install it on Python 3.x the following points should be modified.
- basestring --> str
- print xxx --> print({}.format(xxx)).
- import xxx --> from . import xxx
- except Exception, e --> except Exception as e
- remove tuples from input arguments of a function.
Concretely...
=== novoapi\backend\__init__.py
#import session
from . import session
=== novoapi\backend\session.py
#except Exception, e:
except Exception as e:
#print self.last_message
print(self.last_message)
=== novoapi\asr\__init__.py
#import segments
#import spraaklab
from . import segments
from . import spraaklab
=== novoapi\asr\segments\praat.py
#print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
#print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
#print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
print_tier(output, "confidence", begin, end, confidences, '%.3f', lambda x: x)
print_tier(output, "words", begin, end, word_labels, '%s', lambda x: x)
print_tier(output, "phones", begin, end, phones, '%s', lambda x: x)
=== novoapi\asr\spraaklab\__init__.py ===
#import schema
from . import schema
=== novoapi\asr\spraaklab\schema.py ===
#if isinstance(object, basestring):
if isinstance(object, str):
except jsonschema.ValidationError as e:
#print data, "validated not OK", e.message
print("{0} validated not OK {1}".format(data, e.message))
else:
#print data, "validated OK"
print("{0} validated OK".format(data))
Then to make it correctly work, few more modification is needed.
When the wav file is read using the wave module, the output (named buf) is a string of bytes on Python 2.7 while buf is a byte object on Python 3.6.
Therefore...
=== novoapi\backend\session.py
#audio_packet = str(buf[j:j + buffer_size])
audio_packet = buf[j:j + buffer_size]
Also, because of this difference, Segment.__repr__ (novoapi\asr\segments\segments.py) does not work.

View File

@ -1,119 +0,0 @@
#!/usr/bin/env python
import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import argparse
import json
from novoapi.backend import session
p = argparse.ArgumentParser()
p.add_argument("--user", default='martijn.wieling')
p.add_argument("--password", default='xxxxx')
args = p.parse_args()
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True)
grammar = {
"type": "confusion_network",
"version": "1.0",
"data": {
"kind": "sequence",
"elements": [
{
"kind": "word",
"pronunciation": [
{
"phones": [
"r",
"eu0",
"s"
],
"id": 0
}
,
{
"phones": [
"m",
"a0",
"n"
],
"id": 1
}
,
{
"phones": [
"m",
"a0",
"n",
"t",
"s",
"y",
"ax"
],
"id": 2
}
],
"label": "reus"
}
]
},
"return_objects": [
"grammar"
],
"phoneset": "novo70"
}
res = rec.setgrammar(grammar)
#print "Set grammar result", res
## === novoapi/backend/session.py ===
#import wave
#import time
#from novoapi.backend.session import rpcid, segmentation
#wavf = "reus1008-reus.wav"
#w = wave.open(wavf, 'r')
#nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
#buf = w.readframes(nframes)
#w.close()
#buffer_size = 4096
#nbytes_sent = 0
#start = time.time()
#for j in range(0, len(buf), buffer_size):
# audio_packet = buf[j:j + buffer_size]
# nbytes_sent += len(audio_packet)
# rec.conn.send_binary(audio_packet)
#rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
#print(rpcid.next())
#rec.last_message = rec.conn.recv()
#message = json.loads(rec.last_message)
#result = session.segmentation(message["result"]["words"])
#result.export()
## ====================================
def result2pronunciation(result, word):
#result_ = res.export()[1]
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
llh = result_[0]['llh']
phones = result_[0]['phones']
pronunciation = [phone['label'] for phone in phones]
return pronunciation, llh
res = rec.recognize_wav("reus1008-reus.wav")
#print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n"
#print "Recognition result:", json.dumps(res.export(), indent=4)
result2pronunciation(res.export(), 'reus')
#print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n"
res2 = rec.recognize_wav("reus1167-man.wav")
#print "Recognition result:", json.dumps(res2.export(), indent=4)
result2pronunciation(res2.export(), 'reus')
#print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n"
res3 = rec.recognize_wav("reus3768-mantsje.wav")
#print "Recognition result:", json.dumps(res3.export(), indent=4)
result2pronunciation(res3.export(), 'reus')

Binary file not shown.

Binary file not shown.

Binary file not shown.