diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo
index 36c4ef1..a64dccd 100644
Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ
diff --git a/acoustic_model.sln b/acoustic_model.sln
index 7d8fcbe..406d9e5 100644
--- a/acoustic_model.sln
+++ b/acoustic_model.sln
@@ -16,6 +16,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
+ reus-test\reus-test.py = reus-test\reus-test.py
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc
index 8898cb1..ef367cd 100644
Binary files a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc and b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc differ
diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj
index c24e77d..17163f2 100644
--- a/acoustic_model/acoustic_model.pyproj
+++ b/acoustic_model/acoustic_model.pyproj
@@ -4,8 +4,7 @@
2.0
4d8c8573-32f0-4a62-9e62-3ce5cc680390
.
-
-
+ forced_aligner_comparison.py
.
@@ -36,6 +35,9 @@
Code
+
+ Code
+
Code
diff --git a/acoustic_model/acoustic_model_functions.py b/acoustic_model/acoustic_model_function.py
similarity index 100%
rename from acoustic_model/acoustic_model_functions.py
rename to acoustic_model/acoustic_model_function.py
diff --git a/acoustic_model/check_novoapi.py b/acoustic_model/check_novoapi.py
index e70d754..96d8e8f 100644
--- a/acoustic_model/check_novoapi.py
+++ b/acoustic_model/check_novoapi.py
@@ -10,14 +10,13 @@ import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
-
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import novoapi
import defaultfiles as default
sys.path.append(default.forced_alignment_module_dir)
-from forced_alignment import pyhtk, convert_phone_set
+from forced_alignment import convert_phone_set
#import acoustic_model_functions as am_func
import convert_xsampa2ipa
import novoapi_functions
@@ -47,10 +46,6 @@ david_suggestion = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'w']
## read pronunciation variants.
stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx)
df = pd.read_excel(stimmen_transcription_, 'frequency')
-#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
-# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
-# if not ipa_converted == ipa:
-# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
transcription_ipa = list(df['IPA'])
# transcription mistake?
@@ -63,6 +58,7 @@ for ipa in transcription_ipa:
ipa = ipa.replace(':', 'ː')
ipa = convert_phone_set.split_ipa(ipa)
+ # list of phones not in novo70 phoneset.
not_in_novo70_ = [phone for phone in ipa
if not phone in phoneset_ipa and not phone in david_suggestion]
not_in_novo70_ = [phone.replace('sp', '') for phone in not_in_novo70_]
@@ -106,6 +102,10 @@ df = pd.read_excel(stimmen_transcription_, 'original')
# mapping from ipa to xsampa
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
+#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
+# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
+# if not ipa_converted == ipa:
+# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
ipas = []
famehtks = []
@@ -153,12 +153,12 @@ for word in word_list:
## ===== forced alignment =====
-reus_dir = r'C:\OneDrive\Desktop\Reus'
+rozen_dir = r'c:\Users\Aki\source\repos\acoustic_model\rozen-test'
if forced_alignment_novo70:
Results = pd.DataFrame(index=[],
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
#for word in word_list:
- for word in ['Reus']:
+ for word in ['Rozen']:
# pronunciation variants top 3
df_per_word_ = df_per_word[df_per_word['word']==word]
df_per_word_ = df_per_word_.sort_values('frequency', ascending=False)
@@ -208,37 +208,35 @@ if forced_alignment_novo70:
wav_file = os.path.join(default.stimmen_wav_dir, filename)
if os.path.exists(wav_file):
# for Martijn
- #shutil.copy(wav_file, os.path.join(reus_dir, filename))
+ shutil.copy(wav_file, os.path.join(rozen_dir, filename))
- pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
- result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
- result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
- result_ = pd.Series([
- sample['filename'],
- sample['word'],
- sample['xsampa'],
- sample['ipa'],
- ' '.join(result_ipa),
- ' '.join(result_novo70),
- llh
- ], index=results.columns)
- results = results.append(result_, ignore_index = True)
- print('{0}/{1}: answer {2} - prediction {3}'.format(
- i+1, len(samples), result_['ipa'], result_['result_ipa']))
- results.to_excel(os.path.join(reus_dir, 'results.xlsx'), encoding="utf-8")
- if len(results) > 0:
- Results = Results.append(results, ignore_index = True)
- Results.to_excel(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
+ # pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
+ # result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
+ # result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
+ # result_ = pd.Series([
+ # sample['filename'],
+ # sample['word'],
+ # sample['xsampa'],
+ # sample['ipa'],
+ # ' '.join(result_ipa),
+ # ' '.join(result_novo70),
+ # llh
+ # ], index=results.columns)
+ # results = results.append(result_, ignore_index = True)
+ # print('{0}/{1}: answer {2} - prediction {3}'.format(
+ # i+1, len(samples), result_['ipa'], result_['result_ipa']))
+ # #results.to_excel(os.path.join(default.stimmen_dir, 'results.xlsx'), encoding="utf-8")
+ #if len(results) > 0:
+ # Results = Results.append(results, ignore_index = True)
+ #Results.to_excel(os.path.join(default.stimmen_result_novoapi_dir, 'Results.xlsx'), encoding="utf-8")
else:
- Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
+ Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_result_novoapi_dir, 'Results.xlsx'), encoding="utf-8")
Results = pd.read_excel(Results_xlsx, 'Sheet1')
## ===== analysis =====
-#result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi')
#for word in word_list:
# if not word == 'Oog':
-
# Results_ = Results[Results['word'] == word]
# y_true = list(Results_['ipa'])
# y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])]
@@ -249,4 +247,4 @@ else:
# plt.figure()
# output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False)
# #plt.show()
-# plt.savefig(os.path.join(result_novoapi_dir, word + '.png'))
\ No newline at end of file
+# plt.savefig(os.path.join(default.stimmen_result_novoapi_dir, word + '.png'))
\ No newline at end of file
diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py
index b9ab0ab..f53100f 100644
--- a/acoustic_model/defaultfiles.py
+++ b/acoustic_model/defaultfiles.py
@@ -31,6 +31,12 @@ ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
+htk_config_dir = r'c:\Users\Aki\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
+config_hvite = os.path.join(htk_config_dir, 'config.HVite')
+#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
+acoustic_model = r'c:\cygwin64\home\Aki\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
+phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
+
WSL_dir = r'C:\OneDrive\WSL'
fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
fame_s5_dir = os.path.join(fame_dir, 's5')
@@ -43,6 +49,7 @@ stimmen_data_dir = os.path.join(stimmen_dir, 'data')
#stimmen_wav_dir = os.path.join(stimmen_dir, 'wav')
# 16 kHz
stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen'
+stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi')
stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx')
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
diff --git a/acoustic_model/forced_aligner_comparison.py b/acoustic_model/forced_aligner_comparison.py
new file mode 100644
index 0000000..d9d34a4
--- /dev/null
+++ b/acoustic_model/forced_aligner_comparison.py
@@ -0,0 +1,42 @@
+import os
+os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
+import sys
+
+import defaultfiles as default
+sys.path.append(default.forced_alignment_module_dir)
+from forced_alignment import pyhtk, convert_phone_set, scripts
+
+reus_dir = r'c:\Users\Aki\source\repos\acoustic_model\reus-test'
+wav_dir = reus_dir
+wav_files = ['reus1008-reus.wav',
+ 'reus1167-man.wav',
+ 'reus3768-mantsje.wav']
+
+word = 'reus'
+pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
+
+for wav_file in wav_files:
+ file_lab = os.path.join(reus_dir, wav_file.replace('.wav', '.lab'))
+ file_dic = os.path.join(reus_dir, wav_file.replace('.wav', '.dic'))
+ file_txt = os.path.join(reus_dir, wav_file.replace('.wav', '.txt'))
+
+ # output htk dict file
+ with open(file_dic, 'w', encoding="utf-8") as f:
+ for ipa in pronunciation_ipa:
+ cgn = convert_phone_set.ipa2cgn([ipa.replace(':', 'ː')])
+ barbara = convert_phone_set.cgn2barbara(cgn)
+ f.write(word.upper() + '\t' + barbara + '\n')
+
+ # output htk label file.
+ pyhtk._create_label_file(word, file_lab)
+
+ scripts.run_command([
+ 'HVite','-T', '1',
+ '-a',
+ '-C', default.config_hvite,
+ '-H', default.acoustic_model,
+ '-m',
+ '-i', file_txt,
+ #'-S', script_file,
+ file_dic, default.phonelist_txt, os.path.join(wav_dir, wav_file)
+ ])
\ No newline at end of file
diff --git a/acoustic_model/novoapi_functions.py b/acoustic_model/novoapi_functions.py
index 5c76f6a..0c72b45 100644
--- a/acoustic_model/novoapi_functions.py
+++ b/acoustic_model/novoapi_functions.py
@@ -7,7 +7,7 @@ import json
from novoapi.backend import session
import os
-os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
+#os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import defaultfiles as default
diff --git a/acoustic_model/acoustic_model.py b/acoustic_model/train_hmm_fame.py
similarity index 100%
rename from acoustic_model/acoustic_model.py
rename to acoustic_model/train_hmm_fame.py
diff --git a/reus-test/check_novoapi.zip b/reus-test/check_novoapi.zip
new file mode 100644
index 0000000..4cc1a68
Binary files /dev/null and b/reus-test/check_novoapi.zip differ
diff --git a/reus-test/reus1008-reus.dic b/reus-test/reus1008-reus.dic
new file mode 100644
index 0000000..4d22a33
--- /dev/null
+++ b/reus-test/reus1008-reus.dic
@@ -0,0 +1,3 @@
+REUS r eu s
+REUS m ac n
+REUS m ac n t s j @
diff --git a/reus-test/reus1008-reus.lab b/reus-test/reus1008-reus.lab
new file mode 100644
index 0000000..0475f18
--- /dev/null
+++ b/reus-test/reus1008-reus.lab
@@ -0,0 +1 @@
+REUS
diff --git a/reus-test/reus1008-reus.txt b/reus-test/reus1008-reus.txt
new file mode 100644
index 0000000..9726c94
--- /dev/null
+++ b/reus-test/reus1008-reus.txt
@@ -0,0 +1,6 @@
+#!MLF!#
+"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1008-reus.rec"
+0 9700000 r -12463.852539 REUS
+9700000 12800000 eu -3622.108887
+12800000 26250001 s -17303.216797
+.
diff --git a/reus-test/reus1167-man.dic b/reus-test/reus1167-man.dic
new file mode 100644
index 0000000..4d22a33
--- /dev/null
+++ b/reus-test/reus1167-man.dic
@@ -0,0 +1,3 @@
+REUS r eu s
+REUS m ac n
+REUS m ac n t s j @
diff --git a/reus-test/reus1167-man.lab b/reus-test/reus1167-man.lab
new file mode 100644
index 0000000..0475f18
--- /dev/null
+++ b/reus-test/reus1167-man.lab
@@ -0,0 +1 @@
+REUS
diff --git a/reus-test/reus1167-man.txt b/reus-test/reus1167-man.txt
new file mode 100644
index 0000000..06ad7b8
--- /dev/null
+++ b/reus-test/reus1167-man.txt
@@ -0,0 +1,10 @@
+#!MLF!#
+"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1167-man.rec"
+0 150000 m -230.057571 REUS
+150000 300000 ac -250.994858
+300000 450000 n -202.377716
+450000 4600000 t -5128.984375
+4600000 5050000 s -711.338501
+5050000 5450000 j -564.730591
+5450000 16049999 @ -13249.787109
+.
diff --git a/reus-test/reus3768-mantsje.dic b/reus-test/reus3768-mantsje.dic
new file mode 100644
index 0000000..4d22a33
--- /dev/null
+++ b/reus-test/reus3768-mantsje.dic
@@ -0,0 +1,3 @@
+REUS r eu s
+REUS m ac n
+REUS m ac n t s j @
diff --git a/reus-test/reus3768-mantsje.lab b/reus-test/reus3768-mantsje.lab
new file mode 100644
index 0000000..0475f18
--- /dev/null
+++ b/reus-test/reus3768-mantsje.lab
@@ -0,0 +1 @@
+REUS
diff --git a/reus-test/reus3768-mantsje.txt b/reus-test/reus3768-mantsje.txt
new file mode 100644
index 0000000..8e2bc08
--- /dev/null
+++ b/reus-test/reus3768-mantsje.txt
@@ -0,0 +1,10 @@
+#!MLF!#
+"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus3768-mantsje.rec"
+0 150000 m -217.347229 REUS
+150000 1150000 ac -1266.293579
+1150000 1650000 n -583.382568
+1650000 11100000 t -11259.270508
+11100000 11250000 s -247.939255
+11250000 11550000 j -445.511444
+11550000 24150000 @ -16769.048828
+.
diff --git a/rozen-test/pg_rozen_100_jko5r.wav b/rozen-test/pg_rozen_100_jko5r.wav
new file mode 100644
index 0000000..02027e9
Binary files /dev/null and b/rozen-test/pg_rozen_100_jko5r.wav differ
diff --git a/rozen-test/pg_rozen_113_o9kzs.wav b/rozen-test/pg_rozen_113_o9kzs.wav
new file mode 100644
index 0000000..7127250
Binary files /dev/null and b/rozen-test/pg_rozen_113_o9kzs.wav differ
diff --git a/rozen-test/pg_rozen_1296_zbve2.wav b/rozen-test/pg_rozen_1296_zbve2.wav
new file mode 100644
index 0000000..a6bcbb3
Binary files /dev/null and b/rozen-test/pg_rozen_1296_zbve2.wav differ
diff --git a/rozen-test/pg_rozen_1709_kq9xr.wav b/rozen-test/pg_rozen_1709_kq9xr.wav
new file mode 100644
index 0000000..c457bdd
Binary files /dev/null and b/rozen-test/pg_rozen_1709_kq9xr.wav differ
diff --git a/rozen-test/pg_rozen_241_bahqi.wav b/rozen-test/pg_rozen_241_bahqi.wav
new file mode 100644
index 0000000..0a3ec97
Binary files /dev/null and b/rozen-test/pg_rozen_241_bahqi.wav differ
diff --git a/rozen-test/pg_rozen_5502_q79fd.wav b/rozen-test/pg_rozen_5502_q79fd.wav
new file mode 100644
index 0000000..26e050c
Binary files /dev/null and b/rozen-test/pg_rozen_5502_q79fd.wav differ
diff --git a/rozen-test/pg_rozen_632_2m04y.wav b/rozen-test/pg_rozen_632_2m04y.wav
new file mode 100644
index 0000000..e4497e0
Binary files /dev/null and b/rozen-test/pg_rozen_632_2m04y.wav differ
diff --git a/rozen-test/pg_rozen_911_1zvda.wav b/rozen-test/pg_rozen_911_1zvda.wav
new file mode 100644
index 0000000..a739fac
Binary files /dev/null and b/rozen-test/pg_rozen_911_1zvda.wav differ
diff --git a/rozen-test/rozen-test.py b/rozen-test/rozen-test.py
new file mode 100644
index 0000000..379acd4
--- /dev/null
+++ b/rozen-test/rozen-test.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+import os
+os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
+
+import argparse
+import json
+
+from novoapi.backend import session
+
+p = argparse.ArgumentParser()
+p.add_argument("--user", default='martijn.wieling')
+p.add_argument("--password", default='xxxxx')
+args = p.parse_args()
+
+rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True)
+
+grammar = {
+ "type": "confusion_network",
+ "version": "1.0",
+ "data": {
+ "kind": "sequence",
+ "elements": [
+ {
+ "kind": "word",
+ "pronunciation": [
+ {
+ "phones": [
+ "r",
+ "eu0",
+ "s"
+ ],
+ "id": 0
+ }
+ ,
+ {
+ "phones": [
+ "m",
+ "a0",
+ "n"
+ ],
+ "id": 1
+ }
+ ,
+ {
+ "phones": [
+ "m",
+ "a0",
+ "n",
+ "t",
+ "s",
+ "y",
+ "ax"
+ ],
+ "id": 2
+ }
+ ],
+ "label": "reus"
+ }
+ ]
+ },
+ "return_objects": [
+ "grammar"
+ ],
+ "phoneset": "novo70"
+}
+
+res = rec.setgrammar(grammar)
+#print "Set grammar result", res
+
+
+## === novoapi/backend/session.py ===
+#import wave
+#import time
+#from novoapi.backend.session import rpcid, segmentation
+
+#wavf = "reus1008-reus.wav"
+#w = wave.open(wavf, 'r')
+#nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
+#buf = w.readframes(nframes)
+#w.close()
+
+#buffer_size = 4096
+#nbytes_sent = 0
+#start = time.time()
+#for j in range(0, len(buf), buffer_size):
+# audio_packet = buf[j:j + buffer_size]
+# nbytes_sent += len(audio_packet)
+# rec.conn.send_binary(audio_packet)
+#rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
+#print(rpcid.next())
+#rec.last_message = rec.conn.recv()
+#message = json.loads(rec.last_message)
+#result = session.segmentation(message["result"]["words"])
+#result.export()
+## ====================================
+
+def result2pronunciation(result, word):
+ #result_ = res.export()[1]
+ result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
+ llh = result_[0]['llh']
+ phones = result_[0]['phones']
+ pronunciation = [phone['label'] for phone in phones]
+ return pronunciation, llh
+
+
+res = rec.recognize_wav("reus1008-reus.wav")
+#print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n"
+#print "Recognition result:", json.dumps(res.export(), indent=4)
+result2pronunciation(res.export(), 'reus')
+
+#print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n"
+res2 = rec.recognize_wav("reus1167-man.wav")
+#print "Recognition result:", json.dumps(res2.export(), indent=4)
+result2pronunciation(res2.export(), 'reus')
+
+#print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n"
+res3 = rec.recognize_wav("reus3768-mantsje.wav")
+#print "Recognition result:", json.dumps(res3.export(), indent=4)
+result2pronunciation(res3.export(), 'reus')