Merge branch 'master' of https://git.webhosting.rug.nl/p280427/acoustic_model

2019-01-21 21:57:46 +01:00
parent de5c9cecb9 24ac56ac0e
commit 04a862b2fd
29 changed files with 242 additions and 35 deletions
--- a/.vs/acoustic_model/v15/.suo
+++ b/.vs/acoustic_model/v15/.suo
--- a/acoustic_model.sln
+++ b/acoustic_model.sln
@@ -16,6 +16,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
 		..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py
 		..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
 		..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
 		reus-test\reus-test.py = reus-test\reus-test.py
 		..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
 		..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py
 		..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
--- a/acoustic_model/pycache/defaultfiles.cpython-36.pyc
+++ b/acoustic_model/pycache/defaultfiles.cpython-36.pyc
--- a/acoustic_model/acoustic_model.pyproj
+++ b/acoustic_model/acoustic_model.pyproj
@@ -4,8 +4,7 @@
    <SchemaVersion>2.0</SchemaVersion>
    <ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
    <ProjectHome>.</ProjectHome>
-    <StartupFile>
+    <StartupFile>forced_aligner_comparison.py</StartupFile>
    </StartupFile>
    <SearchPath>
    </SearchPath>
    <WorkingDirectory>.</WorkingDirectory>
@@ -36,6 +35,9 @@
    <Compile Include="fa_test.py">
      <SubType>Code</SubType>
    </Compile>
    <Compile Include="forced_aligner_comparison.py">
      <SubType>Code</SubType>
    </Compile>
    <Compile Include="novoapi_forced_alignment.py">
      <SubType>Code</SubType>
    </Compile>
--- a/acoustic_model/acoustic_model_functions.py
+++ b/acoustic_model/acoustic_model_functions.py
--- a/acoustic_model/check_novoapi.py
+++ b/acoustic_model/check_novoapi.py
@@ -10,14 +10,13 @@ import shutil
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from sklearn.metrics import confusion_matrix
 from sklearn.metrics import accuracy_score
 import novoapi 
 import defaultfiles as default
 sys.path.append(default.forced_alignment_module_dir)
-from forced_alignment import pyhtk, convert_phone_set
+from forced_alignment import convert_phone_set
 #import acoustic_model_functions as am_func
 import convert_xsampa2ipa
 import novoapi_functions
@@ -47,10 +46,6 @@ david_suggestion = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'w']
 ## read pronunciation variants.
 stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx)
 df = pd.read_excel(stimmen_transcription_, 'frequency')
 #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
 #    ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
 #    if not ipa_converted == ipa:
 #        print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
 transcription_ipa = list(df['IPA'])
 # transcription mistake?
@@ -63,6 +58,7 @@ for ipa in transcription_ipa:
 	ipa = ipa.replace(':', 'ː')
 	ipa = convert_phone_set.split_ipa(ipa)
 	# list of phones not in novo70 phoneset.
 	not_in_novo70_ = [phone for phone in ipa 
 				   if not phone in phoneset_ipa and not phone in david_suggestion]
 	not_in_novo70_ = [phone.replace('sp', '') for phone in not_in_novo70_]
@@ -106,6 +102,10 @@ df = pd.read_excel(stimmen_transcription_, 'original')
 # mapping from ipa to xsampa
 mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
 #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
 #    ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
 #    if not ipa_converted == ipa:
 #        print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
 ipas     = []
 famehtks = []
@@ -153,12 +153,12 @@ for word in word_list:
 ## ===== forced alignment =====
-reus_dir = r'C:\OneDrive\Desktop\Reus'
+rozen_dir = r'c:\Users\Aki\source\repos\acoustic_model\rozen-test'
 if forced_alignment_novo70:
 	Results = pd.DataFrame(index=[],
 		columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
 	#for word in word_list:
-	for word in ['Reus']:
+	for word in ['Rozen']:
 		# pronunciation variants top 3
 		df_per_word_ = df_per_word[df_per_word['word']==word]
 		df_per_word_ = df_per_word_.sort_values('frequency', ascending=False)
@@ -208,37 +208,35 @@ if forced_alignment_novo70:
 			wav_file = os.path.join(default.stimmen_wav_dir, filename)
 			if os.path.exists(wav_file):
 				# for Martijn
-				#shutil.copy(wav_file, os.path.join(reus_dir, filename))
+				shutil.copy(wav_file, os.path.join(rozen_dir, filename))
-				pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
+		#		pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
-				result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
+		#		result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
-				result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
+		#		result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
-				result_ = pd.Series([
+		#		result_ = pd.Series([
-					sample['filename'],
+		#			sample['filename'],
-					sample['word'],
+		#			sample['word'],
-					sample['xsampa'],
+		#			sample['xsampa'],
-					sample['ipa'],
+		#			sample['ipa'],
-					' '.join(result_ipa),
+		#			' '.join(result_ipa),
-					' '.join(result_novo70),
+		#			' '.join(result_novo70),
-					llh
+		#			llh
-					], index=results.columns)
+		#			], index=results.columns)
-				results = results.append(result_, ignore_index = True)
+		#		results = results.append(result_, ignore_index = True)
-				print('{0}/{1}: answer {2} - prediction {3}'.format( 
+		#		print('{0}/{1}: answer {2} - prediction {3}'.format( 
-			 i+1, len(samples), result_['ipa'], result_['result_ipa']))
+		#	 i+1, len(samples), result_['ipa'], result_['result_ipa']))
-			results.to_excel(os.path.join(reus_dir, 'results.xlsx'), encoding="utf-8")
+		#	#results.to_excel(os.path.join(default.stimmen_dir, 'results.xlsx'), encoding="utf-8")
-		if len(results) > 0:
+		#if len(results) > 0:
-			Results = Results.append(results, ignore_index = True)
+		#	Results = Results.append(results, ignore_index = True)
-		Results.to_excel(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
+		#Results.to_excel(os.path.join(default.stimmen_result_novoapi_dir, 'Results.xlsx'), encoding="utf-8")
 else:
-	Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
+	Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_result_novoapi_dir, 'Results.xlsx'), encoding="utf-8")
 	Results = pd.read_excel(Results_xlsx, 'Sheet1')
 ## ===== analysis =====
 #result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi')
 #for word in word_list:
 #	if not word == 'Oog':
 #		Results_ = Results[Results['word'] == word]
 #		y_true  = list(Results_['ipa'])
 #		y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])]
@@ -249,4 +247,4 @@ else:
 #		plt.figure()
 #		output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False)
 #		#plt.show()
-#		plt.savefig(os.path.join(result_novoapi_dir, word + '.png'))
+#		plt.savefig(os.path.join(default.stimmen_result_novoapi_dir, word + '.png'))
--- a/acoustic_model/defaultfiles.py
+++ b/acoustic_model/defaultfiles.py
@@ -31,6 +31,12 @@ ipa_xsampa_converter_dir    = os.path.join(repo_dir, 'ipa-xsama-converter')
 forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
 accent_classification_dir   = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
 htk_config_dir = r'c:\Users\Aki\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
 config_hvite = os.path.join(htk_config_dir, 'config.HVite')
 #acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
 acoustic_model = r'c:\cygwin64\home\Aki\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
 phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
 WSL_dir   = r'C:\OneDrive\WSL'
 fame_dir        = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
 fame_s5_dir     = os.path.join(fame_dir, 's5')
@@ -43,6 +49,7 @@ stimmen_data_dir = os.path.join(stimmen_dir, 'data')
 #stimmen_wav_dir  = os.path.join(stimmen_dir, 'wav')
 # 16 kHz
 stimmen_wav_dir  = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen'
 stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi')
 stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx')
 phonelist_friesian_txt     = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
--- a/acoustic_model/forced_aligner_comparison.py
+++ b/acoustic_model/forced_aligner_comparison.py
@@ -0,0 +1,42 @@
 import os
 os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
 import sys
 import defaultfiles as default
 sys.path.append(default.forced_alignment_module_dir)
 from forced_alignment import pyhtk, convert_phone_set, scripts
 reus_dir  = r'c:\Users\Aki\source\repos\acoustic_model\reus-test'
 wav_dir   = reus_dir
 wav_files = ['reus1008-reus.wav', 
 			 'reus1167-man.wav',
 			 'reus3768-mantsje.wav']
 word = 'reus'
 pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
 for wav_file in wav_files:
 	file_lab = os.path.join(reus_dir, wav_file.replace('.wav', '.lab'))
 	file_dic = os.path.join(reus_dir, wav_file.replace('.wav', '.dic'))
 	file_txt = os.path.join(reus_dir, wav_file.replace('.wav', '.txt'))
 	# output htk dict file
 	with open(file_dic, 'w', encoding="utf-8") as f:
 		for ipa in pronunciation_ipa:
 			cgn = convert_phone_set.ipa2cgn([ipa.replace(':', 'ː')])
 			barbara = convert_phone_set.cgn2barbara(cgn)
 			f.write(word.upper() + '\t' + barbara + '\n')
 	# output htk label file.
 	pyhtk._create_label_file(word, file_lab)
 	scripts.run_command([
 					'HVite','-T', '1', 
 					'-a', 
 					'-C', default.config_hvite,
 					'-H', default.acoustic_model, 
 					'-m', 
 					'-i', file_txt, 
 					#'-S', script_file, 
 					file_dic, default.phonelist_txt, os.path.join(wav_dir, wav_file)
 				])
--- a/acoustic_model/novoapi_functions.py
+++ b/acoustic_model/novoapi_functions.py
@@ -7,7 +7,7 @@ import json
 from novoapi.backend import session
 import os
-os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
+#os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
 import defaultfiles as default
--- a/acoustic_model/train_hmm_fame.py
+++ b/acoustic_model/train_hmm_fame.py
--- a/reus-test/check_novoapi.zip
+++ b/reus-test/check_novoapi.zip
--- a/reus-test/reus1008-reus.dic
+++ b/reus-test/reus1008-reus.dic
@@ -0,0 +1,3 @@
 REUS	r eu s
 REUS	m ac n
 REUS	m ac n t s j @
--- a/reus-test/reus1008-reus.lab
+++ b/reus-test/reus1008-reus.lab
@@ -0,0 +1 @@
 REUS
--- a/reus-test/reus1008-reus.txt
+++ b/reus-test/reus1008-reus.txt
@@ -0,0 +1,6 @@
 #!MLF!#
 "c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1008-reus.rec"
 0 9700000 r -12463.852539 REUS
 9700000 12800000 eu -3622.108887
 12800000 26250001 s -17303.216797
 .
--- a/reus-test/reus1167-man.dic
+++ b/reus-test/reus1167-man.dic
@@ -0,0 +1,3 @@
 REUS	r eu s
 REUS	m ac n
 REUS	m ac n t s j @
--- a/reus-test/reus1167-man.lab
+++ b/reus-test/reus1167-man.lab
@@ -0,0 +1 @@
 REUS
--- a/reus-test/reus1167-man.txt
+++ b/reus-test/reus1167-man.txt
@@ -0,0 +1,10 @@
 #!MLF!#
 "c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1167-man.rec"
 0 150000 m -230.057571 REUS
 150000 300000 ac -250.994858
 300000 450000 n -202.377716
 450000 4600000 t -5128.984375
 4600000 5050000 s -711.338501
 5050000 5450000 j -564.730591
 5450000 16049999 @ -13249.787109
 .
--- a/reus-test/reus3768-mantsje.dic
+++ b/reus-test/reus3768-mantsje.dic
@@ -0,0 +1,3 @@
 REUS	r eu s
 REUS	m ac n
 REUS	m ac n t s j @
--- a/reus-test/reus3768-mantsje.lab
+++ b/reus-test/reus3768-mantsje.lab
@@ -0,0 +1 @@
 REUS
--- a/reus-test/reus3768-mantsje.txt
+++ b/reus-test/reus3768-mantsje.txt
@@ -0,0 +1,10 @@
 #!MLF!#
 "c:/Users/Aki/source/repos/acoustic_model/reus-test/reus3768-mantsje.rec"
 0 150000 m -217.347229 REUS
 150000 1150000 ac -1266.293579
 1150000 1650000 n -583.382568
 1650000 11100000 t -11259.270508
 11100000 11250000 s -247.939255
 11250000 11550000 j -445.511444
 11550000 24150000 @ -16769.048828
 .
--- a/rozen-test/pg_rozen_100_jko5r.wav
+++ b/rozen-test/pg_rozen_100_jko5r.wav
--- a/rozen-test/pg_rozen_113_o9kzs.wav
+++ b/rozen-test/pg_rozen_113_o9kzs.wav
--- a/rozen-test/pg_rozen_1296_zbve2.wav
+++ b/rozen-test/pg_rozen_1296_zbve2.wav
--- a/rozen-test/pg_rozen_1709_kq9xr.wav
+++ b/rozen-test/pg_rozen_1709_kq9xr.wav
--- a/rozen-test/pg_rozen_241_bahqi.wav
+++ b/rozen-test/pg_rozen_241_bahqi.wav
--- a/rozen-test/pg_rozen_5502_q79fd.wav
+++ b/rozen-test/pg_rozen_5502_q79fd.wav
--- a/rozen-test/pg_rozen_632_2m04y.wav
+++ b/rozen-test/pg_rozen_632_2m04y.wav
--- a/rozen-test/pg_rozen_911_1zvda.wav
+++ b/rozen-test/pg_rozen_911_1zvda.wav
--- a/rozen-test/rozen-test.py
+++ b/rozen-test/rozen-test.py
@@ -0,0 +1,119 @@
 #!/usr/bin/env python
 import os
 os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
 import argparse
 import json
 from novoapi.backend import session
 p = argparse.ArgumentParser()
 p.add_argument("--user", default='martijn.wieling')
 p.add_argument("--password", default='xxxxx')
 args = p.parse_args()
 rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True)
 grammar = {
  "type": "confusion_network",
  "version": "1.0",
  "data": {
    "kind": "sequence",
    "elements": [
      {
        "kind": "word",
        "pronunciation": [
          {
            "phones": [
              "r",
              "eu0",
              "s"
            ],
            "id": 0
          }
          ,
          {
            "phones": [
              "m",
              "a0",
              "n"
            ],
            "id": 1
          }
 		   ,
          {
            "phones": [
              "m",
              "a0",
              "n",
              "t",
              "s",
              "y",
              "ax"
            ],
            "id": 2
          }
        ],
        "label": "reus"
      }
    ]
  },
  "return_objects": [
    "grammar"
  ],
  "phoneset": "novo70"
 }
 res = rec.setgrammar(grammar)
 #print "Set grammar result", res
 ## === novoapi/backend/session.py ===
 #import wave
 #import time
 #from novoapi.backend.session import rpcid, segmentation
 #wavf = "reus1008-reus.wav"
 #w = wave.open(wavf, 'r')
 #nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
 #buf = w.readframes(nframes)
 #w.close()
 #buffer_size = 4096
 #nbytes_sent = 0
 #start = time.time()
 #for j in range(0, len(buf), buffer_size):
 #    audio_packet = buf[j:j + buffer_size]
 #    nbytes_sent += len(audio_packet)
 #    rec.conn.send_binary(audio_packet)
 #rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
 #print(rpcid.next())
 #rec.last_message = rec.conn.recv() 
 #message = json.loads(rec.last_message)
 #result = session.segmentation(message["result"]["words"])
 #result.export()
 ## ====================================
 def result2pronunciation(result, word):
 	#result_ = res.export()[1]
 	result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word] 
 	llh = result_[0]['llh']
 	phones = result_[0]['phones']
 	pronunciation = [phone['label'] for phone in phones]
 	return pronunciation, llh
 res = rec.recognize_wav("reus1008-reus.wav")
 #print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n"
 #print "Recognition result:", json.dumps(res.export(), indent=4)
 result2pronunciation(res.export(), 'reus')
 #print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n"
 res2 = rec.recognize_wav("reus1167-man.wav")
 #print "Recognition result:", json.dumps(res2.export(), indent=4)
 result2pronunciation(res2.export(), 'reus')
 #print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n"
 res3 = rec.recognize_wav("reus3768-mantsje.wav")
 #print "Recognition result:", json.dumps(res3.export(), indent=4)
 result2pronunciation(res3.export(), 'reus')