Compare commits
No commits in common. "24ac56ac0e903ffa9a4353e3b9ed7ac2c7f79855" and "de5c9cecb94ce64206dc409b5d7773f5f529524b" have entirely different histories.
24ac56ac0e
...
de5c9cecb9
Binary file not shown.
@ -16,7 +16,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
||||
..\forced_alignment\forced_alignment\pronunciations.py = ..\forced_alignment\forced_alignment\pronunciations.py
|
||||
..\toolbox\pyHTK.py = ..\toolbox\pyHTK.py
|
||||
..\forced_alignment\forced_alignment\pyhtk.py = ..\forced_alignment\forced_alignment\pyhtk.py
|
||||
reus-test\reus-test.py = reus-test\reus-test.py
|
||||
..\forced_alignment\forced_alignment\scripts.py = ..\forced_alignment\forced_alignment\scripts.py
|
||||
..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py = ..\..\..\..\..\Python36-32\Lib\site-packages\novoapi\backend\session.py
|
||||
..\forced_alignment\forced_alignment\tempfilename.py = ..\forced_alignment\forced_alignment\tempfilename.py
|
||||
|
Binary file not shown.
@ -4,7 +4,8 @@
|
||||
<SchemaVersion>2.0</SchemaVersion>
|
||||
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
||||
<ProjectHome>.</ProjectHome>
|
||||
<StartupFile>forced_aligner_comparison.py</StartupFile>
|
||||
<StartupFile>
|
||||
</StartupFile>
|
||||
<SearchPath>
|
||||
</SearchPath>
|
||||
<WorkingDirectory>.</WorkingDirectory>
|
||||
@ -35,9 +36,6 @@
|
||||
<Compile Include="fa_test.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="forced_aligner_comparison.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="novoapi_forced_alignment.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
|
@ -10,13 +10,14 @@ import shutil
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sklearn.metrics import accuracy_score
|
||||
import novoapi
|
||||
|
||||
import defaultfiles as default
|
||||
sys.path.append(default.forced_alignment_module_dir)
|
||||
from forced_alignment import convert_phone_set
|
||||
from forced_alignment import pyhtk, convert_phone_set
|
||||
#import acoustic_model_functions as am_func
|
||||
import convert_xsampa2ipa
|
||||
import novoapi_functions
|
||||
@ -46,6 +47,10 @@ david_suggestion = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'w']
|
||||
## read pronunciation variants.
|
||||
stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx)
|
||||
df = pd.read_excel(stimmen_transcription_, 'frequency')
|
||||
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
||||
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
||||
# if not ipa_converted == ipa:
|
||||
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
||||
transcription_ipa = list(df['IPA'])
|
||||
|
||||
# transcription mistake?
|
||||
@ -58,7 +63,6 @@ for ipa in transcription_ipa:
|
||||
ipa = ipa.replace(':', 'ː')
|
||||
ipa = convert_phone_set.split_ipa(ipa)
|
||||
|
||||
# list of phones not in novo70 phoneset.
|
||||
not_in_novo70_ = [phone for phone in ipa
|
||||
if not phone in phoneset_ipa and not phone in david_suggestion]
|
||||
not_in_novo70_ = [phone.replace('sp', '') for phone in not_in_novo70_]
|
||||
@ -102,10 +106,6 @@ df = pd.read_excel(stimmen_transcription_, 'original')
|
||||
|
||||
# mapping from ipa to xsampa
|
||||
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
|
||||
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
||||
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
||||
# if not ipa_converted == ipa:
|
||||
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
||||
|
||||
ipas = []
|
||||
famehtks = []
|
||||
@ -153,12 +153,12 @@ for word in word_list:
|
||||
|
||||
|
||||
## ===== forced alignment =====
|
||||
rozen_dir = r'c:\Users\Aki\source\repos\acoustic_model\rozen-test'
|
||||
reus_dir = r'C:\OneDrive\Desktop\Reus'
|
||||
if forced_alignment_novo70:
|
||||
Results = pd.DataFrame(index=[],
|
||||
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
|
||||
#for word in word_list:
|
||||
for word in ['Rozen']:
|
||||
for word in ['Reus']:
|
||||
# pronunciation variants top 3
|
||||
df_per_word_ = df_per_word[df_per_word['word']==word]
|
||||
df_per_word_ = df_per_word_.sort_values('frequency', ascending=False)
|
||||
@ -208,35 +208,37 @@ if forced_alignment_novo70:
|
||||
wav_file = os.path.join(default.stimmen_wav_dir, filename)
|
||||
if os.path.exists(wav_file):
|
||||
# for Martijn
|
||||
shutil.copy(wav_file, os.path.join(rozen_dir, filename))
|
||||
#shutil.copy(wav_file, os.path.join(reus_dir, filename))
|
||||
|
||||
# pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
|
||||
# result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
|
||||
# result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
|
||||
# result_ = pd.Series([
|
||||
# sample['filename'],
|
||||
# sample['word'],
|
||||
# sample['xsampa'],
|
||||
# sample['ipa'],
|
||||
# ' '.join(result_ipa),
|
||||
# ' '.join(result_novo70),
|
||||
# llh
|
||||
# ], index=results.columns)
|
||||
# results = results.append(result_, ignore_index = True)
|
||||
# print('{0}/{1}: answer {2} - prediction {3}'.format(
|
||||
# i+1, len(samples), result_['ipa'], result_['result_ipa']))
|
||||
# #results.to_excel(os.path.join(default.stimmen_dir, 'results.xlsx'), encoding="utf-8")
|
||||
#if len(results) > 0:
|
||||
# Results = Results.append(results, ignore_index = True)
|
||||
#Results.to_excel(os.path.join(default.stimmen_result_novoapi_dir, 'Results.xlsx'), encoding="utf-8")
|
||||
pronunciation_ipa_ = [ipa.replace(':', 'ː') for ipa in pronunciation_ipa]
|
||||
result = novoapi_functions.forced_alignment(wav_file, word, pronunciation_ipa_)
|
||||
result_ipa, result_novo70, llh = novoapi_functions.result2pronunciation(result, word)
|
||||
result_ = pd.Series([
|
||||
sample['filename'],
|
||||
sample['word'],
|
||||
sample['xsampa'],
|
||||
sample['ipa'],
|
||||
' '.join(result_ipa),
|
||||
' '.join(result_novo70),
|
||||
llh
|
||||
], index=results.columns)
|
||||
results = results.append(result_, ignore_index = True)
|
||||
print('{0}/{1}: answer {2} - prediction {3}'.format(
|
||||
i+1, len(samples), result_['ipa'], result_['result_ipa']))
|
||||
results.to_excel(os.path.join(reus_dir, 'results.xlsx'), encoding="utf-8")
|
||||
if len(results) > 0:
|
||||
Results = Results.append(results, ignore_index = True)
|
||||
Results.to_excel(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
|
||||
else:
|
||||
Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_result_novoapi_dir, 'Results.xlsx'), encoding="utf-8")
|
||||
Results_xlsx = pd.ExcelFile(os.path.join(default.stimmen_dir, 'Results.xlsx'), encoding="utf-8")
|
||||
Results = pd.read_excel(Results_xlsx, 'Sheet1')
|
||||
|
||||
|
||||
## ===== analysis =====
|
||||
#result_novoapi_dir = os.path.join(default.stimmen_dir, 'result', 'novoapi')
|
||||
#for word in word_list:
|
||||
# if not word == 'Oog':
|
||||
|
||||
# Results_ = Results[Results['word'] == word]
|
||||
# y_true = list(Results_['ipa'])
|
||||
# y_pred_ = [ipa.replace(' ', '') for ipa in list(Results_['result_ipa'])]
|
||||
@ -247,4 +249,4 @@ else:
|
||||
# plt.figure()
|
||||
# output_confusion_matrix.plot_confusion_matrix(cm, pronunciation_variants, normalize=False)
|
||||
# #plt.show()
|
||||
# plt.savefig(os.path.join(default.stimmen_result_novoapi_dir, word + '.png'))
|
||||
# plt.savefig(os.path.join(result_novoapi_dir, word + '.png'))
|
@ -31,12 +31,6 @@ ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
|
||||
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
|
||||
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
|
||||
|
||||
htk_config_dir = r'c:\Users\Aki\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
|
||||
config_hvite = os.path.join(htk_config_dir, 'config.HVite')
|
||||
#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
|
||||
acoustic_model = r'c:\cygwin64\home\Aki\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
|
||||
phonelist_txt = os.path.join(htk_config_dir, 'phonelist.txt')
|
||||
|
||||
WSL_dir = r'C:\OneDrive\WSL'
|
||||
fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
|
||||
fame_s5_dir = os.path.join(fame_dir, 's5')
|
||||
@ -49,7 +43,6 @@ stimmen_data_dir = os.path.join(stimmen_dir, 'data')
|
||||
#stimmen_wav_dir = os.path.join(stimmen_dir, 'wav')
|
||||
# 16 kHz
|
||||
stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen'
|
||||
stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi')
|
||||
|
||||
stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx')
|
||||
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
|
||||
|
@ -1,42 +0,0 @@
|
||||
import os
|
||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||
import sys
|
||||
|
||||
import defaultfiles as default
|
||||
sys.path.append(default.forced_alignment_module_dir)
|
||||
from forced_alignment import pyhtk, convert_phone_set, scripts
|
||||
|
||||
reus_dir = r'c:\Users\Aki\source\repos\acoustic_model\reus-test'
|
||||
wav_dir = reus_dir
|
||||
wav_files = ['reus1008-reus.wav',
|
||||
'reus1167-man.wav',
|
||||
'reus3768-mantsje.wav']
|
||||
|
||||
word = 'reus'
|
||||
pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
|
||||
|
||||
for wav_file in wav_files:
|
||||
file_lab = os.path.join(reus_dir, wav_file.replace('.wav', '.lab'))
|
||||
file_dic = os.path.join(reus_dir, wav_file.replace('.wav', '.dic'))
|
||||
file_txt = os.path.join(reus_dir, wav_file.replace('.wav', '.txt'))
|
||||
|
||||
# output htk dict file
|
||||
with open(file_dic, 'w', encoding="utf-8") as f:
|
||||
for ipa in pronunciation_ipa:
|
||||
cgn = convert_phone_set.ipa2cgn([ipa.replace(':', 'ː')])
|
||||
barbara = convert_phone_set.cgn2barbara(cgn)
|
||||
f.write(word.upper() + '\t' + barbara + '\n')
|
||||
|
||||
# output htk label file.
|
||||
pyhtk._create_label_file(word, file_lab)
|
||||
|
||||
scripts.run_command([
|
||||
'HVite','-T', '1',
|
||||
'-a',
|
||||
'-C', default.config_hvite,
|
||||
'-H', default.acoustic_model,
|
||||
'-m',
|
||||
'-i', file_txt,
|
||||
#'-S', script_file,
|
||||
file_dic, default.phonelist_txt, os.path.join(wav_dir, wav_file)
|
||||
])
|
@ -7,7 +7,7 @@ import json
|
||||
from novoapi.backend import session
|
||||
|
||||
import os
|
||||
#os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||
import defaultfiles as default
|
||||
|
||||
|
||||
|
Binary file not shown.
@ -1,3 +0,0 @@
|
||||
REUS r eu s
|
||||
REUS m ac n
|
||||
REUS m ac n t s j @
|
@ -1 +0,0 @@
|
||||
REUS
|
@ -1,6 +0,0 @@
|
||||
#!MLF!#
|
||||
"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1008-reus.rec"
|
||||
0 9700000 r -12463.852539 REUS
|
||||
9700000 12800000 eu -3622.108887
|
||||
12800000 26250001 s -17303.216797
|
||||
.
|
@ -1,3 +0,0 @@
|
||||
REUS r eu s
|
||||
REUS m ac n
|
||||
REUS m ac n t s j @
|
@ -1 +0,0 @@
|
||||
REUS
|
@ -1,10 +0,0 @@
|
||||
#!MLF!#
|
||||
"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus1167-man.rec"
|
||||
0 150000 m -230.057571 REUS
|
||||
150000 300000 ac -250.994858
|
||||
300000 450000 n -202.377716
|
||||
450000 4600000 t -5128.984375
|
||||
4600000 5050000 s -711.338501
|
||||
5050000 5450000 j -564.730591
|
||||
5450000 16049999 @ -13249.787109
|
||||
.
|
@ -1,3 +0,0 @@
|
||||
REUS r eu s
|
||||
REUS m ac n
|
||||
REUS m ac n t s j @
|
@ -1 +0,0 @@
|
||||
REUS
|
@ -1,10 +0,0 @@
|
||||
#!MLF!#
|
||||
"c:/Users/Aki/source/repos/acoustic_model/reus-test/reus3768-mantsje.rec"
|
||||
0 150000 m -217.347229 REUS
|
||||
150000 1150000 ac -1266.293579
|
||||
1150000 1650000 n -583.382568
|
||||
1650000 11100000 t -11259.270508
|
||||
11100000 11250000 s -247.939255
|
||||
11250000 11550000 j -445.511444
|
||||
11550000 24150000 @ -16769.048828
|
||||
.
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,119 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||
|
||||
import argparse
|
||||
import json
|
||||
|
||||
from novoapi.backend import session
|
||||
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--user", default='martijn.wieling')
|
||||
p.add_argument("--password", default='xxxxx')
|
||||
args = p.parse_args()
|
||||
|
||||
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True)
|
||||
|
||||
grammar = {
|
||||
"type": "confusion_network",
|
||||
"version": "1.0",
|
||||
"data": {
|
||||
"kind": "sequence",
|
||||
"elements": [
|
||||
{
|
||||
"kind": "word",
|
||||
"pronunciation": [
|
||||
{
|
||||
"phones": [
|
||||
"r",
|
||||
"eu0",
|
||||
"s"
|
||||
],
|
||||
"id": 0
|
||||
}
|
||||
,
|
||||
{
|
||||
"phones": [
|
||||
"m",
|
||||
"a0",
|
||||
"n"
|
||||
],
|
||||
"id": 1
|
||||
}
|
||||
,
|
||||
{
|
||||
"phones": [
|
||||
"m",
|
||||
"a0",
|
||||
"n",
|
||||
"t",
|
||||
"s",
|
||||
"y",
|
||||
"ax"
|
||||
],
|
||||
"id": 2
|
||||
}
|
||||
],
|
||||
"label": "reus"
|
||||
}
|
||||
]
|
||||
},
|
||||
"return_objects": [
|
||||
"grammar"
|
||||
],
|
||||
"phoneset": "novo70"
|
||||
}
|
||||
|
||||
res = rec.setgrammar(grammar)
|
||||
#print "Set grammar result", res
|
||||
|
||||
|
||||
## === novoapi/backend/session.py ===
|
||||
#import wave
|
||||
#import time
|
||||
#from novoapi.backend.session import rpcid, segmentation
|
||||
|
||||
#wavf = "reus1008-reus.wav"
|
||||
#w = wave.open(wavf, 'r')
|
||||
#nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
|
||||
#buf = w.readframes(nframes)
|
||||
#w.close()
|
||||
|
||||
#buffer_size = 4096
|
||||
#nbytes_sent = 0
|
||||
#start = time.time()
|
||||
#for j in range(0, len(buf), buffer_size):
|
||||
# audio_packet = buf[j:j + buffer_size]
|
||||
# nbytes_sent += len(audio_packet)
|
||||
# rec.conn.send_binary(audio_packet)
|
||||
#rec.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
|
||||
#print(rpcid.next())
|
||||
#rec.last_message = rec.conn.recv()
|
||||
#message = json.loads(rec.last_message)
|
||||
#result = session.segmentation(message["result"]["words"])
|
||||
#result.export()
|
||||
## ====================================
|
||||
|
||||
def result2pronunciation(result, word):
|
||||
#result_ = res.export()[1]
|
||||
result_ = [result[i] for i in range(len(result)) if result[i]['label'] == word]
|
||||
llh = result_[0]['llh']
|
||||
phones = result_[0]['phones']
|
||||
pronunciation = [phone['label'] for phone in phones]
|
||||
return pronunciation, llh
|
||||
|
||||
|
||||
res = rec.recognize_wav("reus1008-reus.wav")
|
||||
#print "\n\n\nThe pronounced word in reus1008-reus.wav is: REUS\n\n"
|
||||
#print "Recognition result:", json.dumps(res.export(), indent=4)
|
||||
result2pronunciation(res.export(), 'reus')
|
||||
|
||||
#print "\n\n\nThe pronounced word in reus1167-man.wav is: MAN\n\n"
|
||||
res2 = rec.recognize_wav("reus1167-man.wav")
|
||||
#print "Recognition result:", json.dumps(res2.export(), indent=4)
|
||||
result2pronunciation(res2.export(), 'reus')
|
||||
|
||||
#print "\n\n\nThe pronounced word in reus3768-mantsje.wav is: MANTSJE\n\n"
|
||||
res3 = rec.recognize_wav("reus3768-mantsje.wav")
|
||||
#print "Recognition result:", json.dumps(res3.export(), indent=4)
|
||||
result2pronunciation(res3.export(), 'reus')
|
Loading…
Reference in New Issue
Block a user