resolve the conflicts.
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
<SchemaVersion>2.0</SchemaVersion>
|
||||
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
||||
<ProjectHome>.</ProjectHome>
|
||||
<StartupFile>check_novoapi.py</StartupFile>
|
||||
<StartupFile>performance_check.py</StartupFile>
|
||||
<SearchPath>
|
||||
</SearchPath>
|
||||
<WorkingDirectory>.</WorkingDirectory>
|
||||
@@ -25,9 +25,6 @@
|
||||
<Compile Include="acoustic_model_functions.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="check_novoapi.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="convert_xsampa2ipa.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
@@ -37,10 +34,7 @@
|
||||
<Compile Include="fa_test.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="forced_alignment_novo.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="htk_vs_kaldi.py">
|
||||
<Compile Include="performance_check.py">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
</ItemGroup>
|
||||
|
@@ -2,52 +2,15 @@ import os
|
||||
import sys
|
||||
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||
|
||||
import numpy as np
|
||||
|
||||
import defaultfiles as default
|
||||
|
||||
sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
|
||||
from forced_alignment import forced_alignment, lexicon, convert_phone_set
|
||||
from forced_alignment import forced_alignment
|
||||
|
||||
#wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav'
|
||||
#forced_alignment(
|
||||
# wav_file,
|
||||
# 'Australië'
|
||||
# #'BUFFETCOUPON COULISSEN DOUANE'
|
||||
# )
|
||||
wav_file = r'C:\Users\Aki\source\repos\forced_alignment\notebooks\sample\10147-1464186409-1917281.wav'
|
||||
forced_alignment(
|
||||
wav_file,
|
||||
#'Australië'
|
||||
'BUFFETCOUPON COULISSEN DOUANE'
|
||||
)
|
||||
|
||||
# according to: http://lands.let.ru.nl/cgn/doc_Dutch/topics/version_1.0/annot/phonetics/fon_prot.pdf
|
||||
phone_list_cgn = ['p', 'd', 't', 'd', 'k', 'g', # plosives
|
||||
'f', 'v', 's', 'z', 'S', 'Z', 'x', 'G', 'h', # fricatives
|
||||
'N', 'm', 'n', 'J', 'l', 'r', 'w', 'j', # sonorant
|
||||
'I', 'E', 'A', 'O', 'Y', # short vowels
|
||||
'i', 'y', 'e', '2', 'a', 'o', 'u', # long vowels
|
||||
'@', # schwa
|
||||
'E+', 'Y+', 'A+', # Diftongen
|
||||
'E:', 'Y:', 'O:', # Leenvocalen
|
||||
'E~', 'A~', 'O~', 'Y~' # Nasale vocalen
|
||||
]
|
||||
|
||||
# load word in the lexicon.
|
||||
lexicon_file = r'C:\cygwin64\home\Aki\acoustic_model\material\barbara\2010_2510_lexicon_pronvars_HTK.txt'
|
||||
with open(lexicon_file, 'r') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
words = []
|
||||
for line in lines:
|
||||
line_split = line.split()
|
||||
if len(line_split) > 0:
|
||||
word = line_split[0]
|
||||
word.replace('+s', '')
|
||||
word = word.split('-')
|
||||
words.append(word)
|
||||
words = list(np.unique(words))
|
||||
|
||||
pronunciations = lexicon._grapheme_to_phoneme(words)
|
||||
htks = []
|
||||
phone_list = set()
|
||||
for word in pronunciations.keys():
|
||||
ipa = pronunciations[word]
|
||||
htk = convert_phone_set.split_ipa(ipa)
|
||||
htks.append(htk)
|
||||
phone_list = phone_list | set(htk)
|
@@ -1,133 +0,0 @@
|
||||
#
|
||||
# forced alignment using novo-api.
|
||||
#
|
||||
# *** IMPORTANT ***
|
||||
# This file should be treated as confidencial.
|
||||
# This file should not be copied or uploaded to public sites.
|
||||
#
|
||||
# NOTES:
|
||||
# The usage of novo api: https://bitbucket.org/novolanguage/python-novo-api
|
||||
# I couldn't make it work as I described in the mail to Martijn Bartelds on 2018/12/03.
|
||||
# As per the advice from him, I modified testgrammer.py and made it a function.
|
||||
#
|
||||
# In order to run on Python 3.6, the following points are changed in novo-api.
|
||||
# (1) backend/__init__.py
|
||||
# - #import session
|
||||
# from . import session
|
||||
# (2) backend/session.py
|
||||
# - #except Exception, e:
|
||||
# except Exception as e:
|
||||
# - #print self.last_message
|
||||
# print(self.last_message)
|
||||
# (3) asr/segment/praat.py
|
||||
# - def print_tier(output, title, begin, end, segs, (format, formatter))
|
||||
# def print_tier(output, title, begin, end, segs, format, formatter):
|
||||
# (4) asr/spraaklab/__init.py
|
||||
# - #import session
|
||||
# from . import session
|
||||
# (5) asr/spraaklab/schema.py
|
||||
# - #print data, "validated not OK", e.message
|
||||
# print("{0} validated not OK {1}".format(data, e.message))
|
||||
# - #print data, "validated OK"
|
||||
# print("{} validated OK".format(data))
|
||||
# - #if isinstance(object, basestring):
|
||||
# if isinstance(object, str)
|
||||
#
|
||||
# Aki Kunikoshi
|
||||
# 428968@gmail.com
|
||||
#
|
||||
|
||||
import argparse
|
||||
import json
|
||||
|
||||
from novoapi.backend import session
|
||||
|
||||
# username / password cannot be passed as artuments...
|
||||
p = argparse.ArgumentParser()
|
||||
#p.add_argument("--user", default=None)
|
||||
#p.add_argument("--password", default=None)
|
||||
p.add_argument("--user", default='martijn.wieling')
|
||||
p.add_argument("--password", default='fa0Thaic')
|
||||
args = p.parse_args()
|
||||
|
||||
wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav'
|
||||
|
||||
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
|
||||
|
||||
grammar = {
|
||||
"type": "confusion_network",
|
||||
"version": "1.0",
|
||||
"data": {
|
||||
"kind": "sequence",
|
||||
"elements": [
|
||||
{
|
||||
"kind": "word",
|
||||
"pronunciation": [
|
||||
{
|
||||
"phones": [
|
||||
"wv",
|
||||
"a1",
|
||||
"n"
|
||||
],
|
||||
"id": 0
|
||||
},
|
||||
{
|
||||
"phones": [
|
||||
"wv",
|
||||
"uh1",
|
||||
"n"
|
||||
],
|
||||
"id": 1
|
||||
}
|
||||
],
|
||||
"label": "one"
|
||||
},
|
||||
{
|
||||
"kind": "word",
|
||||
"pronunciation": [
|
||||
{
|
||||
"phones": [
|
||||
"t",
|
||||
"uw1"
|
||||
],
|
||||
"id": 0
|
||||
}
|
||||
],
|
||||
"label": "two"
|
||||
},
|
||||
{
|
||||
"kind": "word",
|
||||
"pronunciation": [
|
||||
{
|
||||
"phones": [
|
||||
"t",
|
||||
"r",
|
||||
"iy1"
|
||||
],
|
||||
"id": 0
|
||||
},
|
||||
{
|
||||
"phones": [
|
||||
"s",
|
||||
"r",
|
||||
"iy1"
|
||||
],
|
||||
"id": 1
|
||||
}
|
||||
],
|
||||
"label": "three"
|
||||
}
|
||||
]
|
||||
},
|
||||
"return_objects": [
|
||||
"grammar"
|
||||
],
|
||||
"phoneset": "novo70"
|
||||
}
|
||||
|
||||
res = rec.setgrammar(grammar)
|
||||
#print "Set grammar result", res
|
||||
|
||||
#res = rec.recognize_wav("test/onetwothree.wav")
|
||||
res = rec.recognize_wav(wav_file)
|
||||
#print "Recognition result:", json.dumps(res.export(), indent=4)
|
@@ -3,7 +3,7 @@ os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
|
||||
|
||||
import sys
|
||||
import csv
|
||||
#import subprocess
|
||||
import subprocess
|
||||
from collections import Counter
|
||||
import re
|
||||
|
||||
@@ -20,6 +20,8 @@ from forced_alignment import pyhtk
|
||||
|
||||
|
||||
## ======================= user define =======================
|
||||
excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
|
||||
data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data')
|
||||
|
||||
wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k
|
||||
|
||||
@@ -46,6 +48,8 @@ load_forced_alignment_kaldi = 1
|
||||
eval_forced_alignment_kaldi = 1
|
||||
|
||||
|
||||
|
||||
|
||||
## ======================= add paths =======================
|
||||
sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
|
||||
from forced_alignment import convert_phone_set
|
||||
@@ -58,15 +62,15 @@ from evaluation import plot_confusion_matrix
|
||||
## ======================= convert phones ======================
|
||||
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
|
||||
|
||||
xls = pd.ExcelFile(default.stimmen_transcription_xlsx)
|
||||
xls = pd.ExcelFile(excel_file)
|
||||
|
||||
## check conversion
|
||||
#df = pd.read_excel(xls, 'check')
|
||||
#df = pd.read_excel(xls, 'frequency')
|
||||
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
|
||||
# if xsampa is not '/':
|
||||
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
||||
# if not ipa_converted == ipa:
|
||||
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
||||
# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_)
|
||||
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
|
||||
# if not ipa_converted == ipa:
|
||||
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
|
||||
|
||||
|
||||
## check phones included in FAME!
|
||||
@@ -156,7 +160,7 @@ if do_forced_alignment_htk:
|
||||
htk_dict_file = os.path.join(htk_dict_dir, word + '.dic')
|
||||
|
||||
pyhtk.doHVite(wav_file, label_file, htk_dict_file, fa_file, default.config_hvite,
|
||||
default.phonelist_friesian_txt, acoustic_model)
|
||||
default.phonelist, acoustic_model)
|
||||
os.remove(label_file)
|
||||
|
||||
prediction = am_func.read_fileFA(fa_file)
|
||||
@@ -227,7 +231,7 @@ if make_kaldi_data_files:
|
||||
|
||||
## ======================= make lexicon txt which is used by Kaldi =======================
|
||||
if make_kaldi_lexicon_txt:
|
||||
option_num = 7
|
||||
option_num = 6
|
||||
|
||||
# remove previous file.
|
||||
if os.path.exists(lexicon_txt):
|
||||
@@ -277,10 +281,10 @@ if load_forced_alignment_kaldi:
|
||||
phones_txt = os.path.join(default.kaldi_dir, 'data', 'lang', 'phones.txt')
|
||||
merged_alignment_txt = os.path.join(default.kaldi_dir, 'exp', 'tri1_alignme', 'merged_alignment.txt')
|
||||
|
||||
#filenames = np.load(stimmen_data_dir + '\\filenames.npy')
|
||||
#words = np.load(stimmen_data_dir + '\\words.npy')
|
||||
#pronunciations = np.load(stimmen_data_dir + '\\pronunciations_ipa.npy')
|
||||
#pronvar_list_all = np.load(stimmen_data_dir + '\\pronvar_list_all.npy')
|
||||
#filenames = np.load(data_dir + '\\filenames.npy')
|
||||
#words = np.load(data_dir + '\\words.npy')
|
||||
#pronunciations = np.load(data_dir + '\\pronunciations_ipa.npy')
|
||||
#pronvar_list_all = np.load(data_dir + '\\pronvar_list_all.npy')
|
||||
#word_list = np.unique(words)
|
||||
|
||||
# load the mapping between phones and ids.
|
||||
@@ -365,7 +369,7 @@ if eval_forced_alignment_htk:
|
||||
if compare_hmm_num:
|
||||
f_result.write("{},".format(hmm_num_str))
|
||||
|
||||
#match = np.load(stimmen_data_dir + '\\match_hmm' + hmm_num_str + '.npy')
|
||||
#match = np.load(data_dir + '\\match_hmm' + hmm_num_str + '.npy')
|
||||
#prediction = np.load(os.path.join(result_dir, 'htk', 'predictions_hmm' + hmm_num_str + '.npy'))
|
||||
#prediction = pd.Series(prediction, index=df.index, name='prediction')
|
||||
#result = pd.concat([df, prediction], axis=1)
|
Reference in New Issue
Block a user