diff --git a/.vs/acoustic_model/v15/.suo b/.vs/acoustic_model/v15/.suo
index ce90954..ef753d5 100644
Binary files a/.vs/acoustic_model/v15/.suo and b/.vs/acoustic_model/v15/.suo differ
diff --git a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc
index ef57dca..545949d 100644
Binary files a/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc and b/acoustic_model/__pycache__/defaultfiles.cpython-36.pyc differ
diff --git a/acoustic_model/acoustic_model.pyproj b/acoustic_model/acoustic_model.pyproj
index 5319301..11ca83b 100644
--- a/acoustic_model/acoustic_model.pyproj
+++ b/acoustic_model/acoustic_model.pyproj
@@ -51,6 +51,7 @@
+
diff --git a/acoustic_model/check_novoapi.py b/acoustic_model/check_novoapi.py
index 96d8e8f..2a3ae59 100644
--- a/acoustic_model/check_novoapi.py
+++ b/acoustic_model/check_novoapi.py
@@ -20,12 +20,12 @@ from forced_alignment import convert_phone_set
#import acoustic_model_functions as am_func
import convert_xsampa2ipa
import novoapi_functions
+import stimmen_functions
sys.path.append(default.accent_classification_dir)
import output_confusion_matrix
## procedure
forced_alignment_novo70 = True
-balance_sample_numbers = False
## ===== load novo phoneset =====
@@ -98,36 +98,7 @@ def search_phone_ipa(x, phone_list):
## ===== load all transcriptions (df) =====
-df = pd.read_excel(stimmen_transcription_, 'original')
-
-# mapping from ipa to xsampa
-mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
-#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
-# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
-# if not ipa_converted == ipa:
-# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
-
-ipas = []
-famehtks = []
-for xsampa in df['Self Xsampa']:
- if not isinstance(xsampa, float): # 'NaN'
- # typo?
- xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t')
- xsampa = xsampa.replace(';', ':')
-
- ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
- ipa = ipa.replace('ː', ':')
- ipa = ipa.replace(' ', '')
- ipas.append(ipa)
- else:
- ipas.append('')
-
-# extract interesting cols.
-df = pd.DataFrame({'filename': df['Filename'],
- 'word': df['Word'],
- 'xsampa': df['Self Xsampa'],
- 'ipa': pd.Series(ipas)})
-
+df = stimmen_functions.load_transcriptions()
word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)]
word_list = sorted(word_list)
@@ -183,21 +154,6 @@ if forced_alignment_novo70:
# samples in which all pronunciations are written in novo70.
samples = df_.query("ipa in @pronunciation_ipa")
-
-
- ## ===== balance sample numbers =====
- if balance_sample_numbers:
- c = Counter(samples['ipa'])
- sample_num_list = [c[key] for key in c.keys()]
- sample_num = np.min(sample_num_list)
-
- samples_balanced = pd.DataFrame(index=[], columns=list(samples.keys()))
- for key in c.keys():
- samples_ = samples[samples['ipa'] == key]
- samples_balanced = samples_balanced.append(samples_.sample(sample_num), ignore_index = True)
-
- samples = samples_balanced
-
results = pd.DataFrame(index=[],
columns=['filename', 'word', 'xsampa', 'ipa', 'result_ipa', 'result_novo70', 'llh'])
diff --git a/acoustic_model/defaultfiles.py b/acoustic_model/defaultfiles.py
index 2188c97..ef0dfd4 100644
--- a/acoustic_model/defaultfiles.py
+++ b/acoustic_model/defaultfiles.py
@@ -2,63 +2,40 @@ import os
# add path of the parent directory
#os.path.dirname(os.path.realpath(__file__))
-#cygwin_dir = r'C:\cygwin64\home\Aki\acoustic_model'
-
-#htk_dir = r'C:\Aki\htk_fame'
-htk_dir = r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk'
-
-
-#config_hvite = os.path.join(cygwin_dir, 'config', 'config.HVite')
-#mkhmmdefs_pl = os.path.join(cygwin_dir, 'src', 'acoustic_model', 'mkhmmdefs.pl')
-
-#dbLexicon = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\lexicon.accdb
-#scriptBarbara = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\pronvars_barbara.perl
-#exeG2P = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\string2phon.exe
-
-#[pyHTK]
-#configHVite = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\config.HVite
-#filePhoneList = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\phonelist_barbara.txt
-#AcousticModel = C:\\Users\\Aki\\source\\repos\\rug_VS\\forced_alignment\\config\\hmmdefs_16-2_barbara.compo
-
-#dbLexicon = config['cLexicon']['dbLexicon']
-#scriptBarbara = config['cLexicon']['scriptBarbara']
-#exeG2P = config['cLexicon']['exeG2P']
-
-#configHVite = config['pyHTK']['configHVite']
-#filePhoneList = config['pyHTK']['filePhoneList']
-#AcousticModel = config['pyHTK']['AcousticModel']
-
+# repos
repo_dir = r'C:\Users\Aki\source\repos'
ipa_xsampa_converter_dir = os.path.join(repo_dir, 'ipa-xsama-converter')
forced_alignment_module_dir = os.path.join(repo_dir, 'forced_alignment')
accent_classification_dir = os.path.join(repo_dir, 'accent_classification', 'accent_classification')
toolbox_dir = os.path.join(repo_dir, 'toolbox')
-#htk_config_dir = r'c:\Users\A.Kunikoshi\source\repos\forced_alignment\forced_alignment\data\htk\preset_models\aki_dutch_2017'
-#config_hvite = os.path.join(htk_config_dir, 'config.HVite')
-#acoustic_model = os.path.join(htk_config_dir, 'hmmdefs.compo')
-#acoustic_model = r'c:\cygwin64\home\A.Kunikoshi\acoustic_model\model\barbara\hmm128-2\hmmdefs.compo'
-phonelist_txt = os.path.join(htk_dir, 'config', 'phonelist.txt')
-
WSL_dir = r'C:\OneDrive\WSL'
-#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
-fame_dir = r'c:\OneDrive\Research\rug\_data\FAME'
+novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi')
+#novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi'
-fame_s5_dir = os.path.join(fame_dir, 's5')
-fame_corpus_dir = os.path.join(fame_dir, 'corpus')
-
-experiments_dir = r'c:\OneDrive\Research\rug\experiments'
+# working directories
+rug_dir = r'c:\OneDrive\Research\rug'
+experiments_dir = os.path.join(rug_dir, 'experiments')
+htk_dir = os.path.join(experiments_dir, 'acoustic_model', 'fame', 'htk')
stimmen_dir = os.path.join(experiments_dir, 'stimmen')
-stimmen_data_dir = os.path.join(stimmen_dir, 'data')
+
+# data
+fame_dir = os.path.join(rug_dir, '_data', 'FAME')
+#fame_dir = os.path.join(WSL_dir, 'kaldi-trunk', 'egs', 'fame')
# 44.1 kHz
#stimmen_wav_dir = os.path.join(stimmen_dir, 'wav')
# 16 kHz
stimmen_wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen'
-stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi')
-
-stimmen_transcription_xlsx = os.path.join(stimmen_data_dir, 'Frisian Variants Picture Task Stimmen.xlsx')
+stimmen_transcription_xlsx = os.path.join(stimmen_dir, 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
+novo70_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'nl', 'novo70.phoneset')
+
+
+
+#phonelist_txt = os.path.join(htk_dir, 'config', 'phonelist.txt')
+#fame_s5_dir = os.path.join(fame_dir, 's5')
+#fame_corpus_dir = os.path.join(fame_dir, 'corpus')
+#stimmen_result_novoapi_dir = os.path.join(stimmen_dir, 'result', 'novoapi')
+# novoapi_functions
+
-novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi')
-#novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi'
-novo70_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'nl', 'novo70.phoneset')
\ No newline at end of file
diff --git a/acoustic_model/fame_functions.py b/acoustic_model/fame_functions.py
index cb87620..295ed79 100644
--- a/acoustic_model/fame_functions.py
+++ b/acoustic_model/fame_functions.py
@@ -341,3 +341,14 @@ def fix_single_quote(lexicon_file):
def word2htk(word):
return ''.join([fame_asr.translation_key_word2htk.get(i, i) for i in word])
+
+
+def ipa2htk(ipa):
+ curr_dir = os.path.dirname(os.path.abspath(__file__))
+ translation_key_ipa2asr = np.load(os.path.join(curr_dir, 'phoneset', 'fame_ipa2asr.npy')).item(0)
+
+ ipa_splitted = convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones)
+ ipa_splitted = fame_ipa.phone_reduction(ipa_splitted)
+ asr_splitted = convert_phoneset.convert_phoneset(ipa_splitted, translation_key_ipa2asr)
+ htk_splitted = convert_phoneset.convert_phoneset(asr_splitted, fame_asr.translation_key_asr2htk)
+ return ''.join(htk_splitted)
\ No newline at end of file
diff --git a/acoustic_model/fame_hmm.py b/acoustic_model/fame_hmm.py
index 19d5f56..b3d1070 100644
--- a/acoustic_model/fame_hmm.py
+++ b/acoustic_model/fame_hmm.py
@@ -27,7 +27,7 @@ extract_features = 0
flat_start = 0
train_model_without_sp = 0
add_sp = 0
-train_model_with_sp = 0
+train_model_with_sp = 1
@@ -321,7 +321,8 @@ if add_sp:
## ======================= train model with short pause =======================
if train_model_with_sp:
print('==== train model with sp ====')
- for niter in range(niter_max+1, niter_max*2+1):
+ #for niter in range(niter_max+1, niter_max*2+1):
+ for niter in range(20, 50):
timer_start = time.time()
hmm_n = 'iter' + str(niter)
hmm_n_pre = 'iter' + str(niter-1)
diff --git a/acoustic_model/fame_test.py b/acoustic_model/fame_test.py
index c7b2e59..c1a432e 100644
--- a/acoustic_model/fame_test.py
+++ b/acoustic_model/fame_test.py
@@ -69,6 +69,10 @@ else:
translation_key_ipa2asr['ə:'] = 'ə'
translation_key_ipa2asr['r.'] = 'r'
translation_key_ipa2asr['r:'] = 'r'
+# added for stimmen.
+translation_key_ipa2asr['ɪ:'] = 'ɪ:'
+translation_key_ipa2asr['y:'] = 'y'
+
np.save(os.path.join('phoneset', 'fame_ipa2asr.npy'), translation_key_ipa2asr)
diff --git a/acoustic_model/htk_vs_kaldi.py b/acoustic_model/htk_vs_kaldi.py
index ca7f6af..c1e5c97 100644
--- a/acoustic_model/htk_vs_kaldi.py
+++ b/acoustic_model/htk_vs_kaldi.py
@@ -1,84 +1,145 @@
import os
os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
-
import sys
-import csv
-import subprocess
-from collections import Counter
-import re
-import numpy as np
+#import csv
+#import subprocess
+#from collections import Counter
+#import re
+import shutil
+import glob
+
+#import numpy as np
import pandas as pd
-import matplotlib.pyplot as plt
-from sklearn.metrics import confusion_matrix
+#import matplotlib.pyplot as plt
+#from sklearn.metrics import confusion_matrix
-import acoustic_model_functions as am_func
-import convert_xsampa2ipa
+#import acoustic_model_functions as am_func
+#import convert_xsampa2ipa
import defaultfiles as default
-from forced_alignment import pyhtk
+#from forced_alignment import pyhtk
+#sys.path.append(default.forced_alignment_module_dir)
+#from forced_alignment import convert_phone_set
+#import acoustic_model_functions as am_func
+import convert_xsampa2ipa
+import stimmen_functions
+import fame_functions
+import convert_phoneset
+from phoneset import fame_ipa, fame_asr
+sys.path.append(default.toolbox_dir)
+import file_handling as fh
+from htk import pyhtk
## ======================= user define =======================
-excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
-data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data')
+#excel_file = os.path.join(default.experiments_dir, 'stimmen', 'data', 'Frisian Variants Picture Task Stimmen.xlsx')
+#data_dir = os.path.join(default.experiments_dir, 'stimmen', 'data')
-wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k
+#wav_dir = r'c:\OneDrive\WSL\kaldi-trunk\egs\fame\s5\corpus\stimmen' # 16k
-acoustic_model_dir = os.path.join(default.experiments_dir, 'friesian', 'acoustic_model', 'model')
-htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short')
-fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k')
-result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result')
+#acoustic_model_dir = os.path.join(default.experiments_dir, 'friesian', 'acoustic_model', 'model')
+#htk_dict_dir = os.path.join(default.experiments_dir, 'stimmen', 'dic_short')
+#fa_dir = os.path.join(default.experiments_dir, 'stimmen', 'FA_44k')
+#result_dir = os.path.join(default.experiments_dir, 'stimmen', 'result')
-kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme')
-kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict')
-lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt')
+#kaldi_data_dir = os.path.join(default.kaldi_dir, 'data', 'alignme')
+#kaldi_dict_dir = os.path.join(default.kaldi_dir, 'data', 'local', 'dict')
+#lexicon_txt = os.path.join(kaldi_dict_dir, 'lexicon.txt')
#lex_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
#lex_asr_htk = os.path.join(default.fame_dir, 'lexicon', 'lex.asr_htk')
-
-# procedure
-make_htk_dict_files = 0
-do_forced_alignment_htk = 0
-eval_forced_alignment_htk = 0
-make_kaldi_data_files = 0
-make_kaldi_lexicon_txt = 0
-load_forced_alignment_kaldi = 1
-eval_forced_alignment_kaldi = 1
+## procedure
+#make_htk_dict_files = 0
+#do_forced_alignment_htk = 0
+#eval_forced_alignment_htk = 0
+#make_kaldi_data_files = 0
+#make_kaldi_lexicon_txt = 0
+#load_forced_alignment_kaldi = 1
+#eval_forced_alignment_kaldi = 1
-## ======================= add paths =======================
-sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
-from forced_alignment import convert_phone_set
-from forced_alignment import pyhtk
+### ======================= add paths =======================
+#sys.path.append(os.path.join(default.repo_dir, 'forced_alignment'))
+#from forced_alignment import convert_phone_set
+#from forced_alignment import pyhtk
-sys.path.append(os.path.join(default.repo_dir, 'toolbox'))
-from evaluation import plot_confusion_matrix
+#sys.path.append(os.path.join(default.repo_dir, 'toolbox'))
+#from evaluation import plot_confusion_matrix
+
+config_dir = os.path.join(default.htk_dir, 'config')
+model_dir = os.path.join(default.htk_dir, 'model')
+lattice_file = os.path.join(config_dir, 'stimmen.ltc')
+#pyhtk.create_word_lattice_file(
+# os.path.join(config_dir, 'stimmen.net'),
+# lattice_file)
+hvite_scp = os.path.join(default.htk_dir, 'tmp', 'stimmen_test.scp')
+
+## ======================= make test data ======================
+# copy wav files which is in the stimmen data.
+stimmen_test_dir = r'c:\OneDrive\Research\rug\_data\stimmen_test'
+fh.make_filelist(stimmen_test_dir, hvite_scp, file_type='wav')
+
+df = stimmen_functions.load_transcriptions()
+word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)]
+word_list = sorted(word_list)
+
+#for index, row in df.iterrows():
+# filename = row['filename']
+# if isinstance(filename, str):
+# wav_file = os.path.join(default.stimmen_wav_dir, filename)
+# if os.path.exists(wav_file):
+# shutil.copy(wav_file, os.path.join(stimmen_test_dir, filename))
+# pyhtk.create_label_file(
+# row['word'],
+# os.path.join(stimmen_test_dir, filename.replace('.wav', '.lab')))
-## ======================= convert phones ======================
-mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
+# after manually removed files which does not contain clear sound,
+# update df as df_test.
+#wav_file_list = glob.glob(os.path.join(stimmen_test_dir, '*.wav'))
+#df_test = pd.DataFrame(index=[], columns=list(df.keys()))
+#for wav_file in wav_file_list:
+# filename = os.path.basename(wav_file)
+# df_ = df[df['filename'].str.match(filename)]
+# df_test = pd.concat([df_test, df_])
-xls = pd.ExcelFile(excel_file)
-
-## check conversion
-#df = pd.read_excel(xls, 'frequency')
-#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
-# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_)
-# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
-# if not ipa_converted == ipa:
-# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
+#output = pyhtk.recognition(
+# os.path.join(default.htk_dir, 'config', 'config.rec',
+# lattice_file,
+# os.path.join(model_dir, 'hmm1', 'iter13'),
+# dictionary_file,
+# os.path.join(config_dir, 'phonelist.txt'),
+# hvite_scp)
-## check phones included in FAME!
-# the phones used in the lexicon.
-#phonelist = am_func.get_phonelist(lex_asr)
+## check phones included in stimmen but not in FAME!
+splitted_ipas = [' '.join(
+ convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones))
+ for ipa in df['ipa']]
+stimmen_phones = set(' '.join(splitted_ipas))
+stimmen_phones = list(stimmen_phones)
+#stimmen_phones = list(set(fame_asr.phone_reduction(list(stimmen_phones))))
+#fame_phones = fame_asr.phoneset_short
+fame_phones = fame_ipa.phoneset
+stimmen_phones.sort()
+fame_phones.sort()
+print('phones which are used in stimmen transcription but not in FAME corpus are:\n{}'.format(
+ set(stimmen_phones) - set(fame_phones)
+ ))
+for ipa in df['ipa']:
+ ipa_splitted = convert_phoneset.split_word(ipa, fame_ipa.multi_character_phones)
+ if ':' in ipa_splitted:
+ print(ipa_splitted)
+
+htk = [fame_functions.ipa2htk(ipa) for ipa in df['ipa']]
+
+ipa = 'e:χ'
+fame_functions.ipa2htk(ipa)
-# the lines which include a specific phone.
-#lines = am_func.find_phone(lex_asr, 'x')
# Filename, Word, Self Xsampa
diff --git a/acoustic_model/novoapi_forced_alignment.py b/acoustic_model/novoapi_forced_alignment.py
index 932d7c1..3fd32b9 100644
--- a/acoustic_model/novoapi_forced_alignment.py
+++ b/acoustic_model/novoapi_forced_alignment.py
@@ -52,7 +52,7 @@ p = argparse.ArgumentParser()
#p.add_argument("--user", default=None)
#p.add_argument("--password", default=None)
p.add_argument("--user", default='martijn.wieling')
-p.add_argument("--password", default='fa0Thaic')
+p.add_argument("--password", default='xxxxxx')
args = p.parse_args()
#wav_file = 'c:\\OneDrive\\WSL\\test\\onetwothree.wav'
diff --git a/acoustic_model/novoapi_functions.py b/acoustic_model/novoapi_functions.py
index 0c72b45..3cd502e 100644
--- a/acoustic_model/novoapi_functions.py
+++ b/acoustic_model/novoapi_functions.py
@@ -173,7 +173,7 @@ def forced_alignment(wav_file, word, pronunciation_ipa):
# username / password cannot be passed as artuments...
p = argparse.ArgumentParser()
p.add_argument("--user", default='martijn.wieling')
- p.add_argument("--password", default='fa0Thaic')
+ p.add_argument("--password", default='xxxxxx')
args = p.parse_args()
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
diff --git a/acoustic_model/phoneset/fame_asr.py b/acoustic_model/phoneset/fame_asr.py
index 22e9d65..b11359b 100644
--- a/acoustic_model/phoneset/fame_asr.py
+++ b/acoustic_model/phoneset/fame_asr.py
@@ -73,12 +73,14 @@ reduction_key = {
# already removed beforehand in phoneset. Just to be sure.
phones_to_be_removed = ['ú', 's:', 'ɔ̈:']
-phoneset_short = [reduction_key.get(i, i) for i in phoneset
+def phone_reduction(phones):
+ return [reduction_key.get(i, i) for i in phones
if not i in phones_to_be_removed]
-phoneset_short = list(set(phoneset_short))
+phoneset_short = list(set(phone_reduction(phoneset)))
phoneset_short.sort()
+
## translation_key to htk format (ascii).
# phones which gives UnicodeEncodeError when phone.encode("ascii")
# are replaced with other characters.
diff --git a/acoustic_model/phoneset/fame_ipa.py b/acoustic_model/phoneset/fame_ipa.py
index 4d44f0a..8859b9f 100644
--- a/acoustic_model/phoneset/fame_ipa.py
+++ b/acoustic_model/phoneset/fame_ipa.py
@@ -5,6 +5,7 @@ phoneset = [
'i̯',
'i̯ⁿ',
'y',
+ 'y:', # not included in lex.ipa, but in stimmen.
'i',
'i.',
'iⁿ',
@@ -13,7 +14,7 @@ phoneset = [
'ɪ',
'ɪⁿ',
'ɪ.',
- #'ɪ:', # not included in lex.ipa
+ 'ɪ:', # not included in lex.ipa, but in stimmen.
'ɪ:ⁿ',
'e',
'e:',
@@ -100,7 +101,37 @@ phoneset = [
'l'
]
+## reduce the number of phones.
+# the phones which are used in stimmen transcription but not in FAME corpus.
+# replacements are based on the advice from Jelske Dijkstra on 2018/06/21.
+stimmen_replacement = {
+ 'æ': 'ɛ',
+ 'ø': 'ö', # or 'ö:'
+ 'ø:': 'ö:', # Aki added.
+ 'œ': 'ɔ̈', # or 'ɔ̈:'
+ 'œ:': 'ɔ̈:', # Aki added.
+ 'ɐ': 'a', # or 'a:'
+ 'ɐ:': 'a:', # Aki added.
+ 'ɑ': 'a', # or 'a:'
+ 'ɑ:': 'a:', # Aki added
+ 'ɒ': 'ɔ', # or 'ɔ:'
+ 'ɒ:': 'ɔ:', # Aki added.
+ 'ɾ': 'r',
+ 'ʁ': 'r',
+ 'ʊ': 'u',
+ 'χ': 'x',
+ # aki guessed.
+ 'ʀ': 'r',
+ 'ɹ': 'r',
+ 'w': 'ö'
+ }
+phoneset.extend(list(stimmen_replacement.keys()))
+
+def phone_reduction(phones):
+ return [stimmen_replacement.get(i, i) for i in phones]
+
+
## the list of multi character phones.
# for example, the length of 'i̯ⁿ' is 3, but in the codes it is treated as one letter.
multi_character_phones = [i for i in phoneset if len(i) > 1]
diff --git a/acoustic_model/phoneset/fame_ipa2asr.npy b/acoustic_model/phoneset/fame_ipa2asr.npy
index b8852ba..687111d 100644
Binary files a/acoustic_model/phoneset/fame_ipa2asr.npy and b/acoustic_model/phoneset/fame_ipa2asr.npy differ
diff --git a/acoustic_model/stimmen_functions.py b/acoustic_model/stimmen_functions.py
new file mode 100644
index 0000000..9d28093
--- /dev/null
+++ b/acoustic_model/stimmen_functions.py
@@ -0,0 +1,38 @@
+import os
+os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
+
+import pandas as pd
+
+import convert_xsampa2ipa
+import defaultfiles as default
+
+
+def load_transcriptions():
+ stimmen_transcription = pd.ExcelFile(default.stimmen_transcription_xlsx)
+ df = pd.read_excel(stimmen_transcription, 'original')
+
+ # mapping from ipa to xsampa
+ mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
+ #for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
+ # ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
+ # if not ipa_converted == ipa:
+ # print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))
+
+ ipas = []
+ for xsampa in df['Self Xsampa']:
+ if not isinstance(xsampa, float): # 'NaN'
+ # typo?
+ xsampa = xsampa.replace('r2:z@rA:\\t', 'r2:z@rA:t').replace(';', ':')
+
+ ipa = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
+ ipa = ipa.replace('ː', ':').replace(' ', '')
+ ipas.append(ipa)
+ else:
+ ipas.append('')
+
+ df_ = pd.DataFrame({'filename': df['Filename'],
+ 'word': df['Word'],
+ 'xsampa': df['Self Xsampa'],
+ 'ipa': pd.Series(ipas)})
+ df_ = df_[~df_['ipa'].str.contains('/')]
+ return df_.dropna()