Compare commits

..

No commits in common. "97486e55992c04542b30fce0d6dfb31ab692db1b" and "b444b70af94852eed3a9892d3457c24479e0a959" have entirely different histories.

7 changed files with 188 additions and 371 deletions

Binary file not shown.

View File

@ -4,7 +4,7 @@
<SchemaVersion>2.0</SchemaVersion> <SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid> <ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
<ProjectHome>.</ProjectHome> <ProjectHome>.</ProjectHome>
<StartupFile>check_novoapi.py</StartupFile> <StartupFile>fame_hmm.py</StartupFile>
<SearchPath> <SearchPath>
</SearchPath> </SearchPath>
<WorkingDirectory>.</WorkingDirectory> <WorkingDirectory>.</WorkingDirectory>

View File

@ -29,47 +29,48 @@ forced_alignment_novo70 = True
## ===== load novo phoneset ===== ## ===== load novo phoneset =====
phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa = novoapi_functions.load_novo70_phoneset() phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa = novoapi_functions.load_phonset()
## ===== extract pronunciations written in novo70 only (not_in_novo70) ===== ## ===== extract pronunciations written in novo70 only (not_in_novo70) =====
# As per Nederlandse phoneset_aki.xlsx recieved from David
# [ɔː] oh / ohr
# [ɪː] ih / ihr
# [iː] iy
# [œː] uh
# [ɛː] eh
# [w] wv in IPA written as ʋ.
david_suggestion = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'w']
## read pronunciation variants. ## read pronunciation variants.
#stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx)
#df = pd.read_excel(stimmen_transcription_, 'frequency') df = pd.read_excel(stimmen_transcription_, 'frequency')
#transcription_ipa = list(df['IPA']) transcription_ipa = list(df['IPA'])
# transcription mistake?
transcription_ipa = [ipa.replace(';', 'ː') for ipa in transcription_ipa if not ipa=='pypɪl' and not pd.isnull(ipa)]
transcription_ipa = [ipa.replace('ˑ', '') for ipa in transcription_ipa] # only one case.
stimmen_test_dir = r'c:\OneDrive\Research\rug\_data\stimmen_test' not_in_novo70 = []
df = stimmen_functions.load_transcriptions_novo70(stimmen_test_dir) all_in_novo70 = []
for ipa in transcription_ipa:
ipa = ipa.replace(':', 'ː')
ipa = convert_phone_set.split_ipa(ipa)
# list of phones not in novo70 phoneset.
not_in_novo70_ = [phone for phone in ipa
if not phone in phoneset_ipa and not phone in david_suggestion]
not_in_novo70_ = [phone.replace('sp', '') for phone in not_in_novo70_]
not_in_novo70_ = [phone.replace(':', '') for phone in not_in_novo70_]
not_in_novo70_ = [phone.replace('ː', '') for phone in not_in_novo70_]
## transcription mistake? if len(not_in_novo70_) == 0:
#transcription_ipa = [ipa.replace(';', 'ː') for ipa in transcription_ipa if not ipa=='pypɪl' and not pd.isnull(ipa)] all_in_novo70.append(''.join(ipa))
#transcription_ipa = [ipa.replace('ˑ', '') for ipa in transcription_ipa] # only one case.
#not_in_novo70 = [] #translation_key.get(phone, phone)
#all_in_novo70 = [] not_in_novo70.extend(not_in_novo70_)
#for ipa in transcription_ipa: not_in_novo70_list = list(set(not_in_novo70))
# ipa = ipa.replace(':', 'ː')
# ipa = convert_phone_set.split_ipa(ipa)
# # list of phones not in novo70 phoneset.
# not_in_novo70_ = [phone for phone in ipa
# if not phone in phoneset_ipa and not phone in david_suggestion]
# not_in_novo70_ = [phone.replace('sp', '') for phone in not_in_novo70_]
# not_in_novo70_ = [phone.replace(':', '') for phone in not_in_novo70_]
# not_in_novo70_ = [phone.replace('ː', '') for phone in not_in_novo70_]
# if len(not_in_novo70_) == 0:
# all_in_novo70.append(''.join(ipa))
# #translation_key.get(phone, phone)
# not_in_novo70.extend(not_in_novo70_)
#not_in_novo70_list = list(set(not_in_novo70))
## check which phones used in stimmen but not in novo70 ## check which phones used in stimmen but not in novo70
@ -84,43 +85,41 @@ df = stimmen_functions.load_transcriptions_novo70(stimmen_test_dir)
# [ʊ] 'ʊ'(1) --> can be ʏ (uh)?? # [ʊ] 'ʊ'(1) --> can be ʏ (uh)??
# [χ] --> can be x?? # [χ] --> can be x??
#def search_phone_ipa(x, phone_list): def search_phone_ipa(x, phone_list):
# x_in_item = [] x_in_item = []
# for ipa in phone_list: for ipa in phone_list:
# ipa_original = ipa ipa_original = ipa
# ipa = ipa.replace(':', 'ː') ipa = ipa.replace(':', 'ː')
# ipa = convert_phone_set.split_ipa(ipa) ipa = convert_phone_set.split_ipa(ipa)
# if x in ipa and not x+':' in ipa: if x in ipa and not x+':' in ipa:
# x_in_item.append(ipa_original) x_in_item.append(ipa_original)
# return x_in_item return x_in_item
#search_phone_ipa('ø', transcription_ipa) #search_phone_ipa('ø', transcription_ipa)
## ===== load all transcriptions (df) ===== ## ===== load all transcriptions (df) =====
#df = stimmen_functions.load_transcriptions() df = stimmen_functions.load_transcriptions()
word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)] word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)]
word_list = sorted(word_list) word_list = sorted(word_list)
## check frequency of each pronunciation variants ## check frequency of each pronunciation variants
#cols = ['word', 'ipa', 'frequency'] cols = ['word', 'ipa', 'frequency']
#df_samples = pd.DataFrame(index=[], columns=cols) df_samples = pd.DataFrame(index=[], columns=cols)
#for ipa in all_in_novo70: for ipa in all_in_novo70:
# ipa = ipa.replace('ː', ':') ipa = ipa.replace('ː', ':')
# samples = df[df['ipa'] == ipa] samples = df[df['ipa'] == ipa]
# word = list(set(samples['word']))[0] word = list(set(samples['word']))[0]
# samples_Series = pd.Series([word, ipa, len(samples)], index=df_samples.columns) samples_Series = pd.Series([word, ipa, len(samples)], index=df_samples.columns)
# df_samples = df_samples.append(samples_Series, ignore_index=True) df_samples = df_samples.append(samples_Series, ignore_index=True)
# each word # each word
#df_per_word = pd.DataFrame(index=[], columns=df_samples.keys()) df_per_word = pd.DataFrame(index=[], columns=df_samples.keys())
#for word in word_list: for word in word_list:
word = word_list[2] df_samples_ = df_samples[df_samples['word']==word]
df_ = df[df['word']==word] df_samples_ = df_samples_[df_samples_['frequency']>2]
np.unique(list(df_['ipa'])) df_per_word = df_per_word.append(df_samples_, ignore_index=True)
#df_samples_ = df_samples_[df_samples_['frequency']>2]
#df_per_word = df_per_word.append(df_samples_, ignore_index=True)
#df_per_word.to_excel(os.path.join(default.stimmen_dir, 'pronunciation_variants_novo70.xlsx'), encoding="utf-8") #df_per_word.to_excel(os.path.join(default.stimmen_dir, 'pronunciation_variants_novo70.xlsx'), encoding="utf-8")

View File

@ -14,20 +14,18 @@ def multi_character_tokenize(line, multi_character_tokens):
line = line[1:] line = line[1:]
def split_word(word, phoneset): def split_word(word, multi_character_phones):
""" """
split a line by given phoneset. split a line by given phoneset.
Args: Args:
word (str): a word written in given phoneset. word (str): a word written in given phoneset.
#multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_ipa.py. multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_ipa.py.
phoneset (list): the list of phones.
Returns: Returns:
(word_seperated) (list): the word splitted in given phoneset. (word_seperated) (list): the word splitted in given phoneset.
""" """
multi_character_phones = extract_multi_character_phones(phoneset)
return [phone return [phone
for phone in multi_character_tokenize(word.strip(), multi_character_phones) for phone in multi_character_tokenize(word.strip(), multi_character_phones)
] ]
@ -45,14 +43,4 @@ def convert_phoneset(word_list, translation_key):
def phone_reduction(phones, reduction_key): def phone_reduction(phones, reduction_key):
multi_character_tokenize(wo.strip(), multi_character_phones) multi_character_tokenize(wo.strip(), multi_character_phones)
return [reduction_key.get(i, i) for i in phones return [reduction_key.get(i, i) for i in phones
if not i in phones_to_be_removed] if not i in phones_to_be_removed]
def extract_multi_character_phones(phoneset):
"""
Args:
phoneset (list):
"""
multi_character_phones = [i for i in phoneset if len(i) > 1]
multi_character_phones.sort(key=len, reverse=True)
return multi_character_phones

View File

@ -16,53 +16,50 @@ import defaultfiles as default
sys.path.append(default.toolbox_dir) sys.path.append(default.toolbox_dir)
import file_handling as fh import file_handling as fh
from htk import pyhtk from htk import pyhtk
#from scripts import run_command
## ======================= user define ======================= ## ======================= user define =======================
# procedure # procedure
combine_all = 1
make_lexicon = 0 make_lexicon = 0
make_label = 0 # it takes roughly 4800 sec on Surface pro 2. make_label = 0 # it takes roughly 4800 sec on Surface pro 2.
make_mlf = 0 make_mlf = 0
extract_features = 0 extract_features = 0
flat_start = 1 flat_start = 0
train_monophone_without_sp = 1 train_monophone_without_sp = 0
add_sp = 1 add_sp = 0
train_monophone_with_re_aligned_mlf = 1 train_monophone_with_re_aligned_mlf = 0
increase_mixture = 1
train_triphone = 0 train_triphone = 0
train_triphone_tied = 0 train_triphone_tied = 1
# pre-defined values. # pre-defined values.
dataset_list = ['devel', 'test', 'train'] dataset_list = ['devel', 'test', 'train']
feature_size = 30 feature_size = 39
improvement_threshold = 0.3 improvement_threshold = 0.3
hmmdefs_name = 'hmmdefs'
proto_name = 'proto'
lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr') lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov') lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov')
config_dir = os.path.join(default.htk_dir, 'config') config_dir = os.path.join(default.htk_dir, 'config')
phonelist_full_txt = os.path.join(config_dir, 'phonelist_full.txt') phonelist_full_txt = os.path.join(config_dir, 'phonelist_full.txt')
tree_hed = os.path.join(config_dir, 'tree.hed') tree_hed = os.path.join(config_dir, 'tree.hed')
quests_hed = os.path.join(config_dir, 'quests.hed') quest_hed = os.path.join(config_dir, 'quests.hed')
model_dir = os.path.join(default.htk_dir, 'model') model_dir = os.path.join(default.htk_dir, 'model')
model_mono0_dir = os.path.join(model_dir, 'mono0') model_mono0_dir = os.path.join(model_dir, 'mono0')
model_mono1_dir = os.path.join(model_dir, 'mono1') model_mono1_dir = os.path.join(model_dir, 'mono1')
model_mono1sp_dir = os.path.join(model_dir, 'mono1sp') model_mono1sp_dir = os.path.join(model_dir, 'mono1sp')
model_mono1sp2_dir = os.path.join(model_dir, 'mono1sp2') model_mono1sp2_dir = os.path.join(model_dir, 'mono1sp2')
model_tri1_dir = os.path.join(model_dir, 'tri1') model_tri1_dir = os.path.join(model_dir, 'tri1')
model_tri1tied_dir = os.path.join(model_dir, 'tri1tied')
# directories / files to be made. # directories / files to be made.
lexicon_dir = os.path.join(default.htk_dir, 'lexicon') lexicon_dir = os.path.join(default.htk_dir, 'lexicon')
lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr') lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr')
lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov') lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov')
lexicon_htk = os.path.join(lexicon_dir, 'lex.htk') lexicon_htk = os.path.join(lexicon_dir, 'lex.htk')
lexicon_htk_with_sp = os.path.join(lexicon_dir, 'lex_with_sp.htk')
lexicon_htk_triphone = os.path.join(lexicon_dir, 'lex_triphone.htk') lexicon_htk_triphone = os.path.join(lexicon_dir, 'lex_triphone.htk')
feature_dir = os.path.join(default.htk_dir, 'mfc') feature_dir = os.path.join(default.htk_dir, 'mfc')
@ -74,20 +71,10 @@ fh.make_new_directory(label_dir, existing_dir='leave')
## training ## training
if combine_all: hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
hcompv_scp_train = os.path.join(tmp_dir, 'all.scp') mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
mlf_file_train = os.path.join(label_dir, 'all_phone.mlf') mlf_file_train_with_sp = os.path.join(label_dir, 'train_phone_with_sp.mlf')
mlf_file_train_word = os.path.join(label_dir, 'all_word.mlf') mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
mlf_file_train_with_sp = os.path.join(label_dir, 'all_phone_with_sp.mlf')
mlf_file_train_aligned = os.path.join(label_dir, 'all_phone_aligned.mlf')
triphone_mlf = os.path.join(label_dir, 'all_triphone.mlf')
else:
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
mlf_file_train_word = os.path.join(label_dir, 'train_word.mlf')
mlf_file_train_with_sp = os.path.join(label_dir, 'train_phone_with_sp.mlf')
mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
triphone_mlf = os.path.join(label_dir, 'train_triphone.mlf')
hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp') hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp')
## testing ## testing
@ -117,18 +104,19 @@ if make_lexicon:
print('>>> fixing the lexicon...') print('>>> fixing the lexicon...')
fame_functions.fix_lexicon(lexicon_htk) fame_functions.fix_lexicon(lexicon_htk)
## adding sp to the lexicon for HTK. ## add sp to the end of each line.
print('>>> adding sp to the lexicon...') #print('>>> adding sp...')
with open(lexicon_htk) as f: #with open(lexicon_htk) as f:
lines = f.read().split('\n') # lines = f.read().split('\n')
with open(lexicon_htk_with_sp, 'wb') as f: #lines = [line + ' sp' for line in lines]
f.write(bytes(' sp\n'.join(lines), 'ascii')) #with open(lexicon_htk_with_sp, 'wb') as f:
# f.write(bytes('\n'.join(lines), 'ascii'))
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
## intialize the instance for HTK. ## intialize the instance for HTK.
chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk_with_sp, feature_size) chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk, feature_size)
## ======================= make label files ======================= ## ======================= make label files =======================
@ -164,7 +152,7 @@ if make_label:
shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic')) shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic'))
label_file = os.path.join(label_dir_, filename + '.lab') label_file = os.path.join(label_dir_, filename + '.lab')
chtk.make_label_file(sentence_htk, label_file) chtk.create_label_file(sentence_htk, label_file)
else: else:
os.remove(dictionary_file) os.remove(dictionary_file)
@ -186,6 +174,7 @@ if make_mlf:
os.remove(empty_dic_file) os.remove(empty_dic_file)
for dataset in dataset_list: for dataset in dataset_list:
#wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
feature_dir_ = os.path.join(feature_dir, dataset) feature_dir_ = os.path.join(feature_dir, dataset)
label_dir_ = os.path.join(label_dir, dataset) label_dir_ = os.path.join(label_dir, dataset)
mlf_word = os.path.join(label_dir, dataset + '_word.mlf') mlf_word = os.path.join(label_dir, dataset + '_word.mlf')
@ -194,11 +183,11 @@ if make_mlf:
print(">>> generating a word level mlf file for {}...".format(dataset)) print(">>> generating a word level mlf file for {}...".format(dataset))
chtk.label2mlf(label_dir_, mlf_word) chtk.label2mlf(label_dir_, mlf_word)
print(">>> generating a phone level mlf file for {}...".format(dataset)) print(">>> generating a phone level mlf file for {}...".format(dataset))
chtk.mlf_word2phone(mlf_phone, mlf_word, with_sp=False) chtk.mlf_word2phone(mlf_phone, mlf_word, with_sp=False)
chtk.mlf_word2phone(mlf_phone_with_sp, mlf_word, with_sp=True) chtk.mlf_word2phone(mlf_phone_with_sp, mlf_word, with_sp=True)
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
@ -208,7 +197,7 @@ if extract_features:
timer_start = time.time() timer_start = time.time()
print('==== extract features on dataset {} ===='.format(dataset)) print('==== extract features on dataset {} ===='.format(dataset))
wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset) wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
label_dir_ = os.path.join(label_dir, dataset) label_dir_ = os.path.join(label_dir, dataset)
feature_dir_ = os.path.join(feature_dir, dataset) feature_dir_ = os.path.join(feature_dir, dataset)
fh.make_new_directory(feature_dir_, existing_dir='delete') fh.make_new_directory(feature_dir_, existing_dir='delete')
@ -228,8 +217,8 @@ if extract_features:
+ os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc')) + os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc'))
for lab_file in lab_list] for lab_file in lab_list]
#if os.path.exists(empty_mfc_file): if os.path.exists(empty_mfc_file):
# os.remove(empty_mfc_file) os.remove(empty_mfc_file)
with open(hcopy_scp.name, 'wb') as f: with open(hcopy_scp.name, 'wb') as f:
f.write(bytes('\n'.join(feature_list), 'ascii')) f.write(bytes('\n'.join(feature_list), 'ascii'))
@ -246,64 +235,9 @@ if extract_features:
with open(hcompv_scp, 'wb') as f: with open(hcompv_scp, 'wb') as f:
f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii')) f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii'))
print(">>> extracting features on stimmen...")
chtk.wav2mfc(os.path.join(htk_stimmen_dir, 'hcopy.scp'))
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
## ======================= flat start monophones =======================
if combine_all:
# script files.
fh.concatenate(
os.path.join(tmp_dir, 'devel.scp'),
os.path.join(tmp_dir, 'test.scp'),
hcompv_scp_train
)
fh.concatenate(
hcompv_scp_train,
os.path.join(tmp_dir, 'train.scp'),
hcompv_scp_train
)
# phone level mlfs.
fh.concatenate(
os.path.join(label_dir, 'devel_phone.mlf'),
os.path.join(label_dir, 'test_phone.mlf'),
mlf_file_train
)
fh.concatenate(
mlf_file_train,
os.path.join(label_dir, 'train_phone.mlf'),
mlf_file_train
)
# phone level mlfs with sp.
fh.concatenate(
os.path.join(label_dir, 'devel_phone_with_sp.mlf'),
os.path.join(label_dir, 'test_phone_with_sp.mlf'),
mlf_file_train_with_sp
)
fh.concatenate(
mlf_file_train_with_sp,
os.path.join(label_dir, 'train_phone_with_sp.mlf'),
mlf_file_train_with_sp
)
# word level mlfs.
fh.concatenate(
os.path.join(label_dir, 'devel_word.mlf'),
os.path.join(label_dir, 'test_word.mlf'),
mlf_file_train_word
)
fh.concatenate(
mlf_file_train_word,
os.path.join(label_dir, 'train_word.mlf'),
mlf_file_train_word
)
## ======================= flat start monophones ======================= ## ======================= flat start monophones =======================
if flat_start: if flat_start:
timer_start = time.time() timer_start = time.time()
@ -312,14 +246,17 @@ if flat_start:
chtk.flat_start(hcompv_scp_train, model_mono0_dir) chtk.flat_start(hcompv_scp_train, model_mono0_dir)
# make macros. # create macros.
vFloors = os.path.join(model_mono0_dir, 'vFloors') vFloors = os.path.join(model_mono0_dir, 'vFloors')
if os.path.exists(vFloors): if os.path.exists(vFloors):
chtk.make_macros(vFloors) chtk.create_macros(vFloors)
# allocate mean & variance to all phones in the phone list # allocate mean & variance to all phones in the phone list
print('>>> allocating mean & variance to all phones in the phone list...') print('>>> allocating mean & variance to all phones in the phone list...')
chtk.make_hmmdefs(model_mono0_dir) chtk.create_hmmdefs(
os.path.join(model_mono0_dir, proto_name),
os.path.join(model_mono0_dir, 'hmmdefs')
)
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
@ -383,9 +320,8 @@ if train_monophone_with_re_aligned_mlf:
os.path.join(modeln_dir, 'macros'), os.path.join(modeln_dir, 'macros'),
os.path.join(modeln_dir, 'hmmdefs'), os.path.join(modeln_dir, 'hmmdefs'),
mlf_file_train_aligned, mlf_file_train_aligned,
mlf_file_train_word, os.path.join(label_dir, 'train_word.mlf'),
hcompv_scp_train) hcompv_scp_train)
chtk.fix_mlf(mlf_file_train_aligned)
print('>>> updating the script file... ') print('>>> updating the script file... ')
chtk.update_script_file( chtk.update_script_file(
@ -413,56 +349,25 @@ if train_monophone_with_re_aligned_mlf:
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
## ======================= increase mixture =======================
if increase_mixture:
print('==== increase mixture ====')
timer_start = time.time()
for nmix in [2, 4, 8, 16]:
if nmix == 2:
modeln_dir_ = model_mono1sp2_dir
else:
modeln_dir_ = os.path.join(model_dir, 'mono'+str(nmix_))
modeln_dir = os.path.join(model_dir, 'mono'+str(nmix))
print('mixture: {}'.format(nmix))
fh.make_new_directory(modeln_dir, existing_dir='delete')
niter = chtk.get_niter_max(modeln_dir_)
chtk.increase_mixture(
os.path.join(modeln_dir_, 'iter'+str(niter), 'hmmdefs'),
nmix,
os.path.join(modeln_dir, 'iter0'),
model_type='monophone_with_sp')
shutil.copy2(os.path.join(modeln_dir_, 'iter'+str(niter), 'macros'),
os.path.join(modeln_dir, 'iter0', 'macros'))
#improvement_threshold = -10
niter = chtk.re_estimation_until_saturated(
modeln_dir,
os.path.join(modeln_dir_, 'iter0'),
improvement_threshold,
hcompv_scp_train_updated,
os.path.join(htk_stimmen_dir, 'mfc'),
'mfc',
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
mlf_file=mlf_file_train_aligned,
lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
model_type='monophone_with_sp'
)
nmix_ = nmix
print("elapsed time: {}".format(time.time() - timer_start))
## ======================= train triphone ======================= ## ======================= train triphone =======================
print('>>> making triphone list... ')
chtk.make_triphonelist(
mlf_file_train_aligned,
triphone_mlf)
if train_triphone: if train_triphone:
print('==== train triphone model ====') print('==== traina triphone model ====')
timer_start = time.time() timer_start = time.time()
triphonelist_txt = os.path.join(config_dir, 'triphonelist.txt')
triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf')
print('>>> making triphone list... ')
chtk.make_triphonelist(
triphonelist_txt,
triphone_mlf,
mlf_file_train_aligned)
print('>>> making triphone header... ')
chtk.make_tri_hed(
os.path.join(config_dir, 'mktri.hed')
)
print('>>> init triphone model... ') print('>>> init triphone model... ')
niter = chtk.get_niter_max(model_mono1sp2_dir) niter = chtk.get_niter_max(model_mono1sp2_dir)
fh.make_new_directory(os.path.join(model_tri1_dir, 'iter0'), existing_dir='leave') fh.make_new_directory(os.path.join(model_tri1_dir, 'iter0'), existing_dir='leave')
@ -472,8 +377,8 @@ if train_triphone:
) )
print('>>> re-estimation... ') print('>>> re-estimation... ')
## I wanted to train until satulated: # I wanted to train until satulated:
#niter = chtk.re_estimation_until_saturated( # #niter = chtk.re_estimation_until_saturated(
# model_tri1_dir, # model_tri1_dir,
# os.path.join(model_tri1_dir, 'iter0'), # os.path.join(model_tri1_dir, 'iter0'),
# improvement_threshold, # improvement_threshold,
@ -490,6 +395,7 @@ if train_triphone:
# ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???] # ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???]
# therefore only two times re-estimation is performed. # therefore only two times re-estimation is performed.
output_dir = model_tri1_dir output_dir = model_tri1_dir
for niter in range(1, 4): for niter in range(1, 4):
hmm_n = 'iter' + str(niter) hmm_n = 'iter' + str(niter)
hmm_n_pre = 'iter' + str(niter-1) hmm_n_pre = 'iter' + str(niter-1)
@ -508,59 +414,18 @@ if train_triphone:
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
## ======================= train tied-state triphones ======================= ## ======================= train triphone =======================
if train_triphone_tied: if train_triphone_tied:
print('==== train tied-state triphones ====') print('==== traina tied-state triphone ====')
timer_start = time.time() timer_start = time.time()
print('>>> making lexicon for triphone... ') print('>>> making lexicon for triphone... ')
chtk.make_lexicon_triphone(phonelist_full_txt, lexicon_htk_triphone) chtk.make_triphone_full(phonelist_full_txt, lexicon_htk_triphone)
chtk.combine_phonelists(phonelist_full_txt)
print('>>> making a tree header... ') print('>>> making headers... ')
fame_phonetics.make_quests_hed(quests_hed) chtk.make_tree_header(tree_hed)
stats = os.path.join(r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\model\tri1\iter3', 'stats') fame_phonetics.make_quests_hed(quest_hed)
chtk.make_tree_header(tree_hed, quests_hed, stats, config_dir)
print('>>> init triphone model... ') print("elapsed time: {}".format(time.time() - timer_start))
niter = chtk.get_niter_max(model_tri1_dir)
fh.make_new_directory(os.path.join(model_tri1tied_dir, 'iter0'), existing_dir='leave')
chtk.init_triphone(
os.path.join(model_tri1_dir, 'iter'+str(niter)),
os.path.join(model_tri1tied_dir, 'iter0'),
tied=True)
# I wanted to train until satulated:
#niter = chtk.re_estimation_until_saturated(
# model_tri1tied_dir,
# os.path.join(model_tri1tied_dir, 'iter0'),
# improvement_threshold,
# hcompv_scp_train_updated,
# os.path.join(htk_stimmen_dir, 'mfc'),
# 'mfc',
# os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
# mlf_file=triphone_mlf,
# lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
# model_type='triphone'
# )
#
# but because the data size is limited, some triphone cannot be trained and received the error:
# ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???]
# therefore only 3 times re-estimation is performed.
output_dir = model_tri1tied_dir
for niter in range(1, 4):
hmm_n = 'iter' + str(niter)
hmm_n_pre = 'iter' + str(niter-1)
_modeln_dir = os.path.join(output_dir, hmm_n)
_modeln_dir_pre = os.path.join(output_dir, hmm_n_pre)
fh.make_new_directory(_modeln_dir, 'leave')
chtk.re_estimation(
os.path.join(_modeln_dir_pre, 'hmmdefs'),
_modeln_dir,
hcompv_scp_train_updated,
mlf_file=triphone_mlf,
macros=os.path.join(_modeln_dir_pre, 'macros'),
model_type='triphone')
print("elapsed time: {}".format(time.time() - timer_start))

View File

@ -1,19 +1,20 @@
## this script should be used only by Aki Kunikoshi. ## this script should be used only by Aki Kunikoshi.
import os
import numpy as np import numpy as np
import pandas as pd
import argparse import argparse
import json import json
from novoapi.backend import session from novoapi.backend import session
import os
#os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import defaultfiles as default import defaultfiles as default
import convert_phoneset
def load_novo70_phoneset(): def load_phonset():
translation_key_ipa2novo70 = dict()
translation_key_novo702ipa = dict()
#phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) #phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx)
#df = pd.read_excel(phonelist_novo70_, 'list') #df = pd.read_excel(phonelist_novo70_, 'list')
## *_simple includes columns which has only one phone in. ## *_simple includes columns which has only one phone in.
@ -22,23 +23,21 @@ def load_novo70_phoneset():
# print('{0}:{1}'.format(ipa, novo70)) # print('{0}:{1}'.format(ipa, novo70))
# translation_key[ipa] = novo70 # translation_key[ipa] = novo70
#phonelist_novo70 = np.unique(list(df['novo70_simple'])) #phonelist_novo70 = np.unique(list(df['novo70_simple']))
novo70_phoneset = pd.read_csv(default.novo70_phoneset, delimiter='\t', header=None, encoding="utf-8")
novo70_phoneset.rename(columns={0: 'novo70', 1: 'ipa', 2: 'description'}, inplace=True)
#phoneset_ipa = [] phoneset_ipa = []
#phoneset_novo70 = [] phoneset_novo70 = []
#with open(default.novo70_phoneset, "rt", encoding="utf-8") as fin: with open(default.novo70_phoneset, "rt", encoding="utf-8") as fin:
# lines = fin.read() lines = fin.read()
# lines = lines.split('\n') lines = lines.split('\n')
# for line in lines: for line in lines:
# words = line.split('\t') words = line.split('\t')
# if len(words) > 1: if len(words) > 1:
# novo70 = words[0] novo70 = words[0]
# ipa = words[1] ipa = words[1]
# phoneset_ipa.append(ipa) phoneset_ipa.append(ipa)
# phoneset_novo70.append(novo70) phoneset_novo70.append(novo70)
# translation_key_ipa2novo70[ipa] = novo70 translation_key_ipa2novo70[ipa] = novo70
# translation_key_novo702ipa[novo70] = ipa translation_key_novo702ipa[novo70] = ipa
# As per Nederlandse phoneset_aki.xlsx recieved from David # As per Nederlandse phoneset_aki.xlsx recieved from David
# [ɔː] oh / ohr # from ipa->novo70, only oh is used. # [ɔː] oh / ohr # from ipa->novo70, only oh is used.
@ -48,26 +47,15 @@ def load_novo70_phoneset():
# [ɛː] eh # [ɛː] eh
# [w] wv in IPA written as ʋ. # [w] wv in IPA written as ʋ.
extra_ipa = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'ʋ'] extra_ipa = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'ʋ']
extra_novo70 = ['oh', 'ih', 'iy', 'uh', 'eh', 'wv'] extra_novo70 = ['oh', 'ih', 'iy', 'uh', 'eh', 'wv']
for ipa, novo70 in zip(extra_ipa, extra_novo70):
phoneset_ipa = list(novo70_phoneset['ipa']) phoneset_ipa.append(ipa)
phoneset_ipa.extend(extra_ipa) phoneset_novo70.append(novo70)
phoneset_ipa = [i.replace('ː', ':') for i in phoneset_ipa]
phoneset_novo70 = list(novo70_phoneset['novo70'])
phoneset_novo70.extend(extra_novo70)
phoneset_novo70 = [i.replace('ː', ':') for i in phoneset_novo70]
translation_key_ipa2novo70 = dict()
translation_key_novo702ipa = dict()
for ipa, novo70 in zip(phoneset_ipa, phoneset_novo70):
#phoneset_ipa.append(ipa)
#phoneset_novo70.append(novo70)
translation_key_ipa2novo70[ipa] = novo70 translation_key_ipa2novo70[ipa] = novo70
translation_key_novo702ipa[novo70] = ipa translation_key_novo702ipa[novo70] = ipa
translation_key_novo702ipa['ohr'] = 'ɔ:' translation_key_novo702ipa['ohr'] = 'ɔː'
translation_key_novo702ipa['ihr'] = 'ɪ:' translation_key_novo702ipa['ihr'] = 'ɪː'
phoneset_ipa = np.unique(phoneset_ipa) phoneset_ipa = np.unique(phoneset_ipa)
phoneset_novo70 = np.unique(phoneset_novo70) phoneset_novo70 = np.unique(phoneset_novo70)
@ -75,6 +63,25 @@ def load_novo70_phoneset():
return phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa return phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa
def multi_character_tokenize(line, multi_character_tokens):
"""
Tries to match one of the tokens in multi_character_tokens at each position of line,
starting at position 0,
if so tokenizes and eats that token. Otherwise tokenizes a single character.
Copied from forced_alignment.convert_phone_set.py
"""
while line != '':
for token in multi_character_tokens:
if line.startswith(token) and len(token) > 0:
yield token
line = line[len(token):]
break
else:
yield line[:1]
line = line[1:]
def split_ipa(line): def split_ipa(line):
""" """
Split a line by IPA phones. Split a line by IPA phones.
@ -82,16 +89,13 @@ def split_ipa(line):
:param string line: one line written in IPA. :param string line: one line written in IPA.
:return string lineSeperated: the line splitted in IPA phone. :return string lineSeperated: the line splitted in IPA phone.
""" """
phoneset_ipa, _, _, _ = load_novo70_phoneset()
#multi_character_phones = [i for i in phoneset_ipa if len(i) > 1]
#multi_character_phones.sort(key=len, reverse=True)
#multi_character_phones = [
# # IPAs in CGN.
# u'ʌu', u'ɛi', u'œy', u'aː', u'eː', u'iː', u'oː', u'øː', u'ɛː', u'œː', u'ɔː', u'ɛ̃ː', u'ɑ̃ː', u'ɔ̃ː', u'œ̃', u'ɪː'
# ]
#return [phone for phone in multi_character_tokenize(line.strip(), multi_character_phones)]
return convert_phoneset.split_word(line, phoneset_ipa) multi_character_phones = [
# IPAs in CGN.
u'ʌu', u'ɛi', u'œy', u'aː', u'eː', u'iː', u'oː', u'øː', u'ɛː', u'œː', u'ɔː', u'ɛ̃ː', u'ɑ̃ː', u'ɔ̃ː', u'œ̃', u'ɪː'
]
return [phone for phone in multi_character_tokenize(line.strip(), multi_character_phones)]
def split_novo70(line): def split_novo70(line):
@ -100,33 +104,30 @@ def split_novo70(line):
:param string line: one line written in novo70. :param string line: one line written in novo70.
:return string lineSeperated: the line splitted by novo70 phones. :return string lineSeperated: the line splitted by novo70 phones.
""" """
_, phoneset_novo70, _, _ = load_novo70_phoneset() _, phoneset_novo70, _, _ = load_phonset()
#multi_character_phones = [p for p in phoneset_novo70 if len(p) > 1] multi_character_phones = [p for p in phoneset_novo70 if len(p) > 1]
#multi_character_phones = sorted(multi_character_phones, key=len, reverse=True) multi_character_phones = sorted(multi_character_phones, key=len, reverse=True)
multi_character_phones = convert_phoneset.extract_multi_character_phones(phoneset_novo70)
return ['sp' if phone == ' ' else phone return ['sp' if phone == ' ' else phone
for phone in multi_character_tokenize(line.strip(), multi_character_phones)] for phone in multi_character_tokenize(line.strip(), multi_character_phones)]
def novo702ipa(line): def novo702ipa(tokens):
#pronunciation = [] pronunciation = []
_, _, _, translation_key = load_novo70_phoneset() _, _, _, translation_key = load_phonset()
#for phone in split_novo70(tokens): for phone in split_novo70(tokens):
# pronunciation.append(translation_key.get(phone, phone)) pronunciation.append(translation_key.get(phone, phone))
#return ' '.join(pronunciation) return ' '.join(pronunciation)
return ' '.join(convert_phoneset.convert_phoneset(split_novo70(line), translation_key))
# numbering of novo70 should be checked. # numbering of novo70 should be checked.
def ipa2novo70(line): def ipa2novo70(tokens):
#pronunciation = [] pronunciation = []
_, _, translation_key, _ = load_novo70_phoneset() _, _, translation_key, _ = load_phonset()
#for phone in split_ipa(tokens): for phone in split_ipa(tokens):
# pronunciation.append(translation_key.get(phone, phone)) pronunciation.append(translation_key.get(phone, phone))
#return ' '.join(pronunciation) return ' '.join(pronunciation)
return ' '.join(convert_phoneset.convert_phoneset(split_ipa(line), translation_key))
def make_grammar(word, pronunciation_ipa): def make_grammar(word, pronunciation_ipa):
""" """
@ -173,9 +174,6 @@ def forced_alignment(wav_file, word, pronunciation_ipa):
p = argparse.ArgumentParser() p = argparse.ArgumentParser()
p.add_argument("--user", default='martijn.wieling') p.add_argument("--user", default='martijn.wieling')
p.add_argument("--password", default='xxxxxx') p.add_argument("--password", default='xxxxxx')
args = p.parse_args() args = p.parse_args()
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
@ -196,25 +194,6 @@ def result2pronunciation(result, word):
return pronunciation_ipa, pronunciation_novo70, llh return pronunciation_ipa, pronunciation_novo70, llh
def phones_not_in_novo70(ipa):
""" extract phones which is not in novo70 phoneset. """
phoneset_ipa, _, _, _ = load_novo70_phoneset()
# As per Nederlandse phoneset_aki.xlsx recieved from David
# [ɔː] oh / ohr
# [ɪː] ih / ihr
# [iː] iy
# [œː] uh
# [ɛː] eh
# [w] wv in IPA written as ʋ.
david_suggestion = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'w']
return [phone for phone in split_ipa(ipa)
if not phone in phoneset_ipa and not phone in david_suggestion]
if __name__ == 'main': if __name__ == 'main':
pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə'] pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
#grammar = make_grammar('reus', pronunciation_ipa) grammar = make_grammar('reus', pronunciation_ipa)
phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa = load_novo70_phoneset()

View File

@ -7,7 +7,6 @@ import pandas as pd
import convert_xsampa2ipa import convert_xsampa2ipa
import defaultfiles as default import defaultfiles as default
import fame_functions import fame_functions
import novoapi_functions
def _load_transcriptions(): def _load_transcriptions():
@ -68,19 +67,6 @@ def load_transcriptions_clean(clean_wav_dir):
return df_clean return df_clean
def load_transcriptions_novo70(clean_wav_dir):
""" extract rows of which ipa is written in novo70 phonset. """
df = load_transcriptions_clean(clean_wav_dir)
df_novo70 = pd.DataFrame(index=[], columns=list(df.keys()))
for index, row in df.iterrows():
not_in_novo70 = novoapi_functions.phones_not_in_novo70(row['ipa'])
if len(not_in_novo70) == 0:
df_novo70 = df_novo70.append(row, ignore_index=True)
return df_novo70
def add_row_htk(df): def add_row_htk(df):
""" df['htk'] is made from df['ipa'] and added. """ """ df['htk'] is made from df['ipa'] and added. """
htk = [] htk = []