Compare commits

...

2 Commits

Author SHA1 Message Date
yemaozi88
97486e5599 dataset for experiments in check_novoapi is updated. 2019-04-22 02:03:50 +02:00
yemaozi88
2004399179 novoapi_functions.py is adjusted to use convert_phoneset.py. 2019-04-22 00:59:53 +02:00
7 changed files with 370 additions and 187 deletions

Binary file not shown.

View File

@ -4,7 +4,7 @@
<SchemaVersion>2.0</SchemaVersion> <SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid> <ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
<ProjectHome>.</ProjectHome> <ProjectHome>.</ProjectHome>
<StartupFile>fame_hmm.py</StartupFile> <StartupFile>check_novoapi.py</StartupFile>
<SearchPath> <SearchPath>
</SearchPath> </SearchPath>
<WorkingDirectory>.</WorkingDirectory> <WorkingDirectory>.</WorkingDirectory>

View File

@ -29,48 +29,47 @@ forced_alignment_novo70 = True
## ===== load novo phoneset ===== ## ===== load novo phoneset =====
phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa = novoapi_functions.load_phonset() phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa = novoapi_functions.load_novo70_phoneset()
## ===== extract pronunciations written in novo70 only (not_in_novo70) ===== ## ===== extract pronunciations written in novo70 only (not_in_novo70) =====
# As per Nederlandse phoneset_aki.xlsx recieved from David
# [ɔː] oh / ohr
# [ɪː] ih / ihr
# [iː] iy
# [œː] uh
# [ɛː] eh
# [w] wv in IPA written as ʋ.
david_suggestion = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'w']
## read pronunciation variants. ## read pronunciation variants.
stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) #stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx)
df = pd.read_excel(stimmen_transcription_, 'frequency') #df = pd.read_excel(stimmen_transcription_, 'frequency')
transcription_ipa = list(df['IPA']) #transcription_ipa = list(df['IPA'])
# transcription mistake?
transcription_ipa = [ipa.replace(';', 'ː') for ipa in transcription_ipa if not ipa=='pypɪl' and not pd.isnull(ipa)]
transcription_ipa = [ipa.replace('ˑ', '') for ipa in transcription_ipa] # only one case.
not_in_novo70 = [] stimmen_test_dir = r'c:\OneDrive\Research\rug\_data\stimmen_test'
all_in_novo70 = [] df = stimmen_functions.load_transcriptions_novo70(stimmen_test_dir)
for ipa in transcription_ipa:
ipa = ipa.replace(':', 'ː')
ipa = convert_phone_set.split_ipa(ipa)
# list of phones not in novo70 phoneset.
not_in_novo70_ = [phone for phone in ipa
if not phone in phoneset_ipa and not phone in david_suggestion]
not_in_novo70_ = [phone.replace('sp', '') for phone in not_in_novo70_]
not_in_novo70_ = [phone.replace(':', '') for phone in not_in_novo70_]
not_in_novo70_ = [phone.replace('ː', '') for phone in not_in_novo70_]
if len(not_in_novo70_) == 0:
all_in_novo70.append(''.join(ipa))
#translation_key.get(phone, phone) ## transcription mistake?
not_in_novo70.extend(not_in_novo70_) #transcription_ipa = [ipa.replace(';', 'ː') for ipa in transcription_ipa if not ipa=='pypɪl' and not pd.isnull(ipa)]
not_in_novo70_list = list(set(not_in_novo70)) #transcription_ipa = [ipa.replace('ˑ', '') for ipa in transcription_ipa] # only one case.
#not_in_novo70 = []
#all_in_novo70 = []
#for ipa in transcription_ipa:
# ipa = ipa.replace(':', 'ː')
# ipa = convert_phone_set.split_ipa(ipa)
# # list of phones not in novo70 phoneset.
# not_in_novo70_ = [phone for phone in ipa
# if not phone in phoneset_ipa and not phone in david_suggestion]
# not_in_novo70_ = [phone.replace('sp', '') for phone in not_in_novo70_]
# not_in_novo70_ = [phone.replace(':', '') for phone in not_in_novo70_]
# not_in_novo70_ = [phone.replace('ː', '') for phone in not_in_novo70_]
# if len(not_in_novo70_) == 0:
# all_in_novo70.append(''.join(ipa))
# #translation_key.get(phone, phone)
# not_in_novo70.extend(not_in_novo70_)
#not_in_novo70_list = list(set(not_in_novo70))
## check which phones used in stimmen but not in novo70 ## check which phones used in stimmen but not in novo70
@ -85,41 +84,43 @@ not_in_novo70_list = list(set(not_in_novo70))
# [ʊ] 'ʊ'(1) --> can be ʏ (uh)?? # [ʊ] 'ʊ'(1) --> can be ʏ (uh)??
# [χ] --> can be x?? # [χ] --> can be x??
def search_phone_ipa(x, phone_list): #def search_phone_ipa(x, phone_list):
x_in_item = [] # x_in_item = []
for ipa in phone_list: # for ipa in phone_list:
ipa_original = ipa # ipa_original = ipa
ipa = ipa.replace(':', 'ː') # ipa = ipa.replace(':', 'ː')
ipa = convert_phone_set.split_ipa(ipa) # ipa = convert_phone_set.split_ipa(ipa)
if x in ipa and not x+':' in ipa: # if x in ipa and not x+':' in ipa:
x_in_item.append(ipa_original) # x_in_item.append(ipa_original)
return x_in_item # return x_in_item
#search_phone_ipa('ø', transcription_ipa) #search_phone_ipa('ø', transcription_ipa)
## ===== load all transcriptions (df) ===== ## ===== load all transcriptions (df) =====
df = stimmen_functions.load_transcriptions() #df = stimmen_functions.load_transcriptions()
word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)] word_list = [i for i in list(set(df['word'])) if not pd.isnull(i)]
word_list = sorted(word_list) word_list = sorted(word_list)
## check frequency of each pronunciation variants ## check frequency of each pronunciation variants
cols = ['word', 'ipa', 'frequency'] #cols = ['word', 'ipa', 'frequency']
df_samples = pd.DataFrame(index=[], columns=cols) #df_samples = pd.DataFrame(index=[], columns=cols)
for ipa in all_in_novo70: #for ipa in all_in_novo70:
ipa = ipa.replace('ː', ':') # ipa = ipa.replace('ː', ':')
samples = df[df['ipa'] == ipa] # samples = df[df['ipa'] == ipa]
word = list(set(samples['word']))[0] # word = list(set(samples['word']))[0]
samples_Series = pd.Series([word, ipa, len(samples)], index=df_samples.columns) # samples_Series = pd.Series([word, ipa, len(samples)], index=df_samples.columns)
df_samples = df_samples.append(samples_Series, ignore_index=True) # df_samples = df_samples.append(samples_Series, ignore_index=True)
# each word # each word
df_per_word = pd.DataFrame(index=[], columns=df_samples.keys()) #df_per_word = pd.DataFrame(index=[], columns=df_samples.keys())
for word in word_list: #for word in word_list:
df_samples_ = df_samples[df_samples['word']==word] word = word_list[2]
df_samples_ = df_samples_[df_samples_['frequency']>2] df_ = df[df['word']==word]
df_per_word = df_per_word.append(df_samples_, ignore_index=True) np.unique(list(df_['ipa']))
#df_samples_ = df_samples_[df_samples_['frequency']>2]
#df_per_word = df_per_word.append(df_samples_, ignore_index=True)
#df_per_word.to_excel(os.path.join(default.stimmen_dir, 'pronunciation_variants_novo70.xlsx'), encoding="utf-8") #df_per_word.to_excel(os.path.join(default.stimmen_dir, 'pronunciation_variants_novo70.xlsx'), encoding="utf-8")

View File

@ -14,18 +14,20 @@ def multi_character_tokenize(line, multi_character_tokens):
line = line[1:] line = line[1:]
def split_word(word, multi_character_phones): def split_word(word, phoneset):
""" """
split a line by given phoneset. split a line by given phoneset.
Args: Args:
word (str): a word written in given phoneset. word (str): a word written in given phoneset.
multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_ipa.py. #multi_character_phones (list): the list of multicharacter phones which is considered as one phone. this can be obtained with phoneset definition such as fame_ipa.py.
phoneset (list): the list of phones.
Returns: Returns:
(word_seperated) (list): the word splitted in given phoneset. (word_seperated) (list): the word splitted in given phoneset.
""" """
multi_character_phones = extract_multi_character_phones(phoneset)
return [phone return [phone
for phone in multi_character_tokenize(word.strip(), multi_character_phones) for phone in multi_character_tokenize(word.strip(), multi_character_phones)
] ]
@ -44,3 +46,13 @@ def phone_reduction(phones, reduction_key):
multi_character_tokenize(wo.strip(), multi_character_phones) multi_character_tokenize(wo.strip(), multi_character_phones)
return [reduction_key.get(i, i) for i in phones return [reduction_key.get(i, i) for i in phones
if not i in phones_to_be_removed] if not i in phones_to_be_removed]
def extract_multi_character_phones(phoneset):
"""
Args:
phoneset (list):
"""
multi_character_phones = [i for i in phoneset if len(i) > 1]
multi_character_phones.sort(key=len, reverse=True)
return multi_character_phones

View File

@ -16,37 +16,38 @@ import defaultfiles as default
sys.path.append(default.toolbox_dir) sys.path.append(default.toolbox_dir)
import file_handling as fh import file_handling as fh
from htk import pyhtk from htk import pyhtk
#from scripts import run_command
## ======================= user define ======================= ## ======================= user define =======================
# procedure # procedure
combine_all = 1
make_lexicon = 0 make_lexicon = 0
make_label = 0 # it takes roughly 4800 sec on Surface pro 2. make_label = 0 # it takes roughly 4800 sec on Surface pro 2.
make_mlf = 0 make_mlf = 0
extract_features = 0 extract_features = 0
flat_start = 0 flat_start = 1
train_monophone_without_sp = 0 train_monophone_without_sp = 1
add_sp = 0 add_sp = 1
train_monophone_with_re_aligned_mlf = 0 train_monophone_with_re_aligned_mlf = 1
increase_mixture = 1
train_triphone = 0 train_triphone = 0
train_triphone_tied = 1 train_triphone_tied = 0
# pre-defined values. # pre-defined values.
dataset_list = ['devel', 'test', 'train'] dataset_list = ['devel', 'test', 'train']
feature_size = 39 feature_size = 30
improvement_threshold = 0.3 improvement_threshold = 0.3
hmmdefs_name = 'hmmdefs'
proto_name = 'proto'
lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr') lexicon_asr = os.path.join(default.fame_dir, 'lexicon', 'lex.asr')
lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov') lexicon_oov = os.path.join(default.fame_dir, 'lexicon', 'lex.oov')
config_dir = os.path.join(default.htk_dir, 'config') config_dir = os.path.join(default.htk_dir, 'config')
phonelist_full_txt = os.path.join(config_dir, 'phonelist_full.txt') phonelist_full_txt = os.path.join(config_dir, 'phonelist_full.txt')
tree_hed = os.path.join(config_dir, 'tree.hed') tree_hed = os.path.join(config_dir, 'tree.hed')
quest_hed = os.path.join(config_dir, 'quests.hed') quests_hed = os.path.join(config_dir, 'quests.hed')
model_dir = os.path.join(default.htk_dir, 'model') model_dir = os.path.join(default.htk_dir, 'model')
model_mono0_dir = os.path.join(model_dir, 'mono0') model_mono0_dir = os.path.join(model_dir, 'mono0')
@ -54,12 +55,14 @@ model_mono1_dir = os.path.join(model_dir, 'mono1')
model_mono1sp_dir = os.path.join(model_dir, 'mono1sp') model_mono1sp_dir = os.path.join(model_dir, 'mono1sp')
model_mono1sp2_dir = os.path.join(model_dir, 'mono1sp2') model_mono1sp2_dir = os.path.join(model_dir, 'mono1sp2')
model_tri1_dir = os.path.join(model_dir, 'tri1') model_tri1_dir = os.path.join(model_dir, 'tri1')
model_tri1tied_dir = os.path.join(model_dir, 'tri1tied')
# directories / files to be made. # directories / files to be made.
lexicon_dir = os.path.join(default.htk_dir, 'lexicon') lexicon_dir = os.path.join(default.htk_dir, 'lexicon')
lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr') lexicon_htk_asr = os.path.join(lexicon_dir, 'lex.htk_asr')
lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov') lexicon_htk_oov = os.path.join(lexicon_dir, 'lex.htk_oov')
lexicon_htk = os.path.join(lexicon_dir, 'lex.htk') lexicon_htk = os.path.join(lexicon_dir, 'lex.htk')
lexicon_htk_with_sp = os.path.join(lexicon_dir, 'lex_with_sp.htk')
lexicon_htk_triphone = os.path.join(lexicon_dir, 'lex_triphone.htk') lexicon_htk_triphone = os.path.join(lexicon_dir, 'lex_triphone.htk')
feature_dir = os.path.join(default.htk_dir, 'mfc') feature_dir = os.path.join(default.htk_dir, 'mfc')
@ -71,10 +74,20 @@ fh.make_new_directory(label_dir, existing_dir='leave')
## training ## training
if combine_all:
hcompv_scp_train = os.path.join(tmp_dir, 'all.scp')
mlf_file_train = os.path.join(label_dir, 'all_phone.mlf')
mlf_file_train_word = os.path.join(label_dir, 'all_word.mlf')
mlf_file_train_with_sp = os.path.join(label_dir, 'all_phone_with_sp.mlf')
mlf_file_train_aligned = os.path.join(label_dir, 'all_phone_aligned.mlf')
triphone_mlf = os.path.join(label_dir, 'all_triphone.mlf')
else:
hcompv_scp_train = os.path.join(tmp_dir, 'train.scp') hcompv_scp_train = os.path.join(tmp_dir, 'train.scp')
mlf_file_train = os.path.join(label_dir, 'train_phone.mlf') mlf_file_train = os.path.join(label_dir, 'train_phone.mlf')
mlf_file_train_word = os.path.join(label_dir, 'train_word.mlf')
mlf_file_train_with_sp = os.path.join(label_dir, 'train_phone_with_sp.mlf') mlf_file_train_with_sp = os.path.join(label_dir, 'train_phone_with_sp.mlf')
mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf') mlf_file_train_aligned = os.path.join(label_dir, 'train_phone_aligned.mlf')
triphone_mlf = os.path.join(label_dir, 'train_triphone.mlf')
hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp') hcompv_scp_train_updated = hcompv_scp_train.replace('.scp', '_updated.scp')
## testing ## testing
@ -104,19 +117,18 @@ if make_lexicon:
print('>>> fixing the lexicon...') print('>>> fixing the lexicon...')
fame_functions.fix_lexicon(lexicon_htk) fame_functions.fix_lexicon(lexicon_htk)
## add sp to the end of each line. ## adding sp to the lexicon for HTK.
#print('>>> adding sp...') print('>>> adding sp to the lexicon...')
#with open(lexicon_htk) as f: with open(lexicon_htk) as f:
# lines = f.read().split('\n') lines = f.read().split('\n')
#lines = [line + ' sp' for line in lines] with open(lexicon_htk_with_sp, 'wb') as f:
#with open(lexicon_htk_with_sp, 'wb') as f: f.write(bytes(' sp\n'.join(lines), 'ascii'))
# f.write(bytes('\n'.join(lines), 'ascii'))
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
## intialize the instance for HTK. ## intialize the instance for HTK.
chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk, feature_size) chtk = pyhtk.HTK(config_dir, fame_asr.phoneset_htk, lexicon_htk_with_sp, feature_size)
## ======================= make label files ======================= ## ======================= make label files =======================
@ -152,7 +164,7 @@ if make_label:
shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic')) shutil.move(dictionary_file, os.path.join(label_dir_, filename + '.dic'))
label_file = os.path.join(label_dir_, filename + '.lab') label_file = os.path.join(label_dir_, filename + '.lab')
chtk.create_label_file(sentence_htk, label_file) chtk.make_label_file(sentence_htk, label_file)
else: else:
os.remove(dictionary_file) os.remove(dictionary_file)
@ -174,7 +186,6 @@ if make_mlf:
os.remove(empty_dic_file) os.remove(empty_dic_file)
for dataset in dataset_list: for dataset in dataset_list:
#wav_dir_ = os.path.join(default.fame_dir, 'fame', 'wav', dataset)
feature_dir_ = os.path.join(feature_dir, dataset) feature_dir_ = os.path.join(feature_dir, dataset)
label_dir_ = os.path.join(label_dir, dataset) label_dir_ = os.path.join(label_dir, dataset)
mlf_word = os.path.join(label_dir, dataset + '_word.mlf') mlf_word = os.path.join(label_dir, dataset + '_word.mlf')
@ -183,11 +194,11 @@ if make_mlf:
print(">>> generating a word level mlf file for {}...".format(dataset)) print(">>> generating a word level mlf file for {}...".format(dataset))
chtk.label2mlf(label_dir_, mlf_word) chtk.label2mlf(label_dir_, mlf_word)
print(">>> generating a phone level mlf file for {}...".format(dataset)) print(">>> generating a phone level mlf file for {}...".format(dataset))
chtk.mlf_word2phone(mlf_phone, mlf_word, with_sp=False) chtk.mlf_word2phone(mlf_phone, mlf_word, with_sp=False)
chtk.mlf_word2phone(mlf_phone_with_sp, mlf_word, with_sp=True) chtk.mlf_word2phone(mlf_phone_with_sp, mlf_word, with_sp=True)
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
@ -217,8 +228,8 @@ if extract_features:
+ os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc')) + os.path.join(feature_dir_, os.path.basename(lab_file).replace('.lab', '.mfc'))
for lab_file in lab_list] for lab_file in lab_list]
if os.path.exists(empty_mfc_file): #if os.path.exists(empty_mfc_file):
os.remove(empty_mfc_file) # os.remove(empty_mfc_file)
with open(hcopy_scp.name, 'wb') as f: with open(hcopy_scp.name, 'wb') as f:
f.write(bytes('\n'.join(feature_list), 'ascii')) f.write(bytes('\n'.join(feature_list), 'ascii'))
@ -235,9 +246,64 @@ if extract_features:
with open(hcompv_scp, 'wb') as f: with open(hcompv_scp, 'wb') as f:
f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii')) f.write(bytes('\n'.join(mfc_list) + '\n', 'ascii'))
print(">>> extracting features on stimmen...")
chtk.wav2mfc(os.path.join(htk_stimmen_dir, 'hcopy.scp'))
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
## ======================= flat start monophones =======================
if combine_all:
# script files.
fh.concatenate(
os.path.join(tmp_dir, 'devel.scp'),
os.path.join(tmp_dir, 'test.scp'),
hcompv_scp_train
)
fh.concatenate(
hcompv_scp_train,
os.path.join(tmp_dir, 'train.scp'),
hcompv_scp_train
)
# phone level mlfs.
fh.concatenate(
os.path.join(label_dir, 'devel_phone.mlf'),
os.path.join(label_dir, 'test_phone.mlf'),
mlf_file_train
)
fh.concatenate(
mlf_file_train,
os.path.join(label_dir, 'train_phone.mlf'),
mlf_file_train
)
# phone level mlfs with sp.
fh.concatenate(
os.path.join(label_dir, 'devel_phone_with_sp.mlf'),
os.path.join(label_dir, 'test_phone_with_sp.mlf'),
mlf_file_train_with_sp
)
fh.concatenate(
mlf_file_train_with_sp,
os.path.join(label_dir, 'train_phone_with_sp.mlf'),
mlf_file_train_with_sp
)
# word level mlfs.
fh.concatenate(
os.path.join(label_dir, 'devel_word.mlf'),
os.path.join(label_dir, 'test_word.mlf'),
mlf_file_train_word
)
fh.concatenate(
mlf_file_train_word,
os.path.join(label_dir, 'train_word.mlf'),
mlf_file_train_word
)
## ======================= flat start monophones ======================= ## ======================= flat start monophones =======================
if flat_start: if flat_start:
timer_start = time.time() timer_start = time.time()
@ -246,17 +312,14 @@ if flat_start:
chtk.flat_start(hcompv_scp_train, model_mono0_dir) chtk.flat_start(hcompv_scp_train, model_mono0_dir)
# create macros. # make macros.
vFloors = os.path.join(model_mono0_dir, 'vFloors') vFloors = os.path.join(model_mono0_dir, 'vFloors')
if os.path.exists(vFloors): if os.path.exists(vFloors):
chtk.create_macros(vFloors) chtk.make_macros(vFloors)
# allocate mean & variance to all phones in the phone list # allocate mean & variance to all phones in the phone list
print('>>> allocating mean & variance to all phones in the phone list...') print('>>> allocating mean & variance to all phones in the phone list...')
chtk.create_hmmdefs( chtk.make_hmmdefs(model_mono0_dir)
os.path.join(model_mono0_dir, proto_name),
os.path.join(model_mono0_dir, 'hmmdefs')
)
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
@ -320,8 +383,9 @@ if train_monophone_with_re_aligned_mlf:
os.path.join(modeln_dir, 'macros'), os.path.join(modeln_dir, 'macros'),
os.path.join(modeln_dir, 'hmmdefs'), os.path.join(modeln_dir, 'hmmdefs'),
mlf_file_train_aligned, mlf_file_train_aligned,
os.path.join(label_dir, 'train_word.mlf'), mlf_file_train_word,
hcompv_scp_train) hcompv_scp_train)
chtk.fix_mlf(mlf_file_train_aligned)
print('>>> updating the script file... ') print('>>> updating the script file... ')
chtk.update_script_file( chtk.update_script_file(
@ -349,24 +413,55 @@ if train_monophone_with_re_aligned_mlf:
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
## ======================= train triphone ======================= ## ======================= increase mixture =======================
if train_triphone: if increase_mixture:
print('==== traina triphone model ====') print('==== increase mixture ====')
timer_start = time.time() timer_start = time.time()
for nmix in [2, 4, 8, 16]:
if nmix == 2:
modeln_dir_ = model_mono1sp2_dir
else:
modeln_dir_ = os.path.join(model_dir, 'mono'+str(nmix_))
modeln_dir = os.path.join(model_dir, 'mono'+str(nmix))
triphonelist_txt = os.path.join(config_dir, 'triphonelist.txt') print('mixture: {}'.format(nmix))
triphone_mlf = os.path.join(default.htk_dir, 'label', 'train_triphone.mlf') fh.make_new_directory(modeln_dir, existing_dir='delete')
niter = chtk.get_niter_max(modeln_dir_)
chtk.increase_mixture(
os.path.join(modeln_dir_, 'iter'+str(niter), 'hmmdefs'),
nmix,
os.path.join(modeln_dir, 'iter0'),
model_type='monophone_with_sp')
shutil.copy2(os.path.join(modeln_dir_, 'iter'+str(niter), 'macros'),
os.path.join(modeln_dir, 'iter0', 'macros'))
#improvement_threshold = -10
niter = chtk.re_estimation_until_saturated(
modeln_dir,
os.path.join(modeln_dir_, 'iter0'),
improvement_threshold,
hcompv_scp_train_updated,
os.path.join(htk_stimmen_dir, 'mfc'),
'mfc',
os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
mlf_file=mlf_file_train_aligned,
lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
model_type='monophone_with_sp'
)
nmix_ = nmix
print("elapsed time: {}".format(time.time() - timer_start))
## ======================= train triphone =======================
print('>>> making triphone list... ') print('>>> making triphone list... ')
chtk.make_triphonelist( chtk.make_triphonelist(
triphonelist_txt, mlf_file_train_aligned,
triphone_mlf, triphone_mlf)
mlf_file_train_aligned)
print('>>> making triphone header... ') if train_triphone:
chtk.make_tri_hed( print('==== train triphone model ====')
os.path.join(config_dir, 'mktri.hed') timer_start = time.time()
)
print('>>> init triphone model... ') print('>>> init triphone model... ')
niter = chtk.get_niter_max(model_mono1sp2_dir) niter = chtk.get_niter_max(model_mono1sp2_dir)
@ -377,8 +472,8 @@ if train_triphone:
) )
print('>>> re-estimation... ') print('>>> re-estimation... ')
# I wanted to train until satulated: ## I wanted to train until satulated:
# #niter = chtk.re_estimation_until_saturated( #niter = chtk.re_estimation_until_saturated(
# model_tri1_dir, # model_tri1_dir,
# os.path.join(model_tri1_dir, 'iter0'), # os.path.join(model_tri1_dir, 'iter0'),
# improvement_threshold, # improvement_threshold,
@ -395,7 +490,6 @@ if train_triphone:
# ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???] # ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???]
# therefore only two times re-estimation is performed. # therefore only two times re-estimation is performed.
output_dir = model_tri1_dir output_dir = model_tri1_dir
for niter in range(1, 4): for niter in range(1, 4):
hmm_n = 'iter' + str(niter) hmm_n = 'iter' + str(niter)
hmm_n_pre = 'iter' + str(niter-1) hmm_n_pre = 'iter' + str(niter-1)
@ -414,18 +508,59 @@ if train_triphone:
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))
## ======================= train triphone ======================= ## ======================= train tied-state triphones =======================
if train_triphone_tied: if train_triphone_tied:
print('==== traina tied-state triphone ====') print('==== train tied-state triphones ====')
timer_start = time.time() timer_start = time.time()
print('>>> making lexicon for triphone... ') print('>>> making lexicon for triphone... ')
chtk.make_triphone_full(phonelist_full_txt, lexicon_htk_triphone) chtk.make_lexicon_triphone(phonelist_full_txt, lexicon_htk_triphone)
chtk.combine_phonelists(phonelist_full_txt)
print('>>> making headers... ') print('>>> making a tree header... ')
chtk.make_tree_header(tree_hed) fame_phonetics.make_quests_hed(quests_hed)
fame_phonetics.make_quests_hed(quest_hed) stats = os.path.join(r'c:\OneDrive\Research\rug\experiments\acoustic_model\fame\htk\model\tri1\iter3', 'stats')
chtk.make_tree_header(tree_hed, quests_hed, stats, config_dir)
print('>>> init triphone model... ')
niter = chtk.get_niter_max(model_tri1_dir)
fh.make_new_directory(os.path.join(model_tri1tied_dir, 'iter0'), existing_dir='leave')
chtk.init_triphone(
os.path.join(model_tri1_dir, 'iter'+str(niter)),
os.path.join(model_tri1tied_dir, 'iter0'),
tied=True)
# I wanted to train until satulated:
#niter = chtk.re_estimation_until_saturated(
# model_tri1tied_dir,
# os.path.join(model_tri1tied_dir, 'iter0'),
# improvement_threshold,
# hcompv_scp_train_updated,
# os.path.join(htk_stimmen_dir, 'mfc'),
# 'mfc',
# os.path.join(htk_stimmen_dir, 'word_lattice.ltc'),
# mlf_file=triphone_mlf,
# lexicon=os.path.join(htk_stimmen_dir, 'lexicon_recognition.dic'),
# model_type='triphone'
# )
#
# but because the data size is limited, some triphone cannot be trained and received the error:
# ERROR [+8231] GetHCIModel: Cannot find hmm [i:-]r[+???]
# therefore only 3 times re-estimation is performed.
output_dir = model_tri1tied_dir
for niter in range(1, 4):
hmm_n = 'iter' + str(niter)
hmm_n_pre = 'iter' + str(niter-1)
_modeln_dir = os.path.join(output_dir, hmm_n)
_modeln_dir_pre = os.path.join(output_dir, hmm_n_pre)
fh.make_new_directory(_modeln_dir, 'leave')
chtk.re_estimation(
os.path.join(_modeln_dir_pre, 'hmmdefs'),
_modeln_dir,
hcompv_scp_train_updated,
mlf_file=triphone_mlf,
macros=os.path.join(_modeln_dir_pre, 'macros'),
model_type='triphone')
print("elapsed time: {}".format(time.time() - timer_start)) print("elapsed time: {}".format(time.time() - timer_start))

View File

@ -1,20 +1,19 @@
## this script should be used only by Aki Kunikoshi. ## this script should be used only by Aki Kunikoshi.
import os
import numpy as np import numpy as np
import pandas as pd
import argparse import argparse
import json import json
from novoapi.backend import session from novoapi.backend import session
import os
#os.chdir(r'C:\Users\Aki\source\repos\acoustic_model\acoustic_model')
import defaultfiles as default import defaultfiles as default
import convert_phoneset
def load_phonset(): def load_novo70_phoneset():
translation_key_ipa2novo70 = dict()
translation_key_novo702ipa = dict()
#phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) #phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx)
#df = pd.read_excel(phonelist_novo70_, 'list') #df = pd.read_excel(phonelist_novo70_, 'list')
## *_simple includes columns which has only one phone in. ## *_simple includes columns which has only one phone in.
@ -23,21 +22,23 @@ def load_phonset():
# print('{0}:{1}'.format(ipa, novo70)) # print('{0}:{1}'.format(ipa, novo70))
# translation_key[ipa] = novo70 # translation_key[ipa] = novo70
#phonelist_novo70 = np.unique(list(df['novo70_simple'])) #phonelist_novo70 = np.unique(list(df['novo70_simple']))
novo70_phoneset = pd.read_csv(default.novo70_phoneset, delimiter='\t', header=None, encoding="utf-8")
novo70_phoneset.rename(columns={0: 'novo70', 1: 'ipa', 2: 'description'}, inplace=True)
phoneset_ipa = [] #phoneset_ipa = []
phoneset_novo70 = [] #phoneset_novo70 = []
with open(default.novo70_phoneset, "rt", encoding="utf-8") as fin: #with open(default.novo70_phoneset, "rt", encoding="utf-8") as fin:
lines = fin.read() # lines = fin.read()
lines = lines.split('\n') # lines = lines.split('\n')
for line in lines: # for line in lines:
words = line.split('\t') # words = line.split('\t')
if len(words) > 1: # if len(words) > 1:
novo70 = words[0] # novo70 = words[0]
ipa = words[1] # ipa = words[1]
phoneset_ipa.append(ipa) # phoneset_ipa.append(ipa)
phoneset_novo70.append(novo70) # phoneset_novo70.append(novo70)
translation_key_ipa2novo70[ipa] = novo70 # translation_key_ipa2novo70[ipa] = novo70
translation_key_novo702ipa[novo70] = ipa # translation_key_novo702ipa[novo70] = ipa
# As per Nederlandse phoneset_aki.xlsx recieved from David # As per Nederlandse phoneset_aki.xlsx recieved from David
# [ɔː] oh / ohr # from ipa->novo70, only oh is used. # [ɔː] oh / ohr # from ipa->novo70, only oh is used.
@ -48,14 +49,25 @@ def load_phonset():
# [w] wv in IPA written as ʋ. # [w] wv in IPA written as ʋ.
extra_ipa = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'ʋ'] extra_ipa = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'ʋ']
extra_novo70 = ['oh', 'ih', 'iy', 'uh', 'eh', 'wv'] extra_novo70 = ['oh', 'ih', 'iy', 'uh', 'eh', 'wv']
for ipa, novo70 in zip(extra_ipa, extra_novo70):
phoneset_ipa.append(ipa) phoneset_ipa = list(novo70_phoneset['ipa'])
phoneset_novo70.append(novo70) phoneset_ipa.extend(extra_ipa)
phoneset_ipa = [i.replace('ː', ':') for i in phoneset_ipa]
phoneset_novo70 = list(novo70_phoneset['novo70'])
phoneset_novo70.extend(extra_novo70)
phoneset_novo70 = [i.replace('ː', ':') for i in phoneset_novo70]
translation_key_ipa2novo70 = dict()
translation_key_novo702ipa = dict()
for ipa, novo70 in zip(phoneset_ipa, phoneset_novo70):
#phoneset_ipa.append(ipa)
#phoneset_novo70.append(novo70)
translation_key_ipa2novo70[ipa] = novo70 translation_key_ipa2novo70[ipa] = novo70
translation_key_novo702ipa[novo70] = ipa translation_key_novo702ipa[novo70] = ipa
translation_key_novo702ipa['ohr'] = 'ɔː' translation_key_novo702ipa['ohr'] = 'ɔ:'
translation_key_novo702ipa['ihr'] = 'ɪː' translation_key_novo702ipa['ihr'] = 'ɪ:'
phoneset_ipa = np.unique(phoneset_ipa) phoneset_ipa = np.unique(phoneset_ipa)
phoneset_novo70 = np.unique(phoneset_novo70) phoneset_novo70 = np.unique(phoneset_novo70)
@ -63,25 +75,6 @@ def load_phonset():
return phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa return phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa
def multi_character_tokenize(line, multi_character_tokens):
"""
Tries to match one of the tokens in multi_character_tokens at each position of line,
starting at position 0,
if so tokenizes and eats that token. Otherwise tokenizes a single character.
Copied from forced_alignment.convert_phone_set.py
"""
while line != '':
for token in multi_character_tokens:
if line.startswith(token) and len(token) > 0:
yield token
line = line[len(token):]
break
else:
yield line[:1]
line = line[1:]
def split_ipa(line): def split_ipa(line):
""" """
Split a line by IPA phones. Split a line by IPA phones.
@ -89,13 +82,16 @@ def split_ipa(line):
:param string line: one line written in IPA. :param string line: one line written in IPA.
:return string lineSeperated: the line splitted in IPA phone. :return string lineSeperated: the line splitted in IPA phone.
""" """
phoneset_ipa, _, _, _ = load_novo70_phoneset()
#multi_character_phones = [i for i in phoneset_ipa if len(i) > 1]
#multi_character_phones.sort(key=len, reverse=True)
#multi_character_phones = [
# # IPAs in CGN.
# u'ʌu', u'ɛi', u'œy', u'aː', u'eː', u'iː', u'oː', u'øː', u'ɛː', u'œː', u'ɔː', u'ɛ̃ː', u'ɑ̃ː', u'ɔ̃ː', u'œ̃', u'ɪː'
# ]
#return [phone for phone in multi_character_tokenize(line.strip(), multi_character_phones)]
multi_character_phones = [ return convert_phoneset.split_word(line, phoneset_ipa)
# IPAs in CGN.
u'ʌu', u'ɛi', u'œy', u'aː', u'eː', u'iː', u'oː', u'øː', u'ɛː', u'œː', u'ɔː', u'ɛ̃ː', u'ɑ̃ː', u'ɔ̃ː', u'œ̃', u'ɪː'
]
return [phone for phone in multi_character_tokenize(line.strip(), multi_character_phones)]
def split_novo70(line): def split_novo70(line):
@ -104,29 +100,32 @@ def split_novo70(line):
:param string line: one line written in novo70. :param string line: one line written in novo70.
:return string lineSeperated: the line splitted by novo70 phones. :return string lineSeperated: the line splitted by novo70 phones.
""" """
_, phoneset_novo70, _, _ = load_phonset() _, phoneset_novo70, _, _ = load_novo70_phoneset()
multi_character_phones = [p for p in phoneset_novo70 if len(p) > 1] #multi_character_phones = [p for p in phoneset_novo70 if len(p) > 1]
multi_character_phones = sorted(multi_character_phones, key=len, reverse=True) #multi_character_phones = sorted(multi_character_phones, key=len, reverse=True)
multi_character_phones = convert_phoneset.extract_multi_character_phones(phoneset_novo70)
return ['sp' if phone == ' ' else phone return ['sp' if phone == ' ' else phone
for phone in multi_character_tokenize(line.strip(), multi_character_phones)] for phone in multi_character_tokenize(line.strip(), multi_character_phones)]
def novo702ipa(tokens): def novo702ipa(line):
pronunciation = [] #pronunciation = []
_, _, _, translation_key = load_phonset() _, _, _, translation_key = load_novo70_phoneset()
for phone in split_novo70(tokens): #for phone in split_novo70(tokens):
pronunciation.append(translation_key.get(phone, phone)) # pronunciation.append(translation_key.get(phone, phone))
return ' '.join(pronunciation) #return ' '.join(pronunciation)
return ' '.join(convert_phoneset.convert_phoneset(split_novo70(line), translation_key))
# numbering of novo70 should be checked. # numbering of novo70 should be checked.
def ipa2novo70(tokens): def ipa2novo70(line):
pronunciation = [] #pronunciation = []
_, _, translation_key, _ = load_phonset() _, _, translation_key, _ = load_novo70_phoneset()
for phone in split_ipa(tokens): #for phone in split_ipa(tokens):
pronunciation.append(translation_key.get(phone, phone)) # pronunciation.append(translation_key.get(phone, phone))
return ' '.join(pronunciation) #return ' '.join(pronunciation)
return ' '.join(convert_phoneset.convert_phoneset(split_ipa(line), translation_key))
def make_grammar(word, pronunciation_ipa): def make_grammar(word, pronunciation_ipa):
@ -174,6 +173,9 @@ def forced_alignment(wav_file, word, pronunciation_ipa):
p = argparse.ArgumentParser() p = argparse.ArgumentParser()
p.add_argument("--user", default='martijn.wieling') p.add_argument("--user", default='martijn.wieling')
p.add_argument("--password", default='xxxxxx') p.add_argument("--password", default='xxxxxx')
args = p.parse_args() args = p.parse_args()
rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir) rec = session.Recognizer(grammar_version="1.0", lang="nl", snodeid=101, user=args.user, password=args.password, keepopen=True) # , modeldir=modeldir)
@ -194,6 +196,25 @@ def result2pronunciation(result, word):
return pronunciation_ipa, pronunciation_novo70, llh return pronunciation_ipa, pronunciation_novo70, llh
def phones_not_in_novo70(ipa):
""" extract phones which is not in novo70 phoneset. """
phoneset_ipa, _, _, _ = load_novo70_phoneset()
# As per Nederlandse phoneset_aki.xlsx recieved from David
# [ɔː] oh / ohr
# [ɪː] ih / ihr
# [iː] iy
# [œː] uh
# [ɛː] eh
# [w] wv in IPA written as ʋ.
david_suggestion = ['ɔː', 'ɪː', 'iː', 'œː', 'ɛː', 'w']
return [phone for phone in split_ipa(ipa)
if not phone in phoneset_ipa and not phone in david_suggestion]
if __name__ == 'main': if __name__ == 'main':
pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə'] pronunciation_ipa = ['rø:s', 'mɑn', 'mɑntsjə']
grammar = make_grammar('reus', pronunciation_ipa) #grammar = make_grammar('reus', pronunciation_ipa)
phoneset_ipa, phoneset_novo70, translation_key_ipa2novo70, translation_key_novo702ipa = load_novo70_phoneset()

View File

@ -7,6 +7,7 @@ import pandas as pd
import convert_xsampa2ipa import convert_xsampa2ipa
import defaultfiles as default import defaultfiles as default
import fame_functions import fame_functions
import novoapi_functions
def _load_transcriptions(): def _load_transcriptions():
@ -67,6 +68,19 @@ def load_transcriptions_clean(clean_wav_dir):
return df_clean return df_clean
def load_transcriptions_novo70(clean_wav_dir):
""" extract rows of which ipa is written in novo70 phonset. """
df = load_transcriptions_clean(clean_wav_dir)
df_novo70 = pd.DataFrame(index=[], columns=list(df.keys()))
for index, row in df.iterrows():
not_in_novo70 = novoapi_functions.phones_not_in_novo70(row['ipa'])
if len(not_in_novo70) == 0:
df_novo70 = df_novo70.append(row, ignore_index=True)
return df_novo70
def add_row_htk(df): def add_row_htk(df):
""" df['htk'] is made from df['ipa'] and added. """ """ df['htk'] is made from df['ipa'] and added. """
htk = [] htk = []