to be sure.
This commit is contained in:
parent
bbed340228
commit
a8dbb51d0c
Binary file not shown.
@ -9,6 +9,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
|
|||||||
ProjectSection(SolutionItems) = preProject
|
ProjectSection(SolutionItems) = preProject
|
||||||
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
|
..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
|
||||||
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
|
..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
|
||||||
|
..\ipa-xsama-converter\converter.py = ..\ipa-xsama-converter\converter.py
|
||||||
..\forced_alignment\forced_alignment\defaultfiles.py = ..\forced_alignment\forced_alignment\defaultfiles.py
|
..\forced_alignment\forced_alignment\defaultfiles.py = ..\forced_alignment\forced_alignment\defaultfiles.py
|
||||||
..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj
|
..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj
|
||||||
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
|
..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
|
||||||
|
BIN
acoustic_model/__pycache__/convert_xsampa2ipa.cpython-36.pyc
Normal file
BIN
acoustic_model/__pycache__/convert_xsampa2ipa.cpython-36.pyc
Normal file
Binary file not shown.
@ -313,26 +313,3 @@ if train_model:
|
|||||||
subprocessStr = 'HHEd -T 1 -H ' + modelN_dir + '\\' + hmmdefs_name + ' -M ' + modelN_dir_next + ' ' + header_file + ' ' + phonelist
|
subprocessStr = 'HHEd -T 1 -H ' + modelN_dir + '\\' + hmmdefs_name + ' -M ' + modelN_dir_next + ' ' + header_file + ' ' + phonelist
|
||||||
subprocess.call(subprocessStr, shell=True)
|
subprocess.call(subprocessStr, shell=True)
|
||||||
|
|
||||||
|
|
||||||
### ======================= forced alignment =======================
|
|
||||||
#if forced_alignment:
|
|
||||||
# try:
|
|
||||||
# scripts.run_command([
|
|
||||||
# 'HVite','-T', '1', '-a', '-C', configHVite,
|
|
||||||
# '-H', AcousticModel, '-m', '-I',
|
|
||||||
# mlf_file, '-i', fa_file, '-S',
|
|
||||||
# script_file, htk_dict_file, filePhoneList
|
|
||||||
# ])
|
|
||||||
# except:
|
|
||||||
# print("\033[91mHVite command failed with these input files:\033[0m")
|
|
||||||
# print(_debug_show_file('HVite config', configHVite))
|
|
||||||
# print(_debug_show_file('Accoustic model', AcousticModel))
|
|
||||||
# print(_debug_show_file('Master Label file', mlf_file))
|
|
||||||
# print(_debug_show_file('Output', fa_file))
|
|
||||||
# print(_debug_show_file('Script file', script_file))
|
|
||||||
# print(_debug_show_file('HTK dictionary', htk_dict_file))
|
|
||||||
# print(_debug_show_file('Phoneme list', filePhoneList))
|
|
||||||
# raise
|
|
||||||
|
|
||||||
|
|
||||||
##os.remove(hcopy_scp.name)
|
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
<SchemaVersion>2.0</SchemaVersion>
|
<SchemaVersion>2.0</SchemaVersion>
|
||||||
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
|
||||||
<ProjectHome>.</ProjectHome>
|
<ProjectHome>.</ProjectHome>
|
||||||
<StartupFile>acoustic_model.py</StartupFile>
|
<StartupFile>performance_check.py</StartupFile>
|
||||||
<SearchPath>
|
<SearchPath>
|
||||||
</SearchPath>
|
</SearchPath>
|
||||||
<WorkingDirectory>.</WorkingDirectory>
|
<WorkingDirectory>.</WorkingDirectory>
|
||||||
@ -25,6 +25,12 @@
|
|||||||
<Compile Include="acoustic_model_functions.py">
|
<Compile Include="acoustic_model_functions.py">
|
||||||
<SubType>Code</SubType>
|
<SubType>Code</SubType>
|
||||||
</Compile>
|
</Compile>
|
||||||
|
<Compile Include="convert_xsampa2ipa.py">
|
||||||
|
<SubType>Code</SubType>
|
||||||
|
</Compile>
|
||||||
|
<Compile Include="performance_check.py">
|
||||||
|
<SubType>Code</SubType>
|
||||||
|
</Compile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Content Include="config.ini" />
|
<Content Include="config.ini" />
|
||||||
|
128
acoustic_model/convert_xsampa2ipa.py
Normal file
128
acoustic_model/convert_xsampa2ipa.py
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
""" Conversion between IPA and Xsampa.
|
||||||
|
|
||||||
|
Note: this code is based on ipa-xsama-converter/converter.py.
|
||||||
|
https://github.com/lingz/ipa-xsama-converter/
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
#sys.path.append(ipa_xsampa_converter_dir)
|
||||||
|
#import converter
|
||||||
|
|
||||||
|
|
||||||
|
def load_converter(source, sink, ipa_xsampa_converter_dir):
|
||||||
|
"""load the converter.
|
||||||
|
source and sink are either of "ipa", "xsampa" or "sassc".
|
||||||
|
"""
|
||||||
|
choices = ["ipa", "xsampa", "sassc"]
|
||||||
|
|
||||||
|
# Validate params
|
||||||
|
try:
|
||||||
|
choice1 = choices.index(source)
|
||||||
|
choice2 = choices.index(sink)
|
||||||
|
if choice1 == choice2:
|
||||||
|
print("source and destination format are the same.")
|
||||||
|
except ValueError:
|
||||||
|
print("source and destination should be one of [ipa xsampa sassc].")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# Mappings from disk
|
||||||
|
# some may not be used if source or sink is already IPA
|
||||||
|
source_to_ipa = {}
|
||||||
|
ipa_to_sink = {}
|
||||||
|
|
||||||
|
ipa_xsampa = []
|
||||||
|
sassc_ipa = []
|
||||||
|
|
||||||
|
# The IPAs that actually occur within SASSC
|
||||||
|
sassc_active_ipa = {}
|
||||||
|
|
||||||
|
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
with open(os.path.join(ipa_xsampa_converter_dir, "ipa_xsampa_map.json"), encoding="utf-8") as f:
|
||||||
|
ipa_xsampa = json.load(f)
|
||||||
|
|
||||||
|
sassc_active = source == "sassc" or sink == "sassc"
|
||||||
|
if sassc_active:
|
||||||
|
with open(os.path.join(script_dir, "./sassc_ipa.json")) as f:
|
||||||
|
sassc_ipa = json.load(f)
|
||||||
|
for pair in sassc_ipa:
|
||||||
|
for char in pair[1]:
|
||||||
|
sassc_active_ipa[char] = 1
|
||||||
|
|
||||||
|
if source == "xsampa":
|
||||||
|
for pair in ipa_xsampa:
|
||||||
|
source_to_ipa[pair[1]] = pair[0]
|
||||||
|
elif source == "sassc":
|
||||||
|
for pair in sassc_ipa:
|
||||||
|
source_to_ipa[pair[0]] = pair[1]
|
||||||
|
|
||||||
|
if sink == "xsampa":
|
||||||
|
for pair in ipa_xsampa:
|
||||||
|
ipa_to_sink[pair[0]] = pair[1]
|
||||||
|
elif sink == "sassc":
|
||||||
|
for pair in sassc_ipa:
|
||||||
|
ipa_to_sink[pair[1]] = pair[0]
|
||||||
|
|
||||||
|
# Combine them into a single mapping
|
||||||
|
mapping = {}
|
||||||
|
if source == "ipa":
|
||||||
|
mapping = ipa_to_sink
|
||||||
|
elif sink == "ipa":
|
||||||
|
mapping = source_to_ipa
|
||||||
|
else:
|
||||||
|
for k, ipas in source_to_ipa.iteritems():
|
||||||
|
map_out = ""
|
||||||
|
failed = False
|
||||||
|
for ipa in ipas:
|
||||||
|
val = ipa_to_sink.get(ipa)
|
||||||
|
if not val:
|
||||||
|
failed = True
|
||||||
|
break
|
||||||
|
map_out += val
|
||||||
|
mapping[k] = map_out if not failed else None
|
||||||
|
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
|
||||||
|
def conversion(source, sink, mapping, line):
|
||||||
|
"""
|
||||||
|
conversion.
|
||||||
|
Args:
|
||||||
|
mapping: can be obtained with load_converter().
|
||||||
|
line: must be seperated, by default the seperator is whitespace.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Edit this to change the seperator
|
||||||
|
SEPERATOR = " "
|
||||||
|
|
||||||
|
line = line.strip()
|
||||||
|
output = []
|
||||||
|
#if sassc_active:
|
||||||
|
# tokens = line.split(SEPERATOR)
|
||||||
|
#else:
|
||||||
|
tokens = line
|
||||||
|
for token in tokens:
|
||||||
|
if token.isspace():
|
||||||
|
output.append(token)
|
||||||
|
continue
|
||||||
|
# Remove extraneous chars that IPA does not accept
|
||||||
|
if sink == "sassc":
|
||||||
|
cleaned_token = u""
|
||||||
|
for char in token:
|
||||||
|
if sassc_active_ipa.get(char):
|
||||||
|
cleaned_token += char
|
||||||
|
token = cleaned_token
|
||||||
|
mapped = mapping.get(token)
|
||||||
|
if not mapped:
|
||||||
|
print("WARNING: could not map token ", token, file=sys.stderr)
|
||||||
|
else:
|
||||||
|
output.append(mapped)
|
||||||
|
#if sassc_active:
|
||||||
|
# output = SEPERATOR.join(output)
|
||||||
|
#else:
|
||||||
|
output = "".join(output)
|
||||||
|
|
||||||
|
return output
|
15
acoustic_model/how_to_use_ipa-xsampa_converer.txt
Normal file
15
acoustic_model/how_to_use_ipa-xsampa_converer.txt
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
Check the indent:
|
||||||
|
114: output += "\n"
|
||||||
|
|
||||||
|
Specify the encoding when the json file is loaded:
|
||||||
|
46: with open(os.path.join(script_dir, "ipa_xsampa_map.json")) as f:
|
||||||
|
--> with open(os.path.join(script_dir, "ipa_xsampa_map.json"), encoding="utf-8") as f:
|
||||||
|
|
||||||
|
Because unicode is supported in Python 3.6,
|
||||||
|
86: line = unicode(line, 'utf-8').strip()
|
||||||
|
--> line = line.strip()
|
||||||
|
117:sys.stdout.write(output.encode("utf-8"))
|
||||||
|
--> sys.stdout.write(output)
|
||||||
|
|
||||||
|
Change std input into arguments.
|
||||||
|
12: if len(sys.argv) != 4:
|
@ -1,4 +1,49 @@
|
|||||||
### ======================= forced alignment =======================
|
import os
|
||||||
|
import sys
|
||||||
|
import csv
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import convert_xsampa2ipa
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= user define =======================
|
||||||
|
forced_alignment_module = r'C:\Users\Aki\source\repos\forced_alignment'
|
||||||
|
ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter'
|
||||||
|
csvfile = r"C:\OneDrive\Research\rug\stimmen\Frisian Variants Picture Task Stimmen.csv"
|
||||||
|
|
||||||
|
|
||||||
|
sys.path.append(forced_alignment_module)
|
||||||
|
from forced_alignment import convert_phone_set
|
||||||
|
|
||||||
|
|
||||||
|
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir)
|
||||||
|
#word_xsampa = 'e:j@X'
|
||||||
|
#word_ipa = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, word_xsampa)
|
||||||
|
|
||||||
|
with open(csvfile, encoding="utf-8") as fin:
|
||||||
|
lines = csv.reader(fin, delimiter=';', lineterminator="\n", skipinitialspace=True)
|
||||||
|
next(lines, None) # skip the headers
|
||||||
|
|
||||||
|
filenames = []
|
||||||
|
words = []
|
||||||
|
pronunciations = []
|
||||||
|
for line in lines:
|
||||||
|
if line[1] is not '' and len(line) > 5:
|
||||||
|
filenames.append(line[0])
|
||||||
|
words.append(line[1])
|
||||||
|
word_xsampa = line[3]
|
||||||
|
word_ipa = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, word_xsampa)
|
||||||
|
word_ipa = word_ipa.replace('ː', ':')
|
||||||
|
word_famehtk = convert_phone_set.ipa2famehtk(word_ipa)
|
||||||
|
pronunciations.append(word_famehtk)
|
||||||
|
phonelist = ' '.join(pronunciations)
|
||||||
|
np.unique(phonelist.split(' '))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## ======================= forced alignment =======================
|
||||||
#if forced_alignment:
|
#if forced_alignment:
|
||||||
# try:
|
# try:
|
||||||
# scripts.run_command([
|
# scripts.run_command([
|
||||||
|
Loading…
Reference in New Issue
Block a user