to be sure.

2018-06-13 07:22:53 +09:00
parent bbed340228
commit a8dbb51d0c
8 changed files with 197 additions and 25 deletions
--- a/.vs/acoustic_model/v15/.suo
+++ b/.vs/acoustic_model/v15/.suo
--- a/acoustic_model.sln
+++ b/acoustic_model.sln
@@ -9,6 +9,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
 	ProjectSection(SolutionItems) = preProject
 		..\forced_alignment\forced_alignment\__init__.py = ..\forced_alignment\forced_alignment\__init__.py
 		..\forced_alignment\forced_alignment\convert_phone_set.py = ..\forced_alignment\forced_alignment\convert_phone_set.py
 		..\ipa-xsama-converter\converter.py = ..\ipa-xsama-converter\converter.py
 		..\forced_alignment\forced_alignment\defaultfiles.py = ..\forced_alignment\forced_alignment\defaultfiles.py
 		..\forced_alignment\forced_alignment\forced_alignment.pyproj = ..\forced_alignment\forced_alignment\forced_alignment.pyproj
 		..\forced_alignment\forced_alignment\htk_dict.py = ..\forced_alignment\forced_alignment\htk_dict.py
--- a/acoustic_model/pycache/convert_xsampa2ipa.cpython-36.pyc
+++ b/acoustic_model/pycache/convert_xsampa2ipa.cpython-36.pyc
--- a/acoustic_model/acoustic_model.py
+++ b/acoustic_model/acoustic_model.py
@@ -313,26 +313,3 @@ if train_model:
 		subprocessStr =	'HHEd -T 1 -H ' + modelN_dir + '\\' + hmmdefs_name + ' -M ' + modelN_dir_next + ' ' + header_file + ' ' + phonelist
 		subprocess.call(subprocessStr, shell=True)
 ### ======================= forced alignment =======================
 #if forced_alignment:
 #	try:
 #		scripts.run_command([
 #			'HVite','-T', '1', '-a', '-C', configHVite,
 #			'-H', AcousticModel, '-m', '-I',
 #			mlf_file, '-i', fa_file, '-S',
 #			script_file, htk_dict_file, filePhoneList
 #		])
 #	except:
 #		print("\033[91mHVite command failed with these input files:\033[0m")
 #		print(_debug_show_file('HVite config', configHVite))
 #		print(_debug_show_file('Accoustic model', AcousticModel))
 #		print(_debug_show_file('Master Label file', mlf_file))
 #		print(_debug_show_file('Output', fa_file))
 #		print(_debug_show_file('Script file', script_file))
 #		print(_debug_show_file('HTK dictionary', htk_dict_file))
 #		print(_debug_show_file('Phoneme list', filePhoneList))
 #		raise
 ##os.remove(hcopy_scp.name)
--- a/acoustic_model/acoustic_model.pyproj
+++ b/acoustic_model/acoustic_model.pyproj
@@ -4,7 +4,7 @@
    <SchemaVersion>2.0</SchemaVersion>
    <ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
    <ProjectHome>.</ProjectHome>
-    <StartupFile>acoustic_model.py</StartupFile>
+    <StartupFile>performance_check.py</StartupFile>
    <SearchPath>
    </SearchPath>
    <WorkingDirectory>.</WorkingDirectory>
@@ -25,6 +25,12 @@
    <Compile Include="acoustic_model_functions.py">
      <SubType>Code</SubType>
    </Compile>
    <Compile Include="convert_xsampa2ipa.py">
      <SubType>Code</SubType>
    </Compile>
    <Compile Include="performance_check.py">
      <SubType>Code</SubType>
    </Compile>
  </ItemGroup>
  <ItemGroup>
    <Content Include="config.ini" />
--- a/acoustic_model/convert_xsampa2ipa.py
+++ b/acoustic_model/convert_xsampa2ipa.py
@@ -0,0 +1,128 @@
 """ Conversion between IPA and Xsampa.
 Note: this code is based on ipa-xsama-converter/converter.py.
 https://github.com/lingz/ipa-xsama-converter/
 """
 import json
 import sys
 import os
 #sys.path.append(ipa_xsampa_converter_dir)
 #import converter
 def load_converter(source, sink, ipa_xsampa_converter_dir):
 	"""load the converter.
 	source and sink are either of "ipa", "xsampa" or "sassc".
 	"""
 	choices = ["ipa", "xsampa", "sassc"]
 	# Validate params
 	try:
 		choice1 = choices.index(source)
 		choice2 = choices.index(sink)
 		if choice1 == choice2:
 			print("source and destination format are the same.")
 	except ValueError:
 		print("source and destination should be one of [ipa xsampa sassc].")
 		exit(1)
 	# Mappings from disk
 	# some may not be used if source or sink is already IPA
 	source_to_ipa = {}
 	ipa_to_sink = {}
 	ipa_xsampa = []
 	sassc_ipa = []
 	# The IPAs that actually occur within SASSC
 	sassc_active_ipa = {}
 	script_dir = os.path.dirname(os.path.realpath(__file__))
 	with open(os.path.join(ipa_xsampa_converter_dir, "ipa_xsampa_map.json"), encoding="utf-8") as f:
 		ipa_xsampa = json.load(f)
 	sassc_active = source == "sassc" or sink == "sassc"
 	if sassc_active:
 		with open(os.path.join(script_dir, "./sassc_ipa.json")) as f:
 			sassc_ipa = json.load(f)
 		for pair in sassc_ipa:
 			for char in pair[1]:
 				sassc_active_ipa[char] = 1
 	if source == "xsampa":
 		for pair in ipa_xsampa:
 			source_to_ipa[pair[1]] = pair[0]
 	elif source == "sassc":
 		for pair in sassc_ipa:
 			source_to_ipa[pair[0]] = pair[1]
 	if sink == "xsampa":
 		for pair in ipa_xsampa:
 			ipa_to_sink[pair[0]] = pair[1]
 	elif sink == "sassc":
 		for pair in sassc_ipa:
 			ipa_to_sink[pair[1]] = pair[0]
 	# Combine them into a single mapping
 	mapping = {}
 	if source == "ipa":
 		mapping = ipa_to_sink
 	elif sink == "ipa":
 		mapping = source_to_ipa
 	else:
 		for k, ipas in source_to_ipa.iteritems():
 			map_out = ""
 			failed = False
 			for ipa in ipas:
 				val = ipa_to_sink.get(ipa)
 				if not val:
 					failed = True
 					break
 				map_out += val
 			mapping[k] = map_out if not failed else None
 	return mapping
 def conversion(source, sink, mapping, line):
 	"""
 	conversion.
 	Args:
 		mapping: can be obtained with load_converter().
 		line: must be seperated, by default the seperator is whitespace.
 	"""
 	# Edit this to change the seperator
 	SEPERATOR = " "
 	line = line.strip()
 	output = []
 	#if sassc_active:
 	#	tokens = line.split(SEPERATOR)
 	#else:
 	tokens = line
 	for token in tokens:
 		if token.isspace():
 			output.append(token)
 			continue
 		# Remove extraneous chars that IPA does not accept
 		if sink == "sassc":
 			cleaned_token = u""
 			for char in token:
 				if sassc_active_ipa.get(char):
 					cleaned_token += char
 			token = cleaned_token
 		mapped = mapping.get(token)
 		if not mapped:
 			print("WARNING: could not map token ", token, file=sys.stderr)
 		else:
 			output.append(mapped)
 	#if sassc_active:
 	#	output = SEPERATOR.join(output)
 	#else:
 	output = "".join(output)
 	return output
--- a/acoustic_model/how_to_use_ipa-xsampa_converer.txt
+++ b/acoustic_model/how_to_use_ipa-xsampa_converer.txt
@@ -0,0 +1,15 @@
 Check the indent:
 114: output += "\n"
 Specify the encoding when the json file is loaded:
 46: with open(os.path.join(script_dir, "ipa_xsampa_map.json")) as f:
 --> with open(os.path.join(script_dir, "ipa_xsampa_map.json"), encoding="utf-8") as f:
 Because unicode is supported in Python 3.6,
 86: line = unicode(line, 'utf-8').strip()
 --> line = line.strip()
 117:sys.stdout.write(output.encode("utf-8"))
 --> sys.stdout.write(output)
 Change std input into arguments.
 12: if len(sys.argv) != 4:
--- a/acoustic_model/performance_check.py
+++ b/acoustic_model/performance_check.py
@@ -1,4 +1,49 @@
-### ======================= forced alignment =======================
+import os
 import sys
 import csv
 import subprocess
 import numpy as np
 import convert_xsampa2ipa
 ## ======================= user define =======================
 forced_alignment_module  = r'C:\Users\Aki\source\repos\forced_alignment'
 ipa_xsampa_converter_dir = r'C:\Users\Aki\source\repos\ipa-xsama-converter'
 csvfile = r"C:\OneDrive\Research\rug\stimmen\Frisian Variants Picture Task Stimmen.csv"
 sys.path.append(forced_alignment_module)
 from forced_alignment import convert_phone_set
 mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', ipa_xsampa_converter_dir)
 #word_xsampa = 'e:j@X'
 #word_ipa = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, word_xsampa)
 with open(csvfile, encoding="utf-8") as fin:
 	lines = csv.reader(fin, delimiter=';', lineterminator="\n", skipinitialspace=True)
 	next(lines, None) # skip the headers
 	filenames	   = []
 	words		   = []
 	pronunciations = []
 	for line in lines:
 		if line[1] is not '' and len(line) > 5:
 			filenames.append(line[0])
 			words.append(line[1])
 			word_xsampa = line[3]
 			word_ipa = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, word_xsampa)
 			word_ipa = word_ipa.replace('ː', ':')
 			word_famehtk = convert_phone_set.ipa2famehtk(word_ipa)
 			pronunciations.append(word_famehtk)
 phonelist = ' '.join(pronunciations)
 np.unique(phonelist.split(' '))
 ## ======================= forced alignment =======================
 #if forced_alignment:
 #	try:
 #		scripts.run_command([