8 changed files with 197 additions and 25 deletions
Binary file not shown.
Binary file not shown.
@ -0,0 +1,128 @@
@@ -0,0 +1,128 @@
|
||||
""" Conversion between IPA and Xsampa. |
||||
|
||||
Note: this code is based on ipa-xsama-converter/converter.py. |
||||
https://github.com/lingz/ipa-xsama-converter/ |
||||
""" |
||||
import json |
||||
import sys |
||||
import os |
||||
|
||||
|
||||
#sys.path.append(ipa_xsampa_converter_dir) |
||||
#import converter |
||||
|
||||
|
||||
def load_converter(source, sink, ipa_xsampa_converter_dir): |
||||
"""load the converter. |
||||
source and sink are either of "ipa", "xsampa" or "sassc". |
||||
""" |
||||
choices = ["ipa", "xsampa", "sassc"] |
||||
|
||||
# Validate params |
||||
try: |
||||
choice1 = choices.index(source) |
||||
choice2 = choices.index(sink) |
||||
if choice1 == choice2: |
||||
print("source and destination format are the same.") |
||||
except ValueError: |
||||
print("source and destination should be one of [ipa xsampa sassc].") |
||||
exit(1) |
||||
|
||||
# Mappings from disk |
||||
# some may not be used if source or sink is already IPA |
||||
source_to_ipa = {} |
||||
ipa_to_sink = {} |
||||
|
||||
ipa_xsampa = [] |
||||
sassc_ipa = [] |
||||
|
||||
# The IPAs that actually occur within SASSC |
||||
sassc_active_ipa = {} |
||||
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__)) |
||||
|
||||
with open(os.path.join(ipa_xsampa_converter_dir, "ipa_xsampa_map.json"), encoding="utf-8") as f: |
||||
ipa_xsampa = json.load(f) |
||||
|
||||
sassc_active = source == "sassc" or sink == "sassc" |
||||
if sassc_active: |
||||
with open(os.path.join(script_dir, "./sassc_ipa.json")) as f: |
||||
sassc_ipa = json.load(f) |
||||
for pair in sassc_ipa: |
||||
for char in pair[1]: |
||||
sassc_active_ipa[char] = 1 |
||||
|
||||
if source == "xsampa": |
||||
for pair in ipa_xsampa: |
||||
source_to_ipa[pair[1]] = pair[0] |
||||
elif source == "sassc": |
||||
for pair in sassc_ipa: |
||||
source_to_ipa[pair[0]] = pair[1] |
||||
|
||||
if sink == "xsampa": |
||||
for pair in ipa_xsampa: |
||||
ipa_to_sink[pair[0]] = pair[1] |
||||
elif sink == "sassc": |
||||
for pair in sassc_ipa: |
||||
ipa_to_sink[pair[1]] = pair[0] |
||||
|
||||
# Combine them into a single mapping |
||||
mapping = {} |
||||
if source == "ipa": |
||||
mapping = ipa_to_sink |
||||
elif sink == "ipa": |
||||
mapping = source_to_ipa |
||||
else: |
||||
for k, ipas in source_to_ipa.iteritems(): |
||||
map_out = "" |
||||
failed = False |
||||
for ipa in ipas: |
||||
val = ipa_to_sink.get(ipa) |
||||
if not val: |
||||
failed = True |
||||
break |
||||
map_out += val |
||||
mapping[k] = map_out if not failed else None |
||||
|
||||
return mapping |
||||
|
||||
|
||||
def conversion(source, sink, mapping, line): |
||||
""" |
||||
conversion. |
||||
Args: |
||||
mapping: can be obtained with load_converter(). |
||||
line: must be seperated, by default the seperator is whitespace. |
||||
""" |
||||
|
||||
# Edit this to change the seperator |
||||
SEPERATOR = " " |
||||
|
||||
line = line.strip() |
||||
output = [] |
||||
#if sassc_active: |
||||
# tokens = line.split(SEPERATOR) |
||||
#else: |
||||
tokens = line |
||||
for token in tokens: |
||||
if token.isspace(): |
||||
output.append(token) |
||||
continue |
||||
# Remove extraneous chars that IPA does not accept |
||||
if sink == "sassc": |
||||
cleaned_token = u"" |
||||
for char in token: |
||||
if sassc_active_ipa.get(char): |
||||
cleaned_token += char |
||||
token = cleaned_token |
||||
mapped = mapping.get(token) |
||||
if not mapped: |
||||
print("WARNING: could not map token ", token, file=sys.stderr) |
||||
else: |
||||
output.append(mapped) |
||||
#if sassc_active: |
||||
# output = SEPERATOR.join(output) |
||||
#else: |
||||
output = "".join(output) |
||||
|
||||
return output |
@ -0,0 +1,15 @@
@@ -0,0 +1,15 @@
|
||||
Check the indent: |
||||
114: output += "\n" |
||||
|
||||
Specify the encoding when the json file is loaded: |
||||
46: with open(os.path.join(script_dir, "ipa_xsampa_map.json")) as f: |
||||
--> with open(os.path.join(script_dir, "ipa_xsampa_map.json"), encoding="utf-8") as f: |
||||
|
||||
Because unicode is supported in Python 3.6, |
||||
86: line = unicode(line, 'utf-8').strip() |
||||
--> line = line.strip() |
||||
117:sys.stdout.write(output.encode("utf-8")) |
||||
--> sys.stdout.write(output) |
||||
|
||||
Change std input into arguments. |
||||
12: if len(sys.argv) != 4: |
Loading…
Reference in new issue