""" Conversion between IPA and Xsampa. Note: this code is based on ipa-xsama-converter/converter.py. https://github.com/lingz/ipa-xsama-converter/ """ import json import sys import os #sys.path.append(ipa_xsampa_converter_dir) #import converter def load_converter(source, sink, ipa_xsampa_converter_dir): """load the converter. source and sink are either of "ipa", "xsampa" or "sassc". """ choices = ["ipa", "xsampa", "sassc"] # Validate params try: choice1 = choices.index(source) choice2 = choices.index(sink) if choice1 == choice2: print("source and destination format are the same.") except ValueError: print("source and destination should be one of [ipa xsampa sassc].") exit(1) # Mappings from disk # some may not be used if source or sink is already IPA source_to_ipa = {} ipa_to_sink = {} ipa_xsampa = [] sassc_ipa = [] # The IPAs that actually occur within SASSC sassc_active_ipa = {} script_dir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(ipa_xsampa_converter_dir, "ipa_xsampa_map.json"), encoding="utf-8") as f: ipa_xsampa = json.load(f) sassc_active = source == "sassc" or sink == "sassc" if sassc_active: with open(os.path.join(script_dir, "./sassc_ipa.json")) as f: sassc_ipa = json.load(f) for pair in sassc_ipa: for char in pair[1]: sassc_active_ipa[char] = 1 if source == "xsampa": for pair in ipa_xsampa: source_to_ipa[pair[1]] = pair[0] elif source == "sassc": for pair in sassc_ipa: source_to_ipa[pair[0]] = pair[1] if sink == "xsampa": for pair in ipa_xsampa: ipa_to_sink[pair[0]] = pair[1] elif sink == "sassc": for pair in sassc_ipa: ipa_to_sink[pair[1]] = pair[0] # Combine them into a single mapping mapping = {} if source == "ipa": mapping = ipa_to_sink elif sink == "ipa": mapping = source_to_ipa else: for k, ipas in source_to_ipa.iteritems(): map_out = "" failed = False for ipa in ipas: val = ipa_to_sink.get(ipa) if not val: failed = True break map_out += val mapping[k] = map_out if not failed else None return mapping def conversion(source, sink, mapping, line): """ conversion. Args: mapping: can be obtained with load_converter(). line: must be seperated, by default the seperator is whitespace. """ # Edit this to change the seperator SEPERATOR = " " line = line.strip() output = [] #if sassc_active: # tokens = line.split(SEPERATOR) #else: tokens = line for token in tokens: if token.isspace(): output.append(token) continue # Remove extraneous chars that IPA does not accept if sink == "sassc": cleaned_token = u"" for char in token: if sassc_active_ipa.get(char): cleaned_token += char token = cleaned_token mapped = mapping.get(token) if not mapped: print("WARNING: could not map token ", token, file=sys.stderr) else: output.append(mapped) #if sassc_active: # output = SEPERATOR.join(output) #else: output = "".join(output) return output