novo_api for python 3.x is added.

This commit is contained in:
yemaozi88 2018-12-30 23:47:55 +01:00
parent a77ed9d4dd
commit e5cf182a18
16 changed files with 769 additions and 10 deletions

Binary file not shown.

View File

@ -4,7 +4,7 @@
<SchemaVersion>2.0</SchemaVersion> <SchemaVersion>2.0</SchemaVersion>
<ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid> <ProjectGuid>4d8c8573-32f0-4a62-9e62-3ce5cc680390</ProjectGuid>
<ProjectHome>.</ProjectHome> <ProjectHome>.</ProjectHome>
<StartupFile>performance_check.py</StartupFile> <StartupFile>check_novoapi.py</StartupFile>
<SearchPath> <SearchPath>
</SearchPath> </SearchPath>
<WorkingDirectory>.</WorkingDirectory> <WorkingDirectory>.</WorkingDirectory>
@ -25,6 +25,7 @@
<Compile Include="acoustic_model_functions.py"> <Compile Include="acoustic_model_functions.py">
<SubType>Code</SubType> <SubType>Code</SubType>
</Compile> </Compile>
<Compile Include="check_novoapi.py" />
<Compile Include="convert_xsampa2ipa.py"> <Compile Include="convert_xsampa2ipa.py">
<SubType>Code</SubType> <SubType>Code</SubType>
</Compile> </Compile>
@ -34,7 +35,7 @@
<Compile Include="fa_test.py"> <Compile Include="fa_test.py">
<SubType>Code</SubType> <SubType>Code</SubType>
</Compile> </Compile>
<Compile Include="performance_check.py"> <Compile Include="htk_vs_kaldi.py">
<SubType>Code</SubType> <SubType>Code</SubType>
</Compile> </Compile>
</ItemGroup> </ItemGroup>

View File

@ -20,13 +20,8 @@ from forced_alignment import pyhtk
import novoapi import novoapi
## ======================= convert phones ======================
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx) ## ======================= novo phoneset ======================
## novo phoneset
translation_key = dict() translation_key = dict()
#phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx) #phonelist_novo70_ = pd.ExcelFile(default.phonelist_novo70_xlsx)
@ -54,3 +49,14 @@ with open(default.cmu69_phoneset, "rt", encoding="utf-8") as fin:
phoneset_ipa = np.unique(phoneset_ipa) phoneset_ipa = np.unique(phoneset_ipa)
phoneset_novo70 = np.unique(phoneset_novo70) phoneset_novo70 = np.unique(phoneset_novo70)
## ======================= convert phones ======================
mapping = convert_xsampa2ipa.load_converter('xsampa', 'ipa', default.ipa_xsampa_converter_dir)
stimmen_transcription_ = pd.ExcelFile(default.stimmen_transcription_xlsx)
df = pd.read_excel(stimmen_transcription_, 'check')
#for xsampa, ipa in zip(df['X-SAMPA'], df['IPA']):
# #ipa_converted = convert_xsampa2ipa.conversion('xsampa', 'ipa', mapping, xsampa_)
# ipa_converted = convert_xsampa2ipa.xsampa2ipa(mapping, xsampa)
# if not ipa_converted == ipa:
# print('{0}: {1} - {2}'.format(xsampa, ipa_converted, ipa))

View File

@ -40,5 +40,6 @@ stimmen_transcription_xlsx = os.path.join(experiments_dir, 'stimmen', 'data', 'F
stimmen_data_dir = os.path.join(experiments_dir, 'stimmen', 'data') stimmen_data_dir = os.path.join(experiments_dir, 'stimmen', 'data')
phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt') phonelist_friesian_txt = os.path.join(experiments_dir, 'friesian', 'acoustic_model', 'config', 'phonelist_friesian.txt')
novo_api_dir = os.path.join(WSL_dir, 'python-novo-api') novo_api_dir = os.path.join(WSL_dir, 'python-novo-api', 'novoapi')
cmu69_phoneset = os.path.join(novo_api_dir, 'novoapi', 'asr', 'phoneset', 'en', 'cmu69.phoneset') #novo_api_dir = r'c:\Python36-32\Lib\site-packages\novoapi'
cmu69_phoneset = os.path.join(novo_api_dir, 'asr', 'phoneset', 'en', 'cmu69.phoneset')

5
novoapi/__init__.py Normal file
View File

@ -0,0 +1,5 @@
#!/usr/bin/env python
__version__ = "0.2"
import backend

6
novoapi/asr/__init__.py Normal file
View File

@ -0,0 +1,6 @@
#!/usr/bin/env python
#import segments
#import spraaklab
from . import segments
from . import spraaklab

View File

@ -0,0 +1,4 @@
#!/usr/bin/env python
from .segments import Segmentation
from .praat import seg2tg

View File

@ -0,0 +1,77 @@
#!/usr/bin/env python
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen
import codecs
def print_header(output, begin, end, nr_tiers):
print >> output, 'File type = "ooTextFile"'
print >> output, 'Object class = "TextGrid"'
print >> output, ''
print >> output, 'xmin = %s' % begin
print >> output, 'xmax = %s' % end
print >> output, 'tiers? <exists>'
print >> output, 'size = %d' % nr_tiers
print >> output, 'item []:'
def print_info_tier(output, title, begin, end, label):
print >> output, '\titem [%d]:' % 0
print >> output, '\t\tclass = "IntervalTier"'
print >> output, '\t\tname = "%s"' % title
print >> output, '\t\txmin = %s' % begin
print >> output, '\t\txmax = %s' % end
print >> output, '\t\tintervals: size = %d' % 1
print >> output, '\t\tintervals [1]:'
print >> output, '\t\t\txmin = %s' % begin
print >> output, '\t\t\txmax = %s' % end
print >> output, '\t\t\ttext = "%s"' % label
#def print_tier(output, title, begin, end, segs, (format, formatter)):
def print_tier(output, title, begin, end, segs, format, formatter):
print >> output, '\titem [%d]:' % 0
print >> output, '\t\tclass = "IntervalTier"'
print >> output, '\t\tname = "%s"' % title
print >> output, '\t\txmin = %s' % begin
print >> output, '\t\txmax = %s' % end
print >> output, '\t\tintervals: size = %d' % len(segs)
count = 1
for seg in segs:
#print seg
print >> output, '\t\tintervals [%d]:' % count
print >> output, '\t\t\txmin = %s' % repr(int(seg['begin']) / 100.0)
print >> output, '\t\t\txmax = %s' % repr(int(seg['end']) / 100.0)
string = '\t\t\ttext = "' + format + '"'
print >> output, string % formatter(seg['label'])
count += 1
def seg2tg(fname, segments):
if not segments:
return
output = codecs.open(fname, "w", encoding="utf-8")
confidences = []
word_labels = []
phones = []
for s in segments:
conf = s.llh if hasattr(s, "llh") else s.score
confidences.append({'begin': s.begin, 'end': s.end, 'label': conf})
word_labels.append({'begin': s.begin, 'end': s.end, 'label': s.label})
for p in s.phones:
phones.append({'begin': p.begin, 'end': p.end, 'label': p.label})
begin = repr(int(segments[0].begin) / 100.0)
end = repr(int(segments[-1].end) / 100.0)
nr_tiers = 3
print_header(output, begin, end, nr_tiers)
print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
output.close()

View File

@ -0,0 +1,99 @@
#!/usr/bin/env python
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen
## These classes can be initialized with dictionaries, as they are returned by the python spraaklab recognition system.
class Segment(object):
def __init__(self, segment):
self.begin = segment["begin"]
self.end = segment["end"]
self.begintime = segment.get("beginTime", self.begin / 100.0)
self.endtime = segment.get("endTime", self.end / 100.0)
self.label = segment["label"]
self.score = segment["score"]
if "llh" in segment:
self.llh = segment["llh"]
if "phones" in segment:
self.type = "word"
self.phones = Segmentation(segment["phones"], ["sil"])
if hasattr(self.phones[0], "llh"):
self.minllh = min([s.llh for s in self.phones]) ## the current word llh for error detection
else:
self.type = "phone"
def __repr__(self):
res = "%8.3f -- %8.3f score %8.3f " % (self.begintime, self.endtime, self.score)
if hasattr(self, "llh"):
res += "llh %8.3f " % self.llh
res += self.label.encode("utf8")
return res
def export(self):
r = {"begin": self.begin, "end": self.end, "label": self.label, "score": self.score, "type": self.type}
if hasattr(self, "llh"):
r["llh"] = self.llh
if hasattr(self, "phones"):
r["phones"] = self.phones.export()
return r
class Segmentation(object):
def __init__(self, segments, sils=["<s>", "</s>", "!sil"]):
"""Create a segmentation from a spraaklab recognition structure.
segments: an array of words (or phones), represented by a dict with
"begin", "end", "label", "score", and "llh" keys. Words can also have
"phones" which is another array of segments."""
self.segments = [Segment(s) for s in segments]
if self.segments:
self.type = self.segments[0].type
else:
self.type = None
self.sils = sils
self.orig = segments ## in case we want to have access to the original recognition structure
def __getitem__(self, item):
return self.segments[item]
def __repr__(self):
ns = len(self.segments)
res = "Segmentation with %d %s%s" % (ns, self.type, "" if ns==1 else "s")
for seg in self.segments:
res += "\n " + repr(seg)
return res
def __len__(self):
return len(self.segments)
def score(self, skip=None):
if not skip:
skip = self.sils
s = 0.0
for seg in self.segments:
if seg.label not in skip:
s += seg.score
return s
def llhs(self, skip=None):
if not skip:
skip = self.sils
return [seg.llh for seg in self.segments if hasattr(seg, "llh") and seg.label not in skip]
def llh(self, skip=None):
return sum(self.llhs(skip))
def minllh(self, skip=None):
llhs = self.llhs(skip)
if llhs:
return min(llhs)
else:
return None
def labels(self, skip=None):
if not skip:
skip = self.sils
return [seg.label for seg in self.segments if seg.label not in skip]
def sentence(self, skip=None):
return " ".join(self.labels(skip))
def export(self):
return [seg.export() for seg in self.segments]

View File

@ -0,0 +1,4 @@
#!/usr/bin/env python
#import schema
from . import schema

View File

@ -0,0 +1,273 @@
#!/usr/bin/env python
## (c) 2017 NovoLanguage, author: David A. van Leeuwen
## The purpose of this to define the grammar structure in a json schema, so that it can be validated,
## (de)serialized, and perhaps even automatically converted to a Python class structure.
import json
import jsonschema
grammar_schema_v10 = {
"$schema": "http://json-schema.org/schema#",
"title": "NovoLanguage grammar",
"description": "A grammar specification for the NovoLanguage Automatic Speech Recognition",
"$ref": "#/definitions/group",
"definitions": {
"phones": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1
},
"pronunciation": {
"type": "object",
"properties": {
"phones": {
"$ref": "#/definitions/phones"
},
"syllables": {
"type": "array",
"items": {
"$ref": "#/definitions/syllable"
},
"minItems": 1
},
"id": {
"type": "integer",
"description": "ID to distinguish this pronunciation from other variants"
},
"meta": {
"type": "object"
}
},
"required": ["phones"]
},
"syllable": {
"type": "object",
"properties": {
"begin": {
"type": "integer",
"minimum": 0
},
"end": {
"type": "integer",
"minimum": 0
},
"stress": {
"type": "integer",
"minimum": 0
},
"tone": {
"type": "integer",
"minimum": 0
}
},
"required": ["begin", "end"]
},
"word": {
"type": "object",
"properties": {
"kind": {
"type": "string",
"enum": ["word"]
},
"label": {
"type": "string"
},
"pronunciation": {
"anyOf": [
{
"$ref": "#/definitions/pronunciation"
},
{
"type": "array",
"items": {
"anyOf": [
{
"$ref": "#/definitions/pronunciation"
},
{
"$ref": "#/definitions/phones"
}
]
},
"minItems": 1
},
{
"$ref": "#/definitions/phones"
}
]
},
"syllables": {
"type": "array",
"items": {
"$ref": "#/definitions/syllable"
}
},
"graphemes": {
"type": "array",
"items": {
"type": "string"
}
},
"id": {
"type": "integer",
"description": "ID to distinguish this word from other words (with possibly the same label)"
},
"meta": {
"type": "object"
}
},
"required": ["label"]
},
"element": {
"title": "element",
"oneOf": [
{
"$ref": "#/definitions/word"
},
{
"$ref": "#/definitions/group"
},
{
"type": ["string", "null"]
}
]
},
"group": {
"title": "element group",
"type": "object",
"properties": {
"kind": {
"type": "string",
"enum": ["sequence", "alternatives", "order"]
},
"elements": {
"type": "array",
"items": {
"$ref": "#/definitions/element"
},
"minItems": 1,
},
"meta": {
"type": "object"
}
},
"required": ["kind", "elements"]
}
}
}
grammar_schema_v01 = {
"$schema": "http://json-schema.org/schema#",
"title": "NovoLanguage grammar v0.1",
"description": "A grammar specification for the NovoLanguage Automatic Speech Recognition",
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": ["multiple_choice", "word_order"]
},
"parts": {
"type": "array",
"minItems": 1,
"maxItems": 5,
"items": {
"type": ["string", "array"],
"items": {
"type": ["string"]
}
}
}
}
}
grammar_rpc_schema = {
"$schema": "http://json-schema.org/schema#",
"title": "NovoLanguage RPC grammar",
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": ["confusion_network"]
},
"version": {
"type": "string",
"default": "v0.1"
},
"data": {
"type": "object"
},
"return_dict": {
"type": "boolean"
},
"return_objects": {
"type": "array",
"items": {
"type": "string",
"enum": ["dict", "grammar"]
}
},
"phoneset": {
"type": "string",
"enum": ["cmu69", "novo70", "mdbg115"]
},
"parallel_silence": {
"type": "boolean"
}
},
"required": ["type", "data"]
}
def validate(object, schema=grammar_schema_v10):
#if isinstance(object, basestring):
if isinstance(object, str):
object = json.loads(object)
if not isinstance(object, dict):
raise TypeError("Expected dict or json string")
try:
jsonschema.validate(object, schema)
except jsonschema.ValidationError:
return False
except Exception:
raise
else:
return True
def validate_rpc_grammar(message):
"""validate an rpc grammar message"""
if not validate(message, grammar_rpc_schema):
raise ValueError("Not a valid RPC grammar")
version = message.get("version", "0.1")
data = message["data"]
if version == "0.1":
if not validate(data, grammar_schema_v01):
raise ValueError("Not a valid grammar v0.1")
elif version == "1.0":
if not validate(data, grammar_schema_v10):
raise ValueError("Not a valid grammar v1.0")
else:
raise ValueError("Unsupported schema version")
## test
def test(data=None):
if not data:
data = {"kind": "sequence", "elements": [
{"kind": "alternatives", "elements": ["a plain string", "an alternative string"]},
{"kind": "word", "label": "a word", "pronunciation": {"phones": ["ah", "w", "er", "d"]}},
{"kind": "order", "elements": [{"kind": "word", "label": "another word", "visible": False}, "last word"]}]}
try:
jsonschema.validate(data, schema)
except jsonschema.ValidationError as e:
#print data, "validated not OK", e.message
print("{0} validated not OK {1}".format(data, e.message))
else:
#print data, "validated OK"
print("{} validated OK".format(data))
if __name__ == "__main__":
test()

View File

@ -0,0 +1,4 @@
#!/usr/bin/env python
#import session
from . import session

254
novoapi/backend/session.py Normal file
View File

@ -0,0 +1,254 @@
#!/usr/bin/env python
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen
## Recognition interface for actual backend. Adapted from player.asr.debug.
import json
import sys
import wave
import requests
import websocket
import logging
import collections
import time
from .. import asr
logger = logging.getLogger(__name__)
## turn off annoying warnings
requests.packages.urllib3.disable_warnings()
logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(logging.WARN)
buffer_size = 4096
gm = "gm.novolanguage.com" ## dev
protocol = "https"
port = 443
apiversion = 0
sessions = collections.Counter()
def segmentation(result):
"""converts a raw backend recognition result to a segment of novo.asr.segments class Segmentation"""
for w in result:
w["score"] = w["confidence"]["prob"]
w["llh"] = w["confidence"]["llr"]
w["label"] = w["label"]["raw"]
w["begin"] /= 10
w["end"] /= 10
for p in w["phones"]:
p["score"] = p["confidence"]["prob"]
p["llh"] = p["confidence"]["llr"]
p["begin"] /= 10
p["end"] /= 10
return asr.segments.Segmentation(result)
class rpcid:
id = 0
@staticmethod
def next():
rpcid.id += 1
return rpcid.id
class Recognizer(object):
def __init__(self, lang="en", gm=gm, grammar_version="0.1", user=None, password=None, snodeid=None, keepopen=False):
self.lang = lang
self.keepopen = keepopen
self.api_url = "%s://%s:%d/v%d" % (protocol, gm, port, apiversion)
self.verify = False
self.headers = {"Content-Type": "application/json"}
self.login_user(user, password)
data = {"l2": lang, "local": False, "skipupload": True}
if snodeid:
data["snodeid"] = snodeid
self.conn = None
self.init_session(data)
self.grammar_version = grammar_version
self.last_message = None
def login_user(self, username, password):
# obtain authentication token of user
logger.info('obtain auth token at %s', self.api_url)
data = {
'username': username,
'password': password
}
try:
r = requests.post(self.api_url + '/publishers/1/login', headers=self.headers, data=json.dumps(data), verify=self.verify)
except Exception as e:
logger.error("Cannot post request to GM API for user login: %s", e.message)
sys.exit(-1)
assert r.ok, r.reason
result = r.json()
if "errors" in result["response"]:
logger.info("Error in logging in: %s", result["response"]["errors"])
sys.exit(-1)
user_auth_token = result['response']['user']['authentication_token']
logger.info("User auth token is: %s", user_auth_token)
# set auth token in header
self.headers['Authentication-Token'] = user_auth_token
def init_session(self, data, direct=False, use_ip=False):
logger.info('Request new session: %s', data)
r = requests.post(self.api_url + '/sessions', headers=self.headers, data=json.dumps(data), verify=self.verify)
if not r.ok:
logger.error("New session request failed: %s", r.text)
return
status_url = r.headers.get("location")
if status_url:
## we got a redirect
status = {}
while True:
logger.debug("Checking %s", status_url)
s = requests.get(status_url, verify=self.verify)
if not s.ok:
logger.error('Checking Failed: %s', s.text)
return
status = s.json()
if status['status'] == 'PENDING':
logger.debug("Status: %s", status['status'])
time.sleep(1)
else:
break
session = status['result'][0] ## [1] is another status code...
if "error" in session:
logger.error("Error in getting a snode: %s", session["error"])
raise Exception
else:
session = r.json()
try:
logger.info("Session: %r", session)
if direct:
snode_ip = session["snode"]["ip"]
proxy_url = snode_ip
snode_port = session["port"]
ws_url = "%s://%s:%d/" % ("ws", snode_ip, snode_port)
else:
field = "ip" if use_ip else "hostname"
proxy_url = session['snode']['datacentre']['proxy'][field]
ws_url = 'wss://' + proxy_url + '/' + session['uuid']
logger.info("Connecting to websocket: %s", ws_url)
conn = websocket.create_connection(ws_url, sslopt={"check_hostname": self.verify})
logger.info("Connected.")
#except Exception, e:
except Exception as e:
logger.error("Unable to connect to websocket: %s", e.message)
raise e
self.session_id = session['id']
self.proxy_url = proxy_url
self.conn = conn
self.session = session
sessions[session["uuid"]] += 1
def setgrammar(self, grammar): ## backend grammar object: {"data": {...}, "type": "confusion_network"}
data = {"jsonrpc": "2.0",
'type': 'jsonrpc',
'method': 'set_grammar',
'params': grammar,
"id": rpcid.next()}
asr.spraaklab.schema.validate_rpc_grammar(grammar)
self.conn.send(json.dumps(data))
result = json.loads(self.conn.recv())
if result.get("error"):
logger.error("Exercise validation error: %s", result)
return result
def set_alternatives_grammar(self, *args, **kwargs):
if not "version" in kwargs:
kwargs["version"] = self.grammar_version
return self.setgrammar(alternatives_grammar(*args, **kwargs))
def recognize_wav(self, wavf):
w = wave.open(wavf, 'r')
nchannels, sampwidth, framerate, nframes, comptype, compname = w.getparams()
if nchannels > 1:
logging.error("Please use .wav with only 1 channel, found %d channels in %s", nchannels, wavf)
return
if (sampwidth != 2):
logging.error("Please use .wav with 2-byte PCM data, found %d bytes in %s", sampwidth, wavf)
return
if (framerate != 16000.0):
logging.error("Please use .wav sampled at 16000 Hz, found %1.0f in %s", framerate, wavf)
return
if (comptype != 'NONE'):
logging.error("Please use .wav with uncompressed data, found %s in %s", compname, wavf)
return
buf = w.readframes(nframes)
w.close()
return self.recognize_data(buf)
def recognize_data(self, buf):
nbytes_sent = 0
start = time.time()
for j in range(0, len(buf), buffer_size):
audio_packet = str(buf[j:j + buffer_size])
nbytes_sent += len(audio_packet)
self.conn.send_binary(audio_packet)
self.conn.send(json.dumps({"jsonrpc": "2.0", "method": "get_result", "id": rpcid.next()}))
logger.info("Waiting for recognition result...")
self.last_message = self.conn.recv() ## keep result for the interested applications
message = json.loads(self.last_message)
dur = time.time() - start
logger.info("Recognition took %5.3f seconds", dur)
if "error" in message:
raise RuntimeError("Error from recognition backend: %r" % message.get("error"))
return segmentation(message["result"]["words"])
def recognize_url(self, url):
start = time.time()
data = json.dumps({"jsonrpc": "2.0", "method": "send_audio", "id": rpcid.next(), "params": {"type": "url", "data": url, "details": ["word", "utterance"]}})
self.conn.send(data)
logger.info("Waiting for recognition result...")
self.last_message = self.conn.recv() ## keep result for the interested applications
#print self.last_message
print(self.last_message)
message = json.loads(self.last_message)
dur = time.time() - start
logger.info("Recognition took %5.3f seconds", dur)
if "error" in message:
raise RuntimeError("Error from recognition backend: %r" % message.get("error"))
return segmentation(message["result"]["words"])
def __del__(self):
sessions[self.session["uuid"]] -= 1
if self.conn and sessions[self.session["uuid"]] <= 0:
self.conn.close()
url = self.api_url + '/sessions/%d' % self.session_id
if self.keepopen:
logger.info("Keeping session open...")
else:
logger.info("Closing session: %s", url)
r = requests.delete(url, headers=self.headers, verify=self.verify)
assert r.ok, r.reason
def alternatives_grammar(parts, version="0.1", ret=None):
"""Make a grammar of alternatives, as array(sequence)-of-array(alternatives)-of-strings"""
r = {"type": "confusion_network", "version": version}
if version=="0.1":
r["data"] = {"type": "multiple_choice", "parts": parts}
if isinstance(ret, list) and "dict" in ret:
r["return_dict"] = True
elif version=="1.0":
seqels = []
for part in parts:
altels = []
for alt in part:
words = alt.split(" ")
if len(words) > 1:
alt = {"kind": "sequence", "elements": words}
altels.append(alt)
seqels.append({"kind": "alternatives", "elements": altels})
r["data"] = {"kind": "sequence", "elements": seqels}
if isinstance(ret, list):
r["return_objects"] = ret
else:
raise ValueError("Unsupported version: %s" % version)
asr.spraaklab.schema.validate_rpc_grammar(r)
return r

View File

@ -0,0 +1,25 @@
#!/usr/bin/env python
## from https://stackoverflow.com/questions/1447287/format-floats-with-standard-json-module
class PrettyFloat(float):
def __repr__(self):
return '%.15g' % self
def pretty_floats(obj):
if isinstance(obj, float):
return PrettyFloat(obj)
elif isinstance(obj, dict):
return dict((k, pretty_floats(v)) for k, v in obj.items())
elif isinstance(obj, (list, tuple)):
return map(pretty_floats, obj)
return obj
def rounded_floats(obj, ndigits=15):
if isinstance(obj, float):
return PrettyFloat(round(obj, ndigits))
elif isinstance(obj, dict):
return dict((k, rounded_floats(v, ndigits)) for k, v in obj.items())
elif isinstance(obj, (list, tuple)):
return map(lambda o: rounded_floats(o, ndigits), obj)
return obj