#!/usr/bin/env python # (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen import codecs def print_header(output, begin, end, nr_tiers): print >> output, 'File type = "ooTextFile"' print >> output, 'Object class = "TextGrid"' print >> output, '' print >> output, 'xmin = %s' % begin print >> output, 'xmax = %s' % end print >> output, 'tiers? ' print >> output, 'size = %d' % nr_tiers print >> output, 'item []:' def print_info_tier(output, title, begin, end, label): print >> output, '\titem [%d]:' % 0 print >> output, '\t\tclass = "IntervalTier"' print >> output, '\t\tname = "%s"' % title print >> output, '\t\txmin = %s' % begin print >> output, '\t\txmax = %s' % end print >> output, '\t\tintervals: size = %d' % 1 print >> output, '\t\tintervals [1]:' print >> output, '\t\t\txmin = %s' % begin print >> output, '\t\t\txmax = %s' % end print >> output, '\t\t\ttext = "%s"' % label #def print_tier(output, title, begin, end, segs, (format, formatter)): def print_tier(output, title, begin, end, segs, format, formatter): print >> output, '\titem [%d]:' % 0 print >> output, '\t\tclass = "IntervalTier"' print >> output, '\t\tname = "%s"' % title print >> output, '\t\txmin = %s' % begin print >> output, '\t\txmax = %s' % end print >> output, '\t\tintervals: size = %d' % len(segs) count = 1 for seg in segs: #print seg print >> output, '\t\tintervals [%d]:' % count print >> output, '\t\t\txmin = %s' % repr(int(seg['begin']) / 100.0) print >> output, '\t\t\txmax = %s' % repr(int(seg['end']) / 100.0) string = '\t\t\ttext = "' + format + '"' print >> output, string % formatter(seg['label']) count += 1 def seg2tg(fname, segments): if not segments: return output = codecs.open(fname, "w", encoding="utf-8") confidences = [] word_labels = [] phones = [] for s in segments: conf = s.llh if hasattr(s, "llh") else s.score confidences.append({'begin': s.begin, 'end': s.end, 'label': conf}) word_labels.append({'begin': s.begin, 'end': s.end, 'label': s.label}) for p in s.phones: phones.append({'begin': p.begin, 'end': p.end, 'label': p.label}) begin = repr(int(segments[0].begin) / 100.0) end = repr(int(segments[-1].end) / 100.0) nr_tiers = 3 print_header(output, begin, end, nr_tiers) print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x)) print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x)) print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x)) output.close()