78 lines
2.7 KiB
Python
78 lines
2.7 KiB
Python
|
#!/usr/bin/env python
|
||
|
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen
|
||
|
|
||
|
import codecs
|
||
|
|
||
|
def print_header(output, begin, end, nr_tiers):
|
||
|
print >> output, 'File type = "ooTextFile"'
|
||
|
print >> output, 'Object class = "TextGrid"'
|
||
|
print >> output, ''
|
||
|
print >> output, 'xmin = %s' % begin
|
||
|
print >> output, 'xmax = %s' % end
|
||
|
print >> output, 'tiers? <exists>'
|
||
|
print >> output, 'size = %d' % nr_tiers
|
||
|
print >> output, 'item []:'
|
||
|
|
||
|
|
||
|
def print_info_tier(output, title, begin, end, label):
|
||
|
print >> output, '\titem [%d]:' % 0
|
||
|
print >> output, '\t\tclass = "IntervalTier"'
|
||
|
print >> output, '\t\tname = "%s"' % title
|
||
|
print >> output, '\t\txmin = %s' % begin
|
||
|
print >> output, '\t\txmax = %s' % end
|
||
|
print >> output, '\t\tintervals: size = %d' % 1
|
||
|
|
||
|
print >> output, '\t\tintervals [1]:'
|
||
|
print >> output, '\t\t\txmin = %s' % begin
|
||
|
print >> output, '\t\t\txmax = %s' % end
|
||
|
print >> output, '\t\t\ttext = "%s"' % label
|
||
|
|
||
|
|
||
|
#def print_tier(output, title, begin, end, segs, (format, formatter)):
|
||
|
def print_tier(output, title, begin, end, segs, format, formatter):
|
||
|
print >> output, '\titem [%d]:' % 0
|
||
|
print >> output, '\t\tclass = "IntervalTier"'
|
||
|
print >> output, '\t\tname = "%s"' % title
|
||
|
print >> output, '\t\txmin = %s' % begin
|
||
|
print >> output, '\t\txmax = %s' % end
|
||
|
print >> output, '\t\tintervals: size = %d' % len(segs)
|
||
|
|
||
|
count = 1
|
||
|
for seg in segs:
|
||
|
#print seg
|
||
|
print >> output, '\t\tintervals [%d]:' % count
|
||
|
print >> output, '\t\t\txmin = %s' % repr(int(seg['begin']) / 100.0)
|
||
|
print >> output, '\t\t\txmax = %s' % repr(int(seg['end']) / 100.0)
|
||
|
string = '\t\t\ttext = "' + format + '"'
|
||
|
print >> output, string % formatter(seg['label'])
|
||
|
count += 1
|
||
|
|
||
|
|
||
|
def seg2tg(fname, segments):
|
||
|
if not segments:
|
||
|
return
|
||
|
output = codecs.open(fname, "w", encoding="utf-8")
|
||
|
|
||
|
confidences = []
|
||
|
word_labels = []
|
||
|
phones = []
|
||
|
|
||
|
for s in segments:
|
||
|
conf = s.llh if hasattr(s, "llh") else s.score
|
||
|
confidences.append({'begin': s.begin, 'end': s.end, 'label': conf})
|
||
|
word_labels.append({'begin': s.begin, 'end': s.end, 'label': s.label})
|
||
|
for p in s.phones:
|
||
|
phones.append({'begin': p.begin, 'end': p.end, 'label': p.label})
|
||
|
|
||
|
|
||
|
begin = repr(int(segments[0].begin) / 100.0)
|
||
|
end = repr(int(segments[-1].end) / 100.0)
|
||
|
|
||
|
nr_tiers = 3
|
||
|
print_header(output, begin, end, nr_tiers)
|
||
|
print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
|
||
|
print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
|
||
|
print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))
|
||
|
|
||
|
output.close()
|