acoustic_model/novoapi/asr/segments/praat.py

#!/usr/bin/env python
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen

import codecs

def print_header(output, begin, end, nr_tiers):
    print >> output, 'File type = "ooTextFile"'
    print >> output, 'Object class = "TextGrid"'
    print >> output, ''
    print >> output, 'xmin = %s' % begin
    print >> output, 'xmax = %s' % end
    print >> output, 'tiers? <exists>'
    print >> output, 'size = %d' % nr_tiers
    print >> output, 'item []:'


def print_info_tier(output, title, begin, end, label):
    print >> output, '\titem [%d]:' % 0
    print >> output, '\t\tclass = "IntervalTier"'
    print >> output, '\t\tname = "%s"' % title
    print >> output, '\t\txmin = %s' % begin
    print >> output, '\t\txmax = %s' % end
    print >> output, '\t\tintervals: size = %d' % 1

    print >> output, '\t\tintervals [1]:'
    print >> output, '\t\t\txmin = %s' % begin
    print >> output, '\t\t\txmax = %s' % end
    print >> output, '\t\t\ttext = "%s"' % label


#def print_tier(output, title, begin, end, segs, (format, formatter)):
def print_tier(output, title, begin, end, segs, format, formatter):
    print >> output, '\titem [%d]:' % 0
    print >> output, '\t\tclass = "IntervalTier"'
    print >> output, '\t\tname = "%s"' % title
    print >> output, '\t\txmin = %s' % begin
    print >> output, '\t\txmax = %s' % end
    print >> output, '\t\tintervals: size = %d' % len(segs)

    count = 1
    for seg in segs:
        #print seg
        print >> output, '\t\tintervals [%d]:' % count
        print >> output, '\t\t\txmin = %s' % repr(int(seg['begin']) / 100.0)
        print >> output, '\t\t\txmax = %s' % repr(int(seg['end']) / 100.0)
        string = '\t\t\ttext = "' + format + '"'
        print >> output, string % formatter(seg['label'])
        count += 1


def seg2tg(fname, segments):
    if not segments:
        return
    output = codecs.open(fname, "w", encoding="utf-8")

    confidences = []
    word_labels = []
    phones = []

    for s in segments:
        conf = s.llh if hasattr(s, "llh") else s.score
        confidences.append({'begin': s.begin, 'end': s.end, 'label': conf})
        word_labels.append({'begin': s.begin, 'end': s.end, 'label': s.label})
        for p in s.phones:
            phones.append({'begin': p.begin, 'end': p.end, 'label': p.label})


    begin = repr(int(segments[0].begin) / 100.0)
    end = repr(int(segments[-1].end) / 100.0)

    nr_tiers = 3
    print_header(output, begin, end, nr_tiers)
    print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))
    print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))
    print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))

    output.close()
novo_api for python 3.x is added. 2018-12-30 23:47:55 +01:00			`#!/usr/bin/env python`
			`# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen`

			`import codecs`

			`def print_header(output, begin, end, nr_tiers):`
			`print >> output, 'File type = "ooTextFile"'`
			`print >> output, 'Object class = "TextGrid"'`
			`print >> output, ''`
			`print >> output, 'xmin = %s' % begin`
			`print >> output, 'xmax = %s' % end`
			`print >> output, 'tiers? <exists>'`
			`print >> output, 'size = %d' % nr_tiers`
			`print >> output, 'item []:'`


			`def print_info_tier(output, title, begin, end, label):`
			`print >> output, '\titem [%d]:' % 0`
			`print >> output, '\t\tclass = "IntervalTier"'`
			`print >> output, '\t\tname = "%s"' % title`
			`print >> output, '\t\txmin = %s' % begin`
			`print >> output, '\t\txmax = %s' % end`
			`print >> output, '\t\tintervals: size = %d' % 1`

			`print >> output, '\t\tintervals [1]:'`
			`print >> output, '\t\t\txmin = %s' % begin`
			`print >> output, '\t\t\txmax = %s' % end`
			`print >> output, '\t\t\ttext = "%s"' % label`


			`#def print_tier(output, title, begin, end, segs, (format, formatter)):`
			`def print_tier(output, title, begin, end, segs, format, formatter):`
			`print >> output, '\titem [%d]:' % 0`
			`print >> output, '\t\tclass = "IntervalTier"'`
			`print >> output, '\t\tname = "%s"' % title`
			`print >> output, '\t\txmin = %s' % begin`
			`print >> output, '\t\txmax = %s' % end`
			`print >> output, '\t\tintervals: size = %d' % len(segs)`

			`count = 1`
			`for seg in segs:`
			`#print seg`
			`print >> output, '\t\tintervals [%d]:' % count`
			`print >> output, '\t\t\txmin = %s' % repr(int(seg['begin']) / 100.0)`
			`print >> output, '\t\t\txmax = %s' % repr(int(seg['end']) / 100.0)`
			`string = '\t\t\ttext = "' + format + '"'`
			`print >> output, string % formatter(seg['label'])`
			`count += 1`


			`def seg2tg(fname, segments):`
			`if not segments:`
			`return`
			`output = codecs.open(fname, "w", encoding="utf-8")`

			`confidences = []`
			`word_labels = []`
			`phones = []`

			`for s in segments:`
			`conf = s.llh if hasattr(s, "llh") else s.score`
			`confidences.append({'begin': s.begin, 'end': s.end, 'label': conf})`
			`word_labels.append({'begin': s.begin, 'end': s.end, 'label': s.label})`
			`for p in s.phones:`
			`phones.append({'begin': p.begin, 'end': p.end, 'label': p.label})`


			`begin = repr(int(segments[0].begin) / 100.0)`
			`end = repr(int(segments[-1].end) / 100.0)`

			`nr_tiers = 3`
			`print_header(output, begin, end, nr_tiers)`
			`print_tier(output, "confidence", begin, end, confidences, ('%.3f', lambda x: x))`
			`print_tier(output, "words", begin, end, word_labels, ('%s', lambda x: x))`
			`print_tier(output, "phones", begin, end, phones, ('%s', lambda x: x))`

			`output.close()`