acoustic_model/novoapi_for_python3x/asr/segments/segments.py

#!/usr/bin/env python
# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen

## These classes can be initialized with dictionaries, as they are returned by the python spraaklab recognition system.

class Segment(object):
    def __init__(self, segment):
        self.begin = segment["begin"]
        self.end = segment["end"]
        self.begintime = segment.get("beginTime", self.begin / 100.0)
        self.endtime = segment.get("endTime", self.end / 100.0)
        self.label = segment["label"]
        self.score = segment["score"]
        if "llh" in segment:
            self.llh = segment["llh"]
        if "phones" in segment:
            self.type = "word"
            self.phones = Segmentation(segment["phones"], ["sil"])
            if hasattr(self.phones[0], "llh"):
                self.minllh = min([s.llh for s in self.phones]) ## the current word llh for error detection
        else:
            self.type = "phone"

    def __repr__(self):
        res = "%8.3f -- %8.3f score %8.3f " % (self.begintime, self.endtime, self.score)
        if hasattr(self, "llh"):
            res += "llh %8.3f " % self.llh
        res += self.label.encode("utf8")
        return res

    def export(self):
        r = {"begin": self.begin, "end": self.end, "label": self.label, "score": self.score, "type": self.type}
        if hasattr(self, "llh"):
            r["llh"] = self.llh
        if hasattr(self, "phones"):
            r["phones"] = self.phones.export()
        return r

class Segmentation(object):
    def __init__(self, segments, sils=["<s>", "</s>", "!sil"]):
        """Create a segmentation from a spraaklab recognition structure.
        segments: an array of words (or phones), represented by a dict with
        "begin", "end", "label", "score", and "llh" keys.  Words can also have
        "phones" which is another array of segments."""
        self.segments = [Segment(s) for s in segments]
        if self.segments:
            self.type = self.segments[0].type
        else:
            self.type = None
        self.sils = sils
        self.orig = segments ## in case we want to have access to the original recognition structure

    def __getitem__(self, item):
        return self.segments[item]

    def __repr__(self):
        ns = len(self.segments)
        res = "Segmentation with %d %s%s" % (ns, self.type, "" if ns==1 else "s")
        for seg in self.segments:
            res += "\n " + repr(seg)
        return res

    def __len__(self):
        return len(self.segments)

    def score(self, skip=None):
        if not skip:
            skip = self.sils
        s = 0.0
        for seg in self.segments:
            if seg.label not in skip:
                s += seg.score
        return s

    def llhs(self, skip=None):
        if not skip:
            skip = self.sils
        return [seg.llh for seg in self.segments if hasattr(seg, "llh") and seg.label not in skip]

    def llh(self, skip=None):
        return sum(self.llhs(skip))

    def minllh(self, skip=None):
        llhs = self.llhs(skip)
        if llhs:
            return min(llhs)
        else:
            return None

    def labels(self, skip=None):
        if not skip:
            skip = self.sils
        return [seg.label for seg in self.segments if seg.label not in skip]

    def sentence(self, skip=None):
        return " ".join(self.labels(skip))

    def export(self):
        return [seg.export() for seg in self.segments]
novo_api for python 3.x is added. 2018-12-30 23:47:55 +01:00			`#!/usr/bin/env python`
			`# (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen`

			`## These classes can be initialized with dictionaries, as they are returned by the python spraaklab recognition system.`

			`class Segment(object):`
			`def __init__(self, segment):`
			`self.begin = segment["begin"]`
			`self.end = segment["end"]`
			`self.begintime = segment.get("beginTime", self.begin / 100.0)`
			`self.endtime = segment.get("endTime", self.end / 100.0)`
			`self.label = segment["label"]`
			`self.score = segment["score"]`
			`if "llh" in segment:`
			`self.llh = segment["llh"]`
			`if "phones" in segment:`
			`self.type = "word"`
			`self.phones = Segmentation(segment["phones"], ["sil"])`
			`if hasattr(self.phones[0], "llh"):`
			`self.minllh = min([s.llh for s in self.phones]) ## the current word llh for error detection`
			`else:`
			`self.type = "phone"`

			`def __repr__(self):`
			`res = "%8.3f -- %8.3f score %8.3f " % (self.begintime, self.endtime, self.score)`
			`if hasattr(self, "llh"):`
			`res += "llh %8.3f " % self.llh`
			`res += self.label.encode("utf8")`
			`return res`

			`def export(self):`
			`r = {"begin": self.begin, "end": self.end, "label": self.label, "score": self.score, "type": self.type}`
			`if hasattr(self, "llh"):`
			`r["llh"] = self.llh`
			`if hasattr(self, "phones"):`
			`r["phones"] = self.phones.export()`
			`return r`

			`class Segmentation(object):`
			`def __init__(self, segments, sils=["<s>", "</s>", "!sil"]):`
			`"""Create a segmentation from a spraaklab recognition structure.`
			`segments: an array of words (or phones), represented by a dict with`
			`"begin", "end", "label", "score", and "llh" keys. Words can also have`
			`"phones" which is another array of segments."""`
			`self.segments = [Segment(s) for s in segments]`
			`if self.segments:`
			`self.type = self.segments[0].type`
			`else:`
			`self.type = None`
			`self.sils = sils`
			`self.orig = segments ## in case we want to have access to the original recognition structure`

			`def __getitem__(self, item):`
			`return self.segments[item]`

			`def __repr__(self):`
			`ns = len(self.segments)`
			`res = "Segmentation with %d %s%s" % (ns, self.type, "" if ns==1 else "s")`
			`for seg in self.segments:`
			`res += "\n " + repr(seg)`
			`return res`

			`def __len__(self):`
			`return len(self.segments)`

			`def score(self, skip=None):`
			`if not skip:`
			`skip = self.sils`
			`s = 0.0`
			`for seg in self.segments:`
			`if seg.label not in skip:`
			`s += seg.score`
			`return s`

			`def llhs(self, skip=None):`
			`if not skip:`
			`skip = self.sils`
			`return [seg.llh for seg in self.segments if hasattr(seg, "llh") and seg.label not in skip]`

			`def llh(self, skip=None):`
			`return sum(self.llhs(skip))`

			`def minllh(self, skip=None):`
			`llhs = self.llhs(skip)`
			`if llhs:`
			`return min(llhs)`
			`else:`
			`return None`

			`def labels(self, skip=None):`
			`if not skip:`
			`skip = self.sils`
			`return [seg.label for seg in self.segments if seg.label not in skip]`

			`def sentence(self, skip=None):`
			`return " ".join(self.labels(skip))`

			`def export(self):`
			`return [seg.export() for seg in self.segments]`