#!/usr/bin/env python # (c) 2015--2018 NovoLanguage, author: David A. van Leeuwen ## These classes can be initialized with dictionaries, as they are returned by the python spraaklab recognition system. class Segment(object): def __init__(self, segment): self.begin = segment["begin"] self.end = segment["end"] self.begintime = segment.get("beginTime", self.begin / 100.0) self.endtime = segment.get("endTime", self.end / 100.0) self.label = segment["label"] self.score = segment["score"] if "llh" in segment: self.llh = segment["llh"] if "phones" in segment: self.type = "word" self.phones = Segmentation(segment["phones"], ["sil"]) if hasattr(self.phones[0], "llh"): self.minllh = min([s.llh for s in self.phones]) ## the current word llh for error detection else: self.type = "phone" def __repr__(self): res = "%8.3f -- %8.3f score %8.3f " % (self.begintime, self.endtime, self.score) if hasattr(self, "llh"): res += "llh %8.3f " % self.llh res += self.label.encode("utf8") return res def export(self): r = {"begin": self.begin, "end": self.end, "label": self.label, "score": self.score, "type": self.type} if hasattr(self, "llh"): r["llh"] = self.llh if hasattr(self, "phones"): r["phones"] = self.phones.export() return r class Segmentation(object): def __init__(self, segments, sils=["", "", "!sil"]): """Create a segmentation from a spraaklab recognition structure. segments: an array of words (or phones), represented by a dict with "begin", "end", "label", "score", and "llh" keys. Words can also have "phones" which is another array of segments.""" self.segments = [Segment(s) for s in segments] if self.segments: self.type = self.segments[0].type else: self.type = None self.sils = sils self.orig = segments ## in case we want to have access to the original recognition structure def __getitem__(self, item): return self.segments[item] def __repr__(self): ns = len(self.segments) res = "Segmentation with %d %s%s" % (ns, self.type, "" if ns==1 else "s") for seg in self.segments: res += "\n " + repr(seg) return res def __len__(self): return len(self.segments) def score(self, skip=None): if not skip: skip = self.sils s = 0.0 for seg in self.segments: if seg.label not in skip: s += seg.score return s def llhs(self, skip=None): if not skip: skip = self.sils return [seg.llh for seg in self.segments if hasattr(seg, "llh") and seg.label not in skip] def llh(self, skip=None): return sum(self.llhs(skip)) def minllh(self, skip=None): llhs = self.llhs(skip) if llhs: return min(llhs) else: return None def labels(self, skip=None): if not skip: skip = self.sils return [seg.label for seg in self.segments if seg.label not in skip] def sentence(self, skip=None): return " ".join(self.labels(skip)) def export(self): return [seg.export() for seg in self.segments]