kaldi.decoder._decoder — PyKaldi 0.1.1 documentation
from .. import fstext as _fst from .. import lat as _lat from ._faster_decoder import * from ._biglm_faster_decoder import * from ._lattice_faster_decoder import * from ._lattice_faster_decoder_ext import * from ._lattice_biglm_faster_decoder import * from ._lattice_faster_online_decoder import * from ._lattice_faster_online_decoder_ext import * class _DecoderBase(object): """Base class defining the Python API for decoders.""" def get_best_path(self, use_final_probs=True): """Gets best path as a lattice. Args: use_final_probs (bool): If ``True`` and a final state of the graph is reached, then the output will include final probabilities given by the graph. Otherwise all final probabilities are treated as one. Returns: LatticeVectorFst: The best path. Raises: RuntimeError: In the unusual circumstances where no tokens survive. """ ofst = _fst.LatticeVectorFst() success = self._get_best_path(ofst, use_final_probs) if not success: raise RuntimeError("Decoding failed. No tokens survived.") return ofst class _LatticeDecoderBase(_DecoderBase): """Base class defining the Python API for lattice generating decoders.""" def get_raw_lattice(self, use_final_probs=True): """Gets raw state-level lattice. The output raw lattice will be topologically sorted. Args: use_final_probs (bool): If ``True`` and a final state of the graph is reached, then the output will include final probabilities given by the graph. Otherwise all final probabilities are treated as one. Returns: LatticeVectorFst: The state-level lattice. Raises: RuntimeError: In the unusual circumstances where no tokens survive. """ ofst = _fst.LatticeVectorFst() success = self._get_raw_lattice(ofst, use_final_probs) if not success: raise RuntimeError("Decoding failed. No tokens survived.") return ofst def get_lattice(self, use_final_probs=True): """Gets the lattice-determinized compact lattice. The output is a deterministic compact lattice with a unique path for each word sequence. Args: use_final_probs (bool): If ``True`` and a final state of the graph is reached, then the output will include final probabilities given by the graph. Otherwise all final probabilities are treated as one. Returns: CompactLatticeVectorFst: The lattice-determinized compact lattice. Raises: RuntimeError: In the unusual circumstances where no tokens survive. """ ofst = _fst.CompactLatticeVectorFst() success = self._get_lattice(ofst, use_final_probs) if not success: raise RuntimeError("Decoding failed. No tokens survived.") return ofst class _LatticeOnlineDecoderBase(_LatticeDecoderBase): """Base class defining the Python API for lattice generating online decoders.""" def get_raw_lattice_pruned(self, beam, use_final_probs=True): """Prunes and returns raw state-level lattice. Behaves like :meth:`get_raw_lattice` but only processes tokens whose extra-cost is smaller than the best-cost plus the specified beam. It is worthwhile to call this function only if :attr:`beam` is less than the lattice-beam specified in the decoder options. Otherwise, it returns essentially the same thing as :meth:`get_raw_lattice`, but more slowly. The output raw lattice will be topologically sorted. Args: beam (float): Pruning beam. use_final_probs (bool): If ``True`` and a final state of the graph is reached, then the output will include final probabilities given by the graph. Otherwise all final probabilities are treated as one. Returns: LatticeVectorFst: The state-level lattice. Raises: RuntimeError: In the unusual circumstances where no tokens survive. """ ofst = _fst.LatticeVectorFst() success = self._get_raw_lattice_pruned(ofst, use_final_probs, beam) if not success: raise RuntimeError("Decoding failed. No tokens survived.") return ofst[docs]class FasterDecoder(_DecoderBase, FasterDecoder): """Faster decoder. Args: fst (StdFst): Decoding graph `HCLG`. opts (FasterDecoderOptions): Decoder options. """ def __init__(self, fst, opts): super(FasterDecoder, self).__init__(fst, opts) self._fst = fst # keep a reference to FST to keep it in scope
[docs]class BiglmFasterDecoder(_DecoderBase, BiglmFasterDecoder): """Faster decoder for decoding with big language models. This is as :class:`LatticeFasterDecoder`, but does online composition between decoding graph :attr:`fst` and the difference language model :attr:`lm_diff_fst`. Args: fst (StdFst): Decoding graph. opts (BiglmFasterDecoderOptions): Decoder options. lm_diff_fst (StdDeterministicOnDemandFst): The deterministic on-demand FST representing the difference in scores between the LM to decode with and the LM the decoding graph :attr:`fst` was compiled with. """ def __init__(self, fst, opts, lm_diff_fst): super(BiglmFasterDecoder, self).__init__(fst, opts, lm_diff_fst) self._fst = fst # keep references to FSTs self._lm_diff_fst = lm_diff_fst # to keep them in scope
[docs]class LatticeFasterDecoder(_LatticeDecoderBase, LatticeFasterDecoder): """Lattice generating faster decoder. Args: fst (StdFst): Decoding graph `HCLG`. opts (LatticeFasterDecoderOptions): Decoder options. """ def __init__(self, fst, opts): super(LatticeFasterDecoder, self).__init__(fst, opts) self._fst = fst # keep a reference to FST to keep it in scope
[docs]class LatticeFasterGrammarDecoder(_LatticeDecoderBase, LatticeFasterGrammarDecoder): """Lattice generating faster grammar decoder. Args: fst (GrammarFst): Decoding graph `HCLG`. opts (LatticeFasterDecoderOptions): Decoder options. """ def __init__(self, fst, opts): super(LatticeFasterGrammarDecoder, self).__init__(fst, opts) self._fst = fst # keep a reference to FST to keep it in scope
[docs]class LatticeBiglmFasterDecoder(_LatticeDecoderBase, LatticeBiglmFasterDecoder): """Lattice generating faster decoder for decoding with big language models. This is as :class:`LatticeFasterDecoder`, but does online composition between decoding graph :attr:`fst` and the difference language model :attr:`lm_diff_fst`. Args: fst (StdFst): Decoding graph `HCLG`. opts (LatticeFasterDecoderOptions): Decoder options. lm_diff_fst (StdDeterministicOnDemandFst): The deterministic on-demand FST representing the difference in scores between the LM to decode with and the LM the decoding graph :attr:`fst` was compiled with. """ def __init__(self, fst, opts, lm_diff_fst): super(LatticeBiglmFasterDecoder, self).__init__(fst, opts, lm_diff_fst) self._fst = fst # keep references to FSTs self._lm_diff_fst = lm_diff_fst # to keep them in scope
[docs]class LatticeFasterOnlineDecoder(_LatticeOnlineDecoderBase, LatticeFasterOnlineDecoder): """Lattice generating faster online decoder. Similar to :class:`LatticeFasterDecoder` but computes the best path without generating the entire raw lattice and finding the best path through it. Instead, it traces back through the lattice. Args: fst (StdFst): Decoding graph `HCLG`. opts (LatticeFasterDecoderOptions): Decoder options. """ def __init__(self, fst, opts): super(LatticeFasterOnlineDecoder, self).__init__(fst, opts) self._fst = fst # keep a reference to FST to keep it in scope # This method is missing from the C++ class so we implement it here. def _get_lattice(self, use_final_probs=True): raw_fst = self.get_raw_lattice(use_final_probs).invert().arcsort() lat_opts = _lat.DeterminizeLatticePrunedOptions() config = self.get_options() lat_opts.max_mem = config.det_opts.max_mem ofst = _fst.CompactLatticeVectorFst() _lat.determinize_lattice_pruned(raw_fst, config.lattice_beam, ofst, lat_opts) ofst.connect() if ofst.num_states() == 0: raise RuntimeError("Decoding failed. No tokens survived.") return ofst
[docs]class LatticeFasterOnlineGrammarDecoder(_LatticeOnlineDecoderBase, LatticeFasterOnlineGrammarDecoder): """Lattice generating faster online grammar decoder. Similar to :class:`LatticeFasterGrammarDecoder` but computes the best path without generating the entire raw lattice and finding the best path through it. Instead, it traces back through the lattice. Args: fst (GrammarFst): Decoding graph `HCLG`. opts (LatticeFasterDecoderOptions): Decoder options. """ def __init__(self, fst, opts): super(LatticeFasterOnlineGrammarDecoder, self).__init__(fst, opts) self._fst = fst # keep a reference to FST to keep it in scope # This method is missing from the C++ class so we implement it here. def _get_lattice(self, use_final_probs=True): raw_fst = self.get_raw_lattice(use_final_probs).invert().arcsort() lat_opts = _lat.DeterminizeLatticePrunedOptions() config = self.get_options() lat_opts.max_mem = config.det_opts.max_mem ofst = _fst.CompactLatticeVectorFst() _lat.determinize_lattice_pruned(raw_fst, config.lattice_beam, ofst, lat_opts) ofst.connect() if ofst.num_states() == 0: raise RuntimeError("Decoding failed. No tokens survived.") return ofst
__all__ = [name for name in dir() if name[0] != '_' and not name.endswith('Base')]