the refinery.units.pattern.resub documentation

Expand source code Browse git
from __future__ import annotations

from typing import Match

from refinery.lib.meta import metavars
from refinery.lib.types import Param, buf
from refinery.units.pattern import Arg, RefinedMatch, SingleRegexTransformUnit


class resub(SingleRegexTransformUnit, docs=(
    '{0}\n\nSubstitutions are interpreted as format strings: {SingleRegexTransformUnit}'
)):
    """
    A unit for performing substitutions based on a binary regular expression pattern.
    """
    def __init__(
        self,
        regex: Param[str, Arg(
            help='Regular expression to be searched and replaced. The default is "{default}".')
        ] = '\\s+',
        subst: Param[buf, Arg('subst', help=(
            'Substitution value: use {1} for group 1, {0} for entire match. The default value is '
            'an empty string, i.e. matches are removed from the input by default.'
        ))] = B'',
        unesc: Param[bool, Arg.Switch('-n', help=(
            'Interpret and replace string escape sequences in the substutution expression.'
        ))] = False,
        multiline=False, ignorecase=False, count=0
    ):
        super().__init__(
            regex=regex,
            subst=subst,
            unesc=unesc,
            multiline=multiline,
            ignorecase=ignorecase,
            count=count,
        )

    def process(self, data):
        def repl(match: Match[bytes]):
            refined = RefinedMatch(match, pattern.groups, pattern.groupindex)
            r = meta.format_bin(
                spec,
                self.codec,
                refined.grouplist(),
                refined.groupdict(),
                escaped=escaped,
            )
            self.log_debug('substitution:', repr(r), clip=True)
            return r
        self.log_info('pattern:', getattr(self.regex, 'pattern', self.regex))
        self.log_info('replace:', self.args.subst)
        meta = metavars(data)
        escaped = self.args.unesc
        spec = self.args.subst.decode('ascii', 'backslashreplace')
        pattern = self.regex
        sub = pattern.sub
        if self.args.count:
            from functools import partial
            sub = partial(sub, count=self.args.count)
        return sub(repl, data)
class resub (regex='\\s+', subst=b'', unesc=False, multiline=False, ignorecase=False, count=0)

A unit for performing substitutions based on a binary regular expression pattern.

Substitutions are interpreted as format strings: Besides the syntax {k} to insert the k-th match group, the unit supports processing the contents of match groups with arbitrary refinery units and other multibin handlers. The format definitions use the following syntax:

{field[!modifier]:handlers}

The field can specify an extracted meta variable, or the positional index of an extracted value. The optional multibin suffix handlers is used to post-process the value of this field. For example, {2:hex:zl:b64} means: Take the second match group, hex-decode it, decompress it using zl, and finally decode it using base64. The optional modifier can be one of these:

  • !r: Computes the Python repr() of the field before processing it.
  • !s: Field is a UTF-8 string literal, not a variable.
  • !a: Field is a latin1 string literal.
  • !u: Field is a UTF-16LE string literal.
  • !h: Field is a hex-encoded literal (shortcut for !s:h).
  • !q: Field is a URL-encoded literal (shortcut for !s:q).
  • !n: Field is an escape-sequence literal (shortcut for !s:n).
  • !z: Field evaluates to integer N; returns N zero bytes.
Expand source code Browse git
class resub(SingleRegexTransformUnit, docs=(
    '{0}\n\nSubstitutions are interpreted as format strings: {SingleRegexTransformUnit}'
)):
    """
    A unit for performing substitutions based on a binary regular expression pattern.
    """
    def __init__(
        self,
        regex: Param[str, Arg(
            help='Regular expression to be searched and replaced. The default is "{default}".')
        ] = '\\s+',
        subst: Param[buf, Arg('subst', help=(
            'Substitution value: use {1} for group 1, {0} for entire match. The default value is '
            'an empty string, i.e. matches are removed from the input by default.'
        ))] = B'',
        unesc: Param[bool, Arg.Switch('-n', help=(
            'Interpret and replace string escape sequences in the substutution expression.'
        ))] = False,
        multiline=False, ignorecase=False, count=0
    ):
        super().__init__(
            regex=regex,
            subst=subst,
            unesc=unesc,
            multiline=multiline,
            ignorecase=ignorecase,
            count=count,
        )

    def process(self, data):
        def repl(match: Match[bytes]):
            refined = RefinedMatch(match, pattern.groups, pattern.groupindex)
            r = meta.format_bin(
                spec,
                self.codec,
                refined.grouplist(),
                refined.groupdict(),
                escaped=escaped,
            )
            self.log_debug('substitution:', repr(r), clip=True)
            return r
        self.log_info('pattern:', getattr(self.regex, 'pattern', self.regex))
        self.log_info('replace:', self.args.subst)
        meta = metavars(data)
        escaped = self.args.unesc
        spec = self.args.subst.decode('ascii', 'backslashreplace')
        pattern = self.regex
        sub = pattern.sub
        if self.args.count:
            from functools import partial
            sub = partial(sub, count=self.args.count)
        return sub(repl, data)

Ancestors

Subclasses

Class variables

var reverse

The type of the None singleton.

Inherited members