Expand source code Browse git
from __future__ import annotations
import msgpack
from refinery.lib.patterns import formats, pattern_with_size_limits
from refinery.lib.types import Param
from refinery.units import Chunk
from refinery.units.pattern import Arg, PatternExtractor
class carve(PatternExtractor):
"""
Extracts and optionally decodes data in named formats from the input: base64, hex, string literals.
The complete list of supported formats is as follows:\n\n{}
"""
def __init__(
self, format: Param[str, Arg.String(metavar='format', help='Specify one of the available long or short format specifiers.')],
unique: Param[bool, Arg.Switch('-q', help='Yield every match only once.')] = False,
decode: Param[bool, Arg.Switch('-d', help='Automatically decode known patterns.')] = False,
single: Param[bool, Arg.Switch('-s', help='Only get the biggest match; equivalent to -qlt1')] = False,
min=1, max=0, len=0,
stripspace=False, longest=False, take=0, utf16=True, ascii=True
):
if single:
take = 1
longest = True
unique = True
try:
format = formats.from_dashname(format)
except Exception:
raise ValueError(F'{format} is not a valid format')
super().__init__(
min=min,
max=max,
len=len,
stripspace=stripspace,
duplicates=not unique,
longest=longest,
take=take,
ascii=ascii,
utf16=utf16,
format=format
)
if not decode:
decoder = None
elif self.args.format == formats.str:
from ..encoding.esc import esc
decoder = esc(unicode=True, quoted=True)
elif self.args.format == formats.int:
from ..encoding.base import base
decoder = base()
elif self.args.format in (formats.base16, formats.base16s, formats.hex):
from ..encoding.hex import hex
decoder = hex()
elif self.args.format == formats.hexdump:
from ..formats.hexload import hexload
decoder = hexload()
elif self.args.format == formats.intarray:
from ..blockwise.pack import pack
decoder = pack()
elif self.args.format == formats.strarray:
from ..encoding.esc import esc
def _decoder(data: Chunk) -> bytes: # noqa
return msgpack.packb([
m[0] | esc | bytes for m in formats.str.value.finditer(data)]) or B''
decoder = _decoder
elif self.args.format in (formats.base64, formats.base64s):
from ..encoding.b64 import b64
decoder = b64()
elif self.args.format in (formats.base85, formats.base85s):
from ..encoding.b85 import b85
decoder = b85()
elif self.args.format == formats.base64u:
from ..encoding.b64 import b64
decoder = b64(urlsafe=True)
elif self.args.format == formats.base32:
from ..encoding.b32 import b32
decoder = b32()
elif self.args.format == formats.ps1str:
from ..encoding.escps import escps
decoder = escps()
elif self.args.format == formats.htmlesc:
from ..encoding.htmlesc import htmlesc
decoder = htmlesc()
elif self.args.format == formats.vbastr:
from ..encoding.escps import escps
decoder = escps()
elif self.args.format == formats.hexarray:
from ..blockwise.pack import pack
decoder = pack(0x10)
elif self.args.format == formats.wshenc:
from ..encoding.wshenc import wshenc
decoder = wshenc()
elif self.args.format == formats.uuenc:
from ..encoding.uuenc import uuenc
decoder = uuenc()
elif self.args.format in (
formats.urlquote,
formats.urlhex,
):
from ..encoding.url import url
decoder = url()
else:
decoder = None
self.decoder = decoder
def process(self, data):
sizes = self._getbounds()
pattern = pattern_with_size_limits(
self.args.format.value, max(1, sizes.min), abs(sizes.max))
self.log_info('using pattern:', pattern.str.pattern)
it = iter(self.matches_filtered(memoryview(data), pattern.bin))
if (decoder := self.decoder) is None:
yield from it
else:
for chunk in it:
try:
yield decoder(chunk)
except Exception as E:
self.log_info(F'decoder failure: {E!s}')
if __d := carve.__doc__:
carve.__doc__ = __d.format(formats.make_table_with_shorts('FORMAT'))
class csd(carve):
"""
Short for carve & decode; carves the single largest buffer of a given format from the input
and decodes it with the appropriate decoder. See the carve help for detailed information on
format specifiers.
"""
def __init__(self, format, utf16=True, ascii=True, stripspace=False):
super().__init__(
format,
decode=True,
single=True,
utf16=utf16,
ascii=ascii,
stripspace=stripspace,
)
class csb(carve):
"""
Short for carve single buffer; carves the single largest buffer of a given format from the
input data and returns it. See the carve help for detailed information on format specifiers.
"""
def __init__(self, format, utf16=True, ascii=True, stripspace=False):
super().__init__(
format,
decode=False,
single=True,
utf16=utf16,
ascii=ascii,
stripspace=stripspace,
)
class carve (format, unique=False, decode=False, single=False, min=1, max=0, len=0, stripspace=False, longest=False, take=0, utf16=True, ascii=True)-
Extracts and optionally decodes data in named formats from the input: base64, hex, string literals.
The complete list of supported formats is as follows:
FORMAT SHORT DESCRIPTION integer int any integer literal expression float flt floating point literals number num either an integer or a float string str c-syntax string literal cmdstr Windows command line escaped string literal ps1str PowerShell escaped string literal vbastr VBS/VBA string literal vbaint VBS/VBA integer literal printable ps printable strings (includes whitespace) urlquote uq url-encoded characters, default char set urlhex uh hex-encoded buffer using URL escape sequences htmlesc sequence of HTML-escape characters intarray [int] integers separated by commas or semicolons strarray [str] strings separated by commas or semicolons numarray [num] numbers separated by commas or semicolons hexarray [hex] hex sequences separated by commas or semicolons letters alphabetic characters wshenc encoded Windows Scripting Host Scripts (JS/VBS) alnum alphanumeric characters base32 b32 Base32 encoded strings base58 b58 Base58 encoded strings base62 b62 Base62 encoded strings base64 b64 Base64 encoded strings base85 b85 Base85 encoded strings ascii85 a85 Ascii85 encoded strings z85 Z85 encoded strings base92 b92 Base92 encoded strings base64u b64u Base64 encoded strings using URL-safe alphabet hex hexadecimal strings base16 b16 uppercase hexadecimal strings base16s b16s hexadecimal strings base64s b64s Base64 encoded strings, separated by whitespace base85s b85s Base85 encoded string, separated by whitespace a85s Ascii85 encoded string, separated by whitespace z85s Z85 encoded string, separated by whitespace utf8 sequences of bytes that can be decoded as UTF8 hexdump hd typical hexdump output uuenc UUEncoded dataExpand source code Browse git
class carve(PatternExtractor): """ Extracts and optionally decodes data in named formats from the input: base64, hex, string literals. The complete list of supported formats is as follows:\n\n{} """ def __init__( self, format: Param[str, Arg.String(metavar='format', help='Specify one of the available long or short format specifiers.')], unique: Param[bool, Arg.Switch('-q', help='Yield every match only once.')] = False, decode: Param[bool, Arg.Switch('-d', help='Automatically decode known patterns.')] = False, single: Param[bool, Arg.Switch('-s', help='Only get the biggest match; equivalent to -qlt1')] = False, min=1, max=0, len=0, stripspace=False, longest=False, take=0, utf16=True, ascii=True ): if single: take = 1 longest = True unique = True try: format = formats.from_dashname(format) except Exception: raise ValueError(F'{format} is not a valid format') super().__init__( min=min, max=max, len=len, stripspace=stripspace, duplicates=not unique, longest=longest, take=take, ascii=ascii, utf16=utf16, format=format ) if not decode: decoder = None elif self.args.format == formats.str: from ..encoding.esc import esc decoder = esc(unicode=True, quoted=True) elif self.args.format == formats.int: from ..encoding.base import base decoder = base() elif self.args.format in (formats.base16, formats.base16s, formats.hex): from ..encoding.hex import hex decoder = hex() elif self.args.format == formats.hexdump: from ..formats.hexload import hexload decoder = hexload() elif self.args.format == formats.intarray: from ..blockwise.pack import pack decoder = pack() elif self.args.format == formats.strarray: from ..encoding.esc import esc def _decoder(data: Chunk) -> bytes: # noqa return msgpack.packb([ m[0] | esc | bytes for m in formats.str.value.finditer(data)]) or B'' decoder = _decoder elif self.args.format in (formats.base64, formats.base64s): from ..encoding.b64 import b64 decoder = b64() elif self.args.format in (formats.base85, formats.base85s): from ..encoding.b85 import b85 decoder = b85() elif self.args.format == formats.base64u: from ..encoding.b64 import b64 decoder = b64(urlsafe=True) elif self.args.format == formats.base32: from ..encoding.b32 import b32 decoder = b32() elif self.args.format == formats.ps1str: from ..encoding.escps import escps decoder = escps() elif self.args.format == formats.htmlesc: from ..encoding.htmlesc import htmlesc decoder = htmlesc() elif self.args.format == formats.vbastr: from ..encoding.escps import escps decoder = escps() elif self.args.format == formats.hexarray: from ..blockwise.pack import pack decoder = pack(0x10) elif self.args.format == formats.wshenc: from ..encoding.wshenc import wshenc decoder = wshenc() elif self.args.format == formats.uuenc: from ..encoding.uuenc import uuenc decoder = uuenc() elif self.args.format in ( formats.urlquote, formats.urlhex, ): from ..encoding.url import url decoder = url() else: decoder = None self.decoder = decoder def process(self, data): sizes = self._getbounds() pattern = pattern_with_size_limits( self.args.format.value, max(1, sizes.min), abs(sizes.max)) self.log_info('using pattern:', pattern.str.pattern) it = iter(self.matches_filtered(memoryview(data), pattern.bin)) if (decoder := self.decoder) is None: yield from it else: for chunk in it: try: yield decoder(chunk) except Exception as E: self.log_info(F'decoder failure: {E!s}')Ancestors
Subclasses
Class variables
var reverse-
The type of the None singleton.
Inherited members
PatternExtractor:FilterEverythingRequiresactassemblecodecconsolefilterfinishhandlesis_quietis_reversibleisattylabelledleniencylog_alwayslog_debuglog_detachlog_faillog_infolog_levellog_warnloggermatchesmatches_filterednamenozzleoptional_dependenciesprocessreadread1required_dependenciesresetrunsourcesuperinit
class csd (format, utf16=True, ascii=True, stripspace=False)-
Short for carve & decode; carves the single largest buffer of a given format from the input and decodes it with the appropriate decoder. See the carve help for detailed information on format specifiers.
Expand source code Browse git
class csd(carve): """ Short for carve & decode; carves the single largest buffer of a given format from the input and decodes it with the appropriate decoder. See the carve help for detailed information on format specifiers. """ def __init__(self, format, utf16=True, ascii=True, stripspace=False): super().__init__( format, decode=True, single=True, utf16=utf16, ascii=ascii, stripspace=stripspace, )Ancestors
Subclasses
Inherited members
carve:FilterEverythingRequiresactassemblecodecconsolefilterfinishhandlesis_quietis_reversibleisattylabelledleniencylog_alwayslog_debuglog_detachlog_faillog_infolog_levellog_warnloggermatchesmatches_filterednamenozzleoptional_dependenciesreadread1required_dependenciesresetreverserunsourcesuperinit
PatternExtractor:
class csb (format, utf16=True, ascii=True, stripspace=False)-
Short for carve single buffer; carves the single largest buffer of a given format from the input data and returns it. See the carve help for detailed information on format specifiers.
Expand source code Browse git
class csb(carve): """ Short for carve single buffer; carves the single largest buffer of a given format from the input data and returns it. See the carve help for detailed information on format specifiers. """ def __init__(self, format, utf16=True, ascii=True, stripspace=False): super().__init__( format, decode=False, single=True, utf16=utf16, ascii=ascii, stripspace=stripspace, )Ancestors
Subclasses
Inherited members
carve:FilterEverythingRequiresactassemblecodecconsolefilterfinishhandlesis_quietis_reversibleisattylabelledleniencylog_alwayslog_debuglog_detachlog_faillog_infolog_levellog_warnloggermatchesmatches_filterednamenozzleoptional_dependenciesreadread1required_dependenciesresetreverserunsourcesuperinit
PatternExtractor: