bpo-45150: Add hashlib.file_digest() for efficient file hashing by tiran · Pull Request #31930 · python/cpython

Expand Up @@ -10,6 +10,7 @@ from binascii import unhexlify import hashlib import importlib import io import itertools import os import sys Expand All @@ -20,6 +21,7 @@ from test import support from test.support import _4G, bigmemtest from test.support.import_helper import import_fresh_module from test.support import os_helper from test.support import threading_helper from test.support import warnings_helper from http.client import HTTPException Expand Down Expand Up @@ -371,6 +373,31 @@ def check(self, name, data, hexdigest, shake=False, **kwargs): if not shake: self.assertEqual(len(digest), m.digest_size)
if not shake and kwargs.get("key") is None: # skip shake and blake2 extended parameter tests self.check_file_digest(name, data, hexdigest)
def check_file_digest(self, name, data, hexdigest): hexdigest = hexdigest.lower() digests = [name] digests.extend(self.constructors_to_test[name])
with open(os_helper.TESTFN, "wb") as f: f.write(data)
try: for digest in digests: buf = io.BytesIO(data) buf.seek(0) self.assertEqual( hashlib.file_digest(buf, digest).hexdigest(), hexdigest ) with open(os_helper.TESTFN, "rb") as f: digestobj = hashlib.file_digest(f, digest) self.assertEqual(digestobj.hexdigest(), hexdigest) finally: os.unlink(os_helper.TESTFN)
def check_no_unicode(self, algorithm_name): # Unicode objects are not allowed as input. constructors = self.constructors_to_test[algorithm_name] Expand Down Expand Up @@ -1117,6 +1144,33 @@ def test_normalized_name(self): self.assertNotIn("blake2b512", hashlib.algorithms_available) self.assertNotIn("sha3-512", hashlib.algorithms_available)
def test_file_digest(self): data = b'a' * 65536 d1 = hashlib.sha256() self.addCleanup(os.unlink, os_helper.TESTFN) with open(os_helper.TESTFN, "wb") as f: for _ in range(10): d1.update(data) f.write(data)
with open(os_helper.TESTFN, "rb") as f: d2 = hashlib.file_digest(f, hashlib.sha256)
self.assertEqual(d1.hexdigest(), d2.hexdigest()) self.assertEqual(d1.name, d2.name) self.assertIs(type(d1), type(d2))
with self.assertRaises(ValueError): hashlib.file_digest(None, "sha256")
with self.assertRaises(ValueError): with open(os_helper.TESTFN, "r") as f: hashlib.file_digest(f, "sha256")
with self.assertRaises(ValueError): with open(os_helper.TESTFN, "wb") as f: hashlib.file_digest(f, "sha256")

if __name__ == "__main__": unittest.main()