bpo-34010: Fix tarfile read performance regression (GH-8020) · python/cpython@12a08c4

Original file line numberDiff line numberDiff line change

@@ -525,7 +525,7 @@ def read(self, size=None):

525525

if not buf:

526526

break

527527

t.append(buf)

528-

buf = "".join(t)

528+

buf = b"".join(t)

529529

else:

530530

buf = self._read(size)

531531

self.pos += len(buf)

@@ -538,6 +538,7 @@ def _read(self, size):

538538

return self.__read(size)

539539
540540

c = len(self.dbuf)

541+

t = [self.dbuf]

541542

while c < size:

542543

buf = self.__read(self.bufsize)

543544

if not buf:

@@ -546,26 +547,27 @@ def _read(self, size):

546547

buf = self.cmp.decompress(buf)

547548

except self.exception:

548549

raise ReadError("invalid compressed data")

549-

self.dbuf += buf

550+

t.append(buf)

550551

c += len(buf)

551-

buf = self.dbuf[:size]

552-

self.dbuf = self.dbuf[size:]

553-

return buf

552+

t = b"".join(t)

553+

self.dbuf = t[size:]

554+

return t[:size]

554555
555556

def __read(self, size):

556557

"""Return size bytes from stream. If internal buffer is empty,

557558

read another block from the stream.

558559

"""

559560

c = len(self.buf)

561+

t = [self.buf]

560562

while c < size:

561563

buf = self.fileobj.read(self.bufsize)

562564

if not buf:

563565

break

564-

self.buf += buf

566+

t.append(buf)

565567

c += len(buf)

566-

buf = self.buf[:size]

567-

self.buf = self.buf[size:]

568-

return buf

568+

t = b"".join(t)

569+

self.buf = t[size:]

570+

return t[:size]

569571

# class _Stream

570572
571573

class _StreamProxy(object):