From 2ac657d26b13cbf2b1d49355df2ce3f74ebb0db3 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 8 Jan 2025 15:03:39 -0500 Subject: [PATCH 1/4] gh-128646: Implement GzipFile.readinto() functions --- Lib/gzip.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Lib/gzip.py b/Lib/gzip.py index 1a3c82ce7e0711..21bb4b085fd4d6 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -338,6 +338,20 @@ def read1(self, size=-1): size = io.DEFAULT_BUFFER_SIZE return self._buffer.read1(size) + def readinto(self, b): + self._check_not_closed() + if self.mode != READ: + import errno + raise OSError(errno.EBADF, "readinto() on write-only GzipFile object") + return self._buffer.readinto(b) + + def readinto1(self, b): + self._check_not_closed() + if self.mode != READ: + import errno + raise OSError(errno.EBADF, "readinto1() on write-only GzipFile object") + return self._buffer.readinto1(b) + def peek(self, n): self._check_not_closed() if self.mode != READ: From db7f971ad66a55c1d5f6a96e9aeef1c8944945cc Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 8 Jan 2025 15:15:04 -0500 Subject: [PATCH 2/4] doc: Add news blurb --- .../Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst diff --git a/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst new file mode 100644 index 00000000000000..034a66b704dea8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst @@ -0,0 +1,4 @@ +Eagerly write to buffers passed to :class:`gzip.GzipFile`'s +:meth:`~io.BufferedIOBase.readinto` and +:meth:`~io.BufferedIOBase.readinto1` implementations, +avoiding unnecessary allocations. Patch by Chris Markiewicz. From b2601d420f7c4fb22beb6f162e244e57810e7e84 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Wed, 12 Feb 2025 11:23:38 -0500 Subject: [PATCH 3/4] test: GzipFile.readinto{,1} --- Lib/test/test_gzip.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index bf6e1703db8451..eab61f04b2aaa9 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -141,6 +141,38 @@ def test_read1(self): self.assertEqual(f.tell(), nread) self.assertEqual(b''.join(blocks), data1 * 50) + def test_readinto(self): + # 10MB of uncompressible data to ensure multiple reads + large_data = os.urandom(10 * 2**20) + with gzip.GzipFile(self.filename, 'wb') as f: + f.write(large_data) + + buf = bytearray(len(large_data)) + with gzip.GzipFile(self.filename, 'r') as f: + nbytes = f.readinto(buf) + self.assertEqual(nbytes, len(large_data)) + self.assertEqual(buf, large_data) + + def test_readinto1(self): + # 10MB of uncompressible data to ensure multiple reads + large_data = os.urandom(10 * 2**20) + with gzip.GzipFile(self.filename, 'wb') as f: + f.write(large_data) + + nread = 0 + buf = bytearray(len(large_data)) + memview = memoryview(buf) # Simplifies slicing + with gzip.GzipFile(self.filename, 'r') as f: + for count in range(200): + nbytes = f.readinto1(memview[nread:]) + if not nbytes: + break + nread += nbytes + self.assertEqual(f.tell(), nread) + self.assertEqual(buf, large_data) + # readinto1() should require multiple loops + self.assertGreater(count, 1) + @bigmemtest(size=_4G, memuse=1) def test_read_large(self, size): # Read chunk size over UINT_MAX should be supported, despite zlib's From e9ff55a2870680d121dcffc874392924e111e578 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Thu, 20 Feb 2025 11:40:15 -0500 Subject: [PATCH 4/4] rf: Add helper function to check for READ capability --- Lib/gzip.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index 21bb4b085fd4d6..216ac1e87d08c7 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -318,11 +318,15 @@ def _write_raw(self, data): return length - def read(self, size=-1): - self._check_not_closed() + def _check_read(self, caller): if self.mode != READ: import errno - raise OSError(errno.EBADF, "read() on write-only GzipFile object") + msg = f"{caller}() on write-only GzipFile object" + raise OSError(errno.EBADF, msg) + + def read(self, size=-1): + self._check_not_closed() + self._check_read("read") return self._buffer.read(size) def read1(self, size=-1): @@ -330,9 +334,7 @@ def read1(self, size=-1): Reads up to a buffer's worth of data if size is negative.""" self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "read1() on write-only GzipFile object") + self._check_read("read1") if size < 0: size = io.DEFAULT_BUFFER_SIZE @@ -340,23 +342,17 @@ def read1(self, size=-1): def readinto(self, b): self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "readinto() on write-only GzipFile object") + self._check_read("readinto") return self._buffer.readinto(b) def readinto1(self, b): self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "readinto1() on write-only GzipFile object") + self._check_read("readinto1") return self._buffer.readinto1(b) def peek(self, n): self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "peek() on write-only GzipFile object") + self._check_read("peek") return self._buffer.peek(n) @property