Skip to content

Commit 54793e7

Browse files
committed
Rewrite _85encode memory-optimized for performance
Use a dedicated generator function to iterate more efficiently over the buffer, using unpack of 512 bytes until the last 512 bytes. Making that much less calls to the unpack method
1 parent 619ac7f commit 54793e7

File tree

1 file changed

+27
-8
lines changed

1 file changed

+27
-8
lines changed

Lib/base64.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -299,19 +299,37 @@ def b16decode(s, casefold=False):
299299
_A85START = b"<~"
300300
_A85END = b"~>"
301301

302+
def _85buffer_iter_words(b):
303+
# Utility method for _85encode
304+
# yield unpacked int32 words from buffer, hopefully in an efficient manner,
305+
# padding the last part with NULL bytes if necessary
306+
n1 = len(b) // 512 # number of 512 bytes unpack
307+
n2 = (len(b) - n1 * 512) // 4 # number of 4 bytes unpack
308+
padding = (-len(b)) % 4
309+
310+
unpack512 = struct.Struct("!128I").unpack
311+
unpack4 = struct.Struct("!I").unpack
312+
313+
offset = 0
314+
for _ in range(n1):
315+
for c in unpack512(b[offset:offset+512]):
316+
yield c
317+
offset += 512
318+
319+
for _ in range(n2):
320+
for c in unpack4(b[offset:offset+4]):
321+
yield c
322+
offset += 4
323+
324+
if padding:
325+
yield unpack4(b[offset:] + b'\0' * padding)[0]
326+
302327
def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
303328
# Helper function for a85encode and b85encode
304329
if not isinstance(b, bytes_types):
305330
b = memoryview(b).tobytes()
306331

307-
padding = (-len(b)) % 4
308-
if padding:
309-
b = b + b'\0' * padding
310-
311-
unpack = struct.Struct("!I").unpack
312-
ibytes = (b[i:i+4] for i in range(0, len(b), 4)) # 4 bytes each
313-
words = (unpack(i)[0] for i in ibytes)
314-
332+
words = _85buffer_iter_words(b)
315333
chunks = (b'z' if foldnuls and not word else
316334
b'y' if foldspaces and word == 0x20202020 else
317335
(chars2[word // 614125] +
@@ -325,6 +343,7 @@ def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
325343
ret += chunk
326344

327345
# update of the last chunk afterwards
346+
padding = (-len(b)) % 4
328347
if chunk and padding and not pad:
329348
ret[-len(chunk):] = []
330349

0 commit comments

Comments
 (0)