Skip to content

Commit 67e6eba

Browse files
committed
comment for MAX_TOTAL_PARTITIONS
1 parent f6bd5d6 commit 67e6eba

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

python/pyspark/shuffle.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,8 @@ class ExternalMerger(Merger):
182182
499950000
183183
"""
184184

185-
TOTAL_PARTITIONS = 4096
185+
# the max total partitions created recursively
186+
MAX_TOTAL_PARTITIONS = 4096
186187

187188
def __init__(self, aggregator, memory_limit=512, serializer=None,
188189
localdirs=None, scale=1, partitions=64, batch=10000):
@@ -196,7 +197,7 @@ def __init__(self, aggregator, memory_limit=512, serializer=None,
196197
self.partitions = partitions
197198
# check the memory after # of items merged
198199
self.batch = batch
199-
# scale is used to scale down the hash of key for recursive hash map,
200+
# scale is used to scale down the hash of key for recursive hash map
200201
self.scale = scale
201202
# unpartitioned merged data
202203
self.data = {}
@@ -362,7 +363,7 @@ def _external_items(self):
362363
False)
363364

364365
# limit the total partitions
365-
if (self.scale * self.partitions < self.TOTAL_PARTITIONS
366+
if (self.scale * self.partitions < self.MAX_TOTAL_PARTITIONS
366367
and j < self.spills - 1
367368
and get_used_memory() > hard_limit):
368369
self.data.clear() # will read from disk again

0 commit comments

Comments
 (0)