Skip to content

Commit f52e0ad

Browse files
committed
Fix Python tests for real
1 parent 2f3afa3 commit f52e0ad

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

python/pyspark/rdd.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,7 +1188,10 @@ def aggregateByKey(self, zeroValue, seqFunc, combFunc, numPartitions=None):
11881188
for merging values between partitions. To avoid memory allocation, both of these functions are
11891189
allowed to modify and return their first argument instead of creating a new U.
11901190
"""
1191-
return self.combineByKey(lambda v: seqFunc(zeroValue, v), seqFunc, combFunc, numPartitions)
1191+
def createZero():
1192+
return copy.deepcopy(zeroValue)
1193+
1194+
return self.combineByKey(lambda v: seqFunc(createZero(), v), seqFunc, combFunc, numPartitions)
11921195

11931196
def foldByKey(self, zeroValue, func, numPartitions=None):
11941197
"""
@@ -1201,7 +1204,10 @@ def foldByKey(self, zeroValue, func, numPartitions=None):
12011204
>>> rdd.foldByKey(0, add).collect()
12021205
[('a', 2), ('b', 1)]
12031206
"""
1204-
return self.combineByKey(lambda v: func(zeroValue, v), func, func, numPartitions)
1207+
def createZero():
1208+
return copy.deepcopy(zeroValue)
1209+
1210+
return self.combineByKey(lambda v: func(createZero(), v), func, func, numPartitions)
12051211

12061212

12071213
# TODO: support variant with custom partitioner

0 commit comments

Comments
 (0)