2929from pyspark .files import SparkFiles
3030from pyspark .java_gateway import launch_gateway
3131from pyspark .serializers import PickleSerializer , BatchedSerializer , UTF8Deserializer , \
32- PairDeserializer , CompressedSerializer
32+ PairDeserializer , CompressedSerializer , AutoBatchedSerializer
3333from pyspark .storagelevel import StorageLevel
3434from pyspark .rdd import RDD
3535from pyspark .traceback_utils import CallSite , first_spark_call
@@ -67,7 +67,7 @@ class SparkContext(object):
6767 _default_batch_size_for_serialized_input = 10
6868
6969 def __init__ (self , master = None , appName = None , sparkHome = None , pyFiles = None ,
70- environment = None , batchSize = 1024 , serializer = PickleSerializer (), conf = None ,
70+ environment = None , batchSize = 0 , serializer = PickleSerializer (), conf = None ,
7171 gateway = None ):
7272 """
7373 Create a new SparkContext. At least the master and app name should be set,
@@ -83,8 +83,9 @@ def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
8383 :param environment: A dictionary of environment variables to set on
8484 worker nodes.
8585 :param batchSize: The number of Python objects represented as a single
86- Java object. Set 1 to disable batching or -1 to use an
87- unlimited batch size.
86+ Java object. Set 1 to disable batching, or 0 to choose batch size
87+ based on size of objects automaticly, or -1 to use an unlimited
88+ batch size.
8889 :param serializer: The serializer for RDDs.
8990 :param conf: A L{SparkConf} object setting Spark properties.
9091 :param gateway: Use an existing gateway and JVM, otherwise a new JVM
@@ -117,6 +118,8 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
117118 self ._unbatched_serializer = serializer
118119 if batchSize == 1 :
119120 self .serializer = self ._unbatched_serializer
121+ elif batchSize == 0 :
122+ self .serializer = AutoBatchedSerializer (self ._unbatched_serializer )
120123 else :
121124 self .serializer = BatchedSerializer (self ._unbatched_serializer ,
122125 batchSize )
0 commit comments