@@ -427,9 +427,9 @@ class SparkContext(config: SparkConf) extends Logging {
427427 * Read a text file from HDFS, a local file system (available on all nodes), or any
428428 * Hadoop-supported file system URI, and return it as an RDD of Strings.
429429 */
430- def textFile (path : String , minSplits : Int = defaultMinSplits ): RDD [String ] = {
430+ def textFile (path : String , minPartitions : Int = defaultMinPartitions ): RDD [String ] = {
431431 hadoopFile(path, classOf [TextInputFormat ], classOf [LongWritable ], classOf [Text ],
432- minSplits ).map(pair => pair._2.toString)
432+ minPartitions ).map(pair => pair._2.toString)
433433 }
434434
435435 /**
@@ -457,9 +457,10 @@ class SparkContext(config: SparkConf) extends Logging {
457457 *
458458 * @note Small files are preferred, large file is also allowable, but may cause bad performance.
459459 *
460- * @param minSplits A suggestion value of the minimal splitting number for input data.
460+ * @param minPartitions A suggestion value of the minimal splitting number for input data.
461461 */
462- def wholeTextFiles (path : String , minSplits : Int = defaultMinSplits): RDD [(String , String )] = {
462+ def wholeTextFiles (path : String , minPartitions : Int = defaultMinPartitions):
463+ RDD [(String , String )] = {
463464 val job = new NewHadoopJob (hadoopConfiguration)
464465 NewFileInputFormat .addInputPath(job, new Path (path))
465466 val updateConf = job.getConfiguration
@@ -469,7 +470,7 @@ class SparkContext(config: SparkConf) extends Logging {
469470 classOf [String ],
470471 classOf [String ],
471472 updateConf,
472- minSplits )
473+ minPartitions )
473474 }
474475
475476 /**
@@ -481,7 +482,7 @@ class SparkContext(config: SparkConf) extends Logging {
481482 * @param inputFormatClass Class of the InputFormat
482483 * @param keyClass Class of the keys
483484 * @param valueClass Class of the values
484- * @param minSplits Minimum number of Hadoop Splits to generate.
485+ * @param minPartitions Minimum number of Hadoop Splits to generate.
485486 *
486487 * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
487488 * record, directly caching the returned RDD will create many references to the same object.
@@ -493,11 +494,11 @@ class SparkContext(config: SparkConf) extends Logging {
493494 inputFormatClass : Class [_ <: InputFormat [K , V ]],
494495 keyClass : Class [K ],
495496 valueClass : Class [V ],
496- minSplits : Int = defaultMinSplits
497+ minPartitions : Int = defaultMinPartitions
497498 ): RDD [(K , V )] = {
498499 // Add necessary security credentials to the JobConf before broadcasting it.
499500 SparkHadoopUtil .get.addCredentials(conf)
500- new HadoopRDD (this , conf, inputFormatClass, keyClass, valueClass, minSplits )
501+ new HadoopRDD (this , conf, inputFormatClass, keyClass, valueClass, minPartitions )
501502 }
502503
503504 /** Get an RDD for a Hadoop file with an arbitrary InputFormat
@@ -512,7 +513,7 @@ class SparkContext(config: SparkConf) extends Logging {
512513 inputFormatClass : Class [_ <: InputFormat [K , V ]],
513514 keyClass : Class [K ],
514515 valueClass : Class [V ],
515- minSplits : Int = defaultMinSplits
516+ minPartitions : Int = defaultMinPartitions
516517 ): RDD [(K , V )] = {
517518 // A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it.
518519 val confBroadcast = broadcast(new SerializableWritable (hadoopConfiguration))
@@ -524,15 +525,15 @@ class SparkContext(config: SparkConf) extends Logging {
524525 inputFormatClass,
525526 keyClass,
526527 valueClass,
527- minSplits )
528+ minPartitions )
528529 }
529530
530531 /**
531532 * Smarter version of hadoopFile() that uses class tags to figure out the classes of keys,
532533 * values and the InputFormat so that users don't need to pass them directly. Instead, callers
533534 * can just write, for example,
534535 * {{{
535- * val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path, minSplits )
536+ * val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path, minPartitions )
536537 * }}}
537538 *
538539 * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
@@ -541,13 +542,13 @@ class SparkContext(config: SparkConf) extends Logging {
541542 * a `map` function.
542543 */
543544 def hadoopFile [K , V , F <: InputFormat [K , V ]]
544- (path : String , minSplits : Int )
545+ (path : String , minPartitions : Int )
545546 (implicit km : ClassTag [K ], vm : ClassTag [V ], fm : ClassTag [F ]): RDD [(K , V )] = {
546547 hadoopFile(path,
547548 fm.runtimeClass.asInstanceOf [Class [F ]],
548549 km.runtimeClass.asInstanceOf [Class [K ]],
549550 vm.runtimeClass.asInstanceOf [Class [V ]],
550- minSplits )
551+ minPartitions )
551552 }
552553
553554 /**
@@ -565,7 +566,7 @@ class SparkContext(config: SparkConf) extends Logging {
565566 */
566567 def hadoopFile [K , V , F <: InputFormat [K , V ]](path : String )
567568 (implicit km : ClassTag [K ], vm : ClassTag [V ], fm : ClassTag [F ]): RDD [(K , V )] =
568- hadoopFile[K , V , F ](path, defaultMinSplits )
569+ hadoopFile[K , V , F ](path, defaultMinPartitions )
569570
570571 /** Get an RDD for a Hadoop file with an arbitrary new API InputFormat. */
571572 def newAPIHadoopFile [K , V , F <: NewInputFormat [K , V ]]
@@ -626,10 +627,10 @@ class SparkContext(config: SparkConf) extends Logging {
626627 def sequenceFile [K , V ](path : String ,
627628 keyClass : Class [K ],
628629 valueClass : Class [V ],
629- minSplits : Int
630+ minPartitions : Int
630631 ): RDD [(K , V )] = {
631632 val inputFormatClass = classOf [SequenceFileInputFormat [K , V ]]
632- hadoopFile(path, inputFormatClass, keyClass, valueClass, minSplits )
633+ hadoopFile(path, inputFormatClass, keyClass, valueClass, minPartitions )
633634 }
634635
635636 /** Get an RDD for a Hadoop SequenceFile with given key and value types.
@@ -641,7 +642,7 @@ class SparkContext(config: SparkConf) extends Logging {
641642 * */
642643 def sequenceFile [K , V ](path : String , keyClass : Class [K ], valueClass : Class [V ]
643644 ): RDD [(K , V )] =
644- sequenceFile(path, keyClass, valueClass, defaultMinSplits )
645+ sequenceFile(path, keyClass, valueClass, defaultMinPartitions )
645646
646647 /**
647648 * Version of sequenceFile() for types implicitly convertible to Writables through a
@@ -665,7 +666,7 @@ class SparkContext(config: SparkConf) extends Logging {
665666 * a `map` function.
666667 */
667668 def sequenceFile [K , V ]
668- (path : String , minSplits : Int = defaultMinSplits )
669+ (path : String , minPartitions : Int = defaultMinPartitions )
669670 (implicit km : ClassTag [K ], vm : ClassTag [V ],
670671 kcf : () => WritableConverter [K ], vcf : () => WritableConverter [V ])
671672 : RDD [(K , V )] = {
@@ -674,7 +675,7 @@ class SparkContext(config: SparkConf) extends Logging {
674675 val format = classOf [SequenceFileInputFormat [Writable , Writable ]]
675676 val writables = hadoopFile(path, format,
676677 kc.writableClass(km).asInstanceOf [Class [Writable ]],
677- vc.writableClass(vm).asInstanceOf [Class [Writable ]], minSplits )
678+ vc.writableClass(vm).asInstanceOf [Class [Writable ]], minPartitions )
678679 writables.map { case (k, v) => (kc.convert(k), vc.convert(v)) }
679680 }
680681
@@ -688,9 +689,9 @@ class SparkContext(config: SparkConf) extends Logging {
688689 */
689690 def objectFile [T : ClassTag ](
690691 path : String ,
691- minSplits : Int = defaultMinSplits
692+ minPartitions : Int = defaultMinPartitions
692693 ): RDD [T ] = {
693- sequenceFile(path, classOf [NullWritable ], classOf [BytesWritable ], minSplits )
694+ sequenceFile(path, classOf [NullWritable ], classOf [BytesWritable ], minPartitions )
694695 .flatMap(x => Utils .deserialize[Array [T ]](x._2.getBytes))
695696 }
696697
@@ -1183,8 +1184,12 @@ class SparkContext(config: SparkConf) extends Logging {
11831184 def defaultParallelism : Int = taskScheduler.defaultParallelism
11841185
11851186 /** Default min number of partitions for Hadoop RDDs when not given by user */
1187+ @ deprecated(" use defaultMinPartitions" , " 1.0.0" )
11861188 def defaultMinSplits : Int = math.min(defaultParallelism, 2 )
11871189
1190+ /** Default min number of partitions for Hadoop RDDs when not given by user */
1191+ def defaultMinPartitions : Int = math.min(defaultParallelism, 2 )
1192+
11881193 private val nextShuffleId = new AtomicInteger (0 )
11891194
11901195 private [spark] def newShuffleId (): Int = nextShuffleId.getAndIncrement()
@@ -1268,7 +1273,7 @@ object SparkContext extends Logging {
12681273 rdd : RDD [(K , V )]) =
12691274 new SequenceFileRDDFunctions (rdd)
12701275
1271- implicit def rddToOrderedRDDFunctions [K <% Ordered [ K ] : ClassTag , V : ClassTag ](
1276+ implicit def rddToOrderedRDDFunctions [K : Ordering : ClassTag , V : ClassTag ](
12721277 rdd : RDD [(K , V )]) =
12731278 new OrderedRDDFunctions [K , V , (K , V )](rdd)
12741279
0 commit comments