@@ -25,6 +25,7 @@ import scala.collection.{Map, mutable}
2525import scala .collection .JavaConversions ._
2626import scala .collection .mutable .ArrayBuffer
2727import scala .reflect .ClassTag
28+ import scala .util .DynamicVariable
2829
2930import com .clearspring .analytics .stream .cardinality .HyperLogLogPlus
3031import org .apache .hadoop .conf .{Configurable , Configuration }
@@ -961,7 +962,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
961962 val outfmt = job.getOutputFormatClass
962963 val jobFormat = outfmt.newInstance
963964
964- if (self.conf.getBoolean(" spark.hadoop.validateOutputSpecs" , true )) {
965+ val validationDisabled = PairRDDFunctions .disableOutputSpecValidation.value
966+ if (! validationDisabled && self.conf.getBoolean(" spark.hadoop.validateOutputSpecs" , true )) {
965967 // FileOutputFormat ignores the filesystem parameter
966968 jobFormat.checkOutputSpecs(job)
967969 }
@@ -1039,7 +1041,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
10391041 logDebug(" Saving as hadoop file of type (" + keyClass.getSimpleName + " , " +
10401042 valueClass.getSimpleName + " )" )
10411043
1042- if (self.conf.getBoolean(" spark.hadoop.validateOutputSpecs" , true )) {
1044+ val validationDisabled = PairRDDFunctions .disableOutputSpecValidation.value
1045+ if (! validationDisabled && self.conf.getBoolean(" spark.hadoop.validateOutputSpecs" , true )) {
10431046 // FileOutputFormat ignores the filesystem parameter
10441047 val ignoredFs = FileSystem .get(hadoopConf)
10451048 hadoopConf.getOutputFormat.checkOutputSpecs(ignoredFs, hadoopConf)
@@ -1118,4 +1121,10 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
11181121
11191122private [spark] object PairRDDFunctions {
11201123 val RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES = 256
1124+ /**
1125+ * Used by Spark Streaming in order to bypass the `spark.hadoop.validateOutputSpecs` checks
1126+ * for save actions launched by Spark Streaming, since the validation may break Spark Streaming's
1127+ * ability to recover from checkpoints. See SPARK-4835 for more details.
1128+ */
1129+ val disableOutputSpecValidation : DynamicVariable [Boolean ] = new DynamicVariable [Boolean ](false )
11211130}
0 commit comments