fix compilation

gengliangwang · gengliangwang · commit 80b36f3250e9 · 2018-04-16T19:26:25.000+08:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -189,13 +189,13 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     }
 
     val allPaths = (CaseInsensitiveMap(extraOptions.toMap).get("path") ++ paths).toSeq
-    option("path", allPaths.mkString(","))
     val cls = DataSource.lookupDataSource(source, sparkSession.sessionState.conf, allPaths)
     if (classOf[DataSourceV2].isAssignableFrom(cls)) {
       val ds = cls.newInstance().asInstanceOf[DataSourceV2]
       if (ds.isInstanceOf[ReadSupport] || ds.isInstanceOf[ReadSupportWithSchema]) {
         val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
           ds = ds, conf = sparkSession.sessionState.conf)
+        option("path", allPaths.mkString(","))
         Dataset.ofRows(sparkSession, DataSourceV2Relation.create(
           ds, extraOptions.toMap ++ sessionOptions,
           userSpecifiedSchema = userSpecifiedSchema))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConverters._
 import scala.language.{existentials, implicitConversions}
 import scala.util.{Failure, Success, Try}
 
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -538,23 +539,8 @@ case class DataSource(
       checkFilesExist: Boolean): Seq[Path] = {
     val allPaths = caseInsensitiveOptions.get("path") ++ paths
     val hadoopConf = sparkSession.sessionState.newHadoopConf()
-    allPaths.flatMap { path =>
-      val hdfsPath = new Path(path)
-      val fs = hdfsPath.getFileSystem(hadoopConf)
-      val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
-      val globPath = SparkHadoopUtil.get.globPathIfNecessary(fs, qualified)
-
-      if (checkEmptyGlobPath && globPath.isEmpty) {
-        throw new AnalysisException(s"Path does not exist: $qualified")
-      }
-
-      // Sufficient to check head of the globPath seq for non-glob scenario
-      // Don't need to check once again if files exist in streaming mode
-      if (checkFilesExist && !fs.exists(globPath.head)) {
-        throw new AnalysisException(s"Path does not exist: ${globPath.head}")
-      }
-      globPath
-    }.toSeq
+    DataSource.checkAndGlobPathIfNecessary(allPaths.toSeq, hadoopConf,
+      checkEmptyGlobPath, checkFilesExist)
   }
 }
 
@@ -701,6 +687,33 @@ object DataSource extends Logging {
     }
   }
 
+  /**
+   * Checks and returns files in all the paths.
+   */
+  private[sql] def checkAndGlobPathIfNecessary(
+      paths: Seq[String],
+      hadoopConf: Configuration,
+      checkEmptyGlobPath: Boolean,
+      checkFilesExist: Boolean): Seq[Path] = {
+    paths.flatMap { path =>
+      val hdfsPath = new Path(path)
+      val fs = hdfsPath.getFileSystem(hadoopConf)
+      val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+      val globPath = SparkHadoopUtil.get.globPathIfNecessary(fs, qualified)
+
+      if (checkEmptyGlobPath && globPath.isEmpty) {
+        throw new AnalysisException(s"Path does not exist: $qualified")
+      }
+
+      // Sufficient to check head of the globPath seq for non-glob scenario
+      // Don't need to check once again if files exist in streaming mode
+      if (checkFilesExist && !fs.exists(globPath.head)) {
+        throw new AnalysisException(s"Path does not exist: ${globPath.head}")
+      }
+      globPath
+    }
+  }
+
   /**
    * When creating a data source table, the `path` option has a special meaning: the table location.
    * This method extracts the `path` option and treat it as table location to build a
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileSourceReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileSourceReader.scala
@@ -63,27 +63,23 @@ trait FileSourceReader extends DataSourceReader
   protected val isCaseSensitive = sqlConf.caseSensitiveAnalysis
   protected val ignoreCorruptFiles = sqlConf.ignoreCorruptFiles
   protected val ignoreMissingFiles = sqlConf.ignoreMissingFiles
-  private val rootPathsSpecified = {
+  private lazy val rootPathsSpecified = {
     val filePath = options.get("path")
     if (!filePath.isPresent) {
       throw new AnalysisException("Reading data source requires a" +
         " path (e.g. data backed by a local or distributed file system).")
     }
-    DataSource.checkAndGlobPathIfNecessary(hadoopConf, filePath.get, checkFilesExist = true)
-  }
-  private val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
-  protected val partitionSchema = {
-    val tempFileIndex = {
-      new InMemoryFileIndex(sparkSession, rootPathsSpecified,
-        options.asMap().asScala.toMap, None, fileStatusCache)
-    }
-    PartitioningUtils.combineInferredAndUserSpecifiedPartitionSchema(
-      tempFileIndex, userSpecifiedSchema, isCaseSensitive)
+    DataSource.checkAndGlobPathIfNecessary(Seq(filePath.get), hadoopConf,
+      checkEmptyGlobPath = false, checkFilesExist = false)
   }
 
-  protected val fileIndex =
+  protected lazy val fileIndex = {
+    val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
     new InMemoryFileIndex(sparkSession, rootPathsSpecified,
-      options.asMap().asScala.toMap, Some(partitionSchema), fileStatusCache)
+      options.asMap().asScala.toMap, userSpecifiedSchema, fileStatusCache)
+  }
+
+  protected lazy val partitionSchema = fileIndex.partitionSchema
 
   protected lazy val dataSchema = userSpecifiedSchema.orElse {
     inferSchema(fileIndex.allFiles())