addess comments

huaxingao · huaxingao · commit b561d09b48b4 · 2021-11-17T09:21:47.000-08:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
@@ -83,11 +83,10 @@ case class OrcPartitionReaderFactory(
 
   override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
     val conf = broadcastedConf.value.value
-    val filePath = new Path(new URI(file.filePath))
-
     if (aggregation.nonEmpty) {
       return buildReaderWithAggregates(file, conf)
     }
+    val filePath = new Path(new URI(file.filePath))
 
     val resultedColPruneInfo =
       Utils.tryWithResource(createORCReader(filePath, conf)) { reader =>
@@ -127,11 +126,10 @@ case class OrcPartitionReaderFactory(
 
   override def buildColumnarReader(file: PartitionedFile): PartitionReader[ColumnarBatch] = {
     val conf = broadcastedConf.value.value
-    val filePath = new Path(new URI(file.filePath))
-
     if (aggregation.nonEmpty) {
       return buildColumnarReaderWithAggregates(file, conf)
     }
+    val filePath = new Path(new URI(file.filePath))
 
     val resultedColPruneInfo =
       Utils.tryWithResource(createORCReader(filePath, conf)) { reader =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala
@@ -263,24 +263,25 @@ trait FileSourceAggregatePushDownSuite
 
   test("aggregate with partition group by can be pushed down") {
     withTempPath { dir =>
-      spark.range(10).selectExpr("id", "id % 3 as p")
+      spark.range(10).selectExpr("id", "id % 3 as P")
         .write.partitionBy("p").format(format).save(dir.getCanonicalPath)
       withTempView("tmp") {
         spark.read.format(format).load(dir.getCanonicalPath).createOrReplaceTempView("tmp");
+        val query = "SELECT count(*), count(id), p, max(id), p, count(p), max(id)," +
+          "  min(id), p FROM tmp group by p"
+        val expected = sql(query).collect
         Seq("false", "true").foreach { enableVectorizedReader =>
           withSQLConf(aggPushDownEnabledKey -> "true",
             vectorizedReaderEnabledKey -> enableVectorizedReader) {
-            val df = sql("SELECT count(*), count(id), p, max(id), p, count(p), max(id)," +
-              "  min(id), p FROM tmp group by p")
+            val df = sql(query)
             df.queryExecution.optimizedPlan.collect {
               case _: DataSourceV2ScanRelation =>
                 val expected_plan_fragment =
                   "PushedAggregation: [COUNT(*), COUNT(id), MAX(id), COUNT(p), MIN(id)], " +
                     "PushedFilters: [], PushedGroupBy: [p]"
                 checkKeywordsExistsInExplain(df, expected_plan_fragment)
             }
-            checkAnswer(df, Seq(Row(3, 3, 1, 7, 1, 3, 7, 1, 1), Row(3, 3, 2, 8, 2, 3, 8, 2, 2),
-              Row(4, 4, 0, 9, 0, 4, 9, 0, 0)))
+            checkAnswer(df, expected)
           }
         }
       }
@@ -297,23 +298,24 @@ trait FileSourceAggregatePushDownSuite
         .partitionBy("p2", "p1", "p4", "p3")
         .format(format)
         .save(dir.getCanonicalPath)
+
       withTempView("tmp") {
-        spark.read.format(format).load(dir.getCanonicalPath).createOrReplaceTempView("tmp");
+        spark.read.format(format).load(dir.getCanonicalPath).createOrReplaceTempView("tmp")
+        val query = "SELECT count(*), count(value), max(value), min(value)," +
+          " p4, p2, p3, p1 FROM tmp GROUP BY p1, p2, p3, p4"
+        val expected = sql(query).collect
         Seq("false", "true").foreach { enableVectorizedReader =>
           withSQLConf(aggPushDownEnabledKey -> "true",
             vectorizedReaderEnabledKey -> enableVectorizedReader) {
-            val df = sql("SELECT count(*), count(value), max(value), min(value)," +
-              " p4, p2, p3, p1 FROM tmp GROUP BY p1, p2, p3, p4")
+            val df = sql(query)
             df.queryExecution.optimizedPlan.collect {
               case _: DataSourceV2ScanRelation =>
                 val expected_plan_fragment =
                   "PushedAggregation: [COUNT(*), COUNT(value), MAX(value), MIN(value)]," +
                     " PushedFilters: [], PushedGroupBy: [p1, p2, p3, p4]"
                 checkKeywordsExistsInExplain(df, expected_plan_fragment)
             }
-            checkAnswer(df, Seq(Row(1, 1, 5, 5, 8, 1, 5, 2), Row(1, 1, 4, 4, 9, 1, 4, 2),
-              Row(2, 2, 6, 3, 8, 1, 4, 2), Row(4, 4, 10, 1, 6, 2, 5, 1),
-              Row(3, 3, 6, -4, 10, 2, 9, 2)))
+            checkAnswer(df, expected)
           }
         }
       }