add first test case

CodingCat · CodingCat · commit d4f12b18c4f3 · 2017-11-10T11:02:11.000-08:00
fix compilation of tests

fix tests

revise the test

fix test

revise the test

add missing file

revise the test

revise the test

revise the test

revise the test

revise the test

revise the test

revise the test

revise the test
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -152,7 +152,6 @@ case class InMemoryRelation(
   private def buildBuffers(): Unit = {
     val output = child.output
 
-    // TODO: need better abstraction for two iterators here
     val batchedRDD = child.execute().mapPartitionsInternal { rowIterator =>
       new CachedBatchIterator(rowIterator, output, batchSize, useCompression, batchStats,
         usePartitionLevelMetadata)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
@@ -223,10 +223,10 @@ case class InMemoryTableScanExec(
           if !partitionFilter.eval(cachedIter.partitionStats) =>
           // scalastyle:off
           println(s"skipped partition $index")
-          // scalastyle:on
           Iterator()
         case _ =>
           doFilterCachedBatches(cachedBatchIterator, schema, partitionFilter)
+          // scalastyle:on
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.columnar
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 
+import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.{DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, In}
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
@@ -479,4 +480,33 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("table cache can prune unnecessary partitions correctly") {
+    // scalastyle:off
+    var bytesReadWithoutPruning = 0L
+    var bytesReadWithPruning = 0L
+    var inMemoryPartitionMetadata = false
+    sparkContext.addSparkListener(new SparkListener() {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+        val metrics = taskEnd.taskMetrics
+        if (inMemoryPartitionMetadata) {
+          bytesReadWithPruning += metrics.inputMetrics.bytesRead
+        } else {
+          bytesReadWithoutPruning += metrics.inputMetrics.bytesRead
+        }
+      }
+    })
+    Seq("true", "false").foreach { enabled =>
+      withSQLConf(SQLConf.IN_MEMORY_PARTITION_METADATA.key -> enabled) {
+        inMemoryPartitionMetadata = conf.inMemoryPartitionMetadata
+        val df1 = (0 until 1000000).toDF("value").repartition(4).cache()
+        df1.where("value >= 999999").collect()
+        val resultArr = df1.where("value >= 999999").collect()
+        assert(resultArr.length == 1)
+        assert(resultArr.head.getInt(0) == 999999)
+        df1.unpersist(true)
+      }
+    }
+    assert(bytesReadWithoutPruning > bytesReadWithPruning * 3)
+  }
 }

Original file line number	Diff line number	Diff line change
`@@ -223,10 +223,10 @@ case class InMemoryTableScanExec(`
`223`	`223`	`if !partitionFilter.eval(cachedIter.partitionStats) =>`
`224`	`224`	`// scalastyle:off`
`225`	`225`	`println(s"skipped partition $index")`
`226`		`- // scalastyle:on`
`227`	`226`	`Iterator()`
`228`	`227`	`case _ =>`
`229`	`228`	`doFilterCachedBatches(cachedBatchIterator, schema, partitionFilter)`
	`229`	`+ // scalastyle:on`
`230`	`230`	`}`
`231`	`231`	`}`
`232`	`232`	`}`