[SPARK-16664][SQL] Fix persist call on Data frames with more than 200…

Wesley Tang · srowen · commit 1b2e6f636c2a · 2016-07-29T13:25:33.000-07:00
## What changes were proposed in this pull request? Cherry-pick from d1d5069 and fix the test case ## How was this patch tested? Test updated Author: Wesley Tang <tangmingjun@mininglamp.com> Closes #14404 from breakdawn/branch-1.6.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -129,7 +129,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
         val groupedAccessorsItr = initializeAccessors.grouped(numberOfStatementsThreshold)
         val groupedExtractorsItr = extractors.grouped(numberOfStatementsThreshold)
         var groupedAccessorsLength = 0
-        groupedAccessorsItr.zipWithIndex.map { case (body, i) =>
+        groupedAccessorsItr.zipWithIndex.foreach { case (body, i) =>
           groupedAccessorsLength += 1
           val funcName = s"accessors$i"
           val funcCode = s"""
@@ -139,7 +139,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
            """.stripMargin
           ctx.addNewFunction(funcName, funcCode)
         }
-        groupedExtractorsItr.zipWithIndex.map { case (body, i) =>
+        groupedExtractorsItr.zipWithIndex.foreach { case (body, i) =>
           val funcName = s"extractors$i"
           val funcCode = s"""
              |private void $funcName() {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1186,4 +1186,12 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       Seq(1 -> "a").toDF("i", "j").filter($"i".cast(StringType) === "1"),
       Row(1, "a"))
   }
+
+  test("SPARK-16664: persist with more than 200 columns") {
+    val size = 201L
+    val rdd = sparkContext.makeRDD(Seq(Row.fromSeq(0L to size)))
+    val schema = (0L to size).map(i => StructField("name" + i, LongType, true))
+    val df = sqlContext.createDataFrame(rdd, StructType(schema))
+    assert(df.persist.take(1).apply(0).toSeq(100).asInstanceOf[Long] == 100)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -225,7 +225,8 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
     val columnTypes1 = List.fill(length1)(IntegerType)
     val columnarIterator1 = GenerateColumnAccessor.generate(columnTypes1)
 
-    val length2 = 10000
+    // SPARK-16664: the limit of janino is 8117
+    val length2 = 8117
     val columnTypes2 = List.fill(length2)(IntegerType)
     val columnarIterator2 = GenerateColumnAccessor.generate(columnTypes2)
   }

Original file line number	Diff line number	Diff line change
`@@ -1186,4 +1186,12 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {`
`1186`	`1186`	`Seq(1 -> "a").toDF("i", "j").filter($"i".cast(StringType) === "1"),`
`1187`	`1187`	`Row(1, "a"))`
`1188`	`1188`	`}`
	`1189`	`+`
	`1190`	`+ test("SPARK-16664: persist with more than 200 columns") {`
	`1191`	`+ val size = 201L`
	`1192`	`+ val rdd = sparkContext.makeRDD(Seq(Row.fromSeq(0L to size)))`
	`1193`	`+ val schema = (0L to size).map(i => StructField("name" + i, LongType, true))`
	`1194`	`+ val df = sqlContext.createDataFrame(rdd, StructType(schema))`
	`1195`	`+ assert(df.persist.take(1).apply(0).toSeq(100).asInstanceOf[Long] == 100)`
	`1196`	`+ }`
`1189`	`1197`	`}`
Original file line number	Diff line number	Diff line change
`@@ -225,7 +225,8 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {`
`225`	`225`	`val columnTypes1 = List.fill(length1)(IntegerType)`
`226`	`226`	`val columnarIterator1 = GenerateColumnAccessor.generate(columnTypes1)`
`227`	`227`
`228`		`- val length2 = 10000`
	`228`	`+ // SPARK-16664: the limit of janino is 8117`
	`229`	`+ val length2 = 8117`
`229`	`230`	`val columnTypes2 = List.fill(length2)(IntegerType)`
`230`	`231`	`val columnarIterator2 = GenerateColumnAccessor.generate(columnTypes2)`
`231`	`232`	`}`