Skip to content

Commit 3e776d7

Browse files
gatorsmiledongjoon-hyun
authored andcommitted
[SPARK-25727][SQL] Add outputOrdering to otherCopyArgs in InMemoryRelation
## What changes were proposed in this pull request? Add `outputOrdering ` to `otherCopyArgs` in InMemoryRelation so that this field will be copied when we doing the tree transformation. ``` val data = Seq(100).toDF("count").cache() data.queryExecution.optimizedPlan.toJSON ``` The above code can generate the following error: ``` assertion failed: InMemoryRelation fields: output, cacheBuilder, statsOfPlanToCache, outputOrdering, values: List(count#178), CachedRDDBuilder(true,10000,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [value#176 AS count#178] +- LocalTableScan [value#176] ,None), Statistics(sizeInBytes=12.0 B, hints=none) java.lang.AssertionError: assertion failed: InMemoryRelation fields: output, cacheBuilder, statsOfPlanToCache, outputOrdering, values: List(count#178), CachedRDDBuilder(true,10000,StorageLevel(disk, memory, deserialized, 1 replicas),*(1) Project [value#176 AS count#178] +- LocalTableScan [value#176] ,None), Statistics(sizeInBytes=12.0 B, hints=none) at scala.Predef$.assert(Predef.scala:170) at org.apache.spark.sql.catalyst.trees.TreeNode.jsonFields(TreeNode.scala:611) at org.apache.spark.sql.catalyst.trees.TreeNode.org$apache$spark$sql$catalyst$trees$TreeNode$$collectJsonValue$1(TreeNode.scala:599) at org.apache.spark.sql.catalyst.trees.TreeNode.jsonValue(TreeNode.scala:604) at org.apache.spark.sql.catalyst.trees.TreeNode.toJSON(TreeNode.scala:590) ``` ## How was this patch tested? Added a test Closes #22715 from gatorsmile/copyArgs1. Authored-by: gatorsmile <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]> (cherry picked from commit 6c3f2c6) Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 883ca3f commit 3e776d7

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ case class InMemoryRelation(
206206
outputOrdering).asInstanceOf[this.type]
207207
}
208208

209-
override protected def otherCopyArgs: Seq[AnyRef] = Seq(statsOfPlanToCache)
209+
override protected def otherCopyArgs: Seq[AnyRef] = Seq(statsOfPlanToCache, outputOrdering)
210210

211211
override def simpleString: String =
212212
s"InMemoryRelation [${Utils.truncatedString(output, ", ")}], ${cacheBuilder.storageLevel}"

sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,12 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
488488
}
489489
}
490490

491+
test("SPARK-25727 - otherCopyArgs in InMemoryRelation does not include outputOrdering") {
492+
val data = Seq(100).toDF("count").cache()
493+
val json = data.queryExecution.optimizedPlan.toJSON
494+
assert(json.contains("outputOrdering") && json.contains("statsOfPlanToCache"))
495+
}
496+
491497
test("SPARK-22673: InMemoryRelation should utilize existing stats of the plan to be cached") {
492498
// This test case depends on the size of parquet in statistics.
493499
withSQLConf(

0 commit comments

Comments
 (0)