From 2b20b3c2ac5e7312097ba23e4c3b130317d56f26 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 14 Jun 2018 17:51:00 -0700 Subject: [PATCH] revert [SPARK-21743][SQL] top-most limit should not cause memory leak --- .../org/apache/spark/sql/execution/SparkStrategies.scala | 7 +------ .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 5 ----- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index c6565fcf6655..a0a641bc9667 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -70,12 +70,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { case Limit(IntegerLiteral(limit), Project(projectList, Sort(order, true, child))) => TakeOrderedAndProjectExec(limit, order, projectList, planLater(child)) :: Nil case Limit(IntegerLiteral(limit), child) => - // With whole stage codegen, Spark releases resources only when all the output data of the - // query plan are consumed. It's possible that `CollectLimitExec` only consumes a little - // data from child plan and finishes the query without releasing resources. Here we wrap - // the child plan with `LocalLimitExec`, to stop the processing of whole stage codegen and - // trigger the resource releasing work, after we consume `limit` rows. - CollectLimitExec(limit, LocalLimitExec(limit, planLater(child))) :: Nil + CollectLimitExec(limit, planLater(child)) :: Nil case other => planLater(other) :: Nil } case Limit(IntegerLiteral(limit), Sort(order, true, child)) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index ebebf62ca4cf..bc57efeef69c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2720,11 +2720,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("SPARK-21743: top-most limit should not cause memory leak") { - // In unit test, Spark will fail the query if memory leak detected. - spark.range(100).groupBy("id").count().limit(1).collect() - } - test("SPARK-21652: rule confliction of InferFiltersFromConstraints and ConstantPropagation") { withTempView("t1", "t2") { Seq((1, 1)).toDF("col1", "col2").createOrReplaceTempView("t1")