-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-29800][SQL] Rewrite non-correlated EXISTS subquery use ScalaSubquery to optimize perf #26437
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2d762b4
1c577bc
5fa971b
1401349
7b943aa
95e446d
20cda42
8e3ce4f
c290411
866ddc7
3de0ecc
32f85c3
e47a757
4c86605
626e41f
ce76e0c
4a4ca9b
88f804d
7668bd6
a6b8485
34046be
4c6c04d
ac6a4d2
59162c6
89a1721
fb98b54
67b4281
821ed40
e319fee
2c387f2
2aff8eb
2b7b417
88fcdbf
9f084ee
8c6060a
9a9d9d1
173942d
26258b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -52,6 +52,21 @@ object ReplaceExpressions extends Rule[LogicalPlan] { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Rewrite non correlated exists subquery to use ScalarSubquery | ||
| * WHERE EXISTS (SELECT A FROM TABLE B WHERE COL1 > 10) | ||
| * will be rewritten to | ||
| * WHERE (SELECT 1 FROM (SELECT A FROM TABLE B WHERE COL1 > 10) LIMIT 1) IS NOT NULL | ||
| */ | ||
| object RewriteNonCorrelatedExists extends Rule[LogicalPlan] { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shall we add a test for this rule?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
With test case Get error Because of Any good advise for test case, where I add test case can avoid this problem? @cloud-fan @viirya |
||
| override def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { | ||
| case exists: Exists if exists.children.isEmpty => | ||
| IsNotNull( | ||
| ScalarSubquery( | ||
| plan = Limit(Literal(1), Project(Seq(Alias(Literal(1), "col")()), exists.plan)), | ||
| exprId = exists.exprId)) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Computes the current date and time to make sure we return the same result in a single query. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -96,7 +96,8 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper { | |
| def apply(plan: LogicalPlan): LogicalPlan = plan transform { | ||
| case Filter(condition, child) => | ||
| val (withSubquery, withoutSubquery) = | ||
| splitConjunctivePredicates(condition).partition(SubqueryExpression.hasInOrExistsSubquery) | ||
| splitConjunctivePredicates(condition) | ||
| .partition(SubqueryExpression.hasInOrCorrelatedExistsSubquery) | ||
|
Comment on lines
+99
to
+100
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unrelated change?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, nvm, I saw it. |
||
|
|
||
| // Construct the pruned filter condition. | ||
| val newFilter: LogicalPlan = withoutSubquery match { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier | |
| import org.apache.spark.sql.catalyst.expressions.SubqueryExpression | ||
| import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, Join, JoinStrategyHint, SHUFFLE_HASH} | ||
| import org.apache.spark.sql.catalyst.util.DateTimeConstants | ||
| import org.apache.spark.sql.execution.{RDDScanExec, SparkPlan} | ||
| import org.apache.spark.sql.execution.{ExecSubqueryExpression, RDDScanExec, SparkPlan} | ||
| import org.apache.spark.sql.execution.columnar._ | ||
| import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec | ||
| import org.apache.spark.sql.functions._ | ||
|
|
@@ -89,10 +89,19 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi | |
| sum | ||
| } | ||
|
|
||
| private def getNumInMemoryTablesInSubquery(plan: SparkPlan): Int = { | ||
| plan.expressions.flatMap(_.collect { | ||
| case sub: ExecSubqueryExpression => getNumInMemoryTablesRecursively(sub.plan) | ||
| }).sum | ||
| } | ||
|
|
||
| private def getNumInMemoryTablesRecursively(plan: SparkPlan): Int = { | ||
| plan.collect { | ||
| case InMemoryTableScanExec(_, _, relation) => | ||
| getNumInMemoryTablesRecursively(relation.cachedPlan) + 1 | ||
| case inMemoryTable @ InMemoryTableScanExec(_, _, relation) => | ||
| getNumInMemoryTablesRecursively(relation.cachedPlan) + | ||
| getNumInMemoryTablesInSubquery(inMemoryTable) + 1 | ||
| case p => | ||
| getNumInMemoryTablesInSubquery(p) | ||
|
Comment on lines
+100
to
+104
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this change needed for this PR? Looks like not directly related?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| }.sum | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -891,9 +891,9 @@ class SubquerySuite extends QueryTest with SharedSparkSession { | |
|
|
||
| val sqlText = | ||
| """ | ||
| |SELECT * FROM t1 | ||
| |SELECT * FROM t1 a | ||
| |WHERE | ||
| |NOT EXISTS (SELECT * FROM t1) | ||
| |NOT EXISTS (SELECT * FROM t1 b WHERE a.i = b.i) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why need to change the existing test?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| """.stripMargin | ||
| val optimizedPlan = sql(sqlText).queryExecution.optimizedPlan | ||
| val join = optimizedPlan.collectFirst { case j: Join => j }.get | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
non correlated -> uncorrelated