Skip to content

Commit c52bcf7

Browse files
rxincmonkey
authored andcommitted
[SPARK-16475][SQL] broadcast hint for SQL queries - follow up
## What changes were proposed in this pull request? A small update to apache#16925 1. Rename SubstituteHints -> ResolveHints to be more consistent with rest of the rules. 2. Added more documentation in the rule and be more defensive / future proof to skip views as well as CTEs. ## How was this patch tested? This pull request contains no real logic change and all behavior should be covered by existing tests. Author: Reynold Xin <[email protected]> Closes apache#16939 from rxin/SPARK-16475.
1 parent 69ac9f8 commit c52bcf7

File tree

3 files changed

+18
-20
lines changed

3 files changed

+18
-20
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ class Analyzer(
115115

116116
lazy val batches: Seq[Batch] = Seq(
117117
Batch("Hints", fixedPoint,
118-
new SubstituteHints.SubstituteBroadcastHints(conf),
119-
SubstituteHints.RemoveAllHints),
118+
new ResolveHints.ResolveBroadcastHints(conf),
119+
ResolveHints.RemoveAllHints),
120120
Batch("Substitution", fixedPoint,
121121
CTESubstitution,
122122
WindowsSubstitution,
Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,9 @@ import org.apache.spark.sql.catalyst.trees.CurrentOrigin
2929
* Note that this is separatedly into two rules because in the future we might introduce new hint
3030
* rules that have different ordering requirements from broadcast.
3131
*/
32-
object SubstituteHints {
32+
object ResolveHints {
3333

3434
/**
35-
* Substitute Hints.
36-
*
37-
* The only hint currently available is broadcast join hint.
38-
*
3935
* For broadcast hint, we accept "BROADCAST", "BROADCASTJOIN", and "MAPJOIN", and a sequence of
4036
* relation aliases can be specified in the hint. A broadcast hint plan node will be inserted
4137
* on top of any relation (that is not aliased differently), subquery, or common table expression
@@ -47,7 +43,7 @@ object SubstituteHints {
4743
*
4844
* This rule must happen before common table expressions.
4945
*/
50-
class SubstituteBroadcastHints(conf: CatalystConf) extends Rule[LogicalPlan] {
46+
class ResolveBroadcastHints(conf: CatalystConf) extends Rule[LogicalPlan] {
5147
private val BROADCAST_HINT_NAMES = Set("BROADCAST", "BROADCASTJOIN", "MAPJOIN")
5248

5349
def resolver: Resolver = conf.resolver
@@ -61,18 +57,21 @@ object SubstituteHints {
6157
case r: UnresolvedRelation =>
6258
val alias = r.alias.getOrElse(r.tableIdentifier.table)
6359
if (toBroadcast.exists(resolver(_, alias))) BroadcastHint(plan) else plan
64-
case r: SubqueryAlias =>
65-
if (toBroadcast.exists(resolver(_, r.alias))) {
66-
BroadcastHint(plan)
67-
} else {
68-
// Don't recurse down subquery aliases if there are no match.
69-
recurse = false
70-
plan
71-
}
72-
case _: BroadcastHint =>
73-
// Found a broadcast hint; don't change the plan but also don't recurse down.
60+
61+
case r: SubqueryAlias if toBroadcast.exists(resolver(_, r.alias)) =>
62+
BroadcastHint(plan)
63+
64+
case _: BroadcastHint | _: View | _: With | _: SubqueryAlias =>
65+
// Don't traverse down these nodes.
66+
// For an existing broadcast hint, there is no point going down (if we do, we either
67+
// won't change the structure, or will introduce another broadcast hint that is useless.
68+
// The rest (view, with, subquery) indicates different scopes that we shouldn't traverse
69+
// down. Note that technically when this rule is executed, we haven't completed view
70+
// resolution yet and as a result the view part should be deadcode. I'm leaving it here
71+
// to be more future proof in case we change the view we do view resolution.
7472
recurse = false
7573
plan
74+
7675
case _ =>
7776
plan
7877
}
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,13 @@
1717

1818
package org.apache.spark.sql.catalyst.analysis
1919

20-
import org.apache.spark.sql.catalyst.TableIdentifier
2120
import org.apache.spark.sql.catalyst.dsl.expressions._
2221
import org.apache.spark.sql.catalyst.dsl.plans._
2322
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
2423
import org.apache.spark.sql.catalyst.plans.Inner
2524
import org.apache.spark.sql.catalyst.plans.logical._
2625

27-
class SubstituteHintsSuite extends AnalysisTest {
26+
class ResolveHintsSuite extends AnalysisTest {
2827
import org.apache.spark.sql.catalyst.analysis.TestRelations._
2928

3029
test("invalid hints should be ignored") {

0 commit comments

Comments
 (0)