Skip to content

Commit 3aa52be

Browse files
ueshinmarmbrus
authored andcommitted
[SPARK-2366] [SQL] Add column pruning for the right side of LeftSemi join.
The right side of `LeftSemi` join needs columns only used in join condition. Author: Takuya UESHIN <[email protected]> Closes #1301 from ueshin/issues/SPARK-2366 and squashes the following commits: 7677a39 [Takuya UESHIN] Update comments. 786d3a0 [Takuya UESHIN] Rename method name. e0957b1 [Takuya UESHIN] Add column pruning for the right side of LeftSemi join. (cherry picked from commit 3da8df9) Signed-off-by: Michael Armbrust <[email protected]>
1 parent b77715a commit 3aa52be

File tree

1 file changed

+20
-8
lines changed
  • sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer

1 file changed

+20
-8
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ object Optimizer extends RuleExecutor[LogicalPlan] {
5252
* - Inserting Projections beneath the following operators:
5353
* - Aggregate
5454
* - Project <- Join
55+
* - LeftSemiJoin
5556
* - Collapse adjacent projections, performing alias substitution.
5657
*/
5758
object ColumnPruning extends Rule[LogicalPlan] {
@@ -62,19 +63,22 @@ object ColumnPruning extends Rule[LogicalPlan] {
6263

6364
// Eliminate unneeded attributes from either side of a Join.
6465
case Project(projectList, Join(left, right, joinType, condition)) =>
65-
// Collect the list of off references required either above or to evaluate the condition.
66+
// Collect the list of all references required either above or to evaluate the condition.
6667
val allReferences: Set[Attribute] =
6768
projectList.flatMap(_.references).toSet ++ condition.map(_.references).getOrElse(Set.empty)
6869

6970
/** Applies a projection only when the child is producing unnecessary attributes */
70-
def prunedChild(c: LogicalPlan) =
71-
if ((c.outputSet -- allReferences.filter(c.outputSet.contains)).nonEmpty) {
72-
Project(allReferences.filter(c.outputSet.contains).toSeq, c)
73-
} else {
74-
c
75-
}
71+
def pruneJoinChild(c: LogicalPlan) = prunedChild(c, allReferences)
7672

77-
Project(projectList, Join(prunedChild(left), prunedChild(right), joinType, condition))
73+
Project(projectList, Join(pruneJoinChild(left), pruneJoinChild(right), joinType, condition))
74+
75+
// Eliminate unneeded attributes from right side of a LeftSemiJoin.
76+
case Join(left, right, LeftSemi, condition) =>
77+
// Collect the list of all references required to evaluate the condition.
78+
val allReferences: Set[Attribute] =
79+
condition.map(_.references).getOrElse(Set.empty)
80+
81+
Join(left, prunedChild(right, allReferences), LeftSemi, condition)
7882

7983
// Combine adjacent Projects.
8084
case Project(projectList1, Project(projectList2, child)) =>
@@ -97,6 +101,14 @@ object ColumnPruning extends Rule[LogicalPlan] {
97101
// Eliminate no-op Projects
98102
case Project(projectList, child) if child.output == projectList => child
99103
}
104+
105+
/** Applies a projection only when the child is producing unnecessary attributes */
106+
private def prunedChild(c: LogicalPlan, allReferences: Set[Attribute]) =
107+
if ((c.outputSet -- allReferences.filter(c.outputSet.contains)).nonEmpty) {
108+
Project(allReferences.filter(c.outputSet.contains).toSeq, c)
109+
} else {
110+
c
111+
}
100112
}
101113

102114
/**

0 commit comments

Comments
 (0)