From b38a21ef6146784e4b93ef4ce8c899f1eee14572 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 16 Nov 2015 18:30:26 -0800
Subject: [PATCH 1/5] SPARK-11633

---
 .../spark/sql/catalyst/analysis/Analyzer.scala   |  3 ++-
 .../spark/sql/hive/execution/SQLQuerySuite.scala | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 2f4670b55bdba..5a5b71e52dd79 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -425,7 +425,8 @@ class Analyzer(
              */
             j
           case Some((oldRelation, newRelation)) =>
-            val attributeRewrites = AttributeMap(oldRelation.output.zip(newRelation.output))
+            val attributeRewrites =
+              AttributeMap(oldRelation.output.zip(newRelation.output).filter(x => x._1 != x._2))
             val newRight = right transformUp {
               case r if r == oldRelation => newRelation
             } transformUp {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 3427152b2da02..5e00546a74c00 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -51,6 +51,8 @@ case class Order(
     state: String,
     month: Int)
 
+case class Individual(F1: Integer, F2: Integer)
+
 case class WindowData(
     month: Int,
     area: String,
@@ -1479,4 +1481,18 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test
       """.stripMargin), Row("value1", "12", 3.14, "hello"))
   }
+
+  test ("SPARK-11633: HiveContext throws TreeNode Exception : Failed to Copy Node") {
+    val rdd1 = sparkContext.parallelize(Seq( Individual(1,3), Individual(2,1)))
+    val df = hiveContext.createDataFrame(rdd1)
+    df.registerTempTable("foo")
+    val df2 = sql("select f1, F2 as F2 from foo")
+    df2.registerTempTable("foo2")
+    df2.registerTempTable("foo3")
+
+    checkAnswer(sql(
+      """
+        SELECT a.F1 FROM foo2 a INNER JOIN foo3 b ON a.F2=b.F2
+      """.stripMargin), Row(2) :: Row(1) :: Nil)
+  }
 }

From 0546772f151f83d6d3cf4d000cbe341f52545007 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Fri, 20 Nov 2015 10:56:45 -0800
Subject: [PATCH 2/5] converge

---
 .../spark/sql/catalyst/analysis/Analyzer.scala    |  3 +--
 .../spark/sql/hive/execution/SQLQuerySuite.scala  | 15 ---------------
 2 files changed, 1 insertion(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 7c9512fbd00aa..47962ebe6ef82 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -417,8 +417,7 @@ class Analyzer(
              */
             j
           case Some((oldRelation, newRelation)) =>
-            val attributeRewrites =
-              AttributeMap(oldRelation.output.zip(newRelation.output).filter(x => x._1 != x._2))
+            val attributeRewrites = AttributeMap(oldRelation.output.zip(newRelation.output))
             val newRight = right transformUp {
               case r if r == oldRelation => newRelation
             } transformUp {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 5e00546a74c00..61d9dcd37572b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -51,8 +51,6 @@ case class Order(
     state: String,
     month: Int)
 
-case class Individual(F1: Integer, F2: Integer)
-
 case class WindowData(
     month: Int,
     area: String,
@@ -1481,18 +1479,5 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test
       """.stripMargin), Row("value1", "12", 3.14, "hello"))
   }
-
-  test ("SPARK-11633: HiveContext throws TreeNode Exception : Failed to Copy Node") {
-    val rdd1 = sparkContext.parallelize(Seq( Individual(1,3), Individual(2,1)))
-    val df = hiveContext.createDataFrame(rdd1)
-    df.registerTempTable("foo")
-    val df2 = sql("select f1, F2 as F2 from foo")
-    df2.registerTempTable("foo2")
-    df2.registerTempTable("foo3")
-
-    checkAnswer(sql(
-      """
-        SELECT a.F1 FROM foo2 a INNER JOIN foo3 b ON a.F2=b.F2
-      """.stripMargin), Row(2) :: Row(1) :: Nil)
   }
 }

From b37a64f13956b6ddd0e38ddfd9fe1caee611f1a8 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Fri, 20 Nov 2015 10:58:37 -0800
Subject: [PATCH 3/5] converge

---
 .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala      | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 61d9dcd37572b..3427152b2da02 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1479,5 +1479,4 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test
       """.stripMargin), Row("value1", "12", 3.14, "hello"))
   }
-  }
 }

From d6a6e9cc31b0f7547b35cf25884135ea65b03676 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sun, 3 Jan 2016 18:40:26 -0800
Subject: [PATCH 4/5] outer join elimination by parent join.

---
 .../sql/catalyst/optimizer/Optimizer.scala    | 104 +++++++++++-
 .../apache/spark/sql/DataFrameJoinSuite.scala | 155 ++++++++++++++++++
 2 files changed, 258 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 0b1c74293bb8b..dace0fc208586 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.{CleanupAliases, EliminateSubQueri
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.planning.ExtractFiltersAndInnerJoins
-import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, LeftOuter, LeftSemi, RightOuter}
+import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types._
@@ -44,6 +44,7 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] {
       // Operator push down
       SetOperationPushDown,
       SamplePushDown,
+      OuterJoinElimination,
       ReorderJoin,
       PushPredicateThroughJoin,
       PushPredicateThroughProject,
@@ -768,6 +769,107 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
   }
 }
 
+/**
+ * Elimination of Outer Join by Parent Join Condition
+ *
+ * Given an outer join is involved in another join (called parent join), when the join type of the
+ * parent join is inner, left-semi, left-outer and right-outer, checking if the join condition of
+ * the parent join satisfies the following two conditions:
+ *
+ * 1) there exist null filtering predicates against the columns in the null-supplying side of
+ *    parent join.
+ * 2) these columns are from the child join.
+ *
+ * If having such join predicates, execute the elimination rules:
+ * - full outer -> inner if both sides of the child join have such predicates
+ * - left outer -> inner if the right side of the child join has such predicates
+ * - right outer -> inner if the left side of the child join has such predicates
+ * - full outer -> left outer if only the left side of the child join has such predicates
+ * - full outer -> right outer if only the right side of the child join has such predicates
+ *
+ */
+object OuterJoinElimination extends Rule[LogicalPlan] with PredicateHelper {
+
+  private def containsAttr(plan: LogicalPlan, attr: Attribute): Boolean =
+    plan.outputSet.exists(_.semanticEquals(attr))
+
+  private def hasNullFilteringPredicate(predicate: Expression, plan: LogicalPlan): Boolean = {
+    predicate match {
+      case EqualTo(ar: AttributeReference, _) if containsAttr(plan, ar) => true
+      case EqualTo(_, ar: AttributeReference) if containsAttr(plan, ar) => true
+      case EqualNullSafe(ar: AttributeReference, l)
+        if !l.nullable && containsAttr(plan, ar) => true
+      case EqualNullSafe(l, ar: AttributeReference)
+        if !l.nullable && containsAttr(plan, ar) => true
+      case GreaterThan(ar: AttributeReference, _) if containsAttr(plan, ar) => true
+      case GreaterThan(_, ar: AttributeReference) if containsAttr(plan, ar) => true
+      case GreaterThanOrEqual(ar: AttributeReference, _) if containsAttr(plan, ar) => true
+      case GreaterThanOrEqual(_, ar: AttributeReference) if containsAttr(plan, ar) => true
+      case LessThan(ar: AttributeReference, _) if containsAttr(plan, ar) => true
+      case LessThan(_, ar: AttributeReference) if containsAttr(plan, ar) => true
+      case LessThanOrEqual(ar: AttributeReference, _) if containsAttr(plan, ar) => true
+      case LessThanOrEqual(_, ar: AttributeReference) if containsAttr(plan, ar) => true
+      case In(ar: AttributeReference, _) if containsAttr(plan, ar) => true
+      case IsNotNull(ar: AttributeReference) if containsAttr(plan, ar) => true
+      case And(l, r) => hasNullFilteringPredicate(l, plan) || hasNullFilteringPredicate(r, plan)
+      case Or(l, r) => hasNullFilteringPredicate(l, plan) && hasNullFilteringPredicate(r, plan)
+      case Not(e) => !hasNullFilteringPredicate(e, plan)
+      case _ => false
+    }
+  }
+
+  private def buildNewJoin(
+      otherCondition: Expression,
+      left: LogicalPlan,
+      right: LogicalPlan,
+      joinType: JoinType,
+      condition: Option[Expression]): Join = {
+    val leftHasNonNullPredicate = hasNullFilteringPredicate(otherCondition, left)
+    val rightHasNonNullPredicate = hasNullFilteringPredicate(otherCondition, right)
+
+    joinType match {
+      case RightOuter if leftHasNonNullPredicate =>
+        Join(left, right, Inner, condition)
+      case LeftOuter if rightHasNonNullPredicate =>
+        Join(left, right, Inner, condition)
+      case FullOuter if leftHasNonNullPredicate && rightHasNonNullPredicate =>
+        Join(left, right, Inner, condition)
+      case FullOuter if leftHasNonNullPredicate =>
+        Join(left, right, LeftOuter, condition)
+      case FullOuter if rightHasNonNullPredicate =>
+        Join(left, right, RightOuter, condition)
+      case _ => Join(left, right, joinType, condition)
+    }
+  }
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+
+    // Case 1: when parent join is Inner|LeftSemi|LeftOuter and the child join is on the right side
+    case pj @ Join(
+        pLeft,
+        j @ Join(left, right, RightOuter|LeftOuter|FullOuter, condition),
+        Inner|LeftSemi|LeftOuter,
+        Some(pJoinCond)) =>
+      Join(
+          pLeft,
+          buildNewJoin(pJoinCond, left, right, j.joinType, condition),
+          pj.joinType,
+          Some(pJoinCond))
+
+    // Case 2: when parent join is Inner|LeftSemi|RightOuter and the child join is on the left side
+    case pj @ Join(
+        j @ Join(left, right, RightOuter|LeftOuter|FullOuter, condition),
+        pRight,
+        Inner|LeftSemi|RightOuter,
+        Some(pJoinCond)) =>
+      Join(
+          buildNewJoin(pJoinCond, left, right, j.joinType, condition),
+          pRight,
+          pj.joinType,
+          Some(pJoinCond))
+  }
+}
+
 /**
  * Pushes down [[Filter]] operators where the `condition` can be
  * evaluated using only the attributes of the left or right side of a join.  Other
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 39a65413bd592..0a1d28ab0278a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical.{Project, Join}
 import org.apache.spark.sql.execution.joins.BroadcastHashJoin
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSQLContext
@@ -140,4 +142,157 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
       assert(df1.join(broadcast(pf1)).count() === 4)
     }
   }
+
+  test("join - left outer to inner by the parent join's join condition") {
+    val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a")
+    val df2 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b")
+    val df3 = Seq((1, 3, "1"), (3, 6, "5")).toDF("int", "int2", "str").as("c")
+
+    // Left -> Inner
+    val right = df.join(df2, $"a.int" === $"b.int", "left")
+    val left2Inner =
+      df3.join(right, $"c.int" === $"b.int", "inner").select($"a.*", $"b.*", $"c.*")
+
+    left2Inner.explain(true)
+
+    // The order before conversion: Left Then Inner
+    assert(left2Inner.queryExecution.analyzed.collect {
+      case j@Join(_, Join(_, _, LeftOuter, _), Inner, _) => j
+    }.size === 1)
+
+    // The order after conversion: Inner Then Inner
+    assert(left2Inner.queryExecution.optimizedPlan.collect {
+      case j@Join(_, Join(_, _, Inner, _), Inner, _) => j
+    }.size === 1)
+
+    checkAnswer(
+      left2Inner,
+      Row(1, 2, "1", 1, 3, "1", 1, 3, "1") :: Nil)
+  }
+
+  test("join - right outer to inner by the parent join's join condition") {
+    val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a")
+    val df2 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b")
+    val df3 = Seq((1, 9, "8"), (5, 0, "4")).toDF("int", "int2", "str").as("c")
+
+    // Right Then Inner -> Inner Then Right
+    val right2Inner = df.join(df2, $"a.int" === $"b.int", "right")
+      .join(df3, $"a.int" === $"b.int", "inner").select($"a.*", $"b.*", $"c.*")
+
+    // The order before conversion: Left Then Inner
+    assert(right2Inner.queryExecution.analyzed.collect {
+      case j@Join(Join(_, _, RightOuter, _), _, Inner, _) => j
+    }.size === 1)
+
+    // The order after conversion: Inner Then Inner
+    assert(right2Inner.queryExecution.optimizedPlan.collect {
+      case j@Join(Join(_, _, Inner, _), _, Inner, _) => j
+    }.size === 1)
+
+    checkAnswer(
+      right2Inner,
+      Row(1, 2, "1", 1, 3, "1", 1, 9, "8") ::
+      Row(1, 2, "1", 1, 3, "1", 5, 0, "4") :: Nil)
+  }
+
+  test("join - full outer to inner by the parent join's join condition") {
+    val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a")
+    val df2 = Seq((1, 2, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b")
+    val df3 = Seq((1, 3, "1"), (3, 6, "5")).toDF("int", "int2", "str").as("c")
+
+    // Full -> Inner
+    val right = df.join(df2, $"a.int" === $"b.int", "full")
+    val full2Inner = df3.join(right, $"c.int" === $"a.int" && $"b.int" === 1, "inner")
+      .select($"a.*", $"b.*", $"c.*")
+
+    // The order before conversion: Left Then Inner
+    assert(full2Inner.queryExecution.analyzed.collect {
+      case j@Join(_, Join(_, _, FullOuter, _), Inner, _) => j
+    }.size === 1)
+
+    // The order after conversion: Inner Then Inner
+    assert(full2Inner.queryExecution.optimizedPlan.collect {
+      case j@Join(_, Join(_, _, Inner, _), Inner, _) => j
+    }.size === 1)
+
+    checkAnswer(
+      full2Inner,
+      Row(1, 2, "1", 1, 2, "1", 1, 3, "1") :: Nil)
+  }
+
+  test("join - full outer to right by the parent join's join condition") {
+    val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a")
+    val df2 = Seq((1, 2, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b")
+    val df3 = Seq((1, 3, "1"), (3, 6, "5")).toDF("int", "int2", "str").as("c")
+
+    // Full -> Right
+    val right = df.join(df2, $"a.int" === $"b.int", "full")
+    val full2Right = df3.join(right, $"b.int" === 1, "leftsemi")
+
+    // The order before conversion: Left Then Inner
+    assert(full2Right.queryExecution.analyzed.collect {
+      case j@Join(_, Join(_, _, FullOuter, _), LeftSemi, _) => j
+    }.size === 1)
+
+    // The order after conversion: Inner Then Inner
+    assert(full2Right.queryExecution.optimizedPlan.collect {
+      case j@Join(_, Project(_, Join(_, _, RightOuter, _)), LeftSemi, _) => j
+    }.size === 1)
+
+    checkAnswer(
+      full2Right,
+      Row(1, 3, "1") :: Row(3, 6, "5") :: Nil)
+  }
+
+
+  test("join - full outer to left by the parent join's join condition #1") {
+    val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a")
+    val df2 = Seq((1, 2, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b")
+    val df3 = Seq((1, 3, "1"), (4, 6, "5")).toDF("int", "int2", "str").as("c")
+
+    // Full -> Left
+    val right = df.join(df2, $"a.int" === $"b.int", "full")
+    val full2Left = df3.join(right, $"c.int" === $"a.int", "left")
+      .select($"a.*", $"b.*", $"c.*")
+
+    // The order before conversion: Full Then Left
+    assert(full2Left.queryExecution.analyzed.collect {
+      case j@Join(_, Join(_, _, FullOuter, _), LeftOuter, _) => j
+    }.size === 1)
+
+    // The order after conversion: Left Then Left
+    assert(full2Left.queryExecution.optimizedPlan.collect {
+      case j@Join(_, Join(_, _, LeftOuter, _), LeftOuter, _) => j
+    }.size === 1)
+
+    checkAnswer(
+      full2Left,
+      Row(1, 2, "1", 1, 2, "1", 1, 3, "1") ::
+      Row(null, null, null, null, null, null, 4, 6, "5") :: Nil)
+  }
+
+  test("join - full outer to left by the parent join's join condition #2") {
+    val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a")
+    val df2 = Seq((1, 2, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b")
+    val df3 = Seq((1, 3, "1"), (4, 6, "5")).toDF("int", "int2", "str").as("c")
+
+    // Full -> Left
+    val full2Left = df.join(df2, $"a.int" === $"b.int", "full")
+      .join(df3, $"c.int" === $"a.int", "right").select($"a.*", $"b.*", $"c.*")
+
+    // The order before conversion: Full Then Right
+    assert(full2Left.queryExecution.analyzed.collect {
+      case j@Join(Join(_, _, FullOuter, _), _, RightOuter, _) => j
+    }.size === 1)
+
+    // The order after conversion: Left Then Right
+    assert(full2Left.queryExecution.optimizedPlan.collect {
+      case j@Join(Join(_, _, LeftOuter, _), _, RightOuter, _) => j
+    }.size === 1)
+
+    checkAnswer(
+      full2Left,
+      Row(1, 2, "1", 1, 2, "1", 1, 3, "1") ::
+      Row(null, null, null, null, null, null, 4, 6, "5") :: Nil)
+  }
 }

From 1a9ebdff1da8b0661738db1c7cf466344261ed33 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 24 Feb 2016 23:36:57 -0800
Subject: [PATCH 5/5] integrate it into the existing outer-join elimination.

---
 .../sql/catalyst/optimizer/Optimizer.scala    | 24 ++++++++++++++++---
 .../apache/spark/sql/DataFrameJoinSuite.scala |  2 +-
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index ec4bf08544dd8..6165663cd0436 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -945,8 +945,10 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
 }
 
 /**
- * Elimination of outer joins, if the predicates can restrict the result sets so that
- * all null-supplying rows are eliminated
+ * Elimination of Outer Joins
+ *
+ * Rule Set 1: checking the Filter Condition if the predicates can restrict the result sets
+ * so that all null-supplying rows are eliminated
  *
  * - full outer -> inner if both sides have such predicates
  * - left outer -> inner if the right side has such predicates
@@ -954,7 +956,21 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
  * - full outer -> left outer if only the left side has such predicates
  * - full outer -> right outer if only the right side has such predicates
  *
- * This rule should be executed before pushing down the Filter
+ * This rule set should be executed before pushing down the Filter
+ *
+ * Rule Set 2: given an outer join is involved in another join (called parent join), when the join
+ * type of the parent join is inner, left-semi, left-outer and right-outer, checking if the join
+ * condition of the parent join satisfies the following two conditions:
+ * 1) there exist null filtering predicates against the columns in the null-supplying side of
+ *    parent join.
+ * 2) these columns are from the child join.
+ *
+ * If having such join predicates, execute the elimination rules:
+ * - full outer -> inner if both sides of the child join have such predicates
+ * - left outer -> inner if the right side of the child join has such predicates
+ * - right outer -> inner if the left side of the child join has such predicates
+ * - full outer -> left outer if only the left side of the child join has such predicates
+ * - full outer -> right outer if only the right side of the child join has such predicates
  */
 object OuterJoinElimination extends Rule[LogicalPlan] with PredicateHelper {
 
@@ -997,10 +1013,12 @@ object OuterJoinElimination extends Rule[LogicalPlan] with PredicateHelper {
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    // Rule Set 1: elimination using Filter conditions/constraints
     case f @ Filter(condition, j @ Join(_, _, RightOuter | LeftOuter | FullOuter, _)) =>
       val newJoinType = buildNewJoinType(f.condition, f.constraints, j)
       if (j.joinType == newJoinType) f else Filter(condition, j.copy(joinType = newJoinType))
 
+    // Rule Set 2: elimination using Parent Join conditions/constraints
     // Case 1: when parent join is Inner|LeftSemi|LeftOuter and the child join is on the right side
     case pj @ Join(
         _,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 77cf015e9cdaf..514d53fb5e020 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{Project, Join}
+import org.apache.spark.sql.catalyst.plans.logical.{Join, Project}
 import org.apache.spark.sql.execution.joins.BroadcastHashJoin
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSQLContext