Skip to content

Commit afd0deb

Browse files
sameeragarwaldavies
authored andcommitted
[SPARK-14137] [SPARK-14150] [SQL] Infer IsNotNull constraints from non-nullable attributes
## What changes were proposed in this pull request? This PR adds support for automatically inferring `IsNotNull` constraints from any non-nullable attributes that are part of an operator's output. This also fixes the issue that causes the optimizer to hit the maximum number of iterations for certain queries in #11828. ## How was this patch tested? Unit test in `ConstraintPropagationSuite` Author: Sameer Agarwal <[email protected]> Closes #11953 from sameeragarwal/infer-isnotnull.
1 parent ca00335 commit afd0deb

File tree

3 files changed

+33
-18
lines changed

3 files changed

+33
-18
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,29 +39,37 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
3939
}
4040

4141
/**
42-
* Infers a set of `isNotNull` constraints from a given set of equality/comparison expressions.
43-
* For e.g., if an expression is of the form (`a > 5`), this returns a constraint of the form
44-
* `isNotNull(a)`
42+
* Infers a set of `isNotNull` constraints from a given set of equality/comparison expressions as
43+
* well as non-nullable attributes. For e.g., if an expression is of the form (`a > 5`), this
44+
* returns a constraint of the form `isNotNull(a)`
4545
*/
4646
private def constructIsNotNullConstraints(constraints: Set[Expression]): Set[Expression] = {
47-
// Currently we only propagate constraints if the condition consists of equality
48-
// and ranges. For all other cases, we return an empty set of constraints
49-
constraints.map {
47+
var isNotNullConstraints = Set.empty[Expression]
48+
49+
// First, we propagate constraints if the condition consists of equality and ranges. For all
50+
// other cases, we return an empty set of constraints
51+
constraints.foreach {
5052
case EqualTo(l, r) =>
51-
Set(IsNotNull(l), IsNotNull(r))
53+
isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r))
5254
case GreaterThan(l, r) =>
53-
Set(IsNotNull(l), IsNotNull(r))
55+
isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r))
5456
case GreaterThanOrEqual(l, r) =>
55-
Set(IsNotNull(l), IsNotNull(r))
57+
isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r))
5658
case LessThan(l, r) =>
57-
Set(IsNotNull(l), IsNotNull(r))
59+
isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r))
5860
case LessThanOrEqual(l, r) =>
59-
Set(IsNotNull(l), IsNotNull(r))
61+
isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r))
6062
case Not(EqualTo(l, r)) =>
61-
Set(IsNotNull(l), IsNotNull(r))
62-
case _ =>
63-
Set.empty[Expression]
64-
}.foldLeft(Set.empty[Expression])(_ union _.toSet)
63+
isNotNullConstraints ++= Set(IsNotNull(l), IsNotNull(r))
64+
case _ => // No inference
65+
}
66+
67+
// Second, we infer additional constraints from non-nullable attributes that are part of the
68+
// operator's output
69+
val nonNullableAttributes = output.filterNot(_.nullable)
70+
isNotNullConstraints ++= nonNullableAttributes.map(IsNotNull).toSet
71+
72+
isNotNullConstraints -- constraints
6573
}
6674

6775
/**

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
2323
import org.apache.spark.sql.catalyst.dsl.plans._
2424
import org.apache.spark.sql.catalyst.expressions._
2525
import org.apache.spark.sql.catalyst.plans.logical._
26+
import org.apache.spark.sql.types.{IntegerType, StringType}
2627

2728
class ConstraintPropagationSuite extends SparkFunSuite {
2829

@@ -217,4 +218,12 @@ class ConstraintPropagationSuite extends SparkFunSuite {
217218
IsNotNull(resolveColumn(tr, "a")),
218219
IsNotNull(resolveColumn(tr, "b")))))
219220
}
221+
222+
test("infer IsNotNull constraints from non-nullable attributes") {
223+
val tr = LocalRelation('a.int, AttributeReference("b", IntegerType, nullable = false)(),
224+
AttributeReference("c", StringType, nullable = false)())
225+
226+
verifyConstraints(tr.analyze.constraints,
227+
ExpressionSet(Seq(IsNotNull(resolveColumn(tr, "b")), IsNotNull(resolveColumn(tr, "c")))))
228+
}
220229
}

sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
341341
"udf_round_3",
342342
"view_cast",
343343

344-
// enable this after fixing SPARK-14137
345-
"union20",
346-
347344
// These tests check the VIEW table definition, but Spark handles CREATE VIEW itself and
348345
// generates different View Expanded Text.
349346
"alter_view_as_select",
@@ -1046,6 +1043,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
10461043
"union18",
10471044
"union19",
10481045
"union2",
1046+
"union20",
10491047
"union22",
10501048
"union23",
10511049
"union24",

0 commit comments

Comments
 (0)