Skip to content

Commit 1afcf77

Browse files
lianchengmarmbrus
authored andcommitted
[SPARK-6452] [SQL] Checks for missing attributes and unresolved operator for all types of operator
In `CheckAnalysis`, `Filter` and `Aggregate` are checked in separate case clauses, thus never hit those clauses for unresolved operators and missing input attributes. This PR also removes the `prettyString` call when generating error message for missing input attributes. Because result of `prettyString` doesn't contain expression ID, and may give confusing messages like > resolved attributes a missing from a cc rxin <!-- Reviewable:start --> [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/5129) <!-- Reviewable:end --> Author: Cheng Lian <[email protected]> Closes apache#5129 from liancheng/spark-6452 and squashes the following commits: 52cdc69 [Cheng Lian] Addresses comments 029f9bd [Cheng Lian] Checks for missing attributes and unresolved operator for all types of operator
1 parent 4ce2782 commit 1afcf77

File tree

3 files changed

+33
-7
lines changed

3 files changed

+33
-7
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class CheckAnalysis {
6363
s"filter expression '${f.condition.prettyString}' " +
6464
s"of type ${f.condition.dataType.simpleString} is not a boolean.")
6565

66-
case aggregatePlan@Aggregate(groupingExprs, aggregateExprs, child) =>
66+
case Aggregate(groupingExprs, aggregateExprs, child) =>
6767
def checkValidAggregateExpression(expr: Expression): Unit = expr match {
6868
case _: AggregateExpression => // OK
6969
case e: Attribute if !groupingExprs.contains(e) =>
@@ -85,13 +85,18 @@ class CheckAnalysis {
8585

8686
cleaned.foreach(checkValidAggregateExpression)
8787

88+
case _ => // Fallbacks to the following checks
89+
}
90+
91+
operator match {
8892
case o if o.children.nonEmpty && o.missingInput.nonEmpty =>
89-
val missingAttributes = o.missingInput.map(_.prettyString).mkString(",")
90-
val input = o.inputSet.map(_.prettyString).mkString(",")
93+
val missingAttributes = o.missingInput.mkString(",")
94+
val input = o.inputSet.mkString(",")
9195

92-
failAnalysis(s"resolved attributes $missingAttributes missing from $input")
96+
failAnalysis(
97+
s"resolved attribute(s) $missingAttributes missing from $input " +
98+
s"in operator ${operator.simpleString}")
9399

94-
// Catch all
95100
case o if !o.resolved =>
96101
failAnalysis(
97102
s"unresolved operator ${operator.simpleString}")

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,12 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy
4747
* Attributes that are referenced by expressions but not provided by this nodes children.
4848
* Subclasses should override this method if they produce attributes internally as it is used by
4949
* assertions designed to prevent the construction of invalid plans.
50+
*
51+
* Note that virtual columns should be excluded. Currently, we only support the grouping ID
52+
* virtual column.
5053
*/
51-
def missingInput: AttributeSet = (references -- inputSet)
52-
.filter(_.name != VirtualColumn.groupingIdName)
54+
def missingInput: AttributeSet =
55+
(references -- inputSet).filter(_.name != VirtualColumn.groupingIdName)
5356

5457
/**
5558
* Runs [[transform]] with `rule` on all expressions present in this query operator.

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,4 +199,22 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter {
199199
assert(pl(3).dataType == DecimalType.Unlimited)
200200
assert(pl(4).dataType == DoubleType)
201201
}
202+
203+
test("SPARK-6452 regression test") {
204+
// CheckAnalysis should throw AnalysisException when Aggregate contains missing attribute(s)
205+
val plan =
206+
Aggregate(
207+
Nil,
208+
Alias(Sum(AttributeReference("a", StringType)(exprId = ExprId(1))), "b")() :: Nil,
209+
LocalRelation(
210+
AttributeReference("a", StringType)(exprId = ExprId(2))))
211+
212+
assert(plan.resolved)
213+
214+
val message = intercept[AnalysisException] {
215+
caseSensitiveAnalyze(plan)
216+
}.getMessage
217+
218+
assert(message.contains("resolved attribute(s) a#1 missing from a#2"))
219+
}
202220
}

0 commit comments

Comments
 (0)