Skip to content

Commit e136f47

Browse files
tianyimarmbrus
authored andcommitted
[SPARK-3688][SQL]LogicalPlan can't resolve column correctlly
This PR fixed the resolving problem described in https://issues.apache.org/jira/browse/SPARK-3688 ``` CREATE TABLE t1(x INT); CREATE TABLE t2(a STRUCT<x: INT>, k INT); SELECT a.x FROM t1 a JOIN t2 b ON a.x = b.k; ``` Author: tianyi <[email protected]> Closes #4524 from tianyi/SPARK-3688 and squashes the following commits: 237a256 [tianyi] resolve a name with table.column pattern first. (cherry picked from commit 44b2311) Signed-off-by: Michael Armbrust <[email protected]>
1 parent 1bb3631 commit e136f47

7 files changed

+42
-18
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,29 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
128128
def resolve(name: String, resolver: Resolver): Option[NamedExpression] =
129129
resolve(name, output, resolver)
130130

131+
def resolveAsTableColumn(
132+
nameParts: Array[String],
133+
resolver: Resolver,
134+
attribute: Attribute): List[(Attribute, List[String])] = {
135+
if (attribute.qualifiers.find(resolver(_, nameParts.head)).nonEmpty && nameParts.size > 1) {
136+
val remainingParts = nameParts.drop(1)
137+
resolveAsColumn(remainingParts, resolver, attribute)
138+
} else {
139+
Nil
140+
}
141+
}
142+
143+
def resolveAsColumn(
144+
nameParts: Array[String],
145+
resolver: Resolver,
146+
attribute: Attribute): List[(Attribute, List[String])] = {
147+
if (resolver(attribute.name, nameParts.head)) {
148+
(attribute.withName(nameParts.head), nameParts.tail.toList) :: Nil
149+
} else {
150+
Nil
151+
}
152+
}
153+
131154
/** Performs attribute resolution given a name and a sequence of possible attributes. */
132155
protected def resolve(
133156
name: String,
@@ -136,24 +159,15 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
136159

137160
val parts = name.split("\\.")
138161

139-
// Collect all attributes that are output by this nodes children where either the first part
140-
// matches the name or where the first part matches the scope and the second part matches the
141-
// name. Return these matches along with any remaining parts, which represent dotted access to
142-
// struct fields.
143-
val options = input.flatMap { option =>
144-
// If the first part of the desired name matches a qualifier for this possible match, drop it.
145-
val remainingParts =
146-
if (option.qualifiers.find(resolver(_, parts.head)).nonEmpty && parts.size > 1) {
147-
parts.drop(1)
148-
} else {
149-
parts
150-
}
151-
152-
if (resolver(option.name, remainingParts.head)) {
153-
// Preserve the case of the user's attribute reference.
154-
(option.withName(remainingParts.head), remainingParts.tail.toList) :: Nil
155-
} else {
156-
Nil
162+
// We will try to resolve this name as `table.column` pattern first.
163+
var options = input.flatMap { option =>
164+
resolveAsTableColumn(parts, resolver, option)
165+
}
166+
167+
// If none of attributes match `table.column` pattern, we try to resolve it as a column.
168+
if(options.isEmpty) {
169+
options = input.flatMap { option =>
170+
resolveAsColumn(parts, resolver, option)
157171
}
158172
}
159173

sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-0-c6d02549aec166e16bfc44d5905fa33a

Whitespace-only changes.

sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-1-a8987ff8c7b9ca95bf8b32314694ed1f

Whitespace-only changes.

sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-2-26f54240cf5b909086fc34a34d7fdb56

Whitespace-only changes.

sql/hive/src/test/resources/golden/test ambiguousReferences resolved as hive-3-d08d5280027adea681001ad82a5a6974

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,15 @@ class HiveResolutionSuite extends HiveComparisonTest {
9999
assert(sql("SELECT nestedArray[0].a FROM nestedRepeatedTest").collect().head(0) === 1)
100100
}
101101

102+
createQueryTest("test ambiguousReferences resolved as hive",
103+
"""
104+
|CREATE TABLE t1(x INT);
105+
|CREATE TABLE t2(a STRUCT<x: INT>, k INT);
106+
|INSERT OVERWRITE TABLE t1 SELECT 1 FROM src LIMIT 1;
107+
|INSERT OVERWRITE TABLE t2 SELECT named_struct("x",1),1 FROM src LIMIT 1;
108+
|SELECT a.x FROM t1 a JOIN t2 b ON a.x = b.k;
109+
""".stripMargin)
110+
102111
/**
103112
* Negative examples. Currently only left here for documentation purposes.
104113
* TODO(marmbrus): Test that catalyst fails on these queries.

0 commit comments

Comments
 (0)