Skip to content

Commit 990efad

Browse files
maropugatorsmile
authored andcommitted
[SPARK-20963][SQL] Support column aliases for join relations in FROM clause
## What changes were proposed in this pull request? This pr added parsing rules to support column aliases for join relations in FROM clause. This pr is a sub-task of #18079. ## How was this patch tested? Added tests in `AnalysisSuite`, `PlanParserSuite,` and `SQLQueryTestSuite`. Author: Takeshi Yamamuro <[email protected]> Closes #18772 from maropu/SPARK-20963-2.
1 parent 41568e9 commit 990efad

File tree

6 files changed

+104
-22
lines changed

6 files changed

+104
-22
lines changed

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -473,11 +473,11 @@ identifierComment
473473
;
474474

475475
relationPrimary
476-
: tableIdentifier sample? tableAlias #tableName
477-
| '(' queryNoWith ')' sample? tableAlias #aliasedQuery
478-
| '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation
479-
| inlineTable #inlineTableDefault2
480-
| functionTable #tableValuedFunction
476+
: tableIdentifier sample? tableAlias #tableName
477+
| '(' queryNoWith ')' sample? tableAlias #aliasedQuery
478+
| '(' relation ')' sample? tableAlias #aliasedRelation
479+
| inlineTable #inlineTableDefault2
480+
| functionTable #tableValuedFunction
481481
;
482482

483483
inlineTable

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -739,12 +739,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
739739
/**
740740
* Create an alias (SubqueryAlias) for a join relation. This is practically the same as
741741
* visitAliasedQuery and visitNamedExpression, ANTLR4 however requires us to use 3 different
742-
* hooks.
742+
* hooks. We could add alias names for output columns, for example:
743+
* {{{
744+
* SELECT a, b, c, d FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d)
745+
* }}}
743746
*/
744747
override def visitAliasedRelation(ctx: AliasedRelationContext): LogicalPlan = withOrigin(ctx) {
745-
plan(ctx.relation)
746-
.optionalMap(ctx.sample)(withSample)
747-
.optionalMap(ctx.strictIdentifier)(aliasPlan)
748+
val relation = plan(ctx.relation).optionalMap(ctx.sample)(withSample)
749+
mayApplyAliasPlan(ctx.tableAlias, relation)
748750
}
749751

750752
/**
@@ -756,31 +758,43 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
756758
* }}}
757759
*/
758760
override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) {
759-
val alias = if (ctx.tableAlias.strictIdentifier == null) {
761+
val relation = plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample)
762+
if (ctx.tableAlias.strictIdentifier == null) {
760763
// For un-aliased subqueries, use a default alias name that is not likely to conflict with
761764
// normal subquery names, so that parent operators can only access the columns in subquery by
762765
// unqualified names. Users can still use this special qualifier to access columns if they
763766
// know it, but that's not recommended.
764-
"__auto_generated_subquery_name"
767+
SubqueryAlias("__auto_generated_subquery_name", relation)
765768
} else {
766-
ctx.tableAlias.strictIdentifier.getText
767-
}
768-
val subquery = SubqueryAlias(alias, plan(ctx.queryNoWith).optionalMap(ctx.sample)(withSample))
769-
if (ctx.tableAlias.identifierList != null) {
770-
val columnAliases = visitIdentifierList(ctx.tableAlias.identifierList)
771-
UnresolvedSubqueryColumnAliases(columnAliases, subquery)
772-
} else {
773-
subquery
769+
mayApplyAliasPlan(ctx.tableAlias, relation)
774770
}
775771
}
776772

777773
/**
778-
* Create an alias (SubqueryAlias) for a LogicalPlan.
774+
* Create an alias ([[SubqueryAlias]]) for a [[LogicalPlan]].
779775
*/
780776
private def aliasPlan(alias: ParserRuleContext, plan: LogicalPlan): LogicalPlan = {
781777
SubqueryAlias(alias.getText, plan)
782778
}
783779

780+
/**
781+
* If aliases specified in a FROM clause, create a subquery alias ([[SubqueryAlias]]) and
782+
* column aliases for a [[LogicalPlan]].
783+
*/
784+
private def mayApplyAliasPlan(tableAlias: TableAliasContext, plan: LogicalPlan): LogicalPlan = {
785+
if (tableAlias.strictIdentifier != null) {
786+
val subquery = SubqueryAlias(tableAlias.strictIdentifier.getText, plan)
787+
if (tableAlias.identifierList != null) {
788+
val columnNames = visitIdentifierList(tableAlias.identifierList)
789+
UnresolvedSubqueryColumnAliases(columnNames, subquery)
790+
} else {
791+
subquery
792+
}
793+
} else {
794+
plan
795+
}
796+
}
797+
784798
/**
785799
* Create a Sequence of Strings for a parenthesis enclosed alias list.
786800
*/

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
2525
import org.apache.spark.sql.catalyst.dsl.expressions._
2626
import org.apache.spark.sql.catalyst.dsl.plans._
2727
import org.apache.spark.sql.catalyst.expressions._
28-
import org.apache.spark.sql.catalyst.plans.Cross
28+
import org.apache.spark.sql.catalyst.plans.{Cross, Inner}
2929
import org.apache.spark.sql.catalyst.plans.logical._
3030
import org.apache.spark.sql.types._
3131

@@ -490,4 +490,26 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers {
490490
Seq("Number of column aliases does not match number of columns. " +
491491
"Number of column aliases: 5; number of columns: 4."))
492492
}
493+
494+
test("SPARK-20963 Support aliases for join relations in FROM clause") {
495+
def joinRelationWithAliases(outputNames: Seq[String]): LogicalPlan = {
496+
val src1 = LocalRelation('id.int, 'v1.string).as("s1")
497+
val src2 = LocalRelation('id.int, 'v2.string).as("s2")
498+
UnresolvedSubqueryColumnAliases(
499+
outputNames,
500+
SubqueryAlias(
501+
"dst",
502+
src1.join(src2, Inner, Option(Symbol("s1.id") === Symbol("s2.id"))))
503+
).select(star())
504+
}
505+
assertAnalysisSuccess(joinRelationWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil))
506+
assertAnalysisError(
507+
joinRelationWithAliases("col1" :: Nil),
508+
Seq("Number of column aliases does not match number of columns. " +
509+
"Number of column aliases: 1; number of columns: 4."))
510+
assertAnalysisError(
511+
joinRelationWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil),
512+
Seq("Number of column aliases does not match number of columns. " +
513+
"Number of column aliases: 5; number of columns: 4."))
514+
}
493515
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,19 @@ class PlanParserSuite extends AnalysisTest {
506506
).select(star()))
507507
}
508508

509+
test("SPARK-20963 Support aliases for join relations in FROM clause") {
510+
val src1 = UnresolvedRelation(TableIdentifier("src1")).as("s1")
511+
val src2 = UnresolvedRelation(TableIdentifier("src2")).as("s2")
512+
assertEqual(
513+
"SELECT * FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d)",
514+
UnresolvedSubqueryColumnAliases(
515+
Seq("a", "b", "c", "d"),
516+
SubqueryAlias(
517+
"dst",
518+
src1.join(src2, Inner, Option(Symbol("s1.id") === Symbol("s2.id"))))
519+
).select(star()))
520+
}
521+
509522
test("inline table") {
510523
assertEqual("values 1, 2, 3, 4",
511524
UnresolvedInlineTable(Seq("col1"), Seq(1, 2, 3, 4).map(x => Seq(Literal(x)))))

sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,10 @@ SELECT a AS col1, b AS col2 FROM testData AS t(c, d);
1818

1919
-- Subquery aliases in FROM clause
2020
SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2);
21+
22+
-- Aliases for join relations in FROM clause
23+
CREATE OR REPLACE TEMPORARY VIEW src1 AS SELECT * FROM VALUES (1, "a"), (2, "b"), (3, "c") AS src1(id, v1);
24+
25+
CREATE OR REPLACE TEMPORARY VIEW src2 AS SELECT * FROM VALUES (2, 1.0), (3, 3.2), (1, 8.5) AS src2(id, v2);
26+
27+
SELECT * FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d);

sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 8
2+
-- Number of queries: 11
33

44

55
-- !query 0
@@ -69,3 +69,29 @@ SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2)
6969
struct<col1:int,col2:int>
7070
-- !query 7 output
7171
1 1
72+
73+
74+
-- !query 8
75+
CREATE OR REPLACE TEMPORARY VIEW src1 AS SELECT * FROM VALUES (1, "a"), (2, "b"), (3, "c") AS src1(id, v1)
76+
-- !query 8 schema
77+
struct<>
78+
-- !query 8 output
79+
80+
81+
82+
-- !query 9
83+
CREATE OR REPLACE TEMPORARY VIEW src2 AS SELECT * FROM VALUES (2, 1.0), (3, 3.2), (1, 8.5) AS src2(id, v2)
84+
-- !query 9 schema
85+
struct<>
86+
-- !query 9 output
87+
88+
89+
90+
-- !query 10
91+
SELECT * FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d)
92+
-- !query 10 schema
93+
struct<a:int,b:string,c:int,d:decimal(2,1)>
94+
-- !query 10 output
95+
1 a 1 8.5
96+
2 b 2 1
97+
3 c 3 3.2

0 commit comments

Comments
 (0)