Skip to content

Commit 24d3428

Browse files
maropugatorsmile
authored andcommitted
[SPARK-20841][SQL] Support table column aliases in FROM clause
## What changes were proposed in this pull request? This pr added parsing rules to support table column aliases in FROM clause. ## How was this patch tested? Added tests in `PlanParserSuite`, `SQLQueryTestSuite`, and `PlanParserSuite`. Author: Takeshi Yamamuro <[email protected]> Closes #18079 from maropu/SPARK-20841.
1 parent 06c155c commit 24d3428

File tree

13 files changed

+165
-19
lines changed

13 files changed

+165
-19
lines changed

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ identifierComment
472472
;
473473

474474
relationPrimary
475-
: tableIdentifier sample? (AS? strictIdentifier)? #tableName
475+
: tableIdentifier sample? tableAlias #tableName
476476
| '(' queryNoWith ')' sample? (AS? strictIdentifier) #aliasedQuery
477477
| '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation
478478
| inlineTable #inlineTableDefault2
@@ -711,7 +711,7 @@ nonReserved
711711
| ADD
712712
| OVER | PARTITION | RANGE | ROWS | PRECEDING | FOLLOWING | CURRENT | ROW | LAST | FIRST | AFTER
713713
| MAP | ARRAY | STRUCT
714-
| LATERAL | WINDOW | REDUCE | TRANSFORM | USING | SERDE | SERDEPROPERTIES | RECORDREADER
714+
| LATERAL | WINDOW | REDUCE | TRANSFORM | SERDE | SERDEPROPERTIES | RECORDREADER
715715
| DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | LINES | SEPARATED
716716
| EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | GLOBAL | TEMPORARY | OPTIONS
717717
| GROUPING | CUBE | ROLLUP

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,25 @@ class Analyzer(
593593
def resolveRelation(plan: LogicalPlan): LogicalPlan = plan match {
594594
case u: UnresolvedRelation if !isRunningDirectlyOnFiles(u.tableIdentifier) =>
595595
val defaultDatabase = AnalysisContext.get.defaultDatabase
596-
val relation = lookupTableFromCatalog(u, defaultDatabase)
596+
val foundRelation = lookupTableFromCatalog(u, defaultDatabase)
597+
598+
// Add `Project` to rename output column names if a query has alias names:
599+
// e.g., SELECT col1, col2 FROM testData AS t(col1, col2)
600+
val relation = if (u.outputColumnNames.nonEmpty) {
601+
val outputAttrs = foundRelation.output
602+
// Checks if the number of the aliases equals to the number of columns in the table.
603+
if (u.outputColumnNames.size != outputAttrs.size) {
604+
u.failAnalysis(s"Number of column aliases does not match number of columns. " +
605+
s"Table name: ${u.tableName}; number of column aliases: " +
606+
s"${u.outputColumnNames.size}; number of columns: ${outputAttrs.size}.")
607+
}
608+
val aliases = outputAttrs.zip(u.outputColumnNames).map {
609+
case (attr, name) => Alias(attr, name)()
610+
}
611+
Project(aliases, foundRelation)
612+
} else {
613+
foundRelation
614+
}
597615
resolveRelation(relation)
598616
// The view's child should be a logical plan parsed from the `desc.viewText`, the variable
599617
// `viewText` should be defined, or else we throw an error on the generation of the View

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,9 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] {
131131
val outputAttrs = resolvedFunc.output
132132
// Checks if the number of the aliases is equal to expected one
133133
if (u.outputNames.size != outputAttrs.size) {
134-
u.failAnalysis(s"expected ${outputAttrs.size} columns but " +
135-
s"found ${u.outputNames.size} columns")
134+
u.failAnalysis(s"Number of given aliases does not match number of output columns. " +
135+
s"Function name: ${u.functionName}; number of aliases: " +
136+
s"${u.outputNames.size}; number of output columns: ${outputAttrs.size}.")
136137
}
137138
val aliases = outputAttrs.zip(u.outputNames).map {
138139
case (attr, name) => Alias(attr, name)()

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,21 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str
3636

3737
/**
3838
* Holds the name of a relation that has yet to be looked up in a catalog.
39+
* We could add alias names for columns in a relation:
40+
* {{{
41+
* // Assign alias names
42+
* SELECT col1, col2 FROM testData AS t(col1, col2);
43+
* }}}
44+
*
45+
* @param tableIdentifier table name
46+
* @param outputColumnNames alias names of columns. If these names given, an analyzer adds
47+
* [[Project]] to rename the columns.
3948
*/
40-
case class UnresolvedRelation(tableIdentifier: TableIdentifier) extends LeafNode {
49+
case class UnresolvedRelation(
50+
tableIdentifier: TableIdentifier,
51+
outputColumnNames: Seq[String] = Seq.empty)
52+
extends LeafNode {
53+
4154
/** Returns a `.` separated name for this relation. */
4255
def tableName: String = tableIdentifier.unquotedString
4356

@@ -71,6 +84,11 @@ case class UnresolvedInlineTable(
7184
* // Assign alias names
7285
* select t.a from range(10) t(a);
7386
* }}}
87+
*
88+
* @param functionName name of this table-value function
89+
* @param functionArgs list of function arguments
90+
* @param outputNames alias names of function output columns. If these names given, an analyzer
91+
* adds [[Project]] to rename the output columns.
7492
*/
7593
case class UnresolvedTableValuedFunction(
7694
functionName: String,

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -676,12 +676,16 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
676676
* Create an aliased table reference. This is typically used in FROM clauses.
677677
*/
678678
override def visitTableName(ctx: TableNameContext): LogicalPlan = withOrigin(ctx) {
679-
val table = UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier))
680-
681-
val tableWithAlias = Option(ctx.strictIdentifier).map(_.getText) match {
682-
case Some(strictIdentifier) =>
683-
SubqueryAlias(strictIdentifier, table)
684-
case _ => table
679+
val tableId = visitTableIdentifier(ctx.tableIdentifier)
680+
val table = if (ctx.tableAlias.identifierList != null) {
681+
UnresolvedRelation(tableId, visitIdentifierList(ctx.tableAlias.identifierList))
682+
} else {
683+
UnresolvedRelation(tableId)
684+
}
685+
val tableWithAlias = if (ctx.tableAlias.strictIdentifier != null) {
686+
SubqueryAlias(ctx.tableAlias.strictIdentifier.getText, table)
687+
} else {
688+
table
685689
}
686690
tableWithAlias.optionalMap(ctx.sample)(withSample)
687691
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,23 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers {
465465
assertAnalysisSuccess(rangeWithAliases(2 :: 6 :: 2 :: Nil, "c" :: Nil))
466466
assertAnalysisError(
467467
rangeWithAliases(3 :: Nil, "a" :: "b" :: Nil),
468-
Seq("expected 1 columns but found 2 columns"))
468+
Seq("Number of given aliases does not match number of output columns. "
469+
+ "Function name: range; number of aliases: 2; number of output columns: 1."))
470+
}
471+
472+
test("SPARK-20841 Support table column aliases in FROM clause") {
473+
def tableColumnsWithAliases(outputNames: Seq[String]): LogicalPlan = {
474+
SubqueryAlias("t", UnresolvedRelation(TableIdentifier("TaBlE3"), outputNames))
475+
.select(star())
476+
}
477+
assertAnalysisSuccess(tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil))
478+
assertAnalysisError(
479+
tableColumnsWithAliases("col1" :: Nil),
480+
Seq("Number of column aliases does not match number of columns. Table name: TaBlE3; " +
481+
"number of column aliases: 1; number of columns: 4."))
482+
assertAnalysisError(
483+
tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil),
484+
Seq("Number of column aliases does not match number of columns. Table name: TaBlE3; " +
485+
"number of column aliases: 5; number of columns: 4."))
469486
}
470487
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ trait AnalysisTest extends PlanTest {
3535
val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
3636
catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true)
3737
catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true)
38+
catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true)
3839
new Analyzer(catalog, conf) {
3940
override val extendedResolutionRules = EliminateSubqueryAliases :: Nil
4041
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
package org.apache.spark.sql.catalyst.parser
1919

20-
import org.apache.spark.sql.catalyst.FunctionIdentifier
21-
import org.apache.spark.sql.catalyst.analysis.{UnresolvedGenerator, UnresolvedInlineTable, UnresolvedTableValuedFunction}
20+
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
21+
import org.apache.spark.sql.catalyst.analysis.{UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedTableValuedFunction}
2222
import org.apache.spark.sql.catalyst.expressions._
2323
import org.apache.spark.sql.catalyst.plans._
2424
import org.apache.spark.sql.catalyst.plans.logical._
@@ -493,6 +493,13 @@ class PlanParserSuite extends PlanTest {
493493
.select(star()))
494494
}
495495

496+
test("SPARK-20841 Support table column aliases in FROM clause") {
497+
assertEqual(
498+
"SELECT * FROM testData AS t(col1, col2)",
499+
SubqueryAlias("t", UnresolvedRelation(TableIdentifier("testData"), Seq("col1", "col2")))
500+
.select(star()))
501+
}
502+
496503
test("inline table") {
497504
assertEqual("values 1, 2, 3, 4",
498505
UnresolvedInlineTable(Seq("col1"), Seq(1, 2, 3, 4).map(x => Seq(Literal(x)))))

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class TableIdentifierParserSuite extends SparkFunSuite {
4949
"insert", "int", "into", "is", "lateral", "like", "local", "none", "null",
5050
"of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke",
5151
"rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
52-
"true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike",
52+
"true", "truncate", "update", "user", "values", "with", "regexp", "rlike",
5353
"bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
5454
"int", "smallint", "timestamp", "at")
5555

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
-- Test data.
2+
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS testData(a, b);
3+
4+
-- Table column aliases in FROM clause
5+
SELECT * FROM testData AS t(col1, col2) WHERE col1 = 1;
6+
7+
SELECT * FROM testData AS t(col1, col2) WHERE col1 = 2;
8+
9+
SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k;
10+
11+
-- Aliasing the wrong number of columns in the FROM clause
12+
SELECT * FROM testData AS t(col1, col2, col3);
13+
14+
SELECT * FROM testData AS t(col1);
15+
16+
-- Check alias duplication
17+
SELECT a AS col1, b AS col2 FROM testData AS t(c, d);

0 commit comments

Comments
 (0)