From 5544ff21289ffbe5fa19da143c462573ab99fa64 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 17 Apr 2017 20:05:38 +0900 Subject: [PATCH 01/11] Add string concatenate operator || to Spark SQL --- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 4 +++- .../spark/sql/execution/SparkSqlParser.scala | 10 +++++++++- .../spark/sql/execution/SparkSqlParserSuite.scala | 15 +++++++++++++-- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 14c511f67060..0e1160f97437 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -562,6 +562,7 @@ primaryExpression | '(' query ')' #subqueryExpression | qualifiedName '(' (setQuantifier? namedExpression (',' namedExpression)*)? ')' (OVER windowSpec)? #functionCall + | primaryExpression (CONCAT_PIPE primaryExpression)+ #concat | value=primaryExpression '[' index=valueExpression ']' #subscript | identifier #columnReference | base=primaryExpression '.' fieldName=identifier #dereference @@ -582,7 +583,7 @@ comparisonOperator ; arithmeticOperator - : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | HAT + : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCATE_PIPE | HAT ; predicateOperator @@ -861,6 +862,7 @@ DIV: 'DIV'; TILDE: '~'; AMPERSAND: '&'; PIPE: '|'; +CONCAT_PIPE: '||'; HAT: '^'; PERCENTLIT: 'PERCENT'; diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 20dacf88504f..55cad7b1777c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -27,7 +27,7 @@ import org.antlr.v4.runtime.tree.TerminalNode import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog._ -import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.expressions.{Concat, Expression} import org.apache.spark.sql.catalyst.parser._ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans.logical._ @@ -1483,4 +1483,12 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { query: LogicalPlan): LogicalPlan = { RepartitionByExpression(expressions, query, conf.numShufflePartitions) } + + /** + * Create a [[Concat]] expression for pipeline concatenation. + */ + override def visitConcat(ctx: ConcatContext): Expression = { + val exprs = ctx.primaryExpression().asScala + Concat(expression(exprs.head) +: exprs.drop(1).map(expression)) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 908b955abbf0..4d0bfd8d7313 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -18,13 +18,14 @@ package org.apache.spark.sql.execution import org.apache.spark.sql.SaveMode +import org.apache.spark.sql.catalyst.analysis.UnresolvedAlias import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedStar} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType} -import org.apache.spark.sql.catalyst.expressions.{Ascending, SortOrder} +import org.apache.spark.sql.catalyst.expressions.{Ascending, Concat, Literal, SortOrder} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, RepartitionByExpression, Sort} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project, RepartitionByExpression, Sort} import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.CreateTable import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} @@ -290,4 +291,14 @@ class SparkSqlParserSuite extends PlanTest { basePlan, numPartitions = newConf.numShufflePartitions))) } + + test("pipeline concatenation") { + val concat = Concat( + UnresolvedAttribute("a") :: + Concat(UnresolvedAttribute("b") :: UnresolvedAttribute("c") :: Nil) :: + Nil) + assertEqual( + "SELECT a || b || c FROM t", + Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t")))) + } } From 05d490ecec3feb7c2dab32d377fe2a9a1f33fffb Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Fri, 21 Apr 2017 10:37:35 +0900 Subject: [PATCH 02/11] Apply comments --- .../spark/sql/execution/SparkSqlParser.scala | 2 +- .../sql-tests/inputs/string-functions.sql | 3 +++ .../results/string-functions.sql.out | 20 +++++++++++++------ .../sql/execution/SparkSqlParserSuite.scala | 7 +++---- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 55cad7b1777c..127b0fb088f6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1489,6 +1489,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { */ override def visitConcat(ctx: ConcatContext): Expression = { val exprs = ctx.primaryExpression().asScala - Concat(expression(exprs.head) +: exprs.drop(1).map(expression)) + Concat(exprs.map(expression)) } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index f21981ef7b72..b9cdda318a39 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -1,3 +1,6 @@ +-- A pipe operation for string concatenation +select 'a' || 'b' || 'c'; + -- Argument number exception select concat_ws(); select format_string(); diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 6961e9b65922..b98dc86e7e9e 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,20 +1,28 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 2 +-- Number of queries: 3 -- !query 0 -select concat_ws() +select 'a' || 'b' || 'c' -- !query 0 schema -struct<> +struct -- !query 0 output -org.apache.spark.sql.AnalysisException -requirement failed: concat_ws requires at least one argument.; line 1 pos 7 +abc -- !query 1 -select format_string() +select concat_ws() -- !query 1 schema struct<> -- !query 1 output org.apache.spark.sql.AnalysisException +requirement failed: concat_ws requires at least one argument.; line 1 pos 7 + + +-- !query 2 +select format_string() +-- !query 2 schema +struct<> +-- !query 2 output +org.apache.spark.sql.AnalysisException requirement failed: format_string() should take at least 1 argument; line 1 pos 7 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 4d0bfd8d7313..0cfd4c2a44a4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -18,14 +18,13 @@ package org.apache.spark.sql.execution import org.apache.spark.sql.SaveMode -import org.apache.spark.sql.catalyst.analysis.UnresolvedAlias import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedStar} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType} -import org.apache.spark.sql.catalyst.expressions.{Ascending, Concat, Literal, SortOrder} +import org.apache.spark.sql.catalyst.expressions.{Ascending, Concat, SortOrder} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project, RepartitionByExpression, Sort} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, RepartitionByExpression, Sort} import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.CreateTable import org.apache.spark.sql.internal.{HiveSerDe, SQLConf} From afcd950f95dbfa6d80858f59a2a5d7385bcebe6a Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Fri, 21 Apr 2017 13:43:04 +0900 Subject: [PATCH 03/11] Apply review comments --- .../sql-tests/inputs/string-functions.sql | 6 +++--- .../sql-tests/results/string-functions.sql.out | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index b9cdda318a39..7005cafe35ca 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -1,6 +1,6 @@ --- A pipe operation for string concatenation -select 'a' || 'b' || 'c'; - -- Argument number exception select concat_ws(); select format_string(); + +-- A pipe operator for string concatenation +select 'a' || 'b' || 'c'; diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index b98dc86e7e9e..bc406fecc50d 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -3,26 +3,26 @@ -- !query 0 -select 'a' || 'b' || 'c' +select concat_ws() -- !query 0 schema -struct +struct<> -- !query 0 output -abc +org.apache.spark.sql.AnalysisException +requirement failed: concat_ws requires at least one argument.; line 1 pos 7 -- !query 1 -select concat_ws() +select format_string() -- !query 1 schema struct<> -- !query 1 output org.apache.spark.sql.AnalysisException -requirement failed: concat_ws requires at least one argument.; line 1 pos 7 +requirement failed: format_string() should take at least 1 argument; line 1 pos 7 -- !query 2 -select format_string() +select 'a' || 'b' || 'c' -- !query 2 schema -struct<> +struct -- !query 2 output -org.apache.spark.sql.AnalysisException -requirement failed: format_string() should take at least 1 argument; line 1 pos 7 +abc From f89d131c5681a5d492bb4cb8230f44cdde1cf62f Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Fri, 21 Apr 2017 14:31:58 +0900 Subject: [PATCH 04/11] Brush up parsing rules --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 7 +++++-- .../org/apache/spark/sql/execution/SparkSqlParser.scala | 8 ++++++-- .../apache/spark/sql/execution/SparkSqlParserSuite.scala | 3 ++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 0e1160f97437..b5ed4eb6cc44 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -538,7 +538,7 @@ predicate ; valueExpression - : primaryExpression #valueExpressionDefault + : builtinOpExpression #valueExpressionDefault | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary | left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary @@ -548,6 +548,10 @@ valueExpression | left=valueExpression comparisonOperator right=valueExpression #comparison ; +builtinOpExpression + : primaryExpression (CONCAT_PIPE primaryExpression)* #concat + ; + primaryExpression : name=(CURRENT_DATE | CURRENT_TIMESTAMP) #timeFunctionCall | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase @@ -562,7 +566,6 @@ primaryExpression | '(' query ')' #subqueryExpression | qualifiedName '(' (setQuantifier? namedExpression (',' namedExpression)*)? ')' (OVER windowSpec)? #functionCall - | primaryExpression (CONCAT_PIPE primaryExpression)+ #concat | value=primaryExpression '[' index=valueExpression ']' #subscript | identifier #columnReference | base=primaryExpression '.' fieldName=identifier #dereference diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 127b0fb088f6..91b57a2f5006 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1488,7 +1488,11 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { * Create a [[Concat]] expression for pipeline concatenation. */ override def visitConcat(ctx: ConcatContext): Expression = { - val exprs = ctx.primaryExpression().asScala - Concat(exprs.map(expression)) + if (ctx.primaryExpression().size > 1) { + val exprs = ctx.primaryExpression().asScala + Concat(exprs.map(expression)) + } else { + expression(ctx.primaryExpression(0)) + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 0cfd4c2a44a4..85f85395a76c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -294,7 +294,8 @@ class SparkSqlParserSuite extends PlanTest { test("pipeline concatenation") { val concat = Concat( UnresolvedAttribute("a") :: - Concat(UnresolvedAttribute("b") :: UnresolvedAttribute("c") :: Nil) :: + UnresolvedAttribute("b") :: + UnresolvedAttribute("c") :: Nil) assertEqual( "SELECT a || b || c FROM t", From 595754548cfd66210c80cc95ea066a959a2d337d Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Fri, 21 Apr 2017 18:51:12 +0900 Subject: [PATCH 05/11] Move the logic to CatalystSqlParser --- .../spark/sql/catalyst/parser/AstBuilder.scala | 12 ++++++++++++ .../apache/spark/sql/execution/SparkSqlParser.scala | 12 ------------ .../sql-tests/results/string-functions.sql.out | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d2a9b4a9a9f5..a4475fc37e58 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -912,6 +912,18 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { } } + /** + * Create a [[Concat]] expression for pipeline concatenation. + */ + override def visitConcat(ctx: ConcatContext): Expression = { + if (ctx.primaryExpression().size > 1) { + val exprs = ctx.primaryExpression().asScala + Concat(exprs.map(expression)) + } else { + expression(ctx.primaryExpression(0)) + } + } + /** * Create a predicated expression. A predicated expression is a normal expression with a * predicate attached to it, for example: diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 91b57a2f5006..6e3fd9ec7927 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1483,16 +1483,4 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { query: LogicalPlan): LogicalPlan = { RepartitionByExpression(expressions, query, conf.numShufflePartitions) } - - /** - * Create a [[Concat]] expression for pipeline concatenation. - */ - override def visitConcat(ctx: ConcatContext): Expression = { - if (ctx.primaryExpression().size > 1) { - val exprs = ctx.primaryExpression().asScala - Concat(exprs.map(expression)) - } else { - expression(ctx.primaryExpression(0)) - } - } } diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index bc406fecc50d..bcfb121a2bb5 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -23,6 +23,6 @@ requirement failed: format_string() should take at least 1 argument; line 1 pos -- !query 2 select 'a' || 'b' || 'c' -- !query 2 schema -struct +struct -- !query 2 output abc From cb4b26e5e3bf112afadf69f0eacbd71a464fedaf Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Mon, 8 May 2017 22:34:45 +0900 Subject: [PATCH 06/11] Make the concat precedence the same with +/- --- .../spark/sql/catalyst/parser/SqlBase.g4 | 10 +- .../sql/catalyst/parser/AstBuilder.scala | 14 +- .../spark/sql/execution/SparkSqlParser.scala | 2 +- .../inputs/{arithmetic.sql => operator.sql} | 8 + .../sql-tests/results/operator.sql.out | 286 ++++++++++++++++++ .../results/string-functions.sql.out | 2 +- .../sql/execution/SparkSqlParserSuite.scala | 6 +- 7 files changed, 304 insertions(+), 24 deletions(-) rename sql/core/src/test/resources/sql-tests/inputs/{arithmetic.sql => operator.sql} (73%) create mode 100644 sql/core/src/test/resources/sql-tests/results/operator.sql.out diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index b5ed4eb6cc44..270e4ce2019a 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -538,20 +538,16 @@ predicate ; valueExpression - : builtinOpExpression #valueExpressionDefault + : primaryExpression #valueExpressionDefault | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary - | left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary + | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary | left=valueExpression comparisonOperator right=valueExpression #comparison ; -builtinOpExpression - : primaryExpression (CONCAT_PIPE primaryExpression)* #concat - ; - primaryExpression : name=(CURRENT_DATE | CURRENT_TIMESTAMP) #timeFunctionCall | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase @@ -586,7 +582,7 @@ comparisonOperator ; arithmeticOperator - : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCATE_PIPE | HAT + : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT ; predicateOperator diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index a4475fc37e58..e87f3a46a890 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -912,18 +912,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { } } - /** - * Create a [[Concat]] expression for pipeline concatenation. - */ - override def visitConcat(ctx: ConcatContext): Expression = { - if (ctx.primaryExpression().size > 1) { - val exprs = ctx.primaryExpression().asScala - Concat(exprs.map(expression)) - } else { - expression(ctx.primaryExpression(0)) - } - } - /** * Create a predicated expression. A predicated expression is a normal expression with a * predicate attached to it, for example: @@ -1010,6 +998,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { Add(left, right) case SqlBaseParser.MINUS => Subtract(left, right) + case SqlBaseParser.CONCAT_PIPE => + Concat(left :: right :: Nil) case SqlBaseParser.AMPERSAND => BitwiseAnd(left, right) case SqlBaseParser.HAT => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 6e3fd9ec7927..20dacf88504f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -27,7 +27,7 @@ import org.antlr.v4.runtime.tree.TerminalNode import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog._ -import org.apache.spark.sql.catalyst.expressions.{Concat, Expression} +import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.parser._ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans.logical._ diff --git a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/operator.sql similarity index 73% rename from sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql rename to sql/core/src/test/resources/sql-tests/inputs/operator.sql index f62b10ca0037..18c2c3eb9ebb 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/operator.sql @@ -32,3 +32,11 @@ select 1 - 2; select 2 * 5; select 5 % 3; select pmod(-7, 3); + +-- check operator precedence +explain select 'a' || 1 + 2; +explain select 1 - 2 || 'b'; +explain select 2 * 4 + 3 || 'b'; +explain select 3 + 1 || 'a' || 4 / 2; +explain select 1 == 1 OR 'a' || 'b' == 'ab'; +explain select 'a' || 'c' == 'ac' AND 2 == 3; diff --git a/sql/core/src/test/resources/sql-tests/results/operator.sql.out b/sql/core/src/test/resources/sql-tests/results/operator.sql.out new file mode 100644 index 000000000000..e0236f41187e --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/operator.sql.out @@ -0,0 +1,286 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 34 + + +-- !query 0 +select -100 +-- !query 0 schema +struct<-100:int> +-- !query 0 output +-100 + + +-- !query 1 +select +230 +-- !query 1 schema +struct<230:int> +-- !query 1 output +230 + + +-- !query 2 +select -5.2 +-- !query 2 schema +struct<-5.2:decimal(2,1)> +-- !query 2 output +-5.2 + + +-- !query 3 +select +6.8e0 +-- !query 3 schema +struct<6.8:decimal(2,1)> +-- !query 3 output +6.8 + + +-- !query 4 +select -key, +key from testdata where key = 2 +-- !query 4 schema +struct<(- key):int,key:int> +-- !query 4 output +-2 2 + + +-- !query 5 +select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1 +-- !query 5 schema +struct<(- (key + 1)):int,((- key) + 1):int,(key + 5):int> +-- !query 5 output +-2 0 6 + + +-- !query 6 +select -max(key), +max(key) from testdata +-- !query 6 schema +struct<(- max(key)):int,max(key):int> +-- !query 6 output +-100 100 + + +-- !query 7 +select - (-10) +-- !query 7 schema +struct<(- -10):int> +-- !query 7 output +10 + + +-- !query 8 +select + (-key) from testdata where key = 32 +-- !query 8 schema +struct<(- key):int> +-- !query 8 output +-32 + + +-- !query 9 +select - (+max(key)) from testdata +-- !query 9 schema +struct<(- max(key)):int> +-- !query 9 output +-100 + + +-- !query 10 +select - - 3 +-- !query 10 schema +struct<(- -3):int> +-- !query 10 output +3 + + +-- !query 11 +select - + 20 +-- !query 11 schema +struct<(- 20):int> +-- !query 11 output +-20 + + +-- !query 12 +select + + 100 +-- !query 12 schema +struct<100:int> +-- !query 12 output +100 + + +-- !query 13 +select - - max(key) from testdata +-- !query 13 schema +struct<(- (- max(key))):int> +-- !query 13 output +100 + + +-- !query 14 +select + - key from testdata where key = 33 +-- !query 14 schema +struct<(- key):int> +-- !query 14 output +-33 + + +-- !query 15 +select 5 / 2 +-- !query 15 schema +struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double> +-- !query 15 output +2.5 + + +-- !query 16 +select 5 / 0 +-- !query 16 schema +struct<(CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)):double> +-- !query 16 output +NULL + + +-- !query 17 +select 5 / null +-- !query 17 schema +struct<(CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)):double> +-- !query 17 output +NULL + + +-- !query 18 +select null / 5 +-- !query 18 schema +struct<(CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)):double> +-- !query 18 output +NULL + + +-- !query 19 +select 5 div 2 +-- !query 19 schema +struct +-- !query 19 output +2 + + +-- !query 20 +select 5 div 0 +-- !query 20 schema +struct +-- !query 20 output +NULL + + +-- !query 21 +select 5 div null +-- !query 21 schema +struct +-- !query 21 output +NULL + + +-- !query 22 +select null div 5 +-- !query 22 schema +struct +-- !query 22 output +NULL + + +-- !query 23 +select 1 + 2 +-- !query 23 schema +struct<(1 + 2):int> +-- !query 23 output +3 + + +-- !query 24 +select 1 - 2 +-- !query 24 schema +struct<(1 - 2):int> +-- !query 24 output +-1 + + +-- !query 25 +select 2 * 5 +-- !query 25 schema +struct<(2 * 5):int> +-- !query 25 output +10 + + +-- !query 26 +select 5 % 3 +-- !query 26 schema +struct<(5 % 3):int> +-- !query 26 output +2 + + +-- !query 27 +select pmod(-7, 3) +-- !query 27 schema +struct +-- !query 27 output +2 + + +-- !query 28 +explain select 'a' || 1 + 2 +-- !query 28 schema +struct +-- !query 28 output +== Physical Plan == +*Project [null AS (CAST(concat(a, CAST(1 AS STRING)) AS DOUBLE) + CAST(2 AS DOUBLE))#x] ++- Scan OneRowRelation[] + + +-- !query 29 +explain select 1 - 2 || 'b' +-- !query 29 schema +struct +-- !query 29 output +== Physical Plan == +*Project [-1b AS concat(CAST((1 - 2) AS STRING), b)#x] ++- Scan OneRowRelation[] + + +-- !query 30 +explain select 2 * 4 + 3 || 'b' +-- !query 30 schema +struct +-- !query 30 output +== Physical Plan == +*Project [11b AS concat(CAST(((2 * 4) + 3) AS STRING), b)#x] ++- Scan OneRowRelation[] + + +-- !query 31 +explain select 3 + 1 || 'a' || 4 / 2 +-- !query 31 schema +struct +-- !query 31 output +== Physical Plan == +*Project [4a2.0 AS concat(concat(CAST((3 + 1) AS STRING), a), CAST((CAST(4 AS DOUBLE) / CAST(2 AS DOUBLE)) AS STRING))#x] ++- Scan OneRowRelation[] + + +-- !query 32 +explain select 1 == 1 OR 'a' || 'b' == 'ab' +-- !query 32 schema +struct +-- !query 32 output +== Physical Plan == +*Project [true AS ((1 = 1) OR (concat(a, b) = ab))#x] ++- Scan OneRowRelation[] + + +-- !query 33 +explain select 'a' || 'c' == 'ac' AND 2 == 3 +-- !query 33 schema +struct +-- !query 33 output +== Physical Plan == +*Project [false AS ((concat(a, c) = ac) AND (2 = 3))#x] ++- Scan OneRowRelation[] diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index bcfb121a2bb5..8ee075118e10 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -23,6 +23,6 @@ requirement failed: format_string() should take at least 1 argument; line 1 pos -- !query 2 select 'a' || 'b' || 'c' -- !query 2 schema -struct +struct -- !query 2 output abc diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index 85f85395a76c..b32fb90e1007 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -293,10 +293,10 @@ class SparkSqlParserSuite extends PlanTest { test("pipeline concatenation") { val concat = Concat( - UnresolvedAttribute("a") :: - UnresolvedAttribute("b") :: + Concat(UnresolvedAttribute("a") :: UnresolvedAttribute("b") :: Nil) :: UnresolvedAttribute("c") :: - Nil) + Nil + ) assertEqual( "SELECT a || b || c FROM t", Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t")))) From 8890b94189eb087bf51da5c3dd0880c33b8a1f20 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 10 May 2017 13:04:55 +0900 Subject: [PATCH 07/11] Remove arithmetic.sq.out --- .../sql-tests/results/arithmetic.sql.out | 226 ------------------ 1 file changed, 226 deletions(-) delete mode 100644 sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out deleted file mode 100644 index ce42c016a710..000000000000 --- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out +++ /dev/null @@ -1,226 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- Number of queries: 28 - - --- !query 0 -select -100 --- !query 0 schema -struct<-100:int> --- !query 0 output --100 - - --- !query 1 -select +230 --- !query 1 schema -struct<230:int> --- !query 1 output -230 - - --- !query 2 -select -5.2 --- !query 2 schema -struct<-5.2:decimal(2,1)> --- !query 2 output --5.2 - - --- !query 3 -select +6.8e0 --- !query 3 schema -struct<6.8:decimal(2,1)> --- !query 3 output -6.8 - - --- !query 4 -select -key, +key from testdata where key = 2 --- !query 4 schema -struct<(- key):int,key:int> --- !query 4 output --2 2 - - --- !query 5 -select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1 --- !query 5 schema -struct<(- (key + 1)):int,((- key) + 1):int,(key + 5):int> --- !query 5 output --2 0 6 - - --- !query 6 -select -max(key), +max(key) from testdata --- !query 6 schema -struct<(- max(key)):int,max(key):int> --- !query 6 output --100 100 - - --- !query 7 -select - (-10) --- !query 7 schema -struct<(- -10):int> --- !query 7 output -10 - - --- !query 8 -select + (-key) from testdata where key = 32 --- !query 8 schema -struct<(- key):int> --- !query 8 output --32 - - --- !query 9 -select - (+max(key)) from testdata --- !query 9 schema -struct<(- max(key)):int> --- !query 9 output --100 - - --- !query 10 -select - - 3 --- !query 10 schema -struct<(- -3):int> --- !query 10 output -3 - - --- !query 11 -select - + 20 --- !query 11 schema -struct<(- 20):int> --- !query 11 output --20 - - --- !query 12 -select + + 100 --- !query 12 schema -struct<100:int> --- !query 12 output -100 - - --- !query 13 -select - - max(key) from testdata --- !query 13 schema -struct<(- (- max(key))):int> --- !query 13 output -100 - - --- !query 14 -select + - key from testdata where key = 33 --- !query 14 schema -struct<(- key):int> --- !query 14 output --33 - - --- !query 15 -select 5 / 2 --- !query 15 schema -struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double> --- !query 15 output -2.5 - - --- !query 16 -select 5 / 0 --- !query 16 schema -struct<(CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)):double> --- !query 16 output -NULL - - --- !query 17 -select 5 / null --- !query 17 schema -struct<(CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)):double> --- !query 17 output -NULL - - --- !query 18 -select null / 5 --- !query 18 schema -struct<(CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)):double> --- !query 18 output -NULL - - --- !query 19 -select 5 div 2 --- !query 19 schema -struct --- !query 19 output -2 - - --- !query 20 -select 5 div 0 --- !query 20 schema -struct --- !query 20 output -NULL - - --- !query 21 -select 5 div null --- !query 21 schema -struct --- !query 21 output -NULL - - --- !query 22 -select null div 5 --- !query 22 schema -struct --- !query 22 output -NULL - - --- !query 23 -select 1 + 2 --- !query 23 schema -struct<(1 + 2):int> --- !query 23 output -3 - - --- !query 24 -select 1 - 2 --- !query 24 schema -struct<(1 - 2):int> --- !query 24 output --1 - - --- !query 25 -select 2 * 5 --- !query 25 schema -struct<(2 * 5):int> --- !query 25 output -10 - - --- !query 26 -select 5 % 3 --- !query 26 schema -struct<(5 % 3):int> --- !query 26 output -2 - - --- !query 27 -select pmod(-7, 3) --- !query 27 schema -struct --- !query 27 output -2 From df3869abaca8db751cfabee47a83f7ad5b4caf5b Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Wed, 10 May 2017 15:35:42 +0900 Subject: [PATCH 08/11] Rename files --- .../resources/sql-tests/inputs/{operator.sql => operators.sql} | 0 .../sql-tests/results/{operator.sql.out => operators.sql.out} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename sql/core/src/test/resources/sql-tests/inputs/{operator.sql => operators.sql} (100%) rename sql/core/src/test/resources/sql-tests/results/{operator.sql.out => operators.sql.out} (100%) diff --git a/sql/core/src/test/resources/sql-tests/inputs/operator.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql similarity index 100% rename from sql/core/src/test/resources/sql-tests/inputs/operator.sql rename to sql/core/src/test/resources/sql-tests/inputs/operators.sql diff --git a/sql/core/src/test/resources/sql-tests/results/operator.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out similarity index 100% rename from sql/core/src/test/resources/sql-tests/results/operator.sql.out rename to sql/core/src/test/resources/sql-tests/results/operators.sql.out From c88652c6d3fe1070cc24109081d72834f576f73f Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Thu, 11 May 2017 12:18:45 +0900 Subject: [PATCH 09/11] Add a new rule to collapse multiple concats in Optimizer --- .../sql/catalyst/optimizer/Optimizer.scala | 26 ++++++++++++++ .../resources/sql-tests/inputs/operators.sql | 12 +++---- .../sql-tests/inputs/string-functions.sql | 6 +++- .../sql-tests/results/operators.sql.out | 12 +++---- .../results/string-functions.sql.out | 34 ++++++++++++++++--- 5 files changed, 73 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index f2b9764b0f08..446d9d4d63a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -87,6 +87,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) CollapseRepartition, CollapseProject, CollapseWindow, + CollapseConcat, CombineFilters, CombineLimits, CombineUnions, @@ -608,6 +609,31 @@ object CollapseWindow extends Rule[LogicalPlan] { } } +/** + * Collapse nested [[Concat]] expressions. + */ +object CollapseConcat extends Rule[LogicalPlan] { + + private def extractConcatExprs(e: Concat): Seq[Expression] = { + e.children.foldLeft(mutable.ArrayBuffer[Expression]()) { case (exprList, e) => + exprList ++= (e match { + case concat: Concat => extractConcatExprs(concat) + case _ => e :: Nil + }) + } + } + + def apply(plan: LogicalPlan): LogicalPlan = plan.transform { + case p @ Project(exprs, _) if exprs.exists(_.collect { case _: Concat => true }.size > 1) => + val projectList = exprs.map { expr => + expr.transformDown { + case concat: Concat => Concat(extractConcatExprs(concat)) + } + }.asInstanceOf[Seq[NamedExpression]] + p.copy(projectList = projectList) + } +} + /** * Generate a list of additional filters from an operator's existing constraint but remove those * that are either already part of the operator's condition or are part of the operator's child diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql index 18c2c3eb9ebb..56efcb0e0b89 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql @@ -34,9 +34,9 @@ select 5 % 3; select pmod(-7, 3); -- check operator precedence -explain select 'a' || 1 + 2; -explain select 1 - 2 || 'b'; -explain select 2 * 4 + 3 || 'b'; -explain select 3 + 1 || 'a' || 4 / 2; -explain select 1 == 1 OR 'a' || 'b' == 'ab'; -explain select 'a' || 'c' == 'ac' AND 2 == 3; +EXPLAIN SELECT 'a' || 1 + 2; +EXPLAIN SELECT 1 - 2 || 'b'; +EXPLAIN SELECT 2 * 4 + 3 || 'b'; +EXPLAIN SELECT 3 + 1 || 'a' || 4 / 2; +EXPLAIN SELECT 1 == 1 OR 'a' || 'b' == 'ab'; +EXPLAIN SELECT 'a' || 'c' == 'ac' AND 2 == 3; diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index 7005cafe35ca..01cf82e04a33 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -3,4 +3,8 @@ select concat_ws(); select format_string(); -- A pipe operator for string concatenation -select 'a' || 'b' || 'c'; +SELECT 'a' || 'b'; + +-- Check if catalyst collapses multiple `Concat`s +EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col +FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)); diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index e0236f41187e..4bbf87d1eda6 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -227,7 +227,7 @@ struct -- !query 28 -explain select 'a' || 1 + 2 +EXPLAIN SELECT 'a' || 1 + 2 -- !query 28 schema struct -- !query 28 output @@ -237,7 +237,7 @@ struct -- !query 29 -explain select 1 - 2 || 'b' +EXPLAIN SELECT 1 - 2 || 'b' -- !query 29 schema struct -- !query 29 output @@ -247,7 +247,7 @@ struct -- !query 30 -explain select 2 * 4 + 3 || 'b' +EXPLAIN SELECT 2 * 4 + 3 || 'b' -- !query 30 schema struct -- !query 30 output @@ -257,7 +257,7 @@ struct -- !query 31 -explain select 3 + 1 || 'a' || 4 / 2 +EXPLAIN SELECT 3 + 1 || 'a' || 4 / 2 -- !query 31 schema struct -- !query 31 output @@ -267,7 +267,7 @@ struct -- !query 32 -explain select 1 == 1 OR 'a' || 'b' == 'ab' +EXPLAIN SELECT 1 == 1 OR 'a' || 'b' == 'ab' -- !query 32 schema struct -- !query 32 output @@ -277,7 +277,7 @@ struct -- !query 33 -explain select 'a' || 'c' == 'ac' AND 2 == 3 +EXPLAIN SELECT 'a' || 'c' == 'ac' AND 2 == 3 -- !query 33 schema struct -- !query 33 output diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 8ee075118e10..e628bf23d119 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 3 +-- Number of queries: 4 -- !query 0 @@ -21,8 +21,34 @@ requirement failed: format_string() should take at least 1 argument; line 1 pos -- !query 2 -select 'a' || 'b' || 'c' +SELECT 'a' || 'b' -- !query 2 schema -struct +struct -- !query 2 output -abc +ab + + +-- !query 3 +EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col +FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) +-- !query 3 schema +struct +-- !query 3 output +== Parsed Logical Plan == +'Project [concat(concat(concat('col1, 'col2), 'col3), 'col4) AS col#x] ++- 'Project ['id AS col1#x, 'id AS col2#x, 'id AS col3#x, 'id AS col4#x] + +- 'UnresolvedTableValuedFunction range, [10] + +== Analyzed Logical Plan == +col: string +Project [concat(concat(concat(cast(col1#xL as string), cast(col2#xL as string)), cast(col3#xL as string)), cast(col4#xL as string)) AS col#x] ++- Project [id#xL AS col1#xL, id#xL AS col2#xL, id#xL AS col3#xL, id#xL AS col4#xL] + +- Range (0, 10, step=1, splits=None) + +== Optimized Logical Plan == +Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)) AS col#x] ++- Range (0, 10, step=1, splits=None) + +== Physical Plan == +*Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)) AS col#x] ++- *Range (0, 10, step=1, splits=2) From 96db57530f6af0dd9a9fad211b3838a3aa843fdc Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Thu, 11 May 2017 17:58:36 +0900 Subject: [PATCH 10/11] Revert "Add a new rule to collapse multiple concats in Optimizer" This reverts commit c88652c6d3fe1070cc24109081d72834f576f73f. --- .../sql/catalyst/optimizer/Optimizer.scala | 26 -------------- .../resources/sql-tests/inputs/operators.sql | 12 +++---- .../sql-tests/inputs/string-functions.sql | 6 +--- .../sql-tests/results/operators.sql.out | 12 +++---- .../results/string-functions.sql.out | 34 +++---------------- 5 files changed, 17 insertions(+), 73 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 446d9d4d63a6..f2b9764b0f08 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -87,7 +87,6 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf) CollapseRepartition, CollapseProject, CollapseWindow, - CollapseConcat, CombineFilters, CombineLimits, CombineUnions, @@ -609,31 +608,6 @@ object CollapseWindow extends Rule[LogicalPlan] { } } -/** - * Collapse nested [[Concat]] expressions. - */ -object CollapseConcat extends Rule[LogicalPlan] { - - private def extractConcatExprs(e: Concat): Seq[Expression] = { - e.children.foldLeft(mutable.ArrayBuffer[Expression]()) { case (exprList, e) => - exprList ++= (e match { - case concat: Concat => extractConcatExprs(concat) - case _ => e :: Nil - }) - } - } - - def apply(plan: LogicalPlan): LogicalPlan = plan.transform { - case p @ Project(exprs, _) if exprs.exists(_.collect { case _: Concat => true }.size > 1) => - val projectList = exprs.map { expr => - expr.transformDown { - case concat: Concat => Concat(extractConcatExprs(concat)) - } - }.asInstanceOf[Seq[NamedExpression]] - p.copy(projectList = projectList) - } -} - /** * Generate a list of additional filters from an operator's existing constraint but remove those * that are either already part of the operator's condition or are part of the operator's child diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql index 56efcb0e0b89..18c2c3eb9ebb 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql @@ -34,9 +34,9 @@ select 5 % 3; select pmod(-7, 3); -- check operator precedence -EXPLAIN SELECT 'a' || 1 + 2; -EXPLAIN SELECT 1 - 2 || 'b'; -EXPLAIN SELECT 2 * 4 + 3 || 'b'; -EXPLAIN SELECT 3 + 1 || 'a' || 4 / 2; -EXPLAIN SELECT 1 == 1 OR 'a' || 'b' == 'ab'; -EXPLAIN SELECT 'a' || 'c' == 'ac' AND 2 == 3; +explain select 'a' || 1 + 2; +explain select 1 - 2 || 'b'; +explain select 2 * 4 + 3 || 'b'; +explain select 3 + 1 || 'a' || 4 / 2; +explain select 1 == 1 OR 'a' || 'b' == 'ab'; +explain select 'a' || 'c' == 'ac' AND 2 == 3; diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index 01cf82e04a33..7005cafe35ca 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -3,8 +3,4 @@ select concat_ws(); select format_string(); -- A pipe operator for string concatenation -SELECT 'a' || 'b'; - --- Check if catalyst collapses multiple `Concat`s -EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col -FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)); +select 'a' || 'b' || 'c'; diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index 4bbf87d1eda6..e0236f41187e 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -227,7 +227,7 @@ struct -- !query 28 -EXPLAIN SELECT 'a' || 1 + 2 +explain select 'a' || 1 + 2 -- !query 28 schema struct -- !query 28 output @@ -237,7 +237,7 @@ struct -- !query 29 -EXPLAIN SELECT 1 - 2 || 'b' +explain select 1 - 2 || 'b' -- !query 29 schema struct -- !query 29 output @@ -247,7 +247,7 @@ struct -- !query 30 -EXPLAIN SELECT 2 * 4 + 3 || 'b' +explain select 2 * 4 + 3 || 'b' -- !query 30 schema struct -- !query 30 output @@ -257,7 +257,7 @@ struct -- !query 31 -EXPLAIN SELECT 3 + 1 || 'a' || 4 / 2 +explain select 3 + 1 || 'a' || 4 / 2 -- !query 31 schema struct -- !query 31 output @@ -267,7 +267,7 @@ struct -- !query 32 -EXPLAIN SELECT 1 == 1 OR 'a' || 'b' == 'ab' +explain select 1 == 1 OR 'a' || 'b' == 'ab' -- !query 32 schema struct -- !query 32 output @@ -277,7 +277,7 @@ struct -- !query 33 -EXPLAIN SELECT 'a' || 'c' == 'ac' AND 2 == 3 +explain select 'a' || 'c' == 'ac' AND 2 == 3 -- !query 33 schema struct -- !query 33 output diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index e628bf23d119..8ee075118e10 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 4 +-- Number of queries: 3 -- !query 0 @@ -21,34 +21,8 @@ requirement failed: format_string() should take at least 1 argument; line 1 pos -- !query 2 -SELECT 'a' || 'b' +select 'a' || 'b' || 'c' -- !query 2 schema -struct +struct -- !query 2 output -ab - - --- !query 3 -EXPLAIN EXTENDED SELECT (col1 || col2 || col3 || col4) col -FROM (SELECT id col1, id col2, id col3, id col4 FROM range(10)) --- !query 3 schema -struct --- !query 3 output -== Parsed Logical Plan == -'Project [concat(concat(concat('col1, 'col2), 'col3), 'col4) AS col#x] -+- 'Project ['id AS col1#x, 'id AS col2#x, 'id AS col3#x, 'id AS col4#x] - +- 'UnresolvedTableValuedFunction range, [10] - -== Analyzed Logical Plan == -col: string -Project [concat(concat(concat(cast(col1#xL as string), cast(col2#xL as string)), cast(col3#xL as string)), cast(col4#xL as string)) AS col#x] -+- Project [id#xL AS col1#xL, id#xL AS col2#xL, id#xL AS col3#xL, id#xL AS col4#xL] - +- Range (0, 10, step=1, splits=None) - -== Optimized Logical Plan == -Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)) AS col#x] -+- Range (0, 10, step=1, splits=None) - -== Physical Plan == -*Project [concat(cast(id#xL as string), cast(id#xL as string), cast(id#xL as string), cast(id#xL as string)) AS col#x] -+- *Range (0, 10, step=1, splits=2) +abc From de89791d2f531c966571ce8a37049f159856e38d Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Fri, 12 May 2017 11:34:25 +0900 Subject: [PATCH 11/11] Add comments --- .../test/resources/sql-tests/inputs/operators.sql | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql index 18c2c3eb9ebb..6339d69ca647 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql @@ -33,7 +33,20 @@ select 2 * 5; select 5 % 3; select pmod(-7, 3); --- check operator precedence +-- check operator precedence. +-- We follow Oracle operator precedence in the table below that lists the levels of precedence +-- among SQL operators from high to low: +------------------------------------------------------------------------------------------ +-- Operator Operation +------------------------------------------------------------------------------------------ +-- +, - identity, negation +-- *, / multiplication, division +-- +, -, || addition, subtraction, concatenation +-- =, !=, <, >, <=, >=, IS NULL, LIKE, BETWEEN, IN comparison +-- NOT exponentiation, logical negation +-- AND conjunction +-- OR disjunction +------------------------------------------------------------------------------------------ explain select 'a' || 1 + 2; explain select 1 - 2 || 'b'; explain select 2 * 4 + 3 || 'b';