From 807e13e8f8f855ca3891635a4b82b80afd369459 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Tue, 6 Jan 2015 21:07:22 +0800 Subject: [PATCH] Trims aliases when resolving and checking aggregate expressions --- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 8 ++++---- .../apache/spark/sql/catalyst/planning/patterns.scala | 8 ++++---- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 11 +++++++++++ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 72680f37a0b4d..7a294694ff911 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -209,10 +209,10 @@ class Analyzer(catalog: Catalog, aggregateExprs.find { e => !isValidAggregateExpression(e.transform { - // Should trim aliases around `GetField`s. These aliases are introduced while - // resolving struct field accesses, because `GetField` is not a `NamedExpression`. - // (Should we just turn `GetField` into a `NamedExpression`?) - case Alias(g: GetField, _) => g + // Should trim aliases. These aliases can only be introduced while resolving unnamed + // expressions like `GetField` and UDF calls, because GROUP BY clause doesn't allow + // aliasing. + case a: Alias => a.child }) }.foreach { e => throw new TreeNodeException(plan, s"Expression not in GROUP BY: $e") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala index 310d127506d68..ec6f1c116b24e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala @@ -153,11 +153,11 @@ object PartialAggregation { partialEvaluations(new TreeNodeRef(e)).finalEvaluation case e: Expression => - // Should trim aliases around `GetField`s. These aliases are introduced while - // resolving struct field accesses, because `GetField` is not a `NamedExpression`. - // (Should we just turn `GetField` into a `NamedExpression`?) namedGroupingExpressions - .get(e.transform { case Alias(g: GetField, _) => g }) + // Should trim aliases. These aliases can only be introduced while resolving unnamed + // expressions like `GetField` and UDF calls, because GROUP BY clause doesn't allow + // aliasing. + .get(e.transform { case a: Alias => a.child }) .map(_.toAttribute) .getOrElse(e) }).asInstanceOf[Seq[NamedExpression]] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index add4e218a22ee..05fffb6ff36b9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -980,6 +980,17 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { dropTempTable("data") } + test("SPARK-4296 Grouping field with UDF as sub expression") { + registerFunction("triple", (_: Int) * 3) + jsonRDD(sparkContext.makeRDD("""{"a": 1}""" :: Nil)).registerTempTable("data") + checkAnswer(sql("SELECT triple(a) FROM data GROUP BY triple(a)"), 3) + dropTempTable("data") + + jsonRDD(sparkContext.makeRDD("""{"a": 1}""" :: Nil)).registerTempTable("data") + checkAnswer(sql("SELECT triple(a) + 1 FROM data GROUP BY triple(a) + 1"), 4) + dropTempTable("data") + } + test("SPARK-4432 Fix attribute reference resolution error when using ORDER BY") { checkAnswer( sql("SELECT a + b FROM testData2 ORDER BY a"),