Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/sql-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ license: |

- The decimal string representation can be different between Hive 1.2 and Hive 2.3 when using `TRANSFORM` operator in SQL for script transformation, which depends on hive's behavior. In Hive 1.2, the string representation omits trailing zeroes. But in Hive 2.3, it is always padded to 18 digits with trailing zeroes if necessary.

- Since Spark 3.0, group by alias fails if there are name conflicts like `SELECT col + 1 as col FROM t GROUP BY col`. In Spark version 2.4 and earlier, it works and the column will be resolved using child output. To restore the previous behaviour, set `spark.sql.legacy.allowAmbiguousGroupByAlias` to `true`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: like -> such as


## Upgrading from Spark SQL 2.4.4 to 2.4.5

- Since Spark 2.4.5, `TRUNCATE TABLE` command tries to set back original permission and ACLs during re-creating the table/partition paths. To restore the behaviour of earlier versions, set `spark.sql.truncateTable.ignorePermissionAcl.enabled` to `true`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1636,9 +1636,33 @@ class Analyzer(

override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
case agg @ Aggregate(groups, aggs, child)
if conf.groupByAliases && child.resolved && aggs.forall(_.resolved) &&
groups.exists(!_.resolved) =>
agg.copy(groupingExpressions = mayResolveAttrByAggregateExprs(groups, aggs, child))
if conf.groupByAliases && child.resolved && aggs.forall(_.resolved) =>
if (groups.exists(!_.resolved)) {
agg.copy(groupingExpressions = mayResolveAttrByAggregateExprs(groups, aggs, child))
} else {
// If we can resolve the GROUP BY column with both child output or aliased column in
// the SELECT clause, we should fail as it's ambiguous.
// Note: this can't be done in `CheckAnalysis`, as the alias in the grouping expressions
// will be removed at that time.
if (!SQLConf.get.getConf(SQLConf.LEGACY_ALLOW_AMBIGUOUS_GROUP_BY_ALIAS)) {
groups.foreach {
case attr: Attribute =>
val resolvedWithAlias = aggs.find {
case a: Alias => resolver(a.name, attr.name)
case _ => false
}
if (resolvedWithAlias.isDefined) {
throw new AnalysisException(s"GROUP BY column ${attr.name} is ambiguous. It " +
"can refer to a column in the child plan, or an aliased column in the " +
"SELECT clause. Please fix the name conflicts, or set " +
s"${SQLConf.LEGACY_ALLOW_AMBIGUOUS_GROUP_BY_ALIAS.key} to true to resolve " +
"the column with the child plan.")
}
case _ =>
}
}
agg
}

case gs @ GroupingSets(selectedGroups, groups, child, aggs)
if conf.groupByAliases && child.resolved && aggs.forall(_.resolved) &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2453,6 +2453,15 @@ object SQLConf {
.booleanConf
.createWithDefault(false)

val LEGACY_ALLOW_AMBIGUOUS_GROUP_BY_ALIAS =
buildConf("spark.sql.legacy.allowAmbiguousGroupByAlias")
.doc(s"When ${GROUP_BY_ALIASES.key} is enabled and this conf is true, Spark will resolve " +
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: conf -> configuration

"the GROUP BY column using child's output, even though there is an ambiguous alias in " +
"the SELECT clause. Id false, Spark fails the query.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit Id -> if

.version("3.0.0")
.booleanConf
.createWithDefault(false)

/**
* Holds information about keys that have been deprecated.
*
Expand Down
18 changes: 18 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3454,6 +3454,24 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
""".stripMargin)
checkAnswer(df2, Row(1) :: Nil)
}

test("SPARK-31022: group by alias should fail if there are name conflicts") {
withTempView("v") {
spark.range(10).toDF("col").createTempView("v")
checkAnswer(sql("SELECT div(col, 10) AS new_col FROM v GROUP BY new_col"), Row(0))

val e = intercept[AnalysisException] {
sql("SELECT div(col, 10) AS col FROM v GROUP BY col")
}
assert(e.message.contains("ambiguous"))

withSQLConf(SQLConf.LEGACY_ALLOW_AMBIGUOUS_GROUP_BY_ALIAS.key -> "true") {
checkAnswer(
sql("SELECT div(col, 10) AS col FROM v GROUP BY col"),
spark.range(10).select(lit(0).as("col")))
}
}
}
}

case class Foo(bar: Option[String])