From e4f003ac2dc10e71c39632d5e6412a33db59f80d Mon Sep 17 00:00:00 2001 From: Shrikant Prasad Date: Fri, 3 Mar 2023 01:14:16 +0530 Subject: [PATCH] [SPARK-42655][SQL]: Fix incorrect ambiguous column reference error --- .../apache/spark/sql/catalyst/expressions/package.scala | 2 +- .../test/scala/org/apache/spark/sql/DataFrameSuite.scala | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index 74f0875c28539..67936c36b41a1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -342,7 +342,7 @@ package object expressions { // attribute metadata to indicate that they are from metadata columns, but they should not // keep any restrictions that may break column resolution for normal attributes. // See SPARK-42084 for more details. - prunedCandidates.map(_.markAsAllowAnyAccess()) match { + prunedCandidates.distinct.map(_.markAsAllowAnyAccess()) match { case Seq(a) if nestedFields.nonEmpty => // One match, but we also need to extract the requested nested field. // The foldLeft adds ExtractValues for every remaining parts of the identifier, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index d4c4c7c9b16ce..1dc0254a0f9a3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2768,6 +2768,15 @@ class DataFrameSuite extends QueryTest checkAnswer(swappedDf.filter($"key"($"map") > "a"), Row(2, Map(2 -> "b"))) } + test("SPARK-42655 Fix ambiguous column reference error") { + val df1 = sparkContext.parallelize(List((1, 2, 3, 4, 5))).toDF("id", "col2", "col3", + "col4", "col5") + val op_cols_mixed_case = List("id", "col2", "col3", "col4", "col5", "ID") + val df2 = df1.select(op_cols_mixed_case.head, op_cols_mixed_case.tail: _*) + // should not throw any error. + checkAnswer(df2.select("id"), Row(1)) + } + test("SPARK-26057: attribute deduplication on already analyzed plans") { withTempView("a", "b", "v") { val df1 = Seq(("1-1", 6)).toDF("id", "n")