From c68e6f17a31f0693bb730c8b79ac2e4bf22cb28d Mon Sep 17 00:00:00 2001 From: Carmen Kwan Date: Tue, 6 Sep 2022 12:03:27 +0200 Subject: [PATCH] [SPARK-40315][SQL] Add deterministic hashCode() for Literal of ArrayBasedMapData --- .../sql/catalyst/expressions/literals.scala | 3 +++ .../expressions/ComplexTypeSuite.scala | 26 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 3a34afc06e1a9..f3ee251a0fb56 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -369,6 +369,9 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression { val valueHashCode = value match { case null => 0 case binary: Array[Byte] => util.Arrays.hashCode(binary) + // SPARK-40315: Literals of ArrayBasedMapData should have deterministic hashCode. + case arrayBasedMapData: ArrayBasedMapData => + arrayBasedMapData.keyArray.hashCode() * 37 + arrayBasedMapData.valueArray.hashCode() case other => other.hashCode() } 31 * Objects.hashCode(dataType) + valueHashCode diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index e3afd6a3bb3c2..3d9416fda4596 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -517,4 +517,30 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { assert(m1.semanticEquals(m2)) } + + test("SPARK-40315: Literals of ArrayBasedMapData should have deterministic hashCode.") { + val keys = new Array[UTF8String](1) + val values1 = new Array[UTF8String](1) + val values2 = new Array[UTF8String](1) + + keys(0) = UTF8String.fromString("key") + values1(0) = UTF8String.fromString("value1") + values2(0) = UTF8String.fromString("value2") + + val d1 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values1)) + val d2 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values1)) + val d3 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values2)) + val m1 = Literal.create(d1, MapType(StringType, StringType)) + val m2 = Literal.create(d2, MapType(StringType, StringType)) + val m3 = Literal.create(d3, MapType(StringType, StringType)) + + // If two Literals of ArrayBasedMapData have the same elements, we expect them to be equal and + // to have the same hashCode(). + assert(m1 == m2) + assert(m1.hashCode() == m2.hashCode()) + // If two Literals of ArrayBasedMapData have different elements, we expect them not to be equal + // and to have different hashCode(). + assert(m1 != m3) + assert(m1.hashCode() != m3.hashCode()) + } }