From 50f8be3e06186e6bbe1404800db1fb226a9e1c5c Mon Sep 17 00:00:00 2001 From: Takeshi YAMAMURO Date: Wed, 22 Jun 2016 21:12:47 +0900 Subject: [PATCH 1/9] Add tests for UnsafeMapData --- .../catalyst/expressions/UnsafeMapData.java | 15 +++++ .../catalyst/expressions/MapDataSuite.scala | 57 +++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java index 0700148becab..b01f09d0d48a 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java @@ -118,4 +118,19 @@ public UnsafeMapData copy() { mapCopy.pointTo(mapDataCopy, Platform.BYTE_ARRAY_OFFSET, sizeInBytes); return mapCopy; } + + @Override + public boolean equals(Object o) { + if (o instanceof UnsafeMapData) { + UnsafeMapData other = (UnsafeMapData) o; + return this.keyArray().equals(other.keyArray()) && + this.valueArray().equals(other.valueArray()); + } + return false; + } + + @Override + public int hashCode() { + return keyArray().hashCode() * 37 + valueArray().hashCode(); + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala new file mode 100644 index 000000000000..1dbcc40bccce --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import scala.collection._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.util.ArrayBasedMapData +import org.apache.spark.sql.types.{DataType, IntegerType, MapType, StringType} +import org.apache.spark.unsafe.types.UTF8String + +class MapDataSuite extends SparkFunSuite { + + test("equality tests") { + def u(str: String): UTF8String = UTF8String.fromString(str) + + // test data + val testMap1 = Map(u("key1") -> 1) + val testMap2 = Map(u("key1") -> 1, u("key2") -> 2) + val testMap3 = Map(u("key1") -> 1) + val testMap4 = Map(u("key1") -> 1, u("key2") -> 2) + + // ArrayBasedMapData + val testArrayMap1 = ArrayBasedMapData(testMap1.toMap) + val testArrayMap2 = ArrayBasedMapData(testMap2.toMap) + val testArrayMap3 = ArrayBasedMapData(testMap3.toMap) + val testArrayMap4 = ArrayBasedMapData(testMap4.toMap) + assert(testArrayMap1 === testArrayMap3) + assert(testArrayMap2 === testArrayMap4) + + // UnsafeMapData + val unsafeConverter = UnsafeProjection.create(Array[DataType](MapType(StringType, IntegerType))) + val row = new GenericMutableRow(1) + def toUnsafeMap(map: ArrayBasedMapData): UnsafeMapData = { + row.update(0, map) + val unsafeRow = unsafeConverter.apply(row) + unsafeRow.getMap(0).copy + } + assert(toUnsafeMap(testArrayMap1) === toUnsafeMap(testArrayMap3)) + assert(toUnsafeMap(testArrayMap2) === toUnsafeMap(testArrayMap4)) + } +} From 30d28bcd45376dc082178e30005728249edd9b4a Mon Sep 17 00:00:00 2001 From: Takeshi YAMAMURO Date: Thu, 23 Jun 2016 00:30:51 +0900 Subject: [PATCH 2/9] Apply comments --- .../sql/catalyst/expressions/UnsafeMapData.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java index b01f09d0d48a..ed6afd1ed036 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java @@ -21,6 +21,8 @@ import org.apache.spark.sql.catalyst.util.MapData; import org.apache.spark.unsafe.Platform; +import org.apache.spark.unsafe.array.ByteArrayMethods; +import org.apache.spark.unsafe.hash.Murmur3_x86_32; /** * An Unsafe implementation of Map which is backed by raw memory instead of Java objects. @@ -120,17 +122,18 @@ public UnsafeMapData copy() { } @Override - public boolean equals(Object o) { - if (o instanceof UnsafeMapData) { - UnsafeMapData other = (UnsafeMapData) o; - return this.keyArray().equals(other.keyArray()) && - this.valueArray().equals(other.valueArray()); + public boolean equals(Object other) { + if (other instanceof UnsafeMapData) { + UnsafeMapData o = (UnsafeMapData) other; + return sizeInBytes == o.sizeInBytes && + ByteArrayMethods.arrayEquals(baseObject, baseOffset, o.baseObject, o.baseOffset, + sizeInBytes); } return false; } @Override public int hashCode() { - return keyArray().hashCode() * 37 + valueArray().hashCode(); + return Murmur3_x86_32.hashUnsafeBytes(baseObject, baseOffset, sizeInBytes, 42); } } From e8eaf7042ceaf6ff1c420be666daad43e24499ce Mon Sep 17 00:00:00 2001 From: Takeshi YAMAMURO Date: Thu, 23 Jun 2016 02:40:55 +0900 Subject: [PATCH 3/9] Add comments --- .../spark/sql/catalyst/expressions/UnsafeArrayData.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java index 02a863b2bb49..c867e5b3cb4c 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java @@ -298,6 +298,10 @@ public UnsafeMapData getMap(int ordinal) { return map; } + // This `hashCode` computation could consume much processor time for large data. + // If the computation becomes a bottleneck, we can use a light-weight logic; the first fixed bytes + // are used to compute `hashCode` (See `Vector.hashCode`). + // The same issue exists in `UnsafeMapData.hashCode`. @Override public int hashCode() { return Murmur3_x86_32.hashUnsafeBytes(baseObject, baseOffset, sizeInBytes, 42); From b6bac43eb2857def9b1e98e47cd30539f0e59945 Mon Sep 17 00:00:00 2001 From: Takeshi YAMAMURO Date: Thu, 23 Jun 2016 02:47:25 +0900 Subject: [PATCH 4/9] Fix comments --- .../apache/spark/sql/catalyst/expressions/UnsafeArrayData.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java index c867e5b3cb4c..24bdcc873bd5 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java @@ -301,7 +301,7 @@ public UnsafeMapData getMap(int ordinal) { // This `hashCode` computation could consume much processor time for large data. // If the computation becomes a bottleneck, we can use a light-weight logic; the first fixed bytes // are used to compute `hashCode` (See `Vector.hashCode`). - // The same issue exists in `UnsafeMapData.hashCode`. + // The same issue exists in `UnsafeRow.hashCode` and `UnsafeMapData.hashCode`. @Override public int hashCode() { return Murmur3_x86_32.hashUnsafeBytes(baseObject, baseOffset, sizeInBytes, 42); From 827785d1dc104ee1eeed93178bdfae4f7df92845 Mon Sep 17 00:00:00 2001 From: Takeshi YAMAMURO Date: Thu, 23 Jun 2016 12:45:16 +0900 Subject: [PATCH 5/9] Apply comments --- .../catalyst/expressions/UnsafeArrayData.java | 2 +- .../sql/catalyst/expressions/UnsafeMapData.java | 16 ---------------- .../sql/catalyst/util/ArrayBasedMapData.scala | 17 ----------------- .../spark/sql/catalyst/util/MapData.scala | 3 +++ 4 files changed, 4 insertions(+), 34 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java index 24bdcc873bd5..6302660548ec 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java @@ -301,7 +301,7 @@ public UnsafeMapData getMap(int ordinal) { // This `hashCode` computation could consume much processor time for large data. // If the computation becomes a bottleneck, we can use a light-weight logic; the first fixed bytes // are used to compute `hashCode` (See `Vector.hashCode`). - // The same issue exists in `UnsafeRow.hashCode` and `UnsafeMapData.hashCode`. + // The same issue exists in `UnsafeRow.hashCode`. @Override public int hashCode() { return Murmur3_x86_32.hashUnsafeBytes(baseObject, baseOffset, sizeInBytes, 42); diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java index ed6afd1ed036..05c87d0921b1 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java @@ -120,20 +120,4 @@ public UnsafeMapData copy() { mapCopy.pointTo(mapDataCopy, Platform.BYTE_ARRAY_OFFSET, sizeInBytes); return mapCopy; } - - @Override - public boolean equals(Object other) { - if (other instanceof UnsafeMapData) { - UnsafeMapData o = (UnsafeMapData) other; - return sizeInBytes == o.sizeInBytes && - ByteArrayMethods.arrayEquals(baseObject, baseOffset, o.baseObject, o.baseOffset, - sizeInBytes); - } - return false; - } - - @Override - public int hashCode() { - return Murmur3_x86_32.hashUnsafeBytes(baseObject, baseOffset, sizeInBytes, 42); - } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala index d46f03ad8fbb..4449da13c083 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala @@ -24,23 +24,6 @@ class ArrayBasedMapData(val keyArray: ArrayData, val valueArray: ArrayData) exte override def copy(): MapData = new ArrayBasedMapData(keyArray.copy(), valueArray.copy()) - override def equals(o: Any): Boolean = { - if (!o.isInstanceOf[ArrayBasedMapData]) { - return false - } - - val other = o.asInstanceOf[ArrayBasedMapData] - if (other eq null) { - return false - } - - this.keyArray == other.keyArray && this.valueArray == other.valueArray - } - - override def hashCode: Int = { - keyArray.hashCode() * 37 + valueArray.hashCode() - } - override def toString: String = { s"keys: $keyArray, values: $valueArray" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala index 40db6067adf7..415212418ebf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala @@ -39,4 +39,7 @@ abstract class MapData extends Serializable { i += 1 } } + + // `MapData` should not implement `equals` and `hashCode` because the type cannot be used as join + // keys, grouping keys, or in equality tests. See SPARK-9415 and PR#13847 for the discussions. } From 2feb984a656dabad298d1108478b459d3d320722 Mon Sep 17 00:00:00 2001 From: Takeshi YAMAMURO Date: Fri, 24 Jun 2016 17:47:52 +0900 Subject: [PATCH 6/9] Modify tests --- .../spark/sql/catalyst/expressions/UnsafeMapData.java | 2 -- .../org/apache/spark/sql/catalyst/util/MapData.scala | 2 +- .../sql/catalyst/expressions/CodeGenerationSuite.scala | 2 +- .../catalyst/expressions/ExpressionEvalHelper.scala | 8 ++++++-- .../spark/sql/catalyst/expressions/MapDataSuite.scala | 10 +++++----- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java index 05c87d0921b1..0700148becab 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java @@ -21,8 +21,6 @@ import org.apache.spark.sql.catalyst.util.MapData; import org.apache.spark.unsafe.Platform; -import org.apache.spark.unsafe.array.ByteArrayMethods; -import org.apache.spark.unsafe.hash.Murmur3_x86_32; /** * An Unsafe implementation of Map which is backed by raw memory instead of Java objects. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala index 415212418ebf..3dcbd5ea5175 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala @@ -40,6 +40,6 @@ abstract class MapData extends Serializable { } } - // `MapData` should not implement `equals` and `hashCode` because the type cannot be used as join + // `MapData` should not implement `equals` and `hashCode` because the type cannot be used as join // keys, grouping keys, or in equality tests. See SPARK-9415 and PR#13847 for the discussions. } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index 62429a22f02f..113412833840 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -115,7 +115,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { new GenericArrayData(0 until length), new GenericArrayData(Seq.fill(length)(true)))) - if (!checkResult(actual, expected)) { + if (!actual.zip(expected).forall { case (data, answer) => checkResult(data, answer)}) { fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 8a9617cfbf5d..e58a0df317fe 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project} +import org.apache.spark.sql.catalyst.util.MapData import org.apache.spark.sql.types.DataType import org.apache.spark.util.Utils @@ -52,7 +53,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { /** * Check the equality between result of expression and expected value, it will handle - * Array[Byte] and Spread[Double]. + * Array[Byte], Spread[Double], and MapData. */ protected def checkResult(result: Any, expected: Any): Boolean = { (result, expected) match { @@ -60,7 +61,10 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { java.util.Arrays.equals(result, expected) case (result: Double, expected: Spread[Double @unchecked]) => expected.asInstanceOf[Spread[Double]].isWithin(result) - case _ => result == expected + case (result: MapData, expected: MapData) => + result.keyArray() == expected.keyArray() && result.valueArray() == expected.valueArray() + case _ => + result == expected } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala index 1dbcc40bccce..0f1264c7c326 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MapDataSuite.scala @@ -26,7 +26,7 @@ import org.apache.spark.unsafe.types.UTF8String class MapDataSuite extends SparkFunSuite { - test("equality tests") { + test("inequality tests") { def u(str: String): UTF8String = UTF8String.fromString(str) // test data @@ -40,8 +40,8 @@ class MapDataSuite extends SparkFunSuite { val testArrayMap2 = ArrayBasedMapData(testMap2.toMap) val testArrayMap3 = ArrayBasedMapData(testMap3.toMap) val testArrayMap4 = ArrayBasedMapData(testMap4.toMap) - assert(testArrayMap1 === testArrayMap3) - assert(testArrayMap2 === testArrayMap4) + assert(testArrayMap1 !== testArrayMap3) + assert(testArrayMap2 !== testArrayMap4) // UnsafeMapData val unsafeConverter = UnsafeProjection.create(Array[DataType](MapType(StringType, IntegerType))) @@ -51,7 +51,7 @@ class MapDataSuite extends SparkFunSuite { val unsafeRow = unsafeConverter.apply(row) unsafeRow.getMap(0).copy } - assert(toUnsafeMap(testArrayMap1) === toUnsafeMap(testArrayMap3)) - assert(toUnsafeMap(testArrayMap2) === toUnsafeMap(testArrayMap4)) + assert(toUnsafeMap(testArrayMap1) !== toUnsafeMap(testArrayMap3)) + assert(toUnsafeMap(testArrayMap2) !== toUnsafeMap(testArrayMap4)) } } From d95824b9e56fe8e1e364dfe4573113a248d52ece Mon Sep 17 00:00:00 2001 From: Takeshi YAMAMURO Date: Fri, 24 Jun 2016 20:55:15 +0900 Subject: [PATCH 7/9] Apply comments --- .../sql/catalyst/expressions/CodeGenerationSuite.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala index 113412833840..60dd03f5d0c1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala @@ -110,12 +110,12 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper { case (expr, i) => Seq(Literal(i), expr) })) val plan = GenerateMutableProjection.generate(expressions) - val actual = plan(new GenericMutableRow(length)).toSeq(expressions.map(_.dataType)) - val expected = Seq(new ArrayBasedMapData( - new GenericArrayData(0 until length), - new GenericArrayData(Seq.fill(length)(true)))) + val actual = plan(new GenericMutableRow(length)).toSeq(expressions.map(_.dataType)).map { + case m: ArrayBasedMapData => ArrayBasedMapData.toScalaMap(m) + } + val expected = (0 until length).map((_, true)).toMap :: Nil - if (!actual.zip(expected).forall { case (data, answer) => checkResult(data, answer)}) { + if (!checkResult(actual, expected)) { fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected") } } From e4b138403859ff1662a50a88279dd653a7258c62 Mon Sep 17 00:00:00 2001 From: Takeshi YAMAMURO Date: Sat, 25 Jun 2016 07:48:16 +0900 Subject: [PATCH 8/9] Move comments --- .../scala/org/apache/spark/sql/catalyst/util/MapData.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala index 3dcbd5ea5175..d2eb25421474 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala @@ -19,6 +19,10 @@ package org.apache.spark.sql.catalyst.util import org.apache.spark.sql.types.DataType +/** + * `MapData` should not implement `equals` and `hashCode` because the type cannot be used as join + * keys, grouping keys, or in equality tests. See SPARK-9415 and PR#13847 for the discussions. + */ abstract class MapData extends Serializable { def numElements(): Int @@ -39,7 +43,4 @@ abstract class MapData extends Serializable { i += 1 } } - - // `MapData` should not implement `equals` and `hashCode` because the type cannot be used as join - // keys, grouping keys, or in equality tests. See SPARK-9415 and PR#13847 for the discussions. } From 902fe5fdf3ef9379252b7e8f7eaeaeaa6dd8759d Mon Sep 17 00:00:00 2001 From: Takeshi YAMAMURO Date: Sun, 26 Jun 2016 00:46:52 +0900 Subject: [PATCH 9/9] Update comments --- .../scala/org/apache/spark/sql/catalyst/util/MapData.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala index d2eb25421474..94e8824cd18c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MapData.scala @@ -20,8 +20,9 @@ package org.apache.spark.sql.catalyst.util import org.apache.spark.sql.types.DataType /** - * `MapData` should not implement `equals` and `hashCode` because the type cannot be used as join - * keys, grouping keys, or in equality tests. See SPARK-9415 and PR#13847 for the discussions. + * This is an internal data representation for map type in Spark SQL. This should not implement + * `equals` and `hashCode` because the type cannot be used as join keys, grouping keys, or + * in equality tests. See SPARK-9415 and PR#13847 for the discussions. */ abstract class MapData extends Serializable {