Address all new comments

c21 · c21 · commit 381cdbc2ec39 · 2020-08-15T13:51:13.000-07:00
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -429,7 +429,7 @@ public MapIterator destructiveIterator() {
   }
 
   /**
-   * Iterator for the entries of this map. This is to first iterate over key index array
+   * Iterator for the entries of this map. This is to first iterate over key indices in
    * `longArray` then accessing values in `dataPages`. NOTE: this is different from `MapIterator`
    * in the sense that key index is preserved here
    * (See `UnsafeHashedRelation` for example of usage).
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -329,7 +329,9 @@ object SQLConf {
 
   val PREFER_SORTMERGEJOIN = buildConf("spark.sql.join.preferSortMergeJoin")
     .internal()
-    .doc("When true, prefer sort merge join over shuffle hash join.")
+    .doc("When true, prefer sort merge join over shuffled hash join. " +
+      "Note that shuffled hash join supports all join types (e.g. full outer) " +
+      "that sort merge join supports.")
     .version("2.0.0")
     .booleanConf
     .createWithDefault(true)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -137,7 +137,7 @@ private[execution] object HashedRelation {
         0)
     }
 
-    if (!input.hasNext) {
+    if (!input.hasNext && !allowsNullKey) {
       EmptyHashedRelation
     } else if (key.length == 1 && key.head.dataType == LongType && !allowsNullKey) {
       // NOTE: LongHashedRelation does not support NULL keys.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -1199,6 +1199,18 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       (spark.range(10).selectExpr("id % 5 as k1"),
         spark.range(30).selectExpr("id % 5 as k2"),
         $"k1" === $"k2"),
+      // Test empty build side
+      (spark.range(10).selectExpr("id as k1").filter("k1 < -1"),
+        spark.range(30).selectExpr("id as k2"),
+        $"k1" === $"k2"),
+      // Test empty stream side
+      (spark.range(10).selectExpr("id as k1"),
+        spark.range(30).selectExpr("id as k2").filter("k2 < -1"),
+        $"k1" === $"k2"),
+      // Test empty build and stream side
+      (spark.range(10).selectExpr("id as k1").filter("k1 < -1"),
+        spark.range(30).selectExpr("id as k2").filter("k2 < -1"),
+        $"k1" === $"k2"),
       // Test string join key
       (spark.range(10).selectExpr("cast(id * 3 as string) as k1"),
         spark.range(30).selectExpr("cast(id as string) as k2"),

Original file line number	Diff line number	Diff line change
`@@ -429,7 +429,7 @@ public MapIterator destructiveIterator() {`
`429`	`429`	`}`
`430`	`430`
`431`	`431`	`/**`
`432`		`- * Iterator for the entries of this map. This is to first iterate over key index array`
	`432`	`+ * Iterator for the entries of this map. This is to first iterate over key indices in`
`433`	`433`	* `longArray` then accessing values in `dataPages`. NOTE: this is different from `MapIterator`
`434`	`434`	`* in the sense that key index is preserved here`
`435`	`435`	* (See `UnsafeHashedRelation` for example of usage).
Original file line number	Diff line number	Diff line change
`@@ -137,7 +137,7 @@ private[execution] object HashedRelation {`
`137`	`137`	`0)`
`138`	`138`	`}`
`139`	`139`
`140`		`- if (!input.hasNext) {`
	`140`	`+ if (!input.hasNext && !allowsNullKey) {`
`141`	`141`	`EmptyHashedRelation`
`142`	`142`	`} else if (key.length == 1 && key.head.dataType == LongType && !allowsNullKey) {`
`143`	`143`	`// NOTE: LongHashedRelation does not support NULL keys.`