From 5b2d942c3f9825eca0b286c42f31a51e6bda0cdd Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Wed, 27 Jan 2016 21:49:45 -0800 Subject: [PATCH 1/7] map column would throw NPE if value is null --- .../sql/catalyst/expressions/complexTypeExtractors.scala | 4 ++-- .../apache/spark/sql/hive/execution/SQLQuerySuite.scala | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 5256baaf432a2..480d4d7c332e6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -278,7 +278,7 @@ case class GetMapValue(child: Expression, key: Expression) } } - if (!found) { + if (!found || map.valueArray().isNullAt(i)) { null } else { map.valueArray().get(i, dataType) @@ -307,7 +307,7 @@ case class GetMapValue(child: Expression, key: Expression) } } - if ($found) { + if ($found && !$eval1.valueArray().isNullAt($index)) { ${ev.value} = ${ctx.getValue(eval1 + ".valueArray()", dataType, index)}; } else { ${ev.isNull} = true; diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 683008960aa28..511b22950f634 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1463,4 +1463,11 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test """.stripMargin), Row("value1", "12", BigDecimal("3.14"), "hello")) } + + test("SPARK-13056: Null in map value causes NPE") { + Seq((1, "abc=somestring,cba")).toDF("key", "value").registerTempTable("mapsrc") + sql("""CREATE TABLE maptest AS SELECT str_to_map(value, ",", "=") as col1 FROM mapsrc""") + checkAnswer(sql("SELECT col1['abc'] FROM maptest"), Row("somestring")) + checkAnswer(sql("SELECT col1['cba'] FROM maptest"), Row(null)) + } } From 2244999f871b53567c57ae344d8bcd2739fe9ebb Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Thu, 28 Jan 2016 20:34:47 -0800 Subject: [PATCH 2/7] address comments --- .../expressions/complexTypeExtractors.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 480d4d7c332e6..70b78996875c4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -218,7 +218,7 @@ case class GetArrayItem(child: Expression, ordinal: Expression) protected override def nullSafeEval(value: Any, ordinal: Any): Any = { val baseValue = value.asInstanceOf[ArrayData] val index = ordinal.asInstanceOf[Number].intValue() - if (index >= baseValue.numElements() || index < 0) { + if (index >= baseValue.numElements() || index < 0 || baseValue.isNullAt(index)) { null } else { baseValue.get(index, dataType) @@ -267,6 +267,7 @@ case class GetMapValue(child: Expression, key: Expression) val map = value.asInstanceOf[MapData] val length = map.numElements() val keys = map.keyArray() + val values = map.valueArray() var i = 0 var found = false @@ -278,10 +279,10 @@ case class GetMapValue(child: Expression, key: Expression) } } - if (!found || map.valueArray().isNullAt(i)) { + if (!found || values.isNullAt(i)) { null } else { - map.valueArray().get(i, dataType) + values.get(i, dataType) } } @@ -291,10 +292,12 @@ case class GetMapValue(child: Expression, key: Expression) val keys = ctx.freshName("keys") val found = ctx.freshName("found") val key = ctx.freshName("key") + val values = ctx.freshName("values") nullSafeCodeGen(ctx, ev, (eval1, eval2) => { s""" final int $length = $eval1.numElements(); final ArrayData $keys = $eval1.keyArray(); + final ArrayData $values = $eval1.valueArray(); int $index = 0; boolean $found = false; @@ -307,8 +310,8 @@ case class GetMapValue(child: Expression, key: Expression) } } - if ($found && !$eval1.valueArray().isNullAt($index)) { - ${ev.value} = ${ctx.getValue(eval1 + ".valueArray()", dataType, index)}; + if ($found && !$values.isNullAt($index)) { + ${ev.value} = ${ctx.getValue(values, dataType, index)}; } else { ${ev.isNull} = true; } From bf429cadc7168be3dd320bb42b2f0fa32f167fed Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Fri, 29 Jan 2016 00:31:57 -0800 Subject: [PATCH 3/7] address comments --- .../sql/catalyst/expressions/complexTypeExtractors.scala | 6 +++--- .../apache/spark/sql/hive/execution/SQLQuerySuite.scala | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 70b78996875c4..9f2f82d68cca0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -310,10 +310,10 @@ case class GetMapValue(child: Expression, key: Expression) } } - if ($found && !$values.isNullAt($index)) { - ${ev.value} = ${ctx.getValue(values, dataType, index)}; - } else { + if (!$found || $values.isNullAt($index)) { ${ev.isNull} = true; + } else { + ${ev.value} = ${ctx.getValue(values, dataType, index)}; } """ }) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 511b22950f634..fe75b143a78cf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1465,9 +1465,9 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } test("SPARK-13056: Null in map value causes NPE") { - Seq((1, "abc=somestring,cba")).toDF("key", "value").registerTempTable("mapsrc") - sql("""CREATE TABLE maptest AS SELECT str_to_map(value, ",", "=") as col1 FROM mapsrc""") - checkAnswer(sql("SELECT col1['abc'] FROM maptest"), Row("somestring")) - checkAnswer(sql("SELECT col1['cba'] FROM maptest"), Row(null)) + val df = Seq(1 -> Map("abc" -> "somestring", "cba" -> null)).toDF("key", "value") + df.registerTempTable("maptest") + checkAnswer(sql("SELECT value['abc'] FROM maptest"), Row("somestring")) + checkAnswer(sql("SELECT value['cba'] FROM maptest"), Row(null)) } } From 21f29a59c94f6549db5085ca9839bbb47b28a45e Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Sun, 31 Jan 2016 20:04:22 -0800 Subject: [PATCH 4/7] move test --- .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 7 +++++++ .../apache/spark/sql/hive/execution/SQLQuerySuite.scala | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 47308966e92cb..3972dbe81e922 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2056,4 +2056,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { ) } } + + test("SPARK-13056: Null in map value causes NPE") { + val df = Seq(1 -> Map("abc" -> "somestring", "cba" -> null)).toDF("key", "value") + df.registerTempTable("maptest") + checkAnswer(sql("SELECT value['abc'] FROM maptest"), Row("somestring")) + checkAnswer(sql("SELECT value['cba'] FROM maptest"), Row(null)) + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index fe75b143a78cf..683008960aa28 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1463,11 +1463,4 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test """.stripMargin), Row("value1", "12", BigDecimal("3.14"), "hello")) } - - test("SPARK-13056: Null in map value causes NPE") { - val df = Seq(1 -> Map("abc" -> "somestring", "cba" -> null)).toDF("key", "value") - df.registerTempTable("maptest") - checkAnswer(sql("SELECT value['abc'] FROM maptest"), Row("somestring")) - checkAnswer(sql("SELECT value['cba'] FROM maptest"), Row(null)) - } } From 898cf201a97c2405cc3fdd1fa90be38e65b9c023 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Sun, 31 Jan 2016 21:16:28 -0800 Subject: [PATCH 5/7] use withTempTable --- .../org/apache/spark/sql/SQLQuerySuite.scala | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 3972dbe81e922..e7dfa144fd33c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2046,6 +2046,15 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { ) } + test("SPARK-13056: Null in map value causes NPE") { + val df = Seq(1 -> Map("abc" -> "somestring", "cba" -> null)).toDF("key", "value") + withTempTable("maptest") { + df.registerTempTable("maptest") + checkAnswer(sql("SELECT value['abc'] FROM maptest"), Row("somestring")) + checkAnswer(sql("SELECT value['cba'] FROM maptest"), Row(null)) + } + } + test("hash function") { val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j") withTempTable("tbl") { @@ -2056,11 +2065,4 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { ) } } - - test("SPARK-13056: Null in map value causes NPE") { - val df = Seq(1 -> Map("abc" -> "somestring", "cba" -> null)).toDF("key", "value") - df.registerTempTable("maptest") - checkAnswer(sql("SELECT value['abc'] FROM maptest"), Row("somestring")) - checkAnswer(sql("SELECT value['cba'] FROM maptest"), Row(null)) - } } From d77013f4900a259f34e6f6a2010ccbbb9a8527ac Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Sun, 31 Jan 2016 21:56:54 -0800 Subject: [PATCH 6/7] modify test case --- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index e7dfa144fd33c..2feb3f4d55744 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2050,8 +2050,8 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { val df = Seq(1 -> Map("abc" -> "somestring", "cba" -> null)).toDF("key", "value") withTempTable("maptest") { df.registerTempTable("maptest") - checkAnswer(sql("SELECT value['abc'] FROM maptest"), Row("somestring")) - checkAnswer(sql("SELECT value['cba'] FROM maptest"), Row(null)) + checkAnswer(sql("SELECT value['abc'] FROM maptest where key = 1"), Row("somestring")) + checkAnswer(sql("SELECT value['cba'] FROM maptest where key = 1"), Row(null)) } } From 5b83626ec613a7bcc0899794381648a1cd2514fa Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Mon, 1 Feb 2016 00:57:26 -0800 Subject: [PATCH 7/7] add comment --- sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 2feb3f4d55744..d9de619553718 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2050,6 +2050,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { val df = Seq(1 -> Map("abc" -> "somestring", "cba" -> null)).toDF("key", "value") withTempTable("maptest") { df.registerTempTable("maptest") + // local optimization will by pass codegen code, so we should keep the filter `key=1` checkAnswer(sql("SELECT value['abc'] FROM maptest where key = 1"), Row("somestring")) checkAnswer(sql("SELECT value['cba'] FROM maptest where key = 1"), Row(null)) }