From b6a79e7fe73b5a1cabbc39a50fa4e47dd4f2a079 Mon Sep 17 00:00:00 2001 From: pierre-borckmans Date: Tue, 22 Dec 2015 09:43:55 +0100 Subject: [PATCH 1/5] CHECK if element in array field is null --- .../spark/sql/catalyst/expressions/complexTypeExtractors.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index c5ed173eeb9d..91c275b1aa1c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -227,7 +227,7 @@ case class GetArrayItem(child: Expression, ordinal: Expression) nullSafeCodeGen(ctx, ev, (eval1, eval2) => { s""" final int index = (int) $eval2; - if (index >= $eval1.numElements() || index < 0) { + if (index >= $eval1.numElements() || index < 0 || $eval1.isNullAt(index)) { ${ev.isNull} = true; } else { ${ev.value} = ${ctx.getValue(eval1, dataType, "index")}; From 3c8a7955dbb62648de83b9cd5595f7687092f55e Mon Sep 17 00:00:00 2001 From: pierre-borckmans Date: Tue, 22 Dec 2015 10:10:58 +0100 Subject: [PATCH 2/5] ADD unit test for accessing null elements in array fields --- .../org/apache/spark/sql/DataFrameComplexTypeSuite.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala index 09f7b507670c..40a637b7d13e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala @@ -43,4 +43,12 @@ class DataFrameComplexTypeSuite extends QueryTest with SharedSQLContext { val df = sparkContext.parallelize(Seq((1, 1))).toDF("a", "b") df.select(array($"a").as("s")).select(f(expr("s[0]"))).collect() } + + test("Accessing null element in array field") { + val df = sc.parallelize(Seq((Seq("val1",null,"val2"),Seq(Some(1),None,Some(2))))).toDF("s","i") + val nullStringRow = df.selectExpr("s[1]").collect()(0) + assert(nullStringRow == org.apache.spark.sql.Row(null)) + val nullIntRow = df.selectExpr("i[1]").collect()(0) + assert(nullIntRow == org.apache.spark.sql.Row(null)) + } } From 3519ace51fd45739862ec1ed8f2a2469ab57a7c7 Mon Sep 17 00:00:00 2001 From: pierre-borckmans Date: Tue, 22 Dec 2015 22:36:45 +0100 Subject: [PATCH 3/5] ADD Jira ticket number in test title --- .../scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala index 40a637b7d13e..06381dc35c01 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala @@ -44,8 +44,8 @@ class DataFrameComplexTypeSuite extends QueryTest with SharedSQLContext { df.select(array($"a").as("s")).select(f(expr("s[0]"))).collect() } - test("Accessing null element in array field") { val df = sc.parallelize(Seq((Seq("val1",null,"val2"),Seq(Some(1),None,Some(2))))).toDF("s","i") + test("SPARK-12477 accessing null element in array field") { val nullStringRow = df.selectExpr("s[1]").collect()(0) assert(nullStringRow == org.apache.spark.sql.Row(null)) val nullIntRow = df.selectExpr("i[1]").collect()(0) From b1fc7e5d7d120411342cd2234ab1b65b096dd524 Mon Sep 17 00:00:00 2001 From: pierre-borckmans Date: Tue, 22 Dec 2015 22:37:05 +0100 Subject: [PATCH 4/5] FIX scalastyle in test Missing spaces after commas Line length exceeds 100 characters --- .../scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala index 06381dc35c01..2167d2504260 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala @@ -44,8 +44,9 @@ class DataFrameComplexTypeSuite extends QueryTest with SharedSQLContext { df.select(array($"a").as("s")).select(f(expr("s[0]"))).collect() } - val df = sc.parallelize(Seq((Seq("val1",null,"val2"),Seq(Some(1),None,Some(2))))).toDF("s","i") test("SPARK-12477 accessing null element in array field") { + val df = sc.parallelize(Seq((Seq("val1", null, "val2"), + Seq(Some(1), None, Some(2))))).toDF("s", "i") val nullStringRow = df.selectExpr("s[1]").collect()(0) assert(nullStringRow == org.apache.spark.sql.Row(null)) val nullIntRow = df.selectExpr("i[1]").collect()(0) From 64f95ec3b307d44af42ee021f707904eae3a7076 Mon Sep 17 00:00:00 2001 From: pierre-borckmans Date: Tue, 22 Dec 2015 23:00:30 +0100 Subject: [PATCH 5/5] FIX test sc => sparkContext --- .../scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala index 2167d2504260..b76fc73b7fa0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala @@ -45,7 +45,7 @@ class DataFrameComplexTypeSuite extends QueryTest with SharedSQLContext { } test("SPARK-12477 accessing null element in array field") { - val df = sc.parallelize(Seq((Seq("val1", null, "val2"), + val df = sparkContext.parallelize(Seq((Seq("val1", null, "val2"), Seq(Some(1), None, Some(2))))).toDF("s", "i") val nullStringRow = df.selectExpr("s[1]").collect()(0) assert(nullStringRow == org.apache.spark.sql.Row(null))