From f69c693e7a171c7d36f92ad2718f8d4422a17fac Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Thu, 30 Aug 2018 19:59:30 -0700 Subject: [PATCH 1/5] [SPARK-25308] ArrayContains function may return a error in the code generation phase. --- .../expressions/collectionOperations.scala | 31 +++++++++++++------ .../CollectionExpressionsSuite.scala | 3 ++ 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index cf9796ef1948..735084440c74 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1464,17 +1464,28 @@ case class ArrayContains(left: Expression, right: Expression) nullSafeCodeGen(ctx, ev, (arr, value) => { val i = ctx.freshName("i") val getValue = CodeGenerator.getValue(arr, right.dataType, i) - s""" - for (int $i = 0; $i < $arr.numElements(); $i ++) { - if ($arr.isNullAt($i)) { - ${ev.isNull} = true; - } else if (${ctx.genEqual(right.dataType, value, getValue)}) { - ${ev.isNull} = false; - ${ev.value} = true; - break; - } + val checkAndSetIsNullCode = if (nullable) { + s""" + |if ($arr.isNullAt($i)) { + | ${ev.isNull} = true; + |} else + | + """.stripMargin + } else { + "" } - """ + val unsetIsNullCode = if (nullable) s"${ev.isNull} = false;" else "" + + s""" + |for (int $i = 0; $i < $arr.numElements(); $i ++) { + | $checkAndSetIsNullCode + | if (${ctx.genEqual(right.dataType, value, getValue)}) { + | $unsetIsNullCode + | ${ev.value} = true; + | break; + | } + |} + """.stripMargin }) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 7b345aabd19c..bac8eb5f6bec 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -383,10 +383,13 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper val a3 = Literal.create(null, ArrayType(StringType)) val a4 = Literal.create(Seq(create_row(1)), ArrayType(StructType(Seq( StructField("a", IntegerType, true))))) + // Explicitly mark the array type not nullable (spark-xxxxx) + val a5 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType, false)) checkEvaluation(ArrayContains(a0, Literal(1)), true) checkEvaluation(ArrayContains(a0, Literal(0)), false) checkEvaluation(ArrayContains(a0, Literal.create(null, IntegerType)), null) + checkEvaluation(ArrayContains(a5, Literal(1)), true) checkEvaluation(ArrayContains(a1, Literal("")), true) checkEvaluation(ArrayContains(a1, Literal("a")), null) From db3c3c002bb8c4439091a26afb70b23a10cf952b Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Sun, 2 Sep 2018 03:27:08 -0700 Subject: [PATCH 2/5] Code review --- .../spark/sql/catalyst/expressions/collectionOperations.scala | 1 - .../sql/catalyst/expressions/CollectionExpressionsSuite.scala | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 735084440c74..fa96db1168ef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1469,7 +1469,6 @@ case class ArrayContains(left: Expression, right: Expression) |if ($arr.isNullAt($i)) { | ${ev.isNull} = true; |} else - | """.stripMargin } else { "" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index bac8eb5f6bec..58c71cadd009 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -383,7 +383,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper val a3 = Literal.create(null, ArrayType(StringType)) val a4 = Literal.create(Seq(create_row(1)), ArrayType(StructType(Seq( StructField("a", IntegerType, true))))) - // Explicitly mark the array type not nullable (spark-xxxxx) + // Explicitly mark the array type not nullable (spark-25308) val a5 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType, false)) checkEvaluation(ArrayContains(a0, Literal(1)), true) From e47a442c3261a8e415801ba822015ecad0db42f1 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Sun, 2 Sep 2018 12:15:11 -0700 Subject: [PATCH 3/5] Code review --- .../expressions/collectionOperations.scala | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index fa96db1168ef..bb2f92ef6931 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1464,25 +1464,33 @@ case class ArrayContains(left: Expression, right: Expression) nullSafeCodeGen(ctx, ev, (arr, value) => { val i = ctx.freshName("i") val getValue = CodeGenerator.getValue(arr, right.dataType, i) - val checkAndSetIsNullCode = if (nullable) { - s""" - |if ($arr.isNullAt($i)) { - | ${ev.isNull} = true; - |} else + def checkAndSetIsNullCode(body: String) = { + if (nullable) { + s""" + |if ($arr.isNullAt($i)) { + | ${ev.isNull} = true; + |} else { + | $body + |} """.stripMargin - } else { - "" + } else { + body + } } val unsetIsNullCode = if (nullable) s"${ev.isNull} = false;" else "" + val code = checkAndSetIsNullCode( + s""" + |if (${ctx.genEqual(right.dataType, value, getValue)}) { + | $unsetIsNullCode + | ${ev.value} = true; + | break; + |} + """.stripMargin + ) s""" |for (int $i = 0; $i < $arr.numElements(); $i ++) { - | $checkAndSetIsNullCode - | if (${ctx.genEqual(right.dataType, value, getValue)}) { - | $unsetIsNullCode - | ${ev.value} = true; - | break; - | } + | $code |} """.stripMargin }) From e17bb4627033f4bb94adfd99bddeda04a7b938ca Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Mon, 3 Sep 2018 00:43:27 -0700 Subject: [PATCH 4/5] Code review --- .../expressions/collectionOperations.scala | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index bb2f92ef6931..348a24728a81 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1464,18 +1464,16 @@ case class ArrayContains(left: Expression, right: Expression) nullSafeCodeGen(ctx, ev, (arr, value) => { val i = ctx.freshName("i") val getValue = CodeGenerator.getValue(arr, right.dataType, i) - def checkAndSetIsNullCode(body: String) = { - if (nullable) { - s""" - |if ($arr.isNullAt($i)) { - | ${ev.isNull} = true; - |} else { - | $body - |} + def checkAndSetIsNullCode(body: String) = if (nullable) { + s""" + |if ($arr.isNullAt($i)) { + | ${ev.isNull} = true; + |} else { + | $body + |} """.stripMargin - } else { - body - } + } else { + body } val unsetIsNullCode = if (nullable) s"${ev.isNull} = false;" else "" val code = checkAndSetIsNullCode( From 59ddb993790f4bb0ec920a2b0d897d8052c9f108 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Mon, 3 Sep 2018 10:18:46 -0700 Subject: [PATCH 5/5] Code review --- .../expressions/collectionOperations.scala | 20 ++++++++----------- .../CollectionExpressionsSuite.scala | 2 +- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 348a24728a81..17c683cc8ff5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1464,31 +1464,27 @@ case class ArrayContains(left: Expression, right: Expression) nullSafeCodeGen(ctx, ev, (arr, value) => { val i = ctx.freshName("i") val getValue = CodeGenerator.getValue(arr, right.dataType, i) - def checkAndSetIsNullCode(body: String) = if (nullable) { + val loopBodyCode = if (nullable) { s""" |if ($arr.isNullAt($i)) { - | ${ev.isNull} = true; - |} else { - | $body + | ${ev.isNull} = true; + |} else if (${ctx.genEqual(right.dataType, value, getValue)}) { + | ${ev.isNull} = false; + | ${ev.value} = true; + | break; |} """.stripMargin } else { - body - } - val unsetIsNullCode = if (nullable) s"${ev.isNull} = false;" else "" - val code = checkAndSetIsNullCode( s""" |if (${ctx.genEqual(right.dataType, value, getValue)}) { - | $unsetIsNullCode | ${ev.value} = true; | break; |} """.stripMargin - ) - + } s""" |for (int $i = 0; $i < $arr.numElements(); $i ++) { - | $code + | $loopBodyCode |} """.stripMargin }) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 58c71cadd009..a9fc3e9c7b37 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -384,7 +384,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper val a4 = Literal.create(Seq(create_row(1)), ArrayType(StructType(Seq( StructField("a", IntegerType, true))))) // Explicitly mark the array type not nullable (spark-25308) - val a5 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType, false)) + val a5 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType, containsNull = false)) checkEvaluation(ArrayContains(a0, Literal(1)), true) checkEvaluation(ArrayContains(a0, Literal(0)), false)