From dabd8105e6646078f07d104af3d02278445e7185 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 8 Jun 2022 16:12:13 -0700 Subject: [PATCH 1/4] Fix ArraySort to throw an exception when the comparator returns null. --- .../expressions/higherOrderFunctions.scala | 26 +++++++++++++++---- .../sql/errors/QueryExecutionErrors.scala | 8 ++++++ .../apache/spark/sql/internal/SQLConf.scala | 9 +++++++ .../HigherOrderFunctionsSuite.scala | 17 ++++++++++++ 4 files changed, 55 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala index 79b76f799d94..83d05f73bf17 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala @@ -357,9 +357,9 @@ case class ArrayTransform( Since 3.0.0 this function also sorts and returns the array based on the given comparator function. The comparator will take two arguments representing two elements of the array. - It returns -1, 0, or 1 as the first element is less than, equal to, or greater - than the second element. If the comparator function returns other - values (including null), the function will fail and raise an error. + It returns a negative integer, 0, or a positive integer as the first element is less than, + equal to, or greater than the second element. If the comparator function returns null, + the function will fail and raise an error. """, examples = """ Examples: @@ -375,9 +375,17 @@ case class ArrayTransform( // scalastyle:on line.size.limit case class ArraySort( argument: Expression, - function: Expression) + function: Expression, + handleComparisonResultNullAsZero: Boolean) extends ArrayBasedSimpleHigherOrderFunction with CodegenFallback { + def this(argument: Expression, function: Expression) = { + this( + argument, + function, + SQLConf.get.getConf(SQLConf.LEGACY_ARRAY_SORT_HANDLES_COMPARISON_RESULT_NULL_VALUE_AS_ZERO)) + } + def this(argument: Expression) = this(argument, ArraySort.defaultComparator) @transient lazy val elementType: DataType = @@ -416,7 +424,11 @@ case class ArraySort( (o1: Any, o2: Any) => { firstElemVar.value.set(o1) secondElemVar.value.set(o2) - f.eval(inputRow).asInstanceOf[Int] + val cmp = f.eval(inputRow) + if (!handleComparisonResultNullAsZero && cmp == null) { + throw QueryExecutionErrors.comparisonResultIsNullError() + } + cmp.asInstanceOf[Int] } } @@ -437,6 +449,10 @@ case class ArraySort( object ArraySort { + def apply(argument: Expression, function: Expression): ArraySort = { + new ArraySort(argument, function) + } + def comparator(left: Expression, right: Expression): Expression = { val lit0 = Literal(0) val lit1 = Literal(1) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index c8feb4c07025..1f248d5f5189 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2012,4 +2012,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { new SparkException( errorClass = "MULTI_VALUE_SUBQUERY_ERROR", messageParameters = Array(plan), cause = null) } + + def comparisonResultIsNullError(): Throwable = { + new NullPointerException( + "The comparison result is null. " + + "If you want to handle null as 0 (equal), you can set " + + SQLConf.LEGACY_ARRAY_SORT_HANDLES_COMPARISON_RESULT_NULL_VALUE_AS_ZERO.key + + " to \"true\".") + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 4b0d110b077f..6d4590b8aeeb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3828,6 +3828,15 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LEGACY_ARRAY_SORT_HANDLES_COMPARISON_RESULT_NULL_VALUE_AS_ZERO = + buildConf("spark.sql.legacy.arraySortHandlesComparisonResultNullAsZero") + .internal() + .doc("When set to true, it throws an error if the comparator function returns null. " + + "If set to true, it restores the legacy behavior that handles null as zero (equal).") + .version("3.4.0") + .booleanConf + .createWithDefault(false) + /** * Holds information about keys that have been deprecated. * diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala index c0db6d8dc29f..88f1c9ae309e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala @@ -838,4 +838,21 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper Literal.create(Seq(Double.NaN, 1d, 2d, null), ArrayType(DoubleType))), Seq(1d, 2d, Double.NaN, null)) } + + test("SPARK-39419: ArraySort should throw an exception when the comparison result is null") { + val comparator = { + val comp = ArraySort.comparator _ + (left: Expression, right: Expression) => + If(comp(left, right) === 0, Literal.create(null, IntegerType), comp(left, right)) + } + + checkExceptionInExpression[NullPointerException]( + arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator), "The comparison result is null") + + withSQLConf( + SQLConf.LEGACY_ARRAY_SORT_HANDLES_COMPARISON_RESULT_NULL_VALUE_AS_ZERO.key -> "true") { + checkEvaluation(arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator), + Seq(1, 1, 2, 3)) + } + } } From 451cf6eeaa44c8673320afc7e7ba40a61c5ab314 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 8 Jun 2022 16:25:25 -0700 Subject: [PATCH 2/4] Fix. --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- .../sql/catalyst/expressions/HigherOrderFunctionsSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 6d4590b8aeeb..80ab71626acb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3831,7 +3831,7 @@ object SQLConf { val LEGACY_ARRAY_SORT_HANDLES_COMPARISON_RESULT_NULL_VALUE_AS_ZERO = buildConf("spark.sql.legacy.arraySortHandlesComparisonResultNullAsZero") .internal() - .doc("When set to true, it throws an error if the comparator function returns null. " + + .doc("When set to false, it throws an error if the comparator function returns null. " + "If set to true, it restores the legacy behavior that handles null as zero (equal).") .version("3.4.0") .booleanConf diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala index 88f1c9ae309e..f2c23dbc053d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala @@ -839,7 +839,7 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper Seq(1d, 2d, Double.NaN, null)) } - test("SPARK-39419: ArraySort should throw an exception when the comparison result is null") { + test("SPARK-39419: ArraySort should throw an exception when the comparator returns null") { val comparator = { val comp = ArraySort.comparator _ (left: Expression, right: Expression) => From 4d65abbafa9189008e323778acb23f477a26c11b Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 8 Jun 2022 20:17:49 -0700 Subject: [PATCH 3/4] Fix. --- core/src/main/resources/error/error-classes.json | 5 +++++ .../catalyst/expressions/higherOrderFunctions.scala | 10 ++++++---- .../spark/sql/errors/QueryExecutionErrors.scala | 9 +++------ .../org/apache/spark/sql/internal/SQLConf.scala | 12 ++++++------ .../expressions/HigherOrderFunctionsSuite.scala | 11 +++++++---- 5 files changed, 27 insertions(+), 20 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 59553e78d74a..ffecf258205c 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -551,5 +551,10 @@ "Writing job aborted" ], "sqlState" : "40000" + }, + "NULL_COMPARISON_RESULT" : { + "message" : [ + "The comparison result is null. If you want to handle null as 0 (equal), you can set \"\" to \"\"." + ] } } \ No newline at end of file diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala index 83d05f73bf17..bb21c44485c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala @@ -376,14 +376,14 @@ case class ArrayTransform( case class ArraySort( argument: Expression, function: Expression, - handleComparisonResultNullAsZero: Boolean) + failOnNullComparisonResult: Boolean) extends ArrayBasedSimpleHigherOrderFunction with CodegenFallback { def this(argument: Expression, function: Expression) = { this( argument, function, - SQLConf.get.getConf(SQLConf.LEGACY_ARRAY_SORT_HANDLES_COMPARISON_RESULT_NULL_VALUE_AS_ZERO)) + SQLConf.get.getConf(SQLConf.LEGACY_ARRAY_SORT_FAILS_ON_NULL_COMPARISON_RESULT)) } def this(argument: Expression) = this(argument, ArraySort.defaultComparator) @@ -425,8 +425,10 @@ case class ArraySort( firstElemVar.value.set(o1) secondElemVar.value.set(o2) val cmp = f.eval(inputRow) - if (!handleComparisonResultNullAsZero && cmp == null) { - throw QueryExecutionErrors.comparisonResultIsNullError() + if (failOnNullComparisonResult && cmp == null) { + throw QueryExecutionErrors.nullComparisonResultError( + SQLConf.LEGACY_ARRAY_SORT_FAILS_ON_NULL_COMPARISON_RESULT.key, "false" + ) } cmp.asInstanceOf[Int] } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 1f248d5f5189..cdf6f0e1ae91 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2013,11 +2013,8 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { errorClass = "MULTI_VALUE_SUBQUERY_ERROR", messageParameters = Array(plan), cause = null) } - def comparisonResultIsNullError(): Throwable = { - new NullPointerException( - "The comparison result is null. " + - "If you want to handle null as 0 (equal), you can set " + - SQLConf.LEGACY_ARRAY_SORT_HANDLES_COMPARISON_RESULT_NULL_VALUE_AS_ZERO.key + - " to \"true\".") + def nullComparisonResultError(config: String, value: String): Throwable = { + new SparkException(errorClass = "NULL_COMPARISON_RESULT", + messageParameters = Array(config, value), cause = null) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 80ab71626acb..b06cc8cf059d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3828,14 +3828,14 @@ object SQLConf { .booleanConf .createWithDefault(false) - val LEGACY_ARRAY_SORT_HANDLES_COMPARISON_RESULT_NULL_VALUE_AS_ZERO = - buildConf("spark.sql.legacy.arraySortHandlesComparisonResultNullAsZero") + val LEGACY_ARRAY_SORT_FAILS_ON_NULL_COMPARISON_RESULT = + buildConf("spark.sql.legacy.arraySortFailsOnNullComparisonResult") .internal() - .doc("When set to false, it throws an error if the comparator function returns null. " + - "If set to true, it restores the legacy behavior that handles null as zero (equal).") - .version("3.4.0") + .doc("When set to true, it throws an error if the comparator function returns null. " + + "If set to false, it restores the legacy behavior that handles null as zero (equal).") + .version("3.2.2") .booleanConf - .createWithDefault(false) + .createWithDefault(true) /** * Holds information about keys that have been deprecated. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala index f2c23dbc053d..903a46126562 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -846,11 +846,14 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper If(comp(left, right) === 0, Literal.create(null, IntegerType), comp(left, right)) } - checkExceptionInExpression[NullPointerException]( - arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator), "The comparison result is null") + withSQLConf( + SQLConf.LEGACY_ARRAY_SORT_FAILS_ON_NULL_COMPARISON_RESULT.key -> "true") { + checkExceptionInExpression[SparkException]( + arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator), "The comparison result is null") + } withSQLConf( - SQLConf.LEGACY_ARRAY_SORT_HANDLES_COMPARISON_RESULT_NULL_VALUE_AS_ZERO.key -> "true") { + SQLConf.LEGACY_ARRAY_SORT_FAILS_ON_NULL_COMPARISON_RESULT.key -> "false") { checkEvaluation(arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator), Seq(1, 1, 2, 3)) } From fb243b43ddf34d616ccf26883796299a8b6a03ce Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 9 Jun 2022 10:48:31 -0700 Subject: [PATCH 4/4] Fix. --- core/src/main/resources/error/error-classes.json | 10 +++++----- .../catalyst/expressions/higherOrderFunctions.scala | 10 ++++------ .../spark/sql/errors/QueryExecutionErrors.scala | 4 ++-- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 ++++++----- .../expressions/HigherOrderFunctionsSuite.scala | 4 ++-- 5 files changed, 19 insertions(+), 20 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index ffecf258205c..5dffc04aa48e 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -296,6 +296,11 @@ "UDF class doesn't implement any UDF interface" ] }, + "NULL_COMPARISON_RESULT" : { + "message" : [ + "The comparison result is null. If you want to handle null as 0 (equal), you can set \"spark.sql.legacy.allowNullComparisonResultInArraySort\" to \"true\"." + ] + }, "PARSE_CHAR_MISSING_LENGTH" : { "message" : [ "DataType requires a length parameter, for example (10). Please specify the length." @@ -551,10 +556,5 @@ "Writing job aborted" ], "sqlState" : "40000" - }, - "NULL_COMPARISON_RESULT" : { - "message" : [ - "The comparison result is null. If you want to handle null as 0 (equal), you can set \"\" to \"\"." - ] } } \ No newline at end of file diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala index bb21c44485c6..135a423b38a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala @@ -376,14 +376,14 @@ case class ArrayTransform( case class ArraySort( argument: Expression, function: Expression, - failOnNullComparisonResult: Boolean) + allowNullComparisonResult: Boolean) extends ArrayBasedSimpleHigherOrderFunction with CodegenFallback { def this(argument: Expression, function: Expression) = { this( argument, function, - SQLConf.get.getConf(SQLConf.LEGACY_ARRAY_SORT_FAILS_ON_NULL_COMPARISON_RESULT)) + SQLConf.get.getConf(SQLConf.LEGACY_ALLOW_NULL_COMPARISON_RESULT_IN_ARRAY_SORT)) } def this(argument: Expression) = this(argument, ArraySort.defaultComparator) @@ -425,10 +425,8 @@ case class ArraySort( firstElemVar.value.set(o1) secondElemVar.value.set(o2) val cmp = f.eval(inputRow) - if (failOnNullComparisonResult && cmp == null) { - throw QueryExecutionErrors.nullComparisonResultError( - SQLConf.LEGACY_ARRAY_SORT_FAILS_ON_NULL_COMPARISON_RESULT.key, "false" - ) + if (!allowNullComparisonResult && cmp == null) { + throw QueryExecutionErrors.nullComparisonResultError() } cmp.asInstanceOf[Int] } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index cdf6f0e1ae91..67a70d39a4e6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2013,8 +2013,8 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { errorClass = "MULTI_VALUE_SUBQUERY_ERROR", messageParameters = Array(plan), cause = null) } - def nullComparisonResultError(config: String, value: String): Throwable = { + def nullComparisonResultError(): Throwable = { new SparkException(errorClass = "NULL_COMPARISON_RESULT", - messageParameters = Array(config, value), cause = null) + messageParameters = Array(), cause = null) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index b06cc8cf059d..b006b76b8ac7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3828,14 +3828,15 @@ object SQLConf { .booleanConf .createWithDefault(false) - val LEGACY_ARRAY_SORT_FAILS_ON_NULL_COMPARISON_RESULT = - buildConf("spark.sql.legacy.arraySortFailsOnNullComparisonResult") + val LEGACY_ALLOW_NULL_COMPARISON_RESULT_IN_ARRAY_SORT = + buildConf("spark.sql.legacy.allowNullComparisonResultInArraySort") .internal() - .doc("When set to true, it throws an error if the comparator function returns null. " + - "If set to false, it restores the legacy behavior that handles null as zero (equal).") + .doc("When set to false, `array_sort` function throws an error " + + "if the comparator function returns null. " + + "If set to true, it restores the legacy behavior that handles null as zero (equal).") .version("3.2.2") .booleanConf - .createWithDefault(true) + .createWithDefault(false) /** * Holds information about keys that have been deprecated. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala index 903a46126562..b1c4c4414274 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala @@ -847,13 +847,13 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper } withSQLConf( - SQLConf.LEGACY_ARRAY_SORT_FAILS_ON_NULL_COMPARISON_RESULT.key -> "true") { + SQLConf.LEGACY_ALLOW_NULL_COMPARISON_RESULT_IN_ARRAY_SORT.key -> "false") { checkExceptionInExpression[SparkException]( arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator), "The comparison result is null") } withSQLConf( - SQLConf.LEGACY_ARRAY_SORT_FAILS_ON_NULL_COMPARISON_RESULT.key -> "false") { + SQLConf.LEGACY_ALLOW_NULL_COMPARISON_RESULT_IN_ARRAY_SORT.key -> "true") { checkEvaluation(arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator), Seq(1, 1, 2, 3)) }