From 649b45875e81e47ba3282150b669f766fbe806ba Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Tue, 11 Sep 2018 17:50:56 +0200 Subject: [PATCH 1/8] [SPARK-16323][SQ] Add IntegerDivide expression --- .../spark/sql/catalyst/dsl/package.scala | 1 + .../sql/catalyst/expressions/arithmetic.scala | 21 +++++++++++++++++++ .../sql/catalyst/parser/AstBuilder.scala | 2 +- .../ArithmeticExpressionSuite.scala | 18 +++++++--------- .../parser/ExpressionParserSuite.scala | 4 ++-- 5 files changed, 33 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index d3ccd18d0245e..176ea823b1fcd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -72,6 +72,7 @@ package object dsl { def - (other: Expression): Expression = Subtract(expr, other) def * (other: Expression): Expression = Multiply(expr, other) def / (other: Expression): Expression = Divide(expr, other) + def div (other: Expression): Expression = IntegralDivide(expr, other) def % (other: Expression): Expression = Remainder(expr, other) def & (other: Expression): Expression = BitwiseAnd(expr, other) def | (other: Expression): Expression = BitwiseOr(expr, other) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index c827226d58420..35c30cfaffb2e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -314,6 +314,27 @@ case class Divide(left: Expression, right: Expression) extends DivModLike { override def evalOperation(left: Any, right: Any): Any = div(left, right) } +@ExpressionDescription( + usage = "a _FUNC_ b - Divides a by b.", + examples = """ + Examples: + > SELECT 3 _FUNC_ 2; + 1 + """, + since = "3.0.0") +case class IntegralDivide(left: Expression, right: Expression) extends DivModLike { + + override def inputType: AbstractDataType = IntegralType + + override def symbol: String = "/" + override def sqlOperator: String = "div" + + private lazy val div: (Any, Any) => Any = dataType match { + case i: IntegralType => i.integral.asInstanceOf[Integral[Any]].quot + } + override def evalOperation(left: Any, right: Any): Any = div(left, right) +} + @ExpressionDescription( usage = "expr1 _FUNC_ expr2 - Returns the remainder after `expr1`/`expr2`.", examples = """ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 7bc1f63e30540..5cfb5dc871041 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1157,7 +1157,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging case SqlBaseParser.PERCENT => Remainder(left, right) case SqlBaseParser.DIV => - Cast(Divide(left, right), LongType) + IntegralDivide(left, right) case SqlBaseParser.PLUS => Add(left, right) case SqlBaseParser.MINUS => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala index 9a752af523ffc..9c9d28f75189d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala @@ -143,16 +143,14 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper } } - // By fixing SPARK-15776, Divide's inputType is required to be DoubleType of DecimalType. - // TODO: in future release, we should add a IntegerDivide to support integral types. - ignore("/ (Divide) for integral type") { - checkEvaluation(Divide(Literal(1.toByte), Literal(2.toByte)), 0.toByte) - checkEvaluation(Divide(Literal(1.toShort), Literal(2.toShort)), 0.toShort) - checkEvaluation(Divide(Literal(1), Literal(2)), 0) - checkEvaluation(Divide(Literal(1.toLong), Literal(2.toLong)), 0.toLong) - checkEvaluation(Divide(positiveShortLit, negativeShortLit), 0.toShort) - checkEvaluation(Divide(positiveIntLit, negativeIntLit), 0) - checkEvaluation(Divide(positiveLongLit, negativeLongLit), 0L) + test("/ (Divide) for integral type") { + checkEvaluation(IntegralDivide(Literal(1.toByte), Literal(2.toByte)), 0.toByte) + checkEvaluation(IntegralDivide(Literal(1.toShort), Literal(2.toShort)), 0.toShort) + checkEvaluation(IntegralDivide(Literal(1), Literal(2)), 0) + checkEvaluation(IntegralDivide(Literal(1.toLong), Literal(2.toLong)), 0.toLong) + checkEvaluation(IntegralDivide(positiveShortLit, negativeShortLit), 0.toShort) + checkEvaluation(IntegralDivide(positiveIntLit, negativeIntLit), 0) + checkEvaluation(IntegralDivide(positiveLongLit, negativeLongLit), 0L) } test("% (Remainder)") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 781fc1e957ae0..b4df22c5b29fa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -203,7 +203,7 @@ class ExpressionParserSuite extends PlanTest { // Simple operations assertEqual("a * b", 'a * 'b) assertEqual("a / b", 'a / 'b) - assertEqual("a DIV b", ('a / 'b).cast(LongType)) + assertEqual("a DIV b", 'a div 'b) assertEqual("a % b", 'a % 'b) assertEqual("a + b", 'a + 'b) assertEqual("a - b", 'a - 'b) @@ -214,7 +214,7 @@ class ExpressionParserSuite extends PlanTest { // Check precedences assertEqual( "a * t | b ^ c & d - e + f % g DIV h / i * k", - 'a * 't | ('b ^ ('c & ('d - 'e + (('f % 'g / 'h).cast(LongType) / 'i * 'k))))) + 'a * 't | ('b ^ ('c & ('d - 'e + (('f % 'g div 'h) / 'i * 'k))))) } test("unary arithmetic expressions") { From a0c0849aaf9b1fd0a3f3e5fa079c51d89005f68c Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Wed, 12 Sep 2018 10:09:30 +0200 Subject: [PATCH 2/8] fix UT failure --- .../test/resources/sql-tests/results/operators.sql.out | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index 840655b7a6447..77c94265e8b03 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -157,7 +157,7 @@ NULL -- !query 19 select 5 div 2 -- !query 19 schema -struct +struct<(5 div 2):int> -- !query 19 output 2 @@ -165,7 +165,7 @@ struct -- !query 20 select 5 div 0 -- !query 20 schema -struct +struct<(5 div 0):int> -- !query 20 output NULL @@ -173,7 +173,7 @@ NULL -- !query 21 select 5 div null -- !query 21 schema -struct +struct<(5 div CAST(NULL AS INT)):int> -- !query 21 output NULL @@ -181,7 +181,7 @@ NULL -- !query 22 select null div 5 -- !query 22 schema -struct +struct<(CAST(NULL AS INT) div 5):int> -- !query 22 output NULL From 315bb86dcbd97bd4b1371896f213b52cf7aee16f Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Wed, 12 Sep 2018 18:17:09 +0200 Subject: [PATCH 3/8] address comment --- .../org/apache/spark/sql/catalyst/expressions/arithmetic.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 35c30cfaffb2e..0015521a56502 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -315,7 +315,7 @@ case class Divide(left: Expression, right: Expression) extends DivModLike { } @ExpressionDescription( - usage = "a _FUNC_ b - Divides a by b.", + usage = "expr1 _FUNC_ expr2 - Returns `expr1`/`expr2`. It performs integral division.", examples = """ Examples: > SELECT 3 _FUNC_ 2; From 02a2369967b3b842b85cbf4ae1ce2bbbfb8817da Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Thu, 13 Sep 2018 15:52:40 +0200 Subject: [PATCH 4/8] address comment --- .../apache/spark/sql/catalyst/analysis/FunctionRegistry.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 77860e1584f42..8b69a47036962 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -267,6 +267,7 @@ object FunctionRegistry { expression[Subtract]("-"), expression[Multiply]("*"), expression[Divide]("/"), + expression[IntegralDivide]("div"), expression[Remainder]("%"), // aggregate functions From fca5e62d5b0b8c7a9123c8bab638f955a754e197 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Fri, 14 Sep 2018 12:00:47 +0200 Subject: [PATCH 5/8] return long --- .../spark/sql/catalyst/expressions/arithmetic.scala | 9 +++++++-- .../expressions/ArithmeticExpressionSuite.scala | 12 ++++++------ .../resources/sql-tests/results/operators.sql.out | 8 ++++---- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 0015521a56502..8603320732e57 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -325,13 +325,18 @@ case class Divide(left: Expression, right: Expression) extends DivModLike { case class IntegralDivide(left: Expression, right: Expression) extends DivModLike { override def inputType: AbstractDataType = IntegralType + override def dataType: DataType = LongType override def symbol: String = "/" override def sqlOperator: String = "div" - private lazy val div: (Any, Any) => Any = dataType match { - case i: IntegralType => i.integral.asInstanceOf[Integral[Any]].quot + private lazy val div: (Any, Any) => Long = left.dataType match { + case i: IntegralType => + val divide = i.integral.asInstanceOf[Integral[Any]].quot _ + val toLong = i.integral.asInstanceOf[Integral[Any]].toLong _ + (x, y) => toLong(divide(x, y)) } + override def evalOperation(left: Any, right: Any): Any = div(left, right) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala index 9c9d28f75189d..c3c4d9ee6b702 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala @@ -144,12 +144,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper } test("/ (Divide) for integral type") { - checkEvaluation(IntegralDivide(Literal(1.toByte), Literal(2.toByte)), 0.toByte) - checkEvaluation(IntegralDivide(Literal(1.toShort), Literal(2.toShort)), 0.toShort) - checkEvaluation(IntegralDivide(Literal(1), Literal(2)), 0) - checkEvaluation(IntegralDivide(Literal(1.toLong), Literal(2.toLong)), 0.toLong) - checkEvaluation(IntegralDivide(positiveShortLit, negativeShortLit), 0.toShort) - checkEvaluation(IntegralDivide(positiveIntLit, negativeIntLit), 0) + checkEvaluation(IntegralDivide(Literal(1.toByte), Literal(2.toByte)), 0L) + checkEvaluation(IntegralDivide(Literal(1.toShort), Literal(2.toShort)), 0L) + checkEvaluation(IntegralDivide(Literal(1), Literal(2)), 0L) + checkEvaluation(IntegralDivide(Literal(1.toLong), Literal(2.toLong)), 0L) + checkEvaluation(IntegralDivide(positiveShortLit, negativeShortLit), 0L) + checkEvaluation(IntegralDivide(positiveIntLit, negativeIntLit), 0L) checkEvaluation(IntegralDivide(positiveLongLit, negativeLongLit), 0L) } diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index 77c94265e8b03..2555734756fc4 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -157,7 +157,7 @@ NULL -- !query 19 select 5 div 2 -- !query 19 schema -struct<(5 div 2):int> +struct<(5 div 2):bigint> -- !query 19 output 2 @@ -165,7 +165,7 @@ struct<(5 div 2):int> -- !query 20 select 5 div 0 -- !query 20 schema -struct<(5 div 0):int> +struct<(5 div 0):bigint> -- !query 20 output NULL @@ -173,7 +173,7 @@ NULL -- !query 21 select 5 div null -- !query 21 schema -struct<(5 div CAST(NULL AS INT)):int> +struct<(5 div CAST(NULL AS INT)):bigint> -- !query 21 output NULL @@ -181,7 +181,7 @@ NULL -- !query 22 select null div 5 -- !query 22 schema -struct<(CAST(NULL AS INT) div 5):int> +struct<(CAST(NULL AS INT) div 5):bigint> -- !query 22 output NULL From 71255a1787012baf2d5188991421e8197ec44733 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Fri, 14 Sep 2018 17:17:49 +0200 Subject: [PATCH 6/8] update description --- .../org/apache/spark/sql/catalyst/expressions/arithmetic.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 8603320732e57..b5aeef89dc0b0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -315,7 +315,7 @@ case class Divide(left: Expression, right: Expression) extends DivModLike { } @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Returns `expr1`/`expr2`. It performs integral division.", + usage = "expr1 _FUNC_ expr2 - Divide `expr1` by `expr2` rounded to the long integer.", examples = """ Examples: > SELECT 3 _FUNC_ 2; From c471bef5292016e942ffe2788760eda08e48783e Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Mon, 17 Sep 2018 10:27:26 +0200 Subject: [PATCH 7/8] update doc with null handling --- .../apache/spark/sql/catalyst/expressions/arithmetic.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index b5aeef89dc0b0..ba0553e7cf96b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -314,14 +314,16 @@ case class Divide(left: Expression, right: Expression) extends DivModLike { override def evalOperation(left: Any, right: Any): Any = div(left, right) } +// scalastyle:off line.size.limit @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Divide `expr1` by `expr2` rounded to the long integer.", + usage = "expr1 _FUNC_ expr2 - Divide `expr1` by `expr2` rounded to the long integer. It returns NULL if an operand is NULL or `expr2` is 0.", examples = """ Examples: > SELECT 3 _FUNC_ 2; 1 """, since = "3.0.0") +// scalastyle:on line.size.limit case class IntegralDivide(left: Expression, right: Expression) extends DivModLike { override def inputType: AbstractDataType = IntegralType From 3550d29f74e437ebc57b846d7b22f0cea254bd18 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Mon, 17 Sep 2018 15:20:03 +0200 Subject: [PATCH 8/8] 3.0.0 -> 2.5.0 --- .../org/apache/spark/sql/catalyst/expressions/arithmetic.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index ba0553e7cf96b..1b1808f8366d2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -322,7 +322,7 @@ case class Divide(left: Expression, right: Expression) extends DivModLike { > SELECT 3 _FUNC_ 2; 1 """, - since = "3.0.0") + since = "2.5.0") // scalastyle:on line.size.limit case class IntegralDivide(left: Expression, right: Expression) extends DivModLike {