From f93c420a7bc4cdd009bf0d8087d46f756e25ace5 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 2 Jun 2015 13:22:43 +0800 Subject: [PATCH 1/4] compare literal --- .../catalyst/analysis/HiveTypeCoercion.scala | 10 ++++++---- .../analysis/HiveTypeCoercionSuite.scala | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 9b8a08a88dcb..0582d1a96e7c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -480,13 +480,15 @@ trait HiveTypeCoercion { * Changes numeric values to booleans so that expressions like true = 1 can be evaluated. */ object BooleanEqualization extends Rule[LogicalPlan] { - private val trueValues = Seq(1.toByte, 1.toShort, 1, 1L, new java.math.BigDecimal(1)) - private val falseValues = Seq(0.toByte, 0.toShort, 0, 0L, new java.math.BigDecimal(0)) + private val trueValues = + Seq(1.toByte, 1.toShort, 1, 1L, new java.math.BigDecimal(1)).map(Literal(_)) + private val falseValues = + Seq(0.toByte, 0.toShort, 0, 0L, new java.math.BigDecimal(0)).map(Literal(_)) private def buildCaseKeyWhen(booleanExpr: Expression, numericExpr: Expression) = { CaseKeyWhen(numericExpr, Seq( - Literal(trueValues.head), booleanExpr, - Literal(falseValues.head), Not(booleanExpr), + trueValues.head, booleanExpr, + falseValues.head, Not(booleanExpr), Literal(false))) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala index 0df446636ea8..5c430d1c5f1c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala @@ -148,6 +148,7 @@ class HiveTypeCoercionSuite extends PlanTest { test("type coercion simplification for equal to") { val be = new HiveTypeCoercion {}.BooleanEqualization + ruleTest(be, EqualTo(Literal(true), Literal(1)), Literal(true) @@ -164,5 +165,22 @@ class HiveTypeCoercionSuite extends PlanTest { EqualNullSafe(Literal(true), Literal(0)), And(IsNotNull(Literal(true)), Not(Literal(true))) ) + + ruleTest(be, + EqualTo(Literal(true), Literal(1L)), + Literal(true) + ) + ruleTest(be, + EqualTo(Literal(new java.math.BigDecimal(1)), Literal(true)), + Literal(true) + ) + ruleTest(be, + EqualTo(Literal(BigDecimal(0)), Literal(true)), + Not(Literal(true)) + ) + ruleTest(be, + EqualTo(Literal(Decimal(1)), Literal(true)), + Literal(true) + ) } } From 1987b3785adb7250bc5907739035970c5b7c7644 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 2 Jun 2015 13:53:02 +0800 Subject: [PATCH 2/4] use Decimal instead of java.math.BigDecimal --- .../catalyst/analysis/HiveTypeCoercion.scala | 42 +++++++++---------- .../analysis/HiveTypeCoercionSuite.scala | 4 ++ 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 0582d1a96e7c..a65576c1493c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -480,15 +480,13 @@ trait HiveTypeCoercion { * Changes numeric values to booleans so that expressions like true = 1 can be evaluated. */ object BooleanEqualization extends Rule[LogicalPlan] { - private val trueValues = - Seq(1.toByte, 1.toShort, 1, 1L, new java.math.BigDecimal(1)).map(Literal(_)) - private val falseValues = - Seq(0.toByte, 0.toShort, 0, 0L, new java.math.BigDecimal(0)).map(Literal(_)) + private val trueValues = Seq(1.toByte, 1.toShort, 1, 1L, Decimal(1)) + private val falseValues = Seq(0.toByte, 0.toShort, 0, 0L, Decimal(0)) private def buildCaseKeyWhen(booleanExpr: Expression, numericExpr: Expression) = { CaseKeyWhen(numericExpr, Seq( - trueValues.head, booleanExpr, - falseValues.head, Not(booleanExpr), + Literal(trueValues.head), booleanExpr, + Literal(falseValues.head), Not(booleanExpr), Literal(false))) } @@ -514,22 +512,22 @@ trait HiveTypeCoercion { // all other cases are considered as false. // We may simplify the expression if one side is literal numeric values - case EqualTo(left @ BooleanType(), Literal(value, _: NumericType)) - if trueValues.contains(value) => left - case EqualTo(left @ BooleanType(), Literal(value, _: NumericType)) - if falseValues.contains(value) => Not(left) - case EqualTo(Literal(value, _: NumericType), right @ BooleanType()) - if trueValues.contains(value) => right - case EqualTo(Literal(value, _: NumericType), right @ BooleanType()) - if falseValues.contains(value) => Not(right) - case EqualNullSafe(left @ BooleanType(), Literal(value, _: NumericType)) - if trueValues.contains(value) => And(IsNotNull(left), left) - case EqualNullSafe(left @ BooleanType(), Literal(value, _: NumericType)) - if falseValues.contains(value) => And(IsNotNull(left), Not(left)) - case EqualNullSafe(Literal(value, _: NumericType), right @ BooleanType()) - if trueValues.contains(value) => And(IsNotNull(right), right) - case EqualNullSafe(Literal(value, _: NumericType), right @ BooleanType()) - if falseValues.contains(value) => And(IsNotNull(right), Not(right)) + case EqualTo(b @ BooleanType(), Literal(value, _: NumericType)) + if trueValues.contains(value) => b + case EqualTo(b @ BooleanType(), Literal(value, _: NumericType)) + if falseValues.contains(value) => Not(b) + case EqualTo(Literal(value, _: NumericType), b @ BooleanType()) + if trueValues.contains(value) => b + case EqualTo(Literal(value, _: NumericType), b @ BooleanType()) + if falseValues.contains(value) => Not(b) + case EqualNullSafe(b @ BooleanType(), Literal(value, _: NumericType)) + if trueValues.contains(value) => And(IsNotNull(b), b) + case EqualNullSafe(b @ BooleanType(), Literal(value, _: NumericType)) + if falseValues.contains(value) => And(IsNotNull(b), Not(b)) + case EqualNullSafe(Literal(value, _: NumericType), b @ BooleanType()) + if trueValues.contains(value) => And(IsNotNull(b), b) + case EqualNullSafe(Literal(value, _: NumericType), b @ BooleanType()) + if falseValues.contains(value) => And(IsNotNull(b), Not(b)) case EqualTo(left @ BooleanType(), right @ NumericType()) => transform(left , right) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala index 5c430d1c5f1c..9895ebe68971 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala @@ -182,5 +182,9 @@ class HiveTypeCoercionSuite extends PlanTest { EqualTo(Literal(Decimal(1)), Literal(true)), Literal(true) ) + ruleTest(be, + EqualTo(Literal.create(Decimal(1), DecimalType(8, 0)), Literal(true)), + Literal(true) + ) } } From b0e35496cacf141ff3a46df7843caf553f55918e Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 2 Jun 2015 14:41:36 +0800 Subject: [PATCH 3/4] rename to BooleanEquality --- .../catalyst/analysis/HiveTypeCoercion.scala | 36 +++++++++---------- .../analysis/HiveTypeCoercionSuite.scala | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index a65576c1493c..a42ffce0d26f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -87,7 +87,7 @@ trait HiveTypeCoercion { WidenTypes :: PromoteStrings :: DecimalPrecision :: - BooleanEqualization :: + BooleanEquality :: StringToIntegralCasts :: FunctionArgumentConversion :: CaseWhenCoercion :: @@ -479,7 +479,7 @@ trait HiveTypeCoercion { /** * Changes numeric values to booleans so that expressions like true = 1 can be evaluated. */ - object BooleanEqualization extends Rule[LogicalPlan] { + object BooleanEquality extends Rule[LogicalPlan] { private val trueValues = Seq(1.toByte, 1.toShort, 1, 1L, Decimal(1)) private val falseValues = Seq(0.toByte, 0.toShort, 0, 0L, Decimal(0)) @@ -512,22 +512,22 @@ trait HiveTypeCoercion { // all other cases are considered as false. // We may simplify the expression if one side is literal numeric values - case EqualTo(b @ BooleanType(), Literal(value, _: NumericType)) - if trueValues.contains(value) => b - case EqualTo(b @ BooleanType(), Literal(value, _: NumericType)) - if falseValues.contains(value) => Not(b) - case EqualTo(Literal(value, _: NumericType), b @ BooleanType()) - if trueValues.contains(value) => b - case EqualTo(Literal(value, _: NumericType), b @ BooleanType()) - if falseValues.contains(value) => Not(b) - case EqualNullSafe(b @ BooleanType(), Literal(value, _: NumericType)) - if trueValues.contains(value) => And(IsNotNull(b), b) - case EqualNullSafe(b @ BooleanType(), Literal(value, _: NumericType)) - if falseValues.contains(value) => And(IsNotNull(b), Not(b)) - case EqualNullSafe(Literal(value, _: NumericType), b @ BooleanType()) - if trueValues.contains(value) => And(IsNotNull(b), b) - case EqualNullSafe(Literal(value, _: NumericType), b @ BooleanType()) - if falseValues.contains(value) => And(IsNotNull(b), Not(b)) + case EqualTo(bool @ BooleanType(), Literal(value, _: NumericType)) + if trueValues.contains(value) => bool + case EqualTo(bool @ BooleanType(), Literal(value, _: NumericType)) + if falseValues.contains(value) => Not(bool) + case EqualTo(Literal(value, _: NumericType), bool @ BooleanType()) + if trueValues.contains(value) => bool + case EqualTo(Literal(value, _: NumericType), bool @ BooleanType()) + if falseValues.contains(value) => Not(bool) + case EqualNullSafe(bool @ BooleanType(), Literal(value, _: NumericType)) + if trueValues.contains(value) => And(IsNotNull(bool), bool) + case EqualNullSafe(bool @ BooleanType(), Literal(value, _: NumericType)) + if falseValues.contains(value) => And(IsNotNull(bool), Not(bool)) + case EqualNullSafe(Literal(value, _: NumericType), bool @ BooleanType()) + if trueValues.contains(value) => And(IsNotNull(bool), bool) + case EqualNullSafe(Literal(value, _: NumericType), bool @ BooleanType()) + if falseValues.contains(value) => And(IsNotNull(bool), Not(bool)) case EqualTo(left @ BooleanType(), right @ NumericType()) => transform(left , right) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala index 9895ebe68971..9977f7af00f6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala @@ -147,7 +147,7 @@ class HiveTypeCoercionSuite extends PlanTest { } test("type coercion simplification for equal to") { - val be = new HiveTypeCoercion {}.BooleanEqualization + val be = new HiveTypeCoercion {}.BooleanEquality ruleTest(be, EqualTo(Literal(true), Literal(1)), From d7b60c897b3295c5fd96f06435552926ab289db9 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 8 Jun 2015 11:15:37 +0800 Subject: [PATCH 4/4] another bug --- .../apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index a42ffce0d26f..a0abede7f8db 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -445,10 +445,10 @@ trait HiveTypeCoercion { e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 => val resultType = DecimalType(max(p1, p2), max(s1, s2)) b.makeCopy(Array(Cast(e1, resultType), Cast(e2, resultType))) - case b @ BinaryComparison(e1 @ DecimalType.Fixed(_, _), e2) + case b @ BinaryComparison(e1 @ DecimalType.Expression(_, _), e2) if e2.dataType == DecimalType.Unlimited => b.makeCopy(Array(Cast(e1, DecimalType.Unlimited), e2)) - case b @ BinaryComparison(e1, e2 @ DecimalType.Fixed(_, _)) + case b @ BinaryComparison(e1, e2 @ DecimalType.Expression(_, _)) if e1.dataType == DecimalType.Unlimited => b.makeCopy(Array(e1, Cast(e2, DecimalType.Unlimited)))