From e99a26caaf95118c1de02849c69ed16f4a502e7e Mon Sep 17 00:00:00 2001 From: wangfei Date: Fri, 26 Dec 2014 18:19:48 +0800 Subject: [PATCH 1/8] refactory And/Or optimization to make it more readable and clean --- pom.xml | 5 + sql/catalyst/pom.xml | 5 +- .../sql/catalyst/expressions/predicates.scala | 8 + .../sql/catalyst/optimizer/Optimizer.scala | 113 +++++++++++++ .../ConditionSimplificationSuite.scala | 158 ++++++++++++++++++ 5 files changed, 288 insertions(+), 1 deletion(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala diff --git a/pom.xml b/pom.xml index e4db1393ba9c..052ce02e125d 100644 --- a/pom.xml +++ b/pom.xml @@ -824,6 +824,11 @@ jackson-mapper-asl 1.8.8 + + org.spire-math + spire_2.10 + 0.9.0 + diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 1caa297e24e3..43f12a6b9364 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -44,7 +44,10 @@ org.scala-lang scala-reflect - + + org.spire-math + spire_2.10 + org.apache.spark spark-core_${scala.binary.version} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 94b6fb084d38..c0fddf2af425 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -48,6 +48,14 @@ trait PredicateHelper { } } + protected def splitDisjunctivePredicates(condition: Expression): Seq[Expression] = { + condition match { + case Or(cond1, cond2) => + splitDisjunctivePredicates(cond1) ++ splitDisjunctivePredicates(cond2) + case other => other :: Nil + } + } + /** * Returns true if `expr` can be evaluated using only the output of `plan`. This method * can be used to determine when is is acceptable to move expression evaluation within a query diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 806c1394eb15..5fb6641686bd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -18,6 +18,9 @@ package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet +import spire.implicits._ +import spire.math._ + import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.Inner import org.apache.spark.sql.catalyst.plans.FullOuter @@ -293,6 +296,116 @@ object OptimizeIn extends Rule[LogicalPlan] { } } +/** + * Simplifies Conditions(And, Or) expressions when the conditions can by optimized. + */ +object ConditionSimplification extends Rule[LogicalPlan] with PredicateHelper { + + def apply(plan: LogicalPlan): LogicalPlan = plan transform { + case q: LogicalPlan => q transformExpressionsDown { + // a && a => a + case And(left, right) if left.fastEquals(right) => + left + + // a || a => a + case Or(left, right) if left.fastEquals(right) => + left + + // a < 2 && a > 2 => false, a > 3 && a > 5 => a > 5 + case and @ And( + e1 @ NumericLiteralBinaryComparison(n1, i1), + e2 @ NumericLiteralBinaryComparison(n2, i2)) if n1 == n2 => + if (!i1.intersects(i2)) Literal(false) + else if (i1.isSubsetOf(i2)) e1 + else if (i1.isSupersetOf(i2)) e2 + else and + + // a < 2 || a >= 2 => true, a > 3 || a > 5 => a > 3 + case or @ Or( + e1 @ NumericLiteralBinaryComparison(n1, i1), + e2 @ NumericLiteralBinaryComparison(n2, i2)) if n1 == n2 => + if (i1.intersects(i2)) Literal(true) + else if (i1.isSubsetOf(i2)) e2 + else if (i1.isSupersetOf(i2)) e1 + else or + + // (a < 3 && b > 5) || a > 2 => b > 5 || a > 2 + case Or(left1 @ And(left2, right2), right1) => + And(Or(left2, right1), Or(right2, right1)) + + // (a < 3 || b > 5) || a > 2 => true, (b > 5 || a < 3) || a > 2 => true + case Or( Or( + e1 @ NumericLiteralBinaryComparison(n1, i1), e2 @ NumericLiteralBinaryComparison(n2, i2)), + right @ NumericLiteralBinaryComparison(n3, i3)) => + if (n3 fastEquals n1) { + Or(Or(e1, right), e2) + } else { + Or(Or(e2, right), e1) + } + + // (b > 5 && a < 2) && a > 3 => false, (a < 2 && b > 5) && a > 3 => false + case And(And( + e1 @ NumericLiteralBinaryComparison(n1, i1), e2 @ NumericLiteralBinaryComparison(n2, i2)), + right @ NumericLiteralBinaryComparison(n3, i3)) => + if (n3 fastEquals n1) { + And(And(e1, right), e2) + } else { + And(And(e2, right), e1) + } + + // (a < 2 || b > 5) && a > 3 => b > 5 && a > 3 + case And(left1@Or(left2, right2), right1) => + Or(And(left2, right1), And(right2, right1)) + + // (a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ... => + // a && b && ((c && ...) || (d && ...) || (e && ...) || ...) + case or @ Or(left, right) => + val lhsSet = splitConjunctivePredicates(left).toSet + val rhsSet = splitConjunctivePredicates(right).toSet + val common = lhsSet.intersect(rhsSet) + (lhsSet.diff(common).reduceOption(And) ++ rhsSet.diff(common).reduceOption(And)) + .reduceOption(Or) + .map(_ :: common.toList) + .getOrElse(common.toList) + .reduce(And) + + // (a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ... => + // (a || b) || ((c || ...) && (f || ...) && (e || ...) && ...) + case and @ And(left, right) => + val lhsSet = splitDisjunctivePredicates(left).toSet + val rhsSet = splitDisjunctivePredicates(right).toSet + val common = lhsSet.intersect(rhsSet) + (lhsSet.diff(common).reduceOption(Or) ++ rhsSet.diff(common).reduceOption(Or)) + .reduceOption(And) + .map(_ :: common.toList) + .getOrElse(common.toList) + .reduce(Or) + } + } + + private implicit class NumericLiteral(e: Literal) { + def toDouble = Cast(e, DoubleType).eval().asInstanceOf[Double] + } + + object NumericLiteralBinaryComparison { + def unapply(e: Expression): Option[(NamedExpression, Interval[Double])] = e match { + case LessThan(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.below(l.toDouble))) + case LessThan(l @ Literal(_, _: NumericType), n: NamedExpression) => Some((n, Interval.atOrAbove(l.toDouble))) + + case GreaterThan(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.above(l.toDouble))) + case GreaterThan(l @ Literal(_, dt: NumericType), n: NamedExpression) => Some((n, Interval.atOrBelow(l.toDouble))) + + case LessThanOrEqual(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.atOrBelow(l.toDouble))) + case LessThanOrEqual(l @ Literal(_, _: NumericType), n: NamedExpression) => Some((n, Interval.above(l.toDouble))) + + case GreaterThanOrEqual(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.atOrAbove(l.toDouble))) + case GreaterThanOrEqual(l @ Literal(_, _: NumericType), n: NamedExpression) => Some((n, Interval.below(l.toDouble))) + + case EqualTo(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.point(l.toDouble))) + } + } +} + /** * Simplifies boolean expressions where the answer can be determined without evaluating both sides. * Note that this rule can eliminate expressions that might otherwise have been evaluated and thus diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala new file mode 100644 index 000000000000..743b0338fee8 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators +import org.apache.spark.sql.catalyst.expressions.{Literal, Expression} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.rules._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.dsl.expressions._ + +class ConditionSimplificationSuite extends PlanTest { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = + Batch("AnalysisNodes", Once, + EliminateAnalysisOperators) :: + Batch("Constant Folding", FixedPoint(10), + NullPropagation, + ConstantFolding, + ConditionSimplification, + BooleanSimplification, + SimplifyFilters) :: Nil + } + + val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string) + + def checkCondition(originCondition: Expression, optimizedCondition: Expression): Unit = { + val originQuery = testRelation.where(originCondition).analyze + val optimized = Optimize(originQuery) + val expected = testRelation.where(optimizedCondition).analyze + comparePlans(optimized, expected) + } + + def checkCondition(originCondition: Expression): Unit = { + val originQuery = testRelation.where(originCondition).analyze + val optimized = Optimize(originQuery) + val expected = testRelation + comparePlans(optimized, expected) + } + + test("literal in front of attribute") { + checkCondition(Literal(1) < 'a || Literal(2) < 'a, 'a > 1) + } + + test("combine the same condition") { + checkCondition('a < 1 || 'a < 1, 'a < 1) + checkCondition('a < 1 || 'a < 1 || 'a < 1 || 'a < 1, 'a < 1) + checkCondition('a > 2 && 'a > 2, 'a > 2) + checkCondition('a > 2 && 'a > 2 && 'a > 2 && 'a > 2, 'a > 2) + checkCondition(('a < 1 && 'a < 2) || ('a < 1 && 'a < 2), 'a < 1) + } + + test("combine literal binary comparison") { + checkCondition('a === 1 && 'a < 1) + checkCondition('a === 1 || 'a < 1, 'a <= 1) + + checkCondition('a === 1 && 'a === 2) + checkCondition('a === 1 || 'a === 2, 'a === 1 || 'a === 2) + + checkCondition('a <= 1 && 'a > 1) + checkCondition('a <= 1 || 'a > 1) + + checkCondition('a < 1 && 'a >= 1) + checkCondition('a < 1 || 'a >= 1) + + checkCondition('a > 3 && 'a > 2, 'a > 3) + checkCondition('a > 3 || 'a > 2, 'a > 2) + + checkCondition('a >= 1 && 'a <= 1, 'a === 1) + + } + + test("different data type comparison") { + checkCondition('a > "abc") + checkCondition('a > "a" && 'a < "b") + + checkCondition('a > "a" || 'a < "b") + + checkCondition('a > "9" || 'a < "0", 'a > 9.0 || 'a < 0.0) + checkCondition('d > 9 && 'd < 1, 'd > 9.0 && 'd < 1.0 ) + + checkCondition('a > "9" || 'a < "0", 'a > 9.0 || 'a < 0.0) + } + + test("combine predicate : 2 same combine") { + checkCondition('a < 1 || 'b > 2 || 'a >= 1) + checkCondition('a < 1 && 'b > 2 && 'a >= 1) + + checkCondition('a < 2 || 'b > 3 || 'b > 2, 'a < 2 || 'b > 2) + checkCondition('a < 2 && 'b > 3 && 'b > 2, 'a < 2 && 'b > 3) + + checkCondition('a < 2 || ('b > 3 || 'b > 2), 'b > 2 || 'a < 2) + checkCondition('a < 2 && ('b > 3 && 'b > 2), 'b > 3 && 'a < 2) + + checkCondition('a < 2 || 'a === 3 || 'a > 5, 'a < 2 || 'a === 3 || 'a > 5) + } + + test("combine predicate : 2 difference combine") { + checkCondition(('a < 2 || 'a > 3) && 'a > 4, 'a > 4) + checkCondition(('a < 2 || 'b > 3) && 'a < 2, 'a < 2) + + checkCondition('a < 2 || ('a >= 2 && 'b > 1), 'b > 1 || 'a < 2) + checkCondition('a < 2 || ('a === 2 && 'b > 1), 'a < 2 || ('a === 2 && 'b > 1)) + + checkCondition('a > 3 || ('a > 2 && 'a < 4), 'a > 2) + } + + test("multi left, single right") { + checkCondition(('a < 2 || 'a > 3 || 'b > 5) && 'a < 2, 'a < 2) + } + + test("multi left, multi right") { + checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), 'a < 2 || ('b > 3 && 'c > 5)) + + var input: Expression = ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5) + var expected: Expression = 'a === 'b || ('b > 3 && 'a > 3 && 'a < 5) + checkCondition(input, expected) + + input = ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a > 1) + expected = 'a === 'b || ('b > 3 && 'a > 3) + checkCondition(input, expected) + + input = ('a === 'b && 'b > 3 && 'c > 2) || + ('a === 'b && 'c < 1 && 'a === 5) || + ('a === 'b && 'b < 5 && 'a > 1) + + expected = ('a === 'b) && + (((('b > 3) && ('c > 2)) || + (('c < 1) && ('a === 5))) || + (('b < 5) && ('a > 1))) + checkCondition(input, expected) + + input = ('a < 2 || 'b > 5 || 'a < 2 || 'b > 1) && ('a < 2 || 'b > 1) + expected = 'a < 2 || 'b > 1 + checkCondition(input, expected) + + input = ('a === 'b || 'b > 5) && ('a === 'b || 'c > 3) && ('a === 'b || 'b > 1) + expected = ('a === 'b) || ('c > 3 && 'b > 5) + checkCondition(input, expected) + } +} \ No newline at end of file From 32a595b855d7d0db67f7ba295079341081d532fe Mon Sep 17 00:00:00 2001 From: scwf Date: Sat, 27 Dec 2014 00:42:39 +0800 Subject: [PATCH 2/8] improvement and test fix --- pom.xml | 14 +- .../sql/catalyst/optimizer/Optimizer.scala | 127 +++++++++++++----- .../ConditionSimplificationSuite.scala | 41 +++--- 3 files changed, 120 insertions(+), 62 deletions(-) diff --git a/pom.xml b/pom.xml index 052ce02e125d..560752aee936 100644 --- a/pom.xml +++ b/pom.xml @@ -156,7 +156,7 @@ central Maven Repository - https://repo1.maven.org/maven2 + http://repo1.maven.org/maven2 true @@ -167,7 +167,7 @@ apache-repo Apache Repository - https://repository.apache.org/content/repositories/releases + http://repository.apache.org/content/repositories/releases true @@ -178,7 +178,7 @@ jboss-repo JBoss Repository - https://repository.jboss.org/nexus/content/repositories/releases + http://repository.jboss.org/nexus/content/repositories/releases true @@ -189,7 +189,7 @@ mqtt-repo MQTT Repository - https://repo.eclipse.org/content/repositories/paho-releases + http://repo.eclipse.org/content/repositories/paho-releases true @@ -200,7 +200,7 @@ cloudera-repo Cloudera Repository - https://repository.cloudera.com/artifactory/cloudera-repos + http://repository.cloudera.com/artifactory/cloudera-repos true @@ -222,7 +222,7 @@ spring-releases Spring Release Repository - https://repo.spring.io/libs-release + http://repo.spring.io/libs-release true @@ -234,7 +234,7 @@ central - https://repo1.maven.org/maven2 + http://repo1.maven.org/maven2 true diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 5fb6641686bd..cbc5a7265444 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -42,6 +42,7 @@ object DefaultOptimizer extends Optimizer { NullPropagation, ConstantFolding, LikeSimplification, + ConditionSimplification, BooleanSimplification, SimplifyFilters, SimplifyCasts, @@ -302,7 +303,8 @@ object OptimizeIn extends Rule[LogicalPlan] { object ConditionSimplification extends Rule[LogicalPlan] with PredicateHelper { def apply(plan: LogicalPlan): LogicalPlan = plan transform { - case q: LogicalPlan => q transformExpressionsDown { + case q: LogicalPlan => q transformExpressionsUp { + /** 1. one And/Or with same condition. */ // a && a => a case And(left, right) if left.fastEquals(right) => left @@ -311,75 +313,129 @@ object ConditionSimplification extends Rule[LogicalPlan] with PredicateHelper { case Or(left, right) if left.fastEquals(right) => left + /** 2. one And/Or with literal conditions that can be merged. */ // a < 2 && a > 2 => false, a > 3 && a > 5 => a > 5 case and @ And( - e1 @ NumericLiteralBinaryComparison(n1, i1), - e2 @ NumericLiteralBinaryComparison(n2, i2)) if n1 == n2 => + e1 @ NumLitBinComparison(n1, i1), + e2 @ NumLitBinComparison(n2, i2)) if n1 == n2 => if (!i1.intersects(i2)) Literal(false) else if (i1.isSubsetOf(i2)) e1 else if (i1.isSupersetOf(i2)) e2 + else if (i1.intersect(i2).isPoint) + EqualTo(n1, Literal(i1.intersect(i2).asInstanceOf[Point[Double]].value, n1.dataType)) else and // a < 2 || a >= 2 => true, a > 3 || a > 5 => a > 3 case or @ Or( - e1 @ NumericLiteralBinaryComparison(n1, i1), - e2 @ NumericLiteralBinaryComparison(n2, i2)) if n1 == n2 => - if (i1.intersects(i2)) Literal(true) + e1 @ NumLitBinComparison(n1, i1), + e2 @ NumLitBinComparison(n2, i2)) if n1 == n2 => + // a hack to avoid bug of spire + val op = Interval.all[Double] -- i1 + if (i1.intersects(i2) && i1.union(i2) == Interval.all[Double]) Literal(true) + else if (op(op.size - 1) == i2) Literal(true) else if (i1.isSubsetOf(i2)) e2 else if (i1.isSupersetOf(i2)) e1 else or - // (a < 3 && b > 5) || a > 2 => b > 5 || a > 2 - case Or(left1 @ And(left2, right2), right1) => - And(Or(left2, right1), Or(right2, right1)) - + /** 3. Two And/Or with literal condition that can be merged, do a transformation to reuse 2. */ // (a < 3 || b > 5) || a > 2 => true, (b > 5 || a < 3) || a > 2 => true - case Or( Or( - e1 @ NumericLiteralBinaryComparison(n1, i1), e2 @ NumericLiteralBinaryComparison(n2, i2)), - right @ NumericLiteralBinaryComparison(n3, i3)) => + case or @ Or( + Or(e1 @ NumLitBinComparison(n1, i1), e2 @ NumLitBinComparison(n2, i2)), + right @ NumLitBinComparison(n3, i3)) => if (n3 fastEquals n1) { Or(Or(e1, right), e2) - } else { + } else if (n3 fastEquals n2) { Or(Or(e2, right), e1) + } else { + or } // (b > 5 && a < 2) && a > 3 => false, (a < 2 && b > 5) && a > 3 => false - case And(And( - e1 @ NumericLiteralBinaryComparison(n1, i1), e2 @ NumericLiteralBinaryComparison(n2, i2)), - right @ NumericLiteralBinaryComparison(n3, i3)) => + case and @ And( + And(e1 @ NumLitBinComparison(n1, i1), e2 @ NumLitBinComparison(n2, i2)), + right @ NumLitBinComparison(n3, i3)) => if (n3 fastEquals n1) { And(And(e1, right), e2) - } else { + } else if (n3 fastEquals n2) { And(And(e2, right), e1) + } else { + and } // (a < 2 || b > 5) && a > 3 => b > 5 && a > 3 - case And(left1@Or(left2, right2), right1) => - Or(And(left2, right1), And(right2, right1)) - - // (a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ... => + // using formula: a && (b || c) = (a && b) || (a && c) + case And( + left1 @ Or(left2 @ NumLitBinComparison(n1, i1), right2 @ NumLitBinComparison(n2, i2)), + right1 @ NumLitBinComparison(n3, i3)) + if ((n3.fastEquals(n1) && i3 != i1) || (n3.fastEquals(n2) && i3 != i2)) => + Or(And(left2, right1), And(right2, right1)) + + // (a < 3 && b > 5) || a > 2 => b > 5 || a > 2. + // using formula: a || (b && c) = (a || b) && (a || c) + case Or( + left1 @ And(left2 @ NumLitBinComparison(n1, i1), right2 @ NumLitBinComparison(n2, i2)), + right1 @ NumLitBinComparison(n3, i3)) + if ((n3.fastEquals(n1) && i3 != i1) || (n3.fastEquals(n2) && i3 != i2)) => + Or(And(left2, right1), And(right2, right1)) + + /** 4. And/Or whose one child is literal condition, the other is Or/And */ + // (a < 2 || b > 5) && a < 2 => a < 2 + case And( + left1 @ Or(left2 @ NumLitBinComparison(_, _), right2 @ NumLitBinComparison(_, _)), + right1 @ NumLitBinComparison(_, _)) + if (right1 fastEquals left2) || (right1 fastEquals right2) => + right1 + + // (a < 3 && b > 5) || a < 3 => a < 3 + case Or( + left1 @ And(left2 @ NumLitBinComparison(_, _), right2 @ NumLitBinComparison(_, _)), + right1 @ NumLitBinComparison(_, _)) + if (right1 fastEquals left2) || (right1 fastEquals right2) => + right1 + + // 5. (a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ... => // a && b && ((c && ...) || (d && ...) || (e && ...) || ...) case or @ Or(left, right) => val lhsSet = splitConjunctivePredicates(left).toSet val rhsSet = splitConjunctivePredicates(right).toSet val common = lhsSet.intersect(rhsSet) - (lhsSet.diff(common).reduceOption(And) ++ rhsSet.diff(common).reduceOption(And)) - .reduceOption(Or) - .map(_ :: common.toList) - .getOrElse(common.toList) - .reduce(And) + val ldiff = lhsSet.diff(common) + val rdiff = rhsSet.diff(common) + if (common.size == 0) { + or + }else if ( ldiff.size == 0 || rdiff == 0) { + common.reduce(And) + } else { + (ldiff.reduceOption(And) ++ rdiff.reduceOption(And)) + .reduceOption(Or) + .map(_ :: common.toList) + .getOrElse(common.toList) + .reduce(And) + } - // (a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ... => + // 6. (a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ... => // (a || b) || ((c || ...) && (f || ...) && (e || ...) && ...) case and @ And(left, right) => val lhsSet = splitDisjunctivePredicates(left).toSet val rhsSet = splitDisjunctivePredicates(right).toSet val common = lhsSet.intersect(rhsSet) - (lhsSet.diff(common).reduceOption(Or) ++ rhsSet.diff(common).reduceOption(Or)) - .reduceOption(And) - .map(_ :: common.toList) - .getOrElse(common.toList) - .reduce(Or) + val ldiff = lhsSet.diff(common) + val rdiff = rhsSet.diff(common) + + if (common.size == 0) { + and + }else if (ldiff.size == 0 || rdiff.size == 0) { + common.reduce(Or) + } else { + val x = (ldiff.reduceOption(Or) ++ rdiff.reduceOption(Or)) + .reduceOption(And) + .map(_ :: common.toList) + .getOrElse(common.toList) + .reduce(Or) + x + } + + case other => other } } @@ -387,9 +443,9 @@ object ConditionSimplification extends Rule[LogicalPlan] with PredicateHelper { def toDouble = Cast(e, DoubleType).eval().asInstanceOf[Double] } - object NumericLiteralBinaryComparison { + object NumLitBinComparison { def unapply(e: Expression): Option[(NamedExpression, Interval[Double])] = e match { - case LessThan(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.below(l.toDouble))) + case LessThan(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.below(l.toDouble))) case LessThan(l @ Literal(_, _: NumericType), n: NamedExpression) => Some((n, Interval.atOrAbove(l.toDouble))) case GreaterThan(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.above(l.toDouble))) @@ -402,6 +458,7 @@ object ConditionSimplification extends Rule[LogicalPlan] with PredicateHelper { case GreaterThanOrEqual(l @ Literal(_, _: NumericType), n: NamedExpression) => Some((n, Interval.below(l.toDouble))) case EqualTo(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.point(l.toDouble))) + case other => None } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala index 743b0338fee8..940be76ed0d0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala @@ -31,7 +31,7 @@ class ConditionSimplificationSuite extends PlanTest { val batches = Batch("AnalysisNodes", Once, EliminateAnalysisOperators) :: - Batch("Constant Folding", FixedPoint(10), + Batch("Constant Folding", FixedPoint(50), NullPropagation, ConstantFolding, ConditionSimplification, @@ -55,11 +55,12 @@ class ConditionSimplificationSuite extends PlanTest { comparePlans(optimized, expected) } - test("literal in front of attribute") { - checkCondition(Literal(1) < 'a || Literal(2) < 'a, 'a > 1) + test("literals in front of attribute") { + checkCondition(Literal(1) < 'a || Literal(2) < 'a, Literal(1) < 'a) + checkCondition(Literal(1) < 'a && Literal(2) < 'a, Literal(2) < 'a) } - test("combine the same condition") { + test("And/Or with the same conditions") { checkCondition('a < 1 || 'a < 1, 'a < 1) checkCondition('a < 1 || 'a < 1 || 'a < 1 || 'a < 1, 'a < 1) checkCondition('a > 2 && 'a > 2, 'a > 2) @@ -69,7 +70,7 @@ class ConditionSimplificationSuite extends PlanTest { test("combine literal binary comparison") { checkCondition('a === 1 && 'a < 1) - checkCondition('a === 1 || 'a < 1, 'a <= 1) + checkCondition('a === 1 || 'a < 1, 'a === 1 || 'a < 1) checkCondition('a === 1 && 'a === 2) checkCondition('a === 1 || 'a === 2, 'a === 1 || 'a === 2) @@ -83,6 +84,10 @@ class ConditionSimplificationSuite extends PlanTest { checkCondition('a > 3 && 'a > 2, 'a > 3) checkCondition('a > 3 || 'a > 2, 'a > 2) + checkCondition('a < 2 || 'a === 3 , 'a < 2 || 'a === 3) + checkCondition('a === 3 || 'a > 5, 'a === 3 || 'a > 5) + checkCondition('a < 2 || 'a > 5, 'a < 2 || 'a > 5) + checkCondition('a >= 1 && 'a <= 1, 'a === 1) } @@ -103,11 +108,11 @@ class ConditionSimplificationSuite extends PlanTest { checkCondition('a < 1 || 'b > 2 || 'a >= 1) checkCondition('a < 1 && 'b > 2 && 'a >= 1) - checkCondition('a < 2 || 'b > 3 || 'b > 2, 'a < 2 || 'b > 2) - checkCondition('a < 2 && 'b > 3 && 'b > 2, 'a < 2 && 'b > 3) + checkCondition('a < 2 || 'b > 3 || 'b > 2, 'b > 2 || 'a < 2) + checkCondition('a < 2 && 'b > 3 && 'b > 2, 'b > 3 && 'a < 2) - checkCondition('a < 2 || ('b > 3 || 'b > 2), 'b > 2 || 'a < 2) - checkCondition('a < 2 && ('b > 3 && 'b > 2), 'b > 3 && 'a < 2) + checkCondition('a < 2 || ('b > 3 || 'b > 2), 'a < 2 || 'b > 2) + checkCondition('a < 2 && ('b > 3 && 'b > 2), 'a < 2 && 'b > 3) checkCondition('a < 2 || 'a === 3 || 'a > 5, 'a < 2 || 'a === 3 || 'a > 5) } @@ -115,11 +120,7 @@ class ConditionSimplificationSuite extends PlanTest { test("combine predicate : 2 difference combine") { checkCondition(('a < 2 || 'a > 3) && 'a > 4, 'a > 4) checkCondition(('a < 2 || 'b > 3) && 'a < 2, 'a < 2) - - checkCondition('a < 2 || ('a >= 2 && 'b > 1), 'b > 1 || 'a < 2) - checkCondition('a < 2 || ('a === 2 && 'b > 1), 'a < 2 || ('a === 2 && 'b > 1)) - - checkCondition('a > 3 || ('a > 2 && 'a < 4), 'a > 2) + checkCondition(('a < 2 && 'b > 3) || 'a < 2, 'a < 2) } test("multi left, single right") { @@ -127,24 +128,24 @@ class ConditionSimplificationSuite extends PlanTest { } test("multi left, multi right") { - checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), 'a < 2 || ('b > 3 && 'c > 5)) + checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), ('b > 3 && 'c > 5) || 'a < 2) var input: Expression = ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5) - var expected: Expression = 'a === 'b || ('b > 3 && 'a > 3 && 'a < 5) + var expected: Expression = ('a > 3 && 'a < 5 && 'b > 3) || 'a === 'b checkCondition(input, expected) input = ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a > 1) - expected = 'a === 'b || ('b > 3 && 'a > 3) + expected = ('a > 3 && 'b > 3) || 'a === 'b checkCondition(input, expected) input = ('a === 'b && 'b > 3 && 'c > 2) || ('a === 'b && 'c < 1 && 'a === 5) || ('a === 'b && 'b < 5 && 'a > 1) - expected = ('a === 'b) && + expected = (((('b > 3) && ('c > 2)) || (('c < 1) && ('a === 5))) || - (('b < 5) && ('a > 1))) + (('b < 5) && ('a > 1))) && ('a === 'b) checkCondition(input, expected) input = ('a < 2 || 'b > 5 || 'a < 2 || 'b > 1) && ('a < 2 || 'b > 1) @@ -152,7 +153,7 @@ class ConditionSimplificationSuite extends PlanTest { checkCondition(input, expected) input = ('a === 'b || 'b > 5) && ('a === 'b || 'c > 3) && ('a === 'b || 'b > 1) - expected = ('a === 'b) || ('c > 3 && 'b > 5) + expected = ('b > 5 && 'c > 3) || ('a === 'b) checkCondition(input, expected) } } \ No newline at end of file From a001e8c1f856ad7a17417255019c18251fc5098f Mon Sep 17 00:00:00 2001 From: wangfei Date: Sat, 27 Dec 2014 15:35:35 +0800 Subject: [PATCH 3/8] revert pom changes --- pom.xml | 14 +++++++------- .../optimizer/ConditionSimplificationSuite.scala | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pom.xml b/pom.xml index 560752aee936..052ce02e125d 100644 --- a/pom.xml +++ b/pom.xml @@ -156,7 +156,7 @@ central Maven Repository - http://repo1.maven.org/maven2 + https://repo1.maven.org/maven2 true @@ -167,7 +167,7 @@ apache-repo Apache Repository - http://repository.apache.org/content/repositories/releases + https://repository.apache.org/content/repositories/releases true @@ -178,7 +178,7 @@ jboss-repo JBoss Repository - http://repository.jboss.org/nexus/content/repositories/releases + https://repository.jboss.org/nexus/content/repositories/releases true @@ -189,7 +189,7 @@ mqtt-repo MQTT Repository - http://repo.eclipse.org/content/repositories/paho-releases + https://repo.eclipse.org/content/repositories/paho-releases true @@ -200,7 +200,7 @@ cloudera-repo Cloudera Repository - http://repository.cloudera.com/artifactory/cloudera-repos + https://repository.cloudera.com/artifactory/cloudera-repos true @@ -222,7 +222,7 @@ spring-releases Spring Release Repository - http://repo.spring.io/libs-release + https://repo.spring.io/libs-release true @@ -234,7 +234,7 @@ central - http://repo1.maven.org/maven2 + https://repo1.maven.org/maven2 true diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala index 940be76ed0d0..99b6af0cac5c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala @@ -156,4 +156,4 @@ class ConditionSimplificationSuite extends PlanTest { expected = ('b > 5 && 'c > 3) || ('a === 'b) checkCondition(input, expected) } -} \ No newline at end of file +} From 825fa6924d54c6b52ccea1618428bdacd3823c81 Mon Sep 17 00:00:00 2001 From: wangfei Date: Sat, 27 Dec 2014 16:51:09 +0800 Subject: [PATCH 4/8] adding more tests --- .../optimizer/ConditionSimplificationSuite.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala index 99b6af0cac5c..233efe60e215 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala @@ -68,7 +68,7 @@ class ConditionSimplificationSuite extends PlanTest { checkCondition(('a < 1 && 'a < 2) || ('a < 1 && 'a < 2), 'a < 1) } - test("combine literal binary comparison") { + test("one And/Or with literal conditions") { checkCondition('a === 1 && 'a < 1) checkCondition('a === 1 || 'a < 1, 'a === 1 || 'a < 1) @@ -104,7 +104,7 @@ class ConditionSimplificationSuite extends PlanTest { checkCondition('a > "9" || 'a < "0", 'a > 9.0 || 'a < 0.0) } - test("combine predicate : 2 same combine") { + test("two same And/Or with literal conditions") { checkCondition('a < 1 || 'b > 2 || 'a >= 1) checkCondition('a < 1 && 'b > 2 && 'a >= 1) @@ -117,17 +117,19 @@ class ConditionSimplificationSuite extends PlanTest { checkCondition('a < 2 || 'a === 3 || 'a > 5, 'a < 2 || 'a === 3 || 'a > 5) } - test("combine predicate : 2 difference combine") { + test("two different And/Or with literal conditions") { checkCondition(('a < 2 || 'a > 3) && 'a > 4, 'a > 4) checkCondition(('a < 2 || 'b > 3) && 'a < 2, 'a < 2) checkCondition(('a < 2 && 'b > 3) || 'a < 2, 'a < 2) } - test("multi left, single right") { + test("more than two And/Or: one child is literal condition") { checkCondition(('a < 2 || 'a > 3 || 'b > 5) && 'a < 2, 'a < 2) + checkCondition(('a < 2 && 'a > 3 && 'b > 5) && 'a < 2) + checkCondition(('a < 2 && 'a > 3 && 'b > 5) || 'a < 2, 'a < 2) } - test("multi left, multi right") { + test("more than two And/Or") { checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), ('b > 3 && 'c > 5) || 'a < 2) var input: Expression = ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5) From 546a82bfe463ad7f9ed15f6e026a81373735fa9a Mon Sep 17 00:00:00 2001 From: wangfei Date: Sat, 27 Dec 2014 17:10:34 +0800 Subject: [PATCH 5/8] style fix --- .../sql/catalyst/optimizer/Optimizer.scala | 65 ++++++++++++------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index cbc5a7265444..8ac23d474438 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -318,26 +318,35 @@ object ConditionSimplification extends Rule[LogicalPlan] with PredicateHelper { case and @ And( e1 @ NumLitBinComparison(n1, i1), e2 @ NumLitBinComparison(n2, i2)) if n1 == n2 => - if (!i1.intersects(i2)) Literal(false) - else if (i1.isSubsetOf(i2)) e1 - else if (i1.isSupersetOf(i2)) e2 - else if (i1.intersect(i2).isPoint) + if (!i1.intersects(i2)) { + Literal(false) + } else if (i1.isSubsetOf(i2)) { + e1 + } else if (i1.isSupersetOf(i2)) { + e2 + } else if (i1.intersect(i2).isPoint) { EqualTo(n1, Literal(i1.intersect(i2).asInstanceOf[Point[Double]].value, n1.dataType)) - else and + } else { + and + } // a < 2 || a >= 2 => true, a > 3 || a > 5 => a > 3 case or @ Or( e1 @ NumLitBinComparison(n1, i1), e2 @ NumLitBinComparison(n2, i2)) if n1 == n2 => - // a hack to avoid bug of spire + // hack to avoid bug of spire: a < 2 || a > 5 => all in spire val op = Interval.all[Double] -- i1 - if (i1.intersects(i2) && i1.union(i2) == Interval.all[Double]) Literal(true) - else if (op(op.size - 1) == i2) Literal(true) - else if (i1.isSubsetOf(i2)) e2 - else if (i1.isSupersetOf(i2)) e1 - else or - - /** 3. Two And/Or with literal condition that can be merged, do a transformation to reuse 2. */ + if (i1.intersects(i2) && i1.union(i2) == Interval.all[Double]) { + Literal(true) + } else if (op(op.size - 1) == i2) { + Literal(true) + } else if (i1.isSubsetOf(i2)) { + e2 + } else if (i1.isSupersetOf(i2)) { + e1 + } else or + + /** 3. Two And/Or with literal condition that can be merged, transform it to reuse 2. */ // (a < 3 || b > 5) || a > 2 => true, (b > 5 || a < 3) || a > 2 => true case or @ Or( Or(e1 @ NumLitBinComparison(n1, i1), e2 @ NumLitBinComparison(n2, i2)), @@ -371,7 +380,7 @@ object ConditionSimplification extends Rule[LogicalPlan] with PredicateHelper { Or(And(left2, right1), And(right2, right1)) // (a < 3 && b > 5) || a > 2 => b > 5 || a > 2. - // using formula: a || (b && c) = (a || b) && (a || c) + // using formula: a || (b && c) = (a || b) && (a || c) case Or( left1 @ And(left2 @ NumLitBinComparison(n1, i1), right2 @ NumLitBinComparison(n2, i2)), right1 @ NumLitBinComparison(n3, i3)) @@ -445,19 +454,29 @@ object ConditionSimplification extends Rule[LogicalPlan] with PredicateHelper { object NumLitBinComparison { def unapply(e: Expression): Option[(NamedExpression, Interval[Double])] = e match { - case LessThan(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.below(l.toDouble))) - case LessThan(l @ Literal(_, _: NumericType), n: NamedExpression) => Some((n, Interval.atOrAbove(l.toDouble))) + case LessThan(n: NamedExpression, l @ Literal(_, _: NumericType)) => + Some((n, Interval.below(l.toDouble))) + case LessThan(l @ Literal(_, _: NumericType), n: NamedExpression) => + Some((n, Interval.atOrAbove(l.toDouble))) + + case GreaterThan(n: NamedExpression, l @ Literal(_, _: NumericType)) => + Some((n, Interval.above(l.toDouble))) + case GreaterThan(l @ Literal(_, dt: NumericType), n: NamedExpression) => + Some((n, Interval.atOrBelow(l.toDouble))) - case GreaterThan(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.above(l.toDouble))) - case GreaterThan(l @ Literal(_, dt: NumericType), n: NamedExpression) => Some((n, Interval.atOrBelow(l.toDouble))) + case LessThanOrEqual(n: NamedExpression, l @ Literal(_, _: NumericType)) => + Some((n, Interval.atOrBelow(l.toDouble))) + case LessThanOrEqual(l @ Literal(_, _: NumericType), n: NamedExpression) => + Some((n, Interval.above(l.toDouble))) - case LessThanOrEqual(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.atOrBelow(l.toDouble))) - case LessThanOrEqual(l @ Literal(_, _: NumericType), n: NamedExpression) => Some((n, Interval.above(l.toDouble))) + case GreaterThanOrEqual(n: NamedExpression, l @ Literal(_, _: NumericType)) => + Some((n, Interval.atOrAbove(l.toDouble))) + case GreaterThanOrEqual(l @ Literal(_, _: NumericType), n: NamedExpression) => + Some((n, Interval.below(l.toDouble))) - case GreaterThanOrEqual(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.atOrAbove(l.toDouble))) - case GreaterThanOrEqual(l @ Literal(_, _: NumericType), n: NamedExpression) => Some((n, Interval.below(l.toDouble))) + case EqualTo(n: NamedExpression, l @ Literal(_, _: NumericType)) => + Some((n, Interval.point(l.toDouble))) - case EqualTo(n: NamedExpression, l @ Literal(_, _: NumericType)) => Some((n, Interval.point(l.toDouble))) case other => None } } From 5c6f134474c93d69b9fd6dcee172d78e56d38b88 Mon Sep 17 00:00:00 2001 From: scwf Date: Sun, 28 Dec 2014 18:21:12 +0800 Subject: [PATCH 6/8] remove numeric optimizations and move to BooleanSimplification --- pom.xml | 5 - sql/catalyst/pom.xml | 5 +- .../sql/catalyst/optimizer/Optimizer.scala | 243 ++++-------------- .../BooleanSimplificationSuite.scala | 92 +++++++ .../ConditionSimplificationSuite.scala | 161 ------------ 5 files changed, 142 insertions(+), 364 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala delete mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala diff --git a/pom.xml b/pom.xml index 052ce02e125d..e4db1393ba9c 100644 --- a/pom.xml +++ b/pom.xml @@ -824,11 +824,6 @@ jackson-mapper-asl 1.8.8 - - org.spire-math - spire_2.10 - 0.9.0 - diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 43f12a6b9364..aa833c279a2a 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -44,10 +44,7 @@ org.scala-lang scala-reflect - - org.spire-math - spire_2.10 - + org.apache.spark spark-core_${scala.binary.version} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 8ac23d474438..ffffaf6cc590 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -18,8 +18,6 @@ package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet -import spire.implicits._ -import spire.math._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.Inner @@ -42,7 +40,6 @@ object DefaultOptimizer extends Optimizer { NullPropagation, ConstantFolding, LikeSimplification, - ConditionSimplification, BooleanSimplification, SimplifyFilters, SimplifyCasts, @@ -298,196 +295,13 @@ object OptimizeIn extends Rule[LogicalPlan] { } /** - * Simplifies Conditions(And, Or) expressions when the conditions can by optimized. + * Simplifies boolean expressions: + * 1. Simplifies expressions whose answer can be determined without evaluating both sides. + * 2. Eliminates / extracts common factors. + * 3. Merge same expressions + * 4. Removes `Not` operator. */ -object ConditionSimplification extends Rule[LogicalPlan] with PredicateHelper { - - def apply(plan: LogicalPlan): LogicalPlan = plan transform { - case q: LogicalPlan => q transformExpressionsUp { - /** 1. one And/Or with same condition. */ - // a && a => a - case And(left, right) if left.fastEquals(right) => - left - - // a || a => a - case Or(left, right) if left.fastEquals(right) => - left - - /** 2. one And/Or with literal conditions that can be merged. */ - // a < 2 && a > 2 => false, a > 3 && a > 5 => a > 5 - case and @ And( - e1 @ NumLitBinComparison(n1, i1), - e2 @ NumLitBinComparison(n2, i2)) if n1 == n2 => - if (!i1.intersects(i2)) { - Literal(false) - } else if (i1.isSubsetOf(i2)) { - e1 - } else if (i1.isSupersetOf(i2)) { - e2 - } else if (i1.intersect(i2).isPoint) { - EqualTo(n1, Literal(i1.intersect(i2).asInstanceOf[Point[Double]].value, n1.dataType)) - } else { - and - } - - // a < 2 || a >= 2 => true, a > 3 || a > 5 => a > 3 - case or @ Or( - e1 @ NumLitBinComparison(n1, i1), - e2 @ NumLitBinComparison(n2, i2)) if n1 == n2 => - // hack to avoid bug of spire: a < 2 || a > 5 => all in spire - val op = Interval.all[Double] -- i1 - if (i1.intersects(i2) && i1.union(i2) == Interval.all[Double]) { - Literal(true) - } else if (op(op.size - 1) == i2) { - Literal(true) - } else if (i1.isSubsetOf(i2)) { - e2 - } else if (i1.isSupersetOf(i2)) { - e1 - } else or - - /** 3. Two And/Or with literal condition that can be merged, transform it to reuse 2. */ - // (a < 3 || b > 5) || a > 2 => true, (b > 5 || a < 3) || a > 2 => true - case or @ Or( - Or(e1 @ NumLitBinComparison(n1, i1), e2 @ NumLitBinComparison(n2, i2)), - right @ NumLitBinComparison(n3, i3)) => - if (n3 fastEquals n1) { - Or(Or(e1, right), e2) - } else if (n3 fastEquals n2) { - Or(Or(e2, right), e1) - } else { - or - } - - // (b > 5 && a < 2) && a > 3 => false, (a < 2 && b > 5) && a > 3 => false - case and @ And( - And(e1 @ NumLitBinComparison(n1, i1), e2 @ NumLitBinComparison(n2, i2)), - right @ NumLitBinComparison(n3, i3)) => - if (n3 fastEquals n1) { - And(And(e1, right), e2) - } else if (n3 fastEquals n2) { - And(And(e2, right), e1) - } else { - and - } - - // (a < 2 || b > 5) && a > 3 => b > 5 && a > 3 - // using formula: a && (b || c) = (a && b) || (a && c) - case And( - left1 @ Or(left2 @ NumLitBinComparison(n1, i1), right2 @ NumLitBinComparison(n2, i2)), - right1 @ NumLitBinComparison(n3, i3)) - if ((n3.fastEquals(n1) && i3 != i1) || (n3.fastEquals(n2) && i3 != i2)) => - Or(And(left2, right1), And(right2, right1)) - - // (a < 3 && b > 5) || a > 2 => b > 5 || a > 2. - // using formula: a || (b && c) = (a || b) && (a || c) - case Or( - left1 @ And(left2 @ NumLitBinComparison(n1, i1), right2 @ NumLitBinComparison(n2, i2)), - right1 @ NumLitBinComparison(n3, i3)) - if ((n3.fastEquals(n1) && i3 != i1) || (n3.fastEquals(n2) && i3 != i2)) => - Or(And(left2, right1), And(right2, right1)) - - /** 4. And/Or whose one child is literal condition, the other is Or/And */ - // (a < 2 || b > 5) && a < 2 => a < 2 - case And( - left1 @ Or(left2 @ NumLitBinComparison(_, _), right2 @ NumLitBinComparison(_, _)), - right1 @ NumLitBinComparison(_, _)) - if (right1 fastEquals left2) || (right1 fastEquals right2) => - right1 - - // (a < 3 && b > 5) || a < 3 => a < 3 - case Or( - left1 @ And(left2 @ NumLitBinComparison(_, _), right2 @ NumLitBinComparison(_, _)), - right1 @ NumLitBinComparison(_, _)) - if (right1 fastEquals left2) || (right1 fastEquals right2) => - right1 - - // 5. (a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ... => - // a && b && ((c && ...) || (d && ...) || (e && ...) || ...) - case or @ Or(left, right) => - val lhsSet = splitConjunctivePredicates(left).toSet - val rhsSet = splitConjunctivePredicates(right).toSet - val common = lhsSet.intersect(rhsSet) - val ldiff = lhsSet.diff(common) - val rdiff = rhsSet.diff(common) - if (common.size == 0) { - or - }else if ( ldiff.size == 0 || rdiff == 0) { - common.reduce(And) - } else { - (ldiff.reduceOption(And) ++ rdiff.reduceOption(And)) - .reduceOption(Or) - .map(_ :: common.toList) - .getOrElse(common.toList) - .reduce(And) - } - - // 6. (a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ... => - // (a || b) || ((c || ...) && (f || ...) && (e || ...) && ...) - case and @ And(left, right) => - val lhsSet = splitDisjunctivePredicates(left).toSet - val rhsSet = splitDisjunctivePredicates(right).toSet - val common = lhsSet.intersect(rhsSet) - val ldiff = lhsSet.diff(common) - val rdiff = rhsSet.diff(common) - - if (common.size == 0) { - and - }else if (ldiff.size == 0 || rdiff.size == 0) { - common.reduce(Or) - } else { - val x = (ldiff.reduceOption(Or) ++ rdiff.reduceOption(Or)) - .reduceOption(And) - .map(_ :: common.toList) - .getOrElse(common.toList) - .reduce(Or) - x - } - - case other => other - } - } - - private implicit class NumericLiteral(e: Literal) { - def toDouble = Cast(e, DoubleType).eval().asInstanceOf[Double] - } - - object NumLitBinComparison { - def unapply(e: Expression): Option[(NamedExpression, Interval[Double])] = e match { - case LessThan(n: NamedExpression, l @ Literal(_, _: NumericType)) => - Some((n, Interval.below(l.toDouble))) - case LessThan(l @ Literal(_, _: NumericType), n: NamedExpression) => - Some((n, Interval.atOrAbove(l.toDouble))) - - case GreaterThan(n: NamedExpression, l @ Literal(_, _: NumericType)) => - Some((n, Interval.above(l.toDouble))) - case GreaterThan(l @ Literal(_, dt: NumericType), n: NamedExpression) => - Some((n, Interval.atOrBelow(l.toDouble))) - - case LessThanOrEqual(n: NamedExpression, l @ Literal(_, _: NumericType)) => - Some((n, Interval.atOrBelow(l.toDouble))) - case LessThanOrEqual(l @ Literal(_, _: NumericType), n: NamedExpression) => - Some((n, Interval.above(l.toDouble))) - - case GreaterThanOrEqual(n: NamedExpression, l @ Literal(_, _: NumericType)) => - Some((n, Interval.atOrAbove(l.toDouble))) - case GreaterThanOrEqual(l @ Literal(_, _: NumericType), n: NamedExpression) => - Some((n, Interval.below(l.toDouble))) - - case EqualTo(n: NamedExpression, l @ Literal(_, _: NumericType)) => - Some((n, Interval.point(l.toDouble))) - - case other => None - } - } -} - -/** - * Simplifies boolean expressions where the answer can be determined without evaluating both sides. - * Note that this rule can eliminate expressions that might otherwise have been evaluated and thus - * is only safe when evaluations of expressions does not result in side effects. - */ -object BooleanSimplification extends Rule[LogicalPlan] { +object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsUp { case and @ And(left, right) => @@ -496,7 +310,28 @@ object BooleanSimplification extends Rule[LogicalPlan] { case (l, Literal(true, BooleanType)) => l case (Literal(false, BooleanType), _) => Literal(false) case (_, Literal(false, BooleanType)) => Literal(false) - case (_, _) => and + // a && a => a + case (l, r) if l fastEquals r => l + case (_, _) => + // (a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ... => + // a && b && ((c && ...) || (d && ...) || (e && ...) || ...) + val lhsSet = splitDisjunctivePredicates(left).toSet + val rhsSet = splitDisjunctivePredicates(right).toSet + val common = lhsSet.intersect(rhsSet) + val ldiff = lhsSet.diff(common) + val rdiff = rhsSet.diff(common) + + if (common.size == 0) { + and + }else if (ldiff.size == 0 || rdiff.size == 0) { + common.reduce(Or) + } else { + (ldiff.reduceOption(Or) ++ rdiff.reduceOption(Or)) + .reduceOption(And) + .map(_ :: common.toList) + .getOrElse(common.toList) + .reduce(Or) + } } case or @ Or(left, right) => @@ -505,7 +340,27 @@ object BooleanSimplification extends Rule[LogicalPlan] { case (_, Literal(true, BooleanType)) => Literal(true) case (Literal(false, BooleanType), r) => r case (l, Literal(false, BooleanType)) => l - case (_, _) => or + // a || a => a + case (l, r) if l fastEquals r => l + case (_, _) => + // (a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ... => + // (a || b) || ((c || ...) && (f || ...) && (e || ...) && ...) + val lhsSet = splitConjunctivePredicates(left).toSet + val rhsSet = splitConjunctivePredicates(right).toSet + val common = lhsSet.intersect(rhsSet) + val ldiff = lhsSet.diff(common) + val rdiff = rhsSet.diff(common) + if (common.size == 0) { + or + }else if ( ldiff.size == 0 || rdiff.size == 0) { + common.reduce(And) + } else { + (ldiff.reduceOption(And) ++ rdiff.reduceOption(And)) + .reduceOption(Or) + .map(_ :: common.toList) + .getOrElse(common.toList) + .reduce(And) + } } case not @ Not(exp) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala new file mode 100644 index 000000000000..87aea3285402 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators +import org.apache.spark.sql.catalyst.expressions.{Literal, Expression} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.rules._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.dsl.expressions._ + +class BooleanSimplificationSuite extends PlanTest { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = + Batch("AnalysisNodes", Once, + EliminateAnalysisOperators) :: + Batch("Constant Folding", FixedPoint(50), + NullPropagation, + ConstantFolding, + BooleanSimplification, + SimplifyFilters) :: Nil + } + + val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string) + + def checkCondition(originCondition: Expression, optimizedCondition: Expression): Unit = { + val originQuery = testRelation.where(originCondition).analyze + val optimized = Optimize(originQuery) + val expected = testRelation.where(optimizedCondition).analyze + comparePlans(optimized, expected) + } + + test("a && a => a") { + checkCondition(Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a) + checkCondition(Literal(1) < 'a && Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a) + } + + test("a || a => a") { + checkCondition(Literal(1) < 'a || Literal(1) < 'a, Literal(1) < 'a) + checkCondition(Literal(1) < 'a || Literal(1) < 'a || Literal(1) < 'a, Literal(1) < 'a) + } + + test("(a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ...") { + checkCondition('b > 3 || 'c > 5, 'b > 3 || 'c > 5) + + checkCondition(('a < 2 && 'a > 3 && 'b > 5) || 'a < 2, 'a < 2) + + checkCondition('a < 2 || ('a < 2 && 'a > 3 && 'b > 5), 'a < 2) + + val input = ('a === 'b && 'b > 3 && 'c > 2) || + ('a === 'b && 'c < 1 && 'a === 5) || + ('a === 'b && 'b < 5 && 'a > 1) + + val expected = + (((('b > 3) && ('c > 2)) || + (('c < 1) && ('a === 5))) || + (('b < 5) && ('a > 1))) && ('a === 'b) + checkCondition(input, expected) + + } + + test("(a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ...") { + checkCondition('b > 3 && 'c > 5, 'b > 3 && 'c > 5) + + checkCondition(('a < 2 || 'a > 3 || 'b > 5) && 'a < 2, 'a < 2) + + checkCondition('a < 2 && ('a < 2 || 'a > 3 || 'b > 5) , 'a < 2) + + checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), ('b > 3 && 'c > 5) || 'a < 2) + + var input: Expression = ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5) + var expected: Expression = ('b > 3 && 'a > 3 && 'a < 5) || 'a === 'b + checkCondition(input, expected) + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala deleted file mode 100644 index 233efe60e215..000000000000 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConditionSimplificationSuite.scala +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.optimizer - -import org.apache.spark.sql.catalyst.analysis.EliminateAnalysisOperators -import org.apache.spark.sql.catalyst.expressions.{Literal, Expression} -import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.rules._ -import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.dsl.expressions._ - -class ConditionSimplificationSuite extends PlanTest { - - object Optimize extends RuleExecutor[LogicalPlan] { - val batches = - Batch("AnalysisNodes", Once, - EliminateAnalysisOperators) :: - Batch("Constant Folding", FixedPoint(50), - NullPropagation, - ConstantFolding, - ConditionSimplification, - BooleanSimplification, - SimplifyFilters) :: Nil - } - - val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string) - - def checkCondition(originCondition: Expression, optimizedCondition: Expression): Unit = { - val originQuery = testRelation.where(originCondition).analyze - val optimized = Optimize(originQuery) - val expected = testRelation.where(optimizedCondition).analyze - comparePlans(optimized, expected) - } - - def checkCondition(originCondition: Expression): Unit = { - val originQuery = testRelation.where(originCondition).analyze - val optimized = Optimize(originQuery) - val expected = testRelation - comparePlans(optimized, expected) - } - - test("literals in front of attribute") { - checkCondition(Literal(1) < 'a || Literal(2) < 'a, Literal(1) < 'a) - checkCondition(Literal(1) < 'a && Literal(2) < 'a, Literal(2) < 'a) - } - - test("And/Or with the same conditions") { - checkCondition('a < 1 || 'a < 1, 'a < 1) - checkCondition('a < 1 || 'a < 1 || 'a < 1 || 'a < 1, 'a < 1) - checkCondition('a > 2 && 'a > 2, 'a > 2) - checkCondition('a > 2 && 'a > 2 && 'a > 2 && 'a > 2, 'a > 2) - checkCondition(('a < 1 && 'a < 2) || ('a < 1 && 'a < 2), 'a < 1) - } - - test("one And/Or with literal conditions") { - checkCondition('a === 1 && 'a < 1) - checkCondition('a === 1 || 'a < 1, 'a === 1 || 'a < 1) - - checkCondition('a === 1 && 'a === 2) - checkCondition('a === 1 || 'a === 2, 'a === 1 || 'a === 2) - - checkCondition('a <= 1 && 'a > 1) - checkCondition('a <= 1 || 'a > 1) - - checkCondition('a < 1 && 'a >= 1) - checkCondition('a < 1 || 'a >= 1) - - checkCondition('a > 3 && 'a > 2, 'a > 3) - checkCondition('a > 3 || 'a > 2, 'a > 2) - - checkCondition('a < 2 || 'a === 3 , 'a < 2 || 'a === 3) - checkCondition('a === 3 || 'a > 5, 'a === 3 || 'a > 5) - checkCondition('a < 2 || 'a > 5, 'a < 2 || 'a > 5) - - checkCondition('a >= 1 && 'a <= 1, 'a === 1) - - } - - test("different data type comparison") { - checkCondition('a > "abc") - checkCondition('a > "a" && 'a < "b") - - checkCondition('a > "a" || 'a < "b") - - checkCondition('a > "9" || 'a < "0", 'a > 9.0 || 'a < 0.0) - checkCondition('d > 9 && 'd < 1, 'd > 9.0 && 'd < 1.0 ) - - checkCondition('a > "9" || 'a < "0", 'a > 9.0 || 'a < 0.0) - } - - test("two same And/Or with literal conditions") { - checkCondition('a < 1 || 'b > 2 || 'a >= 1) - checkCondition('a < 1 && 'b > 2 && 'a >= 1) - - checkCondition('a < 2 || 'b > 3 || 'b > 2, 'b > 2 || 'a < 2) - checkCondition('a < 2 && 'b > 3 && 'b > 2, 'b > 3 && 'a < 2) - - checkCondition('a < 2 || ('b > 3 || 'b > 2), 'a < 2 || 'b > 2) - checkCondition('a < 2 && ('b > 3 && 'b > 2), 'a < 2 && 'b > 3) - - checkCondition('a < 2 || 'a === 3 || 'a > 5, 'a < 2 || 'a === 3 || 'a > 5) - } - - test("two different And/Or with literal conditions") { - checkCondition(('a < 2 || 'a > 3) && 'a > 4, 'a > 4) - checkCondition(('a < 2 || 'b > 3) && 'a < 2, 'a < 2) - checkCondition(('a < 2 && 'b > 3) || 'a < 2, 'a < 2) - } - - test("more than two And/Or: one child is literal condition") { - checkCondition(('a < 2 || 'a > 3 || 'b > 5) && 'a < 2, 'a < 2) - checkCondition(('a < 2 && 'a > 3 && 'b > 5) && 'a < 2) - checkCondition(('a < 2 && 'a > 3 && 'b > 5) || 'a < 2, 'a < 2) - } - - test("more than two And/Or") { - checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), ('b > 3 && 'c > 5) || 'a < 2) - - var input: Expression = ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5) - var expected: Expression = ('a > 3 && 'a < 5 && 'b > 3) || 'a === 'b - checkCondition(input, expected) - - input = ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a > 1) - expected = ('a > 3 && 'b > 3) || 'a === 'b - checkCondition(input, expected) - - input = ('a === 'b && 'b > 3 && 'c > 2) || - ('a === 'b && 'c < 1 && 'a === 5) || - ('a === 'b && 'b < 5 && 'a > 1) - - expected = - (((('b > 3) && ('c > 2)) || - (('c < 1) && ('a === 5))) || - (('b < 5) && ('a > 1))) && ('a === 'b) - checkCondition(input, expected) - - input = ('a < 2 || 'b > 5 || 'a < 2 || 'b > 1) && ('a < 2 || 'b > 1) - expected = 'a < 2 || 'b > 1 - checkCondition(input, expected) - - input = ('a === 'b || 'b > 5) && ('a === 'b || 'c > 3) && ('a === 'b || 'b > 1) - expected = ('b > 5 && 'c > 3) || ('a === 'b) - checkCondition(input, expected) - } -} From 527e6cee23dca18c2f035c772f659394c3c700d5 Mon Sep 17 00:00:00 2001 From: scwf Date: Sun, 28 Dec 2014 18:24:15 +0800 Subject: [PATCH 7/8] minor comment improvements --- sql/catalyst/pom.xml | 2 +- .../spark/sql/catalyst/optimizer/Optimizer.scala | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index aa833c279a2a..1caa297e24e3 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -44,7 +44,7 @@ org.scala-lang scala-reflect - + org.apache.spark spark-core_${scala.binary.version} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index ffffaf6cc590..39c34d03bf4e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet - import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.Inner import org.apache.spark.sql.catalyst.plans.FullOuter @@ -313,19 +312,20 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { // a && a => a case (l, r) if l fastEquals r => l case (_, _) => - // (a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ... => - // a && b && ((c && ...) || (d && ...) || (e && ...) || ...) val lhsSet = splitDisjunctivePredicates(left).toSet val rhsSet = splitDisjunctivePredicates(right).toSet val common = lhsSet.intersect(rhsSet) val ldiff = lhsSet.diff(common) val rdiff = rhsSet.diff(common) - if (common.size == 0) { + // a && b and }else if (ldiff.size == 0 || rdiff.size == 0) { + // a && (a || b) common.reduce(Or) } else { + // (a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ... => + // (a || b) || ((c || ...) && (f || ...) && (e || ...) && ...) (ldiff.reduceOption(Or) ++ rdiff.reduceOption(Or)) .reduceOption(And) .map(_ :: common.toList) @@ -343,18 +343,20 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { // a || a => a case (l, r) if l fastEquals r => l case (_, _) => - // (a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ... => - // (a || b) || ((c || ...) && (f || ...) && (e || ...) && ...) val lhsSet = splitConjunctivePredicates(left).toSet val rhsSet = splitConjunctivePredicates(right).toSet val common = lhsSet.intersect(rhsSet) val ldiff = lhsSet.diff(common) val rdiff = rhsSet.diff(common) if (common.size == 0) { + // a || b or }else if ( ldiff.size == 0 || rdiff.size == 0) { + // a || (b && a) common.reduce(And) } else { + // (a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ... => + // a && b && ((c && ...) || (d && ...) || (e && ...) || ...) (ldiff.reduceOption(And) ++ rdiff.reduceOption(And)) .reduceOption(Or) .map(_ :: common.toList) From 58bcbc28f6fe085058d47ed52e1c2e7b4efb6e04 Mon Sep 17 00:00:00 2001 From: scwf Date: Sun, 11 Jan 2015 09:24:25 +0800 Subject: [PATCH 8/8] minor format fix --- .../optimizer/BooleanSimplificationSuite.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala index 87aea3285402..a0863dad96eb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala @@ -31,11 +31,11 @@ class BooleanSimplificationSuite extends PlanTest { val batches = Batch("AnalysisNodes", Once, EliminateAnalysisOperators) :: - Batch("Constant Folding", FixedPoint(50), - NullPropagation, - ConstantFolding, - BooleanSimplification, - SimplifyFilters) :: Nil + Batch("Constant Folding", FixedPoint(50), + NullPropagation, + ConstantFolding, + BooleanSimplification, + SimplifyFilters) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string)