Skip to content

Commit 2f6a835

Browse files
committed
[SPARK-2218] rename Equals to EqualTo in Spark SQL expressions.
Due to the existence of scala.Equals, it is very error prone to name the expression Equals, especially because we use a lot of partial functions and pattern matching in the optimizer. Note that this sits on top of apache#1144. Author: Reynold Xin <[email protected]> Closes apache#1146 from rxin/equals and squashes the following commits: f8583fd [Reynold Xin] Merge branch 'master' of github.com:apache/spark into equals 326b388 [Reynold Xin] Merge branch 'master' of github.com:apache/spark into equals bd19807 [Reynold Xin] Rename EqualsTo to EqualTo. 81148d1 [Reynold Xin] [SPARK-2218] rename Equals to EqualsTo in Spark SQL expressions. c4e543d [Reynold Xin] [SPARK-2210] boolean cast on boolean value should be removed.
1 parent 3249528 commit 2f6a835

File tree

11 files changed

+38
-40
lines changed

11 files changed

+38
-40
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -258,13 +258,13 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
258258
comparisonExpression * (AND ^^^ { (e1: Expression, e2: Expression) => And(e1,e2) })
259259

260260
protected lazy val comparisonExpression: Parser[Expression] =
261-
termExpression ~ "=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Equals(e1, e2) } |
261+
termExpression ~ "=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => EqualTo(e1, e2) } |
262262
termExpression ~ "<" ~ termExpression ^^ { case e1 ~ _ ~ e2 => LessThan(e1, e2) } |
263263
termExpression ~ "<=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => LessThanOrEqual(e1, e2) } |
264264
termExpression ~ ">" ~ termExpression ^^ { case e1 ~ _ ~ e2 => GreaterThan(e1, e2) } |
265265
termExpression ~ ">=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => GreaterThanOrEqual(e1, e2) } |
266-
termExpression ~ "!=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(Equals(e1, e2)) } |
267-
termExpression ~ "<>" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(Equals(e1, e2)) } |
266+
termExpression ~ "!=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(EqualTo(e1, e2)) } |
267+
termExpression ~ "<>" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(EqualTo(e1, e2)) } |
268268
termExpression ~ RLIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => RLike(e1, e2) } |
269269
termExpression ~ REGEXP ~ termExpression ^^ { case e1 ~ _ ~ e2 => RLike(e1, e2) } |
270270
termExpression ~ LIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => Like(e1, e2) } |

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,8 +234,8 @@ trait HiveTypeCoercion {
234234
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
235235
// Skip nodes who's children have not been resolved yet.
236236
case e if !e.childrenResolved => e
237-
// No need to change Equals operators as that actually makes sense for boolean types.
238-
case e: Equals => e
237+
// No need to change EqualTo operators as that actually makes sense for boolean types.
238+
case e: EqualTo => e
239239
// Otherwise turn them to Byte types so that there exists and ordering.
240240
case p: BinaryComparison
241241
if p.left.dataType == BooleanType && p.right.dataType == BooleanType =>
@@ -254,7 +254,10 @@ trait HiveTypeCoercion {
254254
// Skip if the type is boolean type already. Note that this extra cast should be removed
255255
// by optimizer.SimplifyCasts.
256256
case Cast(e, BooleanType) if e.dataType == BooleanType => e
257-
case Cast(e, BooleanType) => Not(Equals(e, Literal(0)))
257+
// If the data type is not boolean and is being cast boolean, turn it into a comparison
258+
// with the numeric value, i.e. x != 0. This will coerce the type into numeric type.
259+
case Cast(e, BooleanType) if e.dataType != BooleanType => Not(EqualTo(e, Literal(0)))
260+
// Turn true into 1, and false into 0 if casting boolean into other types.
258261
case Cast(e, dataType) if e.dataType == BooleanType =>
259262
Cast(If(e, Literal(1), Literal(0)), dataType)
260263
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ import org.apache.spark.sql.catalyst.types._
4444
*
4545
* // These unresolved attributes can be used to create more complicated expressions.
4646
* scala> 'a === 'b
47-
* res2: org.apache.spark.sql.catalyst.expressions.Equals = ('a = 'b)
47+
* res2: org.apache.spark.sql.catalyst.expressions.EqualTo = ('a = 'b)
4848
*
4949
* // SQL verbs can be used to construct logical query plans.
5050
* scala> import org.apache.spark.sql.catalyst.plans.logical._
@@ -76,8 +76,8 @@ package object dsl {
7676
def <= (other: Expression) = LessThanOrEqual(expr, other)
7777
def > (other: Expression) = GreaterThan(expr, other)
7878
def >= (other: Expression) = GreaterThanOrEqual(expr, other)
79-
def === (other: Expression) = Equals(expr, other)
80-
def !== (other: Expression) = Not(Equals(expr, other))
79+
def === (other: Expression) = EqualTo(expr, other)
80+
def !== (other: Expression) = Not(EqualTo(expr, other))
8181

8282
def like(other: Expression) = Like(expr, other)
8383
def rlike(other: Expression) = RLike(expr, other)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ package org.apache.spark.sql.catalyst
2424
* expression, a [[NamedExpression]] in addition to the standard collection of expressions.
2525
*
2626
* ==Standard Expressions==
27-
* A library of standard expressions (e.g., [[Add]], [[Equals]]), aggregates (e.g., SUM, COUNT),
27+
* A library of standard expressions (e.g., [[Add]], [[EqualTo]]), aggregates (e.g., SUM, COUNT),
2828
* and other computations (e.g. UDFs). Each expression type is capable of determining its output
2929
* schema as a function of its children's output schema.
3030
*

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ trait PredicateHelper {
5252
*
5353
* For example consider a join between two relations R(a, b) and S(c, d).
5454
*
55-
* `canEvaluate(Equals(a,b), R)` returns `true` where as `canEvaluate(Equals(a,c), R)` returns
55+
* `canEvaluate(EqualTo(a,b), R)` returns `true` where as `canEvaluate(EqualTo(a,c), R)` returns
5656
* `false`.
5757
*/
5858
protected def canEvaluate(expr: Expression, plan: LogicalPlan): Boolean =
@@ -140,7 +140,7 @@ abstract class BinaryComparison extends BinaryPredicate {
140140
self: Product =>
141141
}
142142

143-
case class Equals(left: Expression, right: Expression) extends BinaryComparison {
143+
case class EqualTo(left: Expression, right: Expression) extends BinaryComparison {
144144
def symbol = "="
145145
override def eval(input: Row): Any = {
146146
val l = left.eval(input)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,14 +136,14 @@ object HashFilteredJoin extends Logging with PredicateHelper {
136136
val Join(left, right, joinType, _) = join
137137
val (joinPredicates, otherPredicates) =
138138
allPredicates.flatMap(splitConjunctivePredicates).partition {
139-
case Equals(l, r) if (canEvaluate(l, left) && canEvaluate(r, right)) ||
139+
case EqualTo(l, r) if (canEvaluate(l, left) && canEvaluate(r, right)) ||
140140
(canEvaluate(l, right) && canEvaluate(r, left)) => true
141141
case _ => false
142142
}
143143

144144
val joinKeys = joinPredicates.map {
145-
case Equals(l, r) if canEvaluate(l, left) && canEvaluate(r, right) => (l, r)
146-
case Equals(l, r) if canEvaluate(l, right) && canEvaluate(r, left) => (r, l)
145+
case EqualTo(l, r) if canEvaluate(l, left) && canEvaluate(r, right) => (l, r)
146+
case EqualTo(l, r) if canEvaluate(l, right) && canEvaluate(r, left) => (r, l)
147147
}
148148

149149
// Do not consider this strategy if there are no join keys.

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,8 @@ class ConstantFoldingSuite extends PlanTest {
195195
Add(Literal(null, IntegerType), 1) as 'c9,
196196
Add(1, Literal(null, IntegerType)) as 'c10,
197197

198-
Equals(Literal(null, IntegerType), 1) as 'c11,
199-
Equals(1, Literal(null, IntegerType)) as 'c12,
198+
EqualTo(Literal(null, IntegerType), 1) as 'c11,
199+
EqualTo(1, Literal(null, IntegerType)) as 'c12,
200200

201201
Like(Literal(null, StringType), "abc") as 'c13,
202202
Like("abc", Literal(null, StringType)) as 'c14,

sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,9 @@ object ParquetFilters {
205205
Some(new AndFilter(leftFilter.get, rightFilter.get))
206206
}
207207
}
208-
case p @ Equals(left: Literal, right: NamedExpression) if !right.nullable =>
208+
case p @ EqualTo(left: Literal, right: NamedExpression) if !right.nullable =>
209209
Some(createEqualityFilter(right.name, left, p))
210-
case p @ Equals(left: NamedExpression, right: Literal) if !left.nullable =>
210+
case p @ EqualTo(left: NamedExpression, right: Literal) if !left.nullable =>
211211
Some(createEqualityFilter(left.name, right, p))
212212
case p @ LessThan(left: Literal, right: NamedExpression) if !right.nullable =>
213213
Some(createLessThanFilter(right.name, left, p))

sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,23 @@ package org.apache.spark.sql.parquet
1919

2020
import org.scalatest.{BeforeAndAfterAll, FunSuiteLike}
2121

22-
import org.apache.hadoop.fs.{Path, FileSystem}
23-
import org.apache.hadoop.mapreduce.Job
24-
2522
import parquet.hadoop.ParquetFileWriter
2623
import parquet.hadoop.util.ContextUtil
2724
import parquet.schema.MessageTypeParser
2825

26+
import org.apache.hadoop.fs.{FileSystem, Path}
27+
import org.apache.hadoop.mapreduce.Job
2928
import org.apache.spark.SparkContext
3029
import org.apache.spark.sql._
30+
import org.apache.spark.sql.catalyst.{SqlLexical, SqlParser}
31+
import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedAttribute}
3132
import org.apache.spark.sql.catalyst.expressions._
3233
import org.apache.spark.sql.catalyst.types.{BooleanType, IntegerType}
33-
import org.apache.spark.sql.test.TestSQLContext
34-
import org.apache.spark.sql.TestData
35-
import org.apache.spark.sql.SchemaRDD
3634
import org.apache.spark.sql.catalyst.util.getTempFilePath
37-
import org.apache.spark.sql.catalyst.{SqlLexical, SqlParser}
38-
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, Star}
35+
import org.apache.spark.sql.test.TestSQLContext
36+
import org.apache.spark.sql.test.TestSQLContext._
3937
import org.apache.spark.util.Utils
4038

41-
// Implicits
42-
import org.apache.spark.sql.test.TestSQLContext._
4339

4440
case class TestRDDEntry(key: Int, value: String)
4541

@@ -72,7 +68,6 @@ case class AllDataTypes(
7268
booleanField: Boolean)
7369

7470
class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterAll {
75-
import TestData._
7671
TestData // Load test data tables.
7772

7873
var testRDD: SchemaRDD = null
@@ -319,7 +314,7 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
319314

320315
test("create RecordFilter for simple predicates") {
321316
val attribute1 = new AttributeReference("first", IntegerType, false)()
322-
val predicate1 = new Equals(attribute1, new Literal(1, IntegerType))
317+
val predicate1 = new EqualTo(attribute1, new Literal(1, IntegerType))
323318
val filter1 = ParquetFilters.createFilter(predicate1)
324319
assert(filter1.isDefined)
325320
assert(filter1.get.predicate == predicate1, "predicates do not match")

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ private[hive] object HiveQl {
698698

699699
val joinConditions = joinExpressions.sliding(2).map {
700700
case Seq(c1, c2) =>
701-
val predicates = (c1, c2).zipped.map { case (e1, e2) => Equals(e1, e2): Expression }
701+
val predicates = (c1, c2).zipped.map { case (e1, e2) => EqualTo(e1, e2): Expression }
702702
predicates.reduceLeft(And)
703703
}.toBuffer
704704

@@ -924,9 +924,9 @@ private[hive] object HiveQl {
924924
case Token("%", left :: right:: Nil) => Remainder(nodeToExpr(left), nodeToExpr(right))
925925

926926
/* Comparisons */
927-
case Token("=", left :: right:: Nil) => Equals(nodeToExpr(left), nodeToExpr(right))
928-
case Token("!=", left :: right:: Nil) => Not(Equals(nodeToExpr(left), nodeToExpr(right)))
929-
case Token("<>", left :: right:: Nil) => Not(Equals(nodeToExpr(left), nodeToExpr(right)))
927+
case Token("=", left :: right:: Nil) => EqualTo(nodeToExpr(left), nodeToExpr(right))
928+
case Token("!=", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
929+
case Token("<>", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), nodeToExpr(right)))
930930
case Token(">", left :: right:: Nil) => GreaterThan(nodeToExpr(left), nodeToExpr(right))
931931
case Token(">=", left :: right:: Nil) => GreaterThanOrEqual(nodeToExpr(left), nodeToExpr(right))
932932
case Token("<", left :: right:: Nil) => LessThan(nodeToExpr(left), nodeToExpr(right))
@@ -966,7 +966,7 @@ private[hive] object HiveQl {
966966
// FIXME (SPARK-2155): the key will get evaluated for multiple times in CaseWhen's eval().
967967
// Hence effectful / non-deterministic key expressions are *not* supported at the moment.
968968
// We should consider adding new Expressions to get around this.
969-
Seq(Equals(nodeToExpr(branches(0)), nodeToExpr(condVal)),
969+
Seq(EqualTo(nodeToExpr(branches(0)), nodeToExpr(condVal)),
970970
nodeToExpr(value))
971971
case Seq(elseVal) => Seq(nodeToExpr(elseVal))
972972
}.toSeq.reduce(_ ++ _)

0 commit comments

Comments
 (0)