From 16b5c906ff6d38e333a20e4f5cb4244c49254c6b Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 15 Jul 2015 16:56:05 -0700 Subject: [PATCH 1/5] [SPARK-9085][SQL] Remove LeafNode, UnaryNode, BinaryNode from TreeNode. --- .../sql/catalyst/analysis/unresolved.scala | 10 +++---- .../catalyst/expressions/BoundAttribute.scala | 2 +- .../sql/catalyst/expressions/SortOrder.scala | 3 +- .../sql/catalyst/expressions/aggregates.scala | 29 +++++++++---------- .../sql/catalyst/expressions/generators.scala | 8 ++--- .../expressions/namedExpressions.scala | 16 +++++----- .../catalyst/plans/logical/LogicalPlan.scala | 10 +++++-- .../spark/sql/catalyst/trees/TreeNode.scala | 16 ---------- .../spark/sql/execution/SparkPlan.scala | 11 +++++-- 9 files changed, 50 insertions(+), 55 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index f2e579afe833..7089f079b6dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -50,8 +50,7 @@ case class UnresolvedRelation( /** * Holds the name of an attribute that has yet to be resolved. */ -case class UnresolvedAttribute(nameParts: Seq[String]) - extends Attribute with trees.LeafNode[Expression] { +case class UnresolvedAttribute(nameParts: Seq[String]) extends Attribute { def name: String = nameParts.map(n => if (n.contains(".")) s"`$n`" else n).mkString(".") @@ -96,7 +95,7 @@ case class UnresolvedFunction(name: String, children: Seq[Expression]) extends E * Represents all of the input attributes to a given relational operator, for example in * "SELECT * FROM ...". A [[Star]] gets automatically expanded during analysis. */ -trait Star extends NamedExpression with trees.LeafNode[Expression] { +abstract class Star extends LeafExpression with NamedExpression { self: Product => override def name: String = throw new UnresolvedException(this, "name") @@ -151,7 +150,7 @@ case class UnresolvedStar(table: Option[String]) extends Star { * @param names the names to be associated with each output of computing [[child]]. */ case class MultiAlias(child: Expression, names: Seq[String]) - extends NamedExpression with trees.UnaryNode[Expression] { + extends UnaryExpression with NamedExpression { override def name: String = throw new UnresolvedException(this, "name") @@ -210,8 +209,7 @@ case class UnresolvedExtractValue(child: Expression, extraction: Expression) /** * Holds the expression that has yet to be aliased. */ -case class UnresolvedAlias(child: Expression) extends NamedExpression - with trees.UnaryNode[Expression] { +case class UnresolvedAlias(child: Expression) extends UnaryExpression with NamedExpression { override def toAttribute: Attribute = throw new UnresolvedException(this, "toAttribute") override def qualifiers: Seq[String] = throw new UnresolvedException(this, "qualifiers") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala index 3f0d7b803125..b09aea03318d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.types._ * the layout of intermediate tuples, BindReferences should be run after all such transformations. */ case class BoundReference(ordinal: Int, dataType: DataType, nullable: Boolean) - extends NamedExpression with trees.LeafNode[Expression] { + extends LeafExpression with NamedExpression { override def toString: String = s"input[$ordinal]" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 8ab4ef060b68..6a8ee16b5954 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -30,8 +30,7 @@ case object Descending extends SortDirection * An expression that can be used to sort a tuple. This class extends expression primarily so that * transformations over expression will descend into its child. */ -case class SortOrder(child: Expression, direction: SortDirection) extends Expression - with trees.UnaryNode[Expression] { +case class SortOrder(child: Expression, direction: SortDirection) extends UnaryExpression { override def dataType: DataType = child.dataType override def nullable: Boolean = child.nullable diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala index c0e17f97e9b3..19d74e527084 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types._ import org.apache.spark.util.collection.OpenHashSet -abstract class AggregateExpression extends Expression { +trait AggregateExpression extends Expression { self: Product => /** @@ -60,7 +60,7 @@ case class SplitEvaluation( * An [[AggregateExpression]] that can be partially computed without seeing all relevant tuples. * These partial evaluations can then be combined to compute the actual answer. */ -abstract class PartialAggregate extends AggregateExpression { +trait PartialAggregate extends AggregateExpression { self: Product => /** @@ -74,7 +74,7 @@ abstract class PartialAggregate extends AggregateExpression { * [[AggregateExpression]] with an algorithm that will be used to compute one specific result. */ abstract class AggregateFunction - extends AggregateExpression with Serializable with trees.LeafNode[Expression] { + extends LeafExpression with AggregateExpression with Serializable { self: Product => /** Base should return the generic aggregate expression that this function is computing */ @@ -91,7 +91,7 @@ abstract class AggregateFunction } } -case class Min(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { +case class Min(child: Expression) extends UnaryExpression with PartialAggregate { override def nullable: Boolean = true override def dataType: DataType = child.dataType @@ -124,7 +124,7 @@ case class MinFunction(expr: Expression, base: AggregateExpression) extends Aggr override def eval(input: InternalRow): Any = currentMin.value } -case class Max(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { +case class Max(child: Expression) extends UnaryExpression with PartialAggregate { override def nullable: Boolean = true override def dataType: DataType = child.dataType @@ -157,7 +157,7 @@ case class MaxFunction(expr: Expression, base: AggregateExpression) extends Aggr override def eval(input: InternalRow): Any = currentMax.value } -case class Count(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { +case class Count(child: Expression) extends UnaryExpression with PartialAggregate { override def nullable: Boolean = false override def dataType: LongType.type = LongType @@ -310,7 +310,7 @@ private[sql] case object HyperLogLogUDT extends UserDefinedType[HyperLogLog] { } case class ApproxCountDistinctPartition(child: Expression, relativeSD: Double) - extends AggregateExpression with trees.UnaryNode[Expression] { + extends UnaryExpression with AggregateExpression { override def nullable: Boolean = false override def dataType: DataType = HyperLogLogUDT @@ -340,7 +340,7 @@ case class ApproxCountDistinctPartitionFunction( } case class ApproxCountDistinctMerge(child: Expression, relativeSD: Double) - extends AggregateExpression with trees.UnaryNode[Expression] { + extends UnaryExpression with AggregateExpression { override def nullable: Boolean = false override def dataType: LongType.type = LongType @@ -368,7 +368,7 @@ case class ApproxCountDistinctMergeFunction( } case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05) - extends PartialAggregate with trees.UnaryNode[Expression] { + extends UnaryExpression with PartialAggregate { override def nullable: Boolean = false override def dataType: LongType.type = LongType @@ -386,7 +386,7 @@ case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05) override def newInstance(): CountDistinctFunction = new CountDistinctFunction(child :: Nil, this) } -case class Average(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { +case class Average(child: Expression) extends UnaryExpression with PartialAggregate { override def prettyName: String = "avg" @@ -479,7 +479,7 @@ case class AverageFunction(expr: Expression, base: AggregateExpression) } } -case class Sum(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { +case class Sum(child: Expression) extends UnaryExpression with PartialAggregate { override def nullable: Boolean = true @@ -606,8 +606,7 @@ case class CombineSumFunction(expr: Expression, base: AggregateExpression) } } -case class SumDistinct(child: Expression) - extends PartialAggregate with trees.UnaryNode[Expression] { +case class SumDistinct(child: Expression) extends UnaryExpression with PartialAggregate { def this() = this(null) override def nullable: Boolean = true @@ -701,7 +700,7 @@ case class CombineSetsAndSumFunction( } } -case class First(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { +case class First(child: Expression) extends UnaryExpression with PartialAggregate { override def nullable: Boolean = true override def dataType: DataType = child.dataType override def toString: String = s"FIRST($child)" @@ -729,7 +728,7 @@ case class FirstFunction(expr: Expression, base: AggregateExpression) extends Ag override def eval(input: InternalRow): Any = result } -case class Last(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { +case class Last(child: Expression) extends UnaryExpression with PartialAggregate { override def references: AttributeSet = child.references override def nullable: Boolean = true override def dataType: DataType = child.dataType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index b68d30a26abd..05c695be4393 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -40,8 +40,7 @@ import org.apache.spark.sql.types._ * requested. The attributes produced by this function will be automatically copied anytime rules * result in changes to the Generator or its children. */ -abstract class Generator extends Expression { - self: Product => +trait Generator extends Expression { self: Product => // TODO ideally we should return the type of ArrayType(StructType), // however, we don't keep the output field names in the Generator. @@ -99,8 +98,9 @@ case class UserDefinedGenerator( /** * Given an input array produces a sequence of rows for each value in the array. */ -case class Explode(child: Expression) - extends Generator with trees.UnaryNode[Expression] { +case class Explode(child: Expression) extends UnaryExpression with Generator { + + override def children: Seq[Expression] = child :: Nil override def checkInputDataTypes(): TypeCheckResult = { if (child.dataType.isInstanceOf[ArrayType] || child.dataType.isInstanceOf[MapType]) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index 6181c60c0e45..718f291173c9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -37,8 +37,10 @@ object NamedExpression { */ case class ExprId(id: Long) -abstract class NamedExpression extends Expression { - self: Product => +/** + * An [[Expression]] that is named. + */ +trait NamedExpression extends Expression { self: Product => def name: String def exprId: ExprId @@ -78,8 +80,7 @@ abstract class NamedExpression extends Expression { } } -abstract class Attribute extends NamedExpression { - self: Product => +abstract class Attribute extends LeafExpression with NamedExpression { self: Product => override def references: AttributeSet = AttributeSet(this) @@ -110,7 +111,7 @@ case class Alias(child: Expression, name: String)( val exprId: ExprId = NamedExpression.newExprId, val qualifiers: Seq[String] = Nil, val explicitMetadata: Option[Metadata] = None) - extends NamedExpression with trees.UnaryNode[Expression] { + extends UnaryExpression with NamedExpression { // Alias(Generator, xx) need to be transformed into Generate(generator, ...) override lazy val resolved = @@ -172,7 +173,8 @@ case class AttributeReference( nullable: Boolean = true, override val metadata: Metadata = Metadata.empty)( val exprId: ExprId = NamedExpression.newExprId, - val qualifiers: Seq[String] = Nil) extends Attribute with trees.LeafNode[Expression] { + val qualifiers: Seq[String] = Nil) + extends Attribute { /** * Returns true iff the expression id is the same for both attributes. @@ -242,7 +244,7 @@ case class AttributeReference( * A place holder used when printing expressions without debugging information such as the * expression id or the unresolved indicator. */ -case class PrettyAttribute(name: String) extends Attribute with trees.LeafNode[Expression] { +case class PrettyAttribute(name: String) extends Attribute { override def toString: String = name diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index d7077a0ec907..f8a1f12c352f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -277,15 +277,21 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { /** * A logical plan node with no children. */ -abstract class LeafNode extends LogicalPlan with trees.LeafNode[LogicalPlan] { +abstract class LeafNode extends LogicalPlan { self: Product => + + override def children: Seq[LogicalPlan] = Nil } /** * A logical plan node with single child. */ -abstract class UnaryNode extends LogicalPlan with trees.UnaryNode[LogicalPlan] { +abstract class UnaryNode extends LogicalPlan { self: Product => + + def child: LogicalPlan + + override def children: Seq[LogicalPlan] = child :: Nil } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 16844b2f4b68..0f95ca688a7a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -452,19 +452,3 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { s"$nodeName(${args.mkString(",")})" } } - - -/** - * A [[TreeNode]] with no children. - */ -trait LeafNode[BaseType <: TreeNode[BaseType]] { - def children: Seq[BaseType] = Nil -} - -/** - * A [[TreeNode]] with a single [[child]]. - */ -trait UnaryNode[BaseType <: TreeNode[BaseType]] { - def child: BaseType - def children: Seq[BaseType] = child :: Nil -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala index 9dc7879fa4a1..2ebce9d1c5c3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala @@ -238,12 +238,19 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ } } -private[sql] trait LeafNode extends SparkPlan with trees.LeafNode[SparkPlan] { +private[sql] trait LeafNode extends SparkPlan { self: Product => + + override def children: Seq[SparkPlan] = Nil } -private[sql] trait UnaryNode extends SparkPlan with trees.UnaryNode[SparkPlan] { +private[sql] trait UnaryNode extends SparkPlan { self: Product => + + def child: SparkPlan + + override def children: Seq[SparkPlan] = child :: Nil + override def outputPartitioning: Partitioning = child.outputPartitioning } From 2225331ea36e4a39f097e53afead6930e3cb0ed5 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 15 Jul 2015 18:19:08 -0700 Subject: [PATCH 2/5] Aggregate expressions should not be foldable. --- .../spark/sql/catalyst/expressions/Expression.scala | 1 - .../spark/sql/catalyst/expressions/aggregates.scala | 6 +++++- .../apache/spark/sql/catalyst/optimizer/Optimizer.scala | 4 ++-- .../spark/sql/catalyst/plans/logical/LogicalPlan.scala | 1 - .../scala/org/apache/spark/sql/execution/SparkPlan.scala | 8 ++++---- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 87667316aca6..d60fcb989a20 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedAttribute} import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, GeneratedExpressionCode} -import org.apache.spark.sql.catalyst.trees import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.types._ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala index 19d74e527084..71c943dc79e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.expressions import com.clearspring.analytics.stream.cardinality.HyperLogLog import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.trees import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.util.TypeUtils @@ -30,6 +29,11 @@ import org.apache.spark.util.collection.OpenHashSet trait AggregateExpression extends Expression { self: Product => + /** + * Aggregate expressions should not be foldable. + */ + override def foldable: Boolean = false + /** * Creates a new instance that can be used to compute this aggregate expression for a group * of input rows/ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 5d80214abf14..2f94b457f4cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -342,7 +342,7 @@ object ConstantFolding extends Rule[LogicalPlan] { case l: Literal => l // Fold expressions that are foldable. - case e if e.foldable => Literal.create(e.eval(null), e.dataType) + case e if e.foldable => Literal.create(e.eval(EmptyRow), e.dataType) // Fold "literal in (item1, item2, ..., literal, ...)" into true directly. case In(Literal(v, _), list) if list.exists { @@ -361,7 +361,7 @@ object OptimizeIn extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsDown { case In(v, list) if !list.exists(!_.isInstanceOf[Literal]) => - val hSet = list.map(e => e.eval(null)) + val hSet = list.map(e => e.eval(EmptyRow)) InSet(v, HashSet() ++ hSet) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index f8a1f12c352f..adac37231cc4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -23,7 +23,6 @@ import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.trees.TreeNode -import org.apache.spark.sql.catalyst.trees abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala index 2ebce9d1c5c3..632f633d82a2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala @@ -17,20 +17,20 @@ package org.apache.spark.sql.execution -import org.apache.spark.annotation.DeveloperApi +import scala.collection.mutable.ArrayBuffer + import org.apache.spark.Logging +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.rdd.{RDD, RDDOperationScope} import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.{CatalystTypeConverters, trees} +import org.apache.spark.sql.catalyst.CatalystTypeConverters import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.physical._ -import scala.collection.mutable.ArrayBuffer - object SparkPlan { protected[sql] val currentContext = new ThreadLocal[SQLContext]() } From 9c589cf216ff5eb46031ed332a35e6e23d91d2fe Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 15 Jul 2015 19:01:47 -0700 Subject: [PATCH 3/5] Fixed one more test case... --- .../spark/sql/catalyst/expressions/namedExpressions.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index 718f291173c9..8bf7a7ce4e64 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -42,6 +42,9 @@ case class ExprId(id: Long) */ trait NamedExpression extends Expression { self: Product => + /** We should never fold named expressions in order to not remove the alias. */ + override def foldable: Boolean = false + def name: String def exprId: ExprId From 3135a8b9edaf52aba17c9028f1672334e793456d Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 15 Jul 2015 19:12:07 -0700 Subject: [PATCH 4/5] SortOrder should not be foldable. --- .../org/apache/spark/sql/catalyst/expressions/SortOrder.scala | 3 +++ .../org/apache/spark/sql/catalyst/expressions/generators.scala | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 6a8ee16b5954..b8f7068c9e5e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -32,6 +32,9 @@ case object Descending extends SortDirection */ case class SortOrder(child: Expression, direction: SortDirection) extends UnaryExpression { + /** Sort order is not foldable because we don't have an eval for it. */ + override def foldable: Boolean = false + override def dataType: DataType = child.dataType override def nullable: Boolean = child.nullable diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index 05c695be4393..a35b3cebf7f7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -127,6 +127,4 @@ case class Explode(child: Expression) extends UnaryExpression with Generator { else inputMap.map { case (k, v) => InternalRow(k, v) } } } - - override def toString: String = s"explode($child)" } From 9e8a4def6f02e03899fa2fafdd2841c513d280af Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 15 Jul 2015 21:03:20 -0700 Subject: [PATCH 5/5] Generator should not be foldable. --- .../org/apache/spark/sql/catalyst/expressions/generators.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index a35b3cebf7f7..51dc77ee3fc5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -46,6 +46,8 @@ trait Generator extends Expression { self: Product => // however, we don't keep the output field names in the Generator. override def dataType: DataType = throw new UnsupportedOperationException + override def foldable: Boolean = false + override def nullable: Boolean = false /**