-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-9085][SQL] Remove LeafNode, UnaryNode, BinaryNode from TreeNode. #7434
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
16b5c90
2225331
9c589cf
3135a8b
9e8a4de
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,16 +20,20 @@ package org.apache.spark.sql.catalyst.expressions | |
| import com.clearspring.analytics.stream.cardinality.HyperLogLog | ||
|
|
||
| import org.apache.spark.sql.catalyst.InternalRow | ||
| import org.apache.spark.sql.catalyst.trees | ||
| import org.apache.spark.sql.catalyst.errors.TreeNodeException | ||
| import org.apache.spark.sql.catalyst.analysis.TypeCheckResult | ||
| import org.apache.spark.sql.catalyst.util.TypeUtils | ||
| import org.apache.spark.sql.types._ | ||
| import org.apache.spark.util.collection.OpenHashSet | ||
|
|
||
| abstract class AggregateExpression extends Expression { | ||
| trait AggregateExpression extends Expression { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about: trait AggregateExpression {
self: Expression =>
}
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it'd result in a lot more code changes to get it working. e.g. abstract class AggregateFunction
extends LeafExpression with AggregateExpression with Serializable {
self: Product =>
/** Base should return the generic aggregate expression that this function is computing */
val base: AggregateExpression
override def nullable: Boolean = base.nullable <--- this won't work anymore
override def dataType: DataType = base.dataType |
||
| self: Product => | ||
|
|
||
| /** | ||
| * Aggregate expressions should not be foldable. | ||
| */ | ||
| override def foldable: Boolean = false | ||
|
|
||
| /** | ||
| * Creates a new instance that can be used to compute this aggregate expression for a group | ||
| * of input rows/ | ||
|
|
@@ -60,7 +64,7 @@ case class SplitEvaluation( | |
| * An [[AggregateExpression]] that can be partially computed without seeing all relevant tuples. | ||
| * These partial evaluations can then be combined to compute the actual answer. | ||
| */ | ||
| abstract class PartialAggregate extends AggregateExpression { | ||
| trait PartialAggregate extends AggregateExpression { | ||
| self: Product => | ||
|
|
||
| /** | ||
|
|
@@ -74,7 +78,7 @@ abstract class PartialAggregate extends AggregateExpression { | |
| * [[AggregateExpression]] with an algorithm that will be used to compute one specific result. | ||
| */ | ||
| abstract class AggregateFunction | ||
| extends AggregateExpression with Serializable with trees.LeafNode[Expression] { | ||
| extends LeafExpression with AggregateExpression with Serializable { | ||
| self: Product => | ||
|
|
||
| /** Base should return the generic aggregate expression that this function is computing */ | ||
|
|
@@ -91,7 +95,7 @@ abstract class AggregateFunction | |
| } | ||
| } | ||
|
|
||
| case class Min(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { | ||
| case class Min(child: Expression) extends UnaryExpression with PartialAggregate { | ||
|
|
||
| override def nullable: Boolean = true | ||
| override def dataType: DataType = child.dataType | ||
|
|
@@ -124,7 +128,7 @@ case class MinFunction(expr: Expression, base: AggregateExpression) extends Aggr | |
| override def eval(input: InternalRow): Any = currentMin.value | ||
| } | ||
|
|
||
| case class Max(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { | ||
| case class Max(child: Expression) extends UnaryExpression with PartialAggregate { | ||
|
|
||
| override def nullable: Boolean = true | ||
| override def dataType: DataType = child.dataType | ||
|
|
@@ -157,7 +161,7 @@ case class MaxFunction(expr: Expression, base: AggregateExpression) extends Aggr | |
| override def eval(input: InternalRow): Any = currentMax.value | ||
| } | ||
|
|
||
| case class Count(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { | ||
| case class Count(child: Expression) extends UnaryExpression with PartialAggregate { | ||
|
|
||
| override def nullable: Boolean = false | ||
| override def dataType: LongType.type = LongType | ||
|
|
@@ -310,7 +314,7 @@ private[sql] case object HyperLogLogUDT extends UserDefinedType[HyperLogLog] { | |
| } | ||
|
|
||
| case class ApproxCountDistinctPartition(child: Expression, relativeSD: Double) | ||
| extends AggregateExpression with trees.UnaryNode[Expression] { | ||
| extends UnaryExpression with AggregateExpression { | ||
|
|
||
| override def nullable: Boolean = false | ||
| override def dataType: DataType = HyperLogLogUDT | ||
|
|
@@ -340,7 +344,7 @@ case class ApproxCountDistinctPartitionFunction( | |
| } | ||
|
|
||
| case class ApproxCountDistinctMerge(child: Expression, relativeSD: Double) | ||
| extends AggregateExpression with trees.UnaryNode[Expression] { | ||
| extends UnaryExpression with AggregateExpression { | ||
|
|
||
| override def nullable: Boolean = false | ||
| override def dataType: LongType.type = LongType | ||
|
|
@@ -368,7 +372,7 @@ case class ApproxCountDistinctMergeFunction( | |
| } | ||
|
|
||
| case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05) | ||
| extends PartialAggregate with trees.UnaryNode[Expression] { | ||
| extends UnaryExpression with PartialAggregate { | ||
|
|
||
| override def nullable: Boolean = false | ||
| override def dataType: LongType.type = LongType | ||
|
|
@@ -386,7 +390,7 @@ case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05) | |
| override def newInstance(): CountDistinctFunction = new CountDistinctFunction(child :: Nil, this) | ||
| } | ||
|
|
||
| case class Average(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { | ||
| case class Average(child: Expression) extends UnaryExpression with PartialAggregate { | ||
|
|
||
| override def prettyName: String = "avg" | ||
|
|
||
|
|
@@ -479,7 +483,7 @@ case class AverageFunction(expr: Expression, base: AggregateExpression) | |
| } | ||
| } | ||
|
|
||
| case class Sum(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { | ||
| case class Sum(child: Expression) extends UnaryExpression with PartialAggregate { | ||
|
|
||
| override def nullable: Boolean = true | ||
|
|
||
|
|
@@ -606,8 +610,7 @@ case class CombineSumFunction(expr: Expression, base: AggregateExpression) | |
| } | ||
| } | ||
|
|
||
| case class SumDistinct(child: Expression) | ||
| extends PartialAggregate with trees.UnaryNode[Expression] { | ||
| case class SumDistinct(child: Expression) extends UnaryExpression with PartialAggregate { | ||
|
|
||
| def this() = this(null) | ||
| override def nullable: Boolean = true | ||
|
|
@@ -701,7 +704,7 @@ case class CombineSetsAndSumFunction( | |
| } | ||
| } | ||
|
|
||
| case class First(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { | ||
| case class First(child: Expression) extends UnaryExpression with PartialAggregate { | ||
| override def nullable: Boolean = true | ||
| override def dataType: DataType = child.dataType | ||
| override def toString: String = s"FIRST($child)" | ||
|
|
@@ -729,7 +732,7 @@ case class FirstFunction(expr: Expression, base: AggregateExpression) extends Ag | |
| override def eval(input: InternalRow): Any = result | ||
| } | ||
|
|
||
| case class Last(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { | ||
| case class Last(child: Expression) extends UnaryExpression with PartialAggregate { | ||
| override def references: AttributeSet = child.references | ||
| override def nullable: Boolean = true | ||
| override def dataType: DataType = child.dataType | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,13 +40,14 @@ import org.apache.spark.sql.types._ | |
| * requested. The attributes produced by this function will be automatically copied anytime rules | ||
| * result in changes to the Generator or its children. | ||
| */ | ||
| abstract class Generator extends Expression { | ||
| self: Product => | ||
| trait Generator extends Expression { self: Product => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about: trait Generator {
self: Expression =>
}
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about: trait Generator {
self: Expression =>
}
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we technically can for this one -- but what do we gain from it?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nothing. Unless you have circular dependencies between traits, which I doubt. I avoid self-types as much as I can. Inheritance is much easier to understand and covers 99% of cases. |
||
|
|
||
| // TODO ideally we should return the type of ArrayType(StructType), | ||
| // however, we don't keep the output field names in the Generator. | ||
| override def dataType: DataType = throw new UnsupportedOperationException | ||
|
|
||
| override def foldable: Boolean = false | ||
|
|
||
| override def nullable: Boolean = false | ||
|
|
||
| /** | ||
|
|
@@ -99,8 +100,9 @@ case class UserDefinedGenerator( | |
| /** | ||
| * Given an input array produces a sequence of rows for each value in the array. | ||
| */ | ||
| case class Explode(child: Expression) | ||
| extends Generator with trees.UnaryNode[Expression] { | ||
| case class Explode(child: Expression) extends UnaryExpression with Generator { | ||
|
|
||
| override def children: Seq[Expression] = child :: Nil | ||
|
|
||
| override def checkInputDataTypes(): TypeCheckResult = { | ||
| if (child.dataType.isInstanceOf[ArrayType] || child.dataType.isInstanceOf[MapType]) { | ||
|
|
@@ -127,6 +129,4 @@ case class Explode(child: Expression) | |
| else inputMap.map { case (k, v) => InternalRow(k, v) } | ||
| } | ||
| } | ||
|
|
||
| override def toString: String = s"explode($child)" | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -342,7 +342,7 @@ object ConstantFolding extends Rule[LogicalPlan] { | |
| case l: Literal => l | ||
|
|
||
| // Fold expressions that are foldable. | ||
| case e if e.foldable => Literal.create(e.eval(null), e.dataType) | ||
| case e if e.foldable => Literal.create(e.eval(EmptyRow), e.dataType) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I remember we set a default parameter value for |
||
|
|
||
| // Fold "literal in (item1, item2, ..., literal, ...)" into true directly. | ||
| case In(Literal(v, _), list) if list.exists { | ||
|
|
@@ -361,7 +361,7 @@ object OptimizeIn extends Rule[LogicalPlan] { | |
| def apply(plan: LogicalPlan): LogicalPlan = plan transform { | ||
| case q: LogicalPlan => q transformExpressionsDown { | ||
| case In(v, list) if !list.exists(!_.isInstanceOf[Literal]) => | ||
| val hSet = list.map(e => e.eval(null)) | ||
| val hSet = list.map(e => e.eval(EmptyRow)) | ||
| InSet(v, HashSet() ++ hSet) | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,6 @@ import org.apache.spark.sql.catalyst.analysis._ | |
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.plans.QueryPlan | ||
| import org.apache.spark.sql.catalyst.trees.TreeNode | ||
| import org.apache.spark.sql.catalyst.trees | ||
|
|
||
|
|
||
| abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { | ||
|
|
@@ -277,15 +276,21 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { | |
| /** | ||
| * A logical plan node with no children. | ||
| */ | ||
| abstract class LeafNode extends LogicalPlan with trees.LeafNode[LogicalPlan] { | ||
| abstract class LeafNode extends LogicalPlan { | ||
| self: Product => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's still required |
||
|
|
||
| override def children: Seq[LogicalPlan] = Nil | ||
| } | ||
|
|
||
| /** | ||
| * A logical plan node with single child. | ||
| */ | ||
| abstract class UnaryNode extends LogicalPlan with trees.UnaryNode[LogicalPlan] { | ||
| abstract class UnaryNode extends LogicalPlan { | ||
| self: Product => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's still required |
||
|
|
||
| def child: LogicalPlan | ||
|
|
||
| override def children: Seq[LogicalPlan] = child :: Nil | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
as it's abstract class now, can we remove
self: Product =>?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no it's still required
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you could probably replace it with
with Product.