Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ case class Union(children: Seq[SparkPlan]) extends SparkPlan {
}
}
}
override def outputsUnsafeRows: Boolean = children.forall(_.outputsUnsafeRows)
override def outputsUnsafeRows: Boolean = children.exists(_.outputsUnsafeRows)
override def canProcessUnsafeRows: Boolean = true
override def canProcessSafeRows: Boolean = true
protected override def doExecute(): RDD[InternalRow] =
Expand Down Expand Up @@ -250,7 +250,9 @@ case class Coalesce(numPartitions: Int, child: SparkPlan) extends UnaryNode {
child.execute().coalesce(numPartitions, shuffle = false)
}

override def outputsUnsafeRows: Boolean = child.outputsUnsafeRows
override def canProcessUnsafeRows: Boolean = true
override def canProcessSafeRows: Boolean = true
}

/**
Expand All @@ -263,6 +265,10 @@ case class Except(left: SparkPlan, right: SparkPlan) extends BinaryNode {
protected override def doExecute(): RDD[InternalRow] = {
left.execute().map(_.copy()).subtract(right.execute().map(_.copy()))
}

override def outputsUnsafeRows: Boolean = children.exists(_.outputsUnsafeRows)
override def canProcessUnsafeRows: Boolean = true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UnsafeRow can be compared with GenericInternalRow, so we should make sure that left and right have the same type of row.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you! @davies @cloud-fan I added two extra test cases for ensuring they have the same formats.

override def canProcessSafeRows: Boolean = true
}

/**
Expand All @@ -275,6 +281,10 @@ case class Intersect(left: SparkPlan, right: SparkPlan) extends BinaryNode {
protected override def doExecute(): RDD[InternalRow] = {
left.execute().map(_.copy()).intersection(right.execute().map(_.copy()))
}

override def outputsUnsafeRows: Boolean = children.exists(_.outputsUnsafeRows)
override def canProcessUnsafeRows: Boolean = true
override def canProcessSafeRows: Boolean = true
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,41 @@ class RowFormatConvertersSuite extends SparkPlanTest with SharedSQLContext {
assert(!preparedPlan.outputsUnsafeRows)
}

test("coalesce can process unsafe rows") {
val plan = Coalesce(1, outputsUnsafe)
val preparedPlan = sqlContext.prepareForExecution.execute(plan)
assert(getConverters(preparedPlan).size === 1)
assert(preparedPlan.outputsUnsafeRows)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we add mixed rows test for coalesce too?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I am not sure if I understand your question. Coalesce only has a single child. I am not sure what mixed rows mean. Could you give me one example? Thank you! @cloud-fan

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh sorry, I thought it was org.apache.spark.sql.catalyst.expressions.Coalesce, nvm

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you!


test("except can process unsafe rows") {
val plan = Except(outputsUnsafe, outputsUnsafe)
val preparedPlan = sqlContext.prepareForExecution.execute(plan)
assert(getConverters(preparedPlan).size === 2)
assert(preparedPlan.outputsUnsafeRows)
}

test("except requires all of its input rows' formats to agree") {
val plan = Except(outputsSafe, outputsUnsafe)
assert(plan.canProcessSafeRows && plan.canProcessUnsafeRows)
val preparedPlan = sqlContext.prepareForExecution.execute(plan)
assert(preparedPlan.outputsUnsafeRows)
}

test("intersect can process unsafe rows") {
val plan = Intersect(outputsUnsafe, outputsUnsafe)
val preparedPlan = sqlContext.prepareForExecution.execute(plan)
assert(getConverters(preparedPlan).size === 2)
assert(preparedPlan.outputsUnsafeRows)
}

test("intersect requires all of its input rows' formats to agree") {
val plan = Intersect(outputsSafe, outputsUnsafe)
assert(plan.canProcessSafeRows && plan.canProcessUnsafeRows)
val preparedPlan = sqlContext.prepareForExecution.execute(plan)
assert(preparedPlan.outputsUnsafeRows)
}

test("execute() fails an assertion if inputs rows are of different formats") {
val e = intercept[AssertionError] {
Union(Seq(outputsSafe, outputsUnsafe)).execute()
Expand Down