Skip to content

Commit 59d24c2

Browse files
cloud-fanrxin
authored andcommitted
[SPARK-9130][SQL] throw exception when check equality between external and internal row
instead of return false, throw exception when check equality between external and internal row is better. Author: Wenchen Fan <[email protected]> Closes apache#7460 from cloud-fan/row-compare and squashes the following commits: 8a20911 [Wenchen Fan] improve equals 402daa8 [Wenchen Fan] throw exception when check equality between external and internal row
1 parent 441e072 commit 59d24c2

File tree

3 files changed

+53
-7
lines changed

3 files changed

+53
-7
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -364,18 +364,33 @@ trait Row extends Serializable {
364364
false
365365
}
366366

367-
protected def canEqual(other: Any) = {
368-
// Note that InternalRow overrides canEqual. These two canEqual's together makes sure that
369-
// comparing the external Row and InternalRow will always yield false.
367+
/**
368+
* Returns true if we can check equality for these 2 rows.
369+
* Equality check between external row and internal row is not allowed.
370+
* Here we do this check to prevent call `equals` on external row with internal row.
371+
*/
372+
protected def canEqual(other: Row) = {
373+
// Note that `Row` is not only the interface of external row but also the parent
374+
// of `InternalRow`, so we have to ensure `other` is not a internal row here to prevent
375+
// call `equals` on external row with internal row.
376+
// `InternalRow` overrides canEqual, and these two canEquals together makes sure that
377+
// equality check between external Row and InternalRow will always fail.
370378
// In the future, InternalRow should not extend Row. In that case, we can remove these
371379
// canEqual methods.
372-
other.isInstanceOf[Row] && !other.isInstanceOf[InternalRow]
380+
!other.isInstanceOf[InternalRow]
373381
}
374382

375383
override def equals(o: Any): Boolean = {
376-
if (o == null || !canEqual(o)) return false
377-
384+
if (!o.isInstanceOf[Row]) return false
378385
val other = o.asInstanceOf[Row]
386+
387+
if (!canEqual(other)) {
388+
throw new UnsupportedOperationException(
389+
"cannot check equality between external and internal rows")
390+
}
391+
392+
if (other eq null) return false
393+
379394
if (length != other.length) {
380395
return false
381396
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,12 @@ abstract class InternalRow extends Row {
5454
// A default implementation to change the return type
5555
override def copy(): InternalRow = this
5656

57-
protected override def canEqual(other: Any) = other.isInstanceOf[InternalRow]
57+
/**
58+
* Returns true if we can check equality for these 2 rows.
59+
* Equality check between external row and internal row is not allowed.
60+
* Here we do this check to prevent call `equals` on internal row with external row.
61+
*/
62+
protected override def canEqual(other: Row) = other.isInstanceOf[InternalRow]
5863

5964
// Custom hashCode function that matches the efficient code generated version.
6065
override def hashCode: Int = {

sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.spark.sql
1919

20+
import org.apache.spark.sql.catalyst.InternalRow
2021
import org.apache.spark.sql.catalyst.expressions.{GenericRow, GenericRowWithSchema}
2122
import org.apache.spark.sql.types._
2223
import org.scalatest.{Matchers, FunSpec}
@@ -68,4 +69,29 @@ class RowTest extends FunSpec with Matchers {
6869
sampleRow.getValuesMap(List("col1", "col2")) shouldBe expected
6970
}
7071
}
72+
73+
describe("row equals") {
74+
val externalRow = Row(1, 2)
75+
val externalRow2 = Row(1, 2)
76+
val internalRow = InternalRow(1, 2)
77+
val internalRow2 = InternalRow(1, 2)
78+
79+
it("equality check for external rows") {
80+
externalRow shouldEqual externalRow2
81+
}
82+
83+
it("equality check for internal rows") {
84+
internalRow shouldEqual internalRow2
85+
}
86+
87+
it("throws an exception when check equality between external and internal rows") {
88+
def assertError(f: => Unit): Unit = {
89+
val e = intercept[UnsupportedOperationException](f)
90+
e.getMessage.contains("cannot check equality between external and internal rows")
91+
}
92+
93+
assertError(internalRow.equals(externalRow))
94+
assertError(externalRow.equals(internalRow))
95+
}
96+
}
7197
}

0 commit comments

Comments
 (0)