Skip to content

Commit a30c5a6

Browse files
HyukjinKwoncmonkey
authored andcommitted
[SPARK-19544][SQL] Improve error message when some column types are compatible and others are not in set operations
## What changes were proposed in this pull request? This PR proposes to fix the error message when some data types are compatible and others are not in set/union operation. Currently, the code below: ```scala Seq((1,("a", 1))).toDF.union(Seq((1L,("a", "b"))).toDF) ``` throws an exception saying `LongType` and `IntegerType` are incompatible types. It should say something about `StructType`s with more readable format as below: **Before** ``` Union can only be performed on tables with the compatible column types. LongType <> IntegerType at the first column of the second table;; ``` **After** ``` Union can only be performed on tables with the compatible column types. struct<_1:string,_2:string> <> struct<_1:string,_2:int> at the second column of the second table;; ``` *I manually inserted a newline in the messages above for readability only in this PR description. ## How was this patch tested? Unit tests in `AnalysisErrorSuite`, manual tests and build wth Scala 2.10. Author: hyukjinkwon <[email protected]> Closes apache#16882 from HyukjinKwon/SPARK-19544.
1 parent bdbae06 commit a30c5a6

File tree

4 files changed

+38
-14
lines changed

4 files changed

+38
-14
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -321,12 +321,12 @@ trait CheckAnalysis extends PredicateHelper {
321321
// Check if the data types match.
322322
dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, dt2), ci) =>
323323
// SPARK-18058: we shall not care about the nullability of columns
324-
if (!dt1.sameType(dt2)) {
324+
if (TypeCoercion.findWiderTypeForTwo(dt1.asNullable, dt2.asNullable).isEmpty) {
325325
failAnalysis(
326326
s"""
327327
|${operator.nodeName} can only be performed on tables with the compatible
328-
|column types. $dt1 <> $dt2 at the ${ordinalNumber(ci)} column of
329-
|the ${ordinalNumber(ti + 1)} table
328+
|column types. ${dt1.catalogString} <> ${dt2.catalogString} at the
329+
|${ordinalNumber(ci)} column of the ${ordinalNumber(ti + 1)} table
330330
""".stripMargin.replace("\n", " ").trim())
331331
}
332332
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,19 @@ object TypeCoercion {
116116
* i.e. the main difference with [[findTightestCommonType]] is that here we allow some
117117
* loss of precision when widening decimal and double, and promotion to string.
118118
*/
119-
private def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match {
120-
case (t1: DecimalType, t2: DecimalType) =>
121-
Some(DecimalPrecision.widerDecimalType(t1, t2))
122-
case (t: IntegralType, d: DecimalType) =>
123-
Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d))
124-
case (d: DecimalType, t: IntegralType) =>
125-
Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d))
126-
case (_: FractionalType, _: DecimalType) | (_: DecimalType, _: FractionalType) =>
127-
Some(DoubleType)
128-
case _ =>
129-
findTightestCommonTypeToString(t1, t2)
119+
private[analysis] def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = {
120+
(t1, t2) match {
121+
case (t1: DecimalType, t2: DecimalType) =>
122+
Some(DecimalPrecision.widerDecimalType(t1, t2))
123+
case (t: IntegralType, d: DecimalType) =>
124+
Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d))
125+
case (d: DecimalType, t: IntegralType) =>
126+
Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d))
127+
case (_: FractionalType, _: DecimalType) | (_: DecimalType, _: FractionalType) =>
128+
Some(DoubleType)
129+
case _ =>
130+
findTightestCommonTypeToString(t1, t2)
131+
}
130132
}
131133

132134
private def findWiderCommonType(types: Seq[DataType]) = {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,16 +282,31 @@ class AnalysisErrorSuite extends AnalysisTest {
282282
testRelation.union(nestedRelation),
283283
"union" :: "the compatible column types" :: Nil)
284284

285+
errorTest(
286+
"union with a incompatible column type and compatible column types",
287+
testRelation3.union(testRelation4),
288+
"union" :: "the compatible column types" :: "map" :: "decimal" :: Nil)
289+
285290
errorTest(
286291
"intersect with incompatible column types",
287292
testRelation.intersect(nestedRelation),
288293
"intersect" :: "the compatible column types" :: Nil)
289294

295+
errorTest(
296+
"intersect with a incompatible column type and compatible column types",
297+
testRelation3.intersect(testRelation4),
298+
"intersect" :: "the compatible column types" :: "map" :: "decimal" :: Nil)
299+
290300
errorTest(
291301
"except with incompatible column types",
292302
testRelation.except(nestedRelation),
293303
"except" :: "the compatible column types" :: Nil)
294304

305+
errorTest(
306+
"except with a incompatible column type and compatible column types",
307+
testRelation3.except(testRelation4),
308+
"except" :: "the compatible column types" :: "map" :: "decimal" :: Nil)
309+
295310
errorTest(
296311
"SPARK-9955: correct error message for aggregate",
297312
// When parse SQL string, we will wrap aggregate expressions with UnresolvedAlias.

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@ object TestRelations {
3737
AttributeReference("g", DoubleType)(),
3838
AttributeReference("h", DecimalType(10, 2))())
3939

40+
// This is the same with `testRelation3` but only `h` is incompatible type.
41+
val testRelation4 = LocalRelation(
42+
AttributeReference("e", StringType)(),
43+
AttributeReference("f", StringType)(),
44+
AttributeReference("g", StringType)(),
45+
AttributeReference("h", MapType(IntegerType, IntegerType))())
46+
4047
val nestedRelation = LocalRelation(
4148
AttributeReference("top", StructType(
4249
StructField("duplicateField", StringType) ::

0 commit comments

Comments
 (0)