Skip to content

Commit 86ea64d

Browse files
nonglimarmbrus
authored andcommitted
[SPARK-12271][SQL] Improve error message when Dataset.as[ ] has incompatible schemas.
Author: Nong Li <[email protected]> Closes #10260 from nongli/spark-11271.
1 parent b24c12d commit 86ea64d

File tree

4 files changed

+18
-7
lines changed

4 files changed

+18
-7
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ object ScalaReflection extends ScalaReflection {
184184
val TypeRef(_, _, Seq(optType)) = t
185185
val className = getClassNameFromType(optType)
186186
val newTypePath = s"""- option value class: "$className"""" +: walkedTypePath
187-
WrapOption(constructorFor(optType, path, newTypePath))
187+
WrapOption(constructorFor(optType, path, newTypePath), dataTypeFor(optType))
188188

189189
case t if t <:< localTypeOf[java.lang.Integer] =>
190190
val boxedType = classOf[java.lang.Integer]

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ case class ExpressionEncoder[T](
251251

252252
val plan = Project(Alias(unbound, "")() :: Nil, LocalRelation(schema))
253253
val analyzedPlan = SimpleAnalyzer.execute(plan)
254+
SimpleAnalyzer.checkAnalysis(analyzedPlan)
254255
val optimizedPlan = SimplifyCasts(analyzedPlan)
255256

256257
// In order to construct instances of inner classes (for example those declared in a REPL cell),

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,9 @@ import scala.reflect.ClassTag
2323
import org.apache.spark.SparkConf
2424
import org.apache.spark.serializer._
2525
import org.apache.spark.sql.Row
26-
import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
27-
import org.apache.spark.sql.catalyst.plans.logical.{Project, LocalRelation}
28-
import org.apache.spark.sql.catalyst.util.GenericArrayData
2926
import org.apache.spark.sql.catalyst.InternalRow
30-
import org.apache.spark.sql.catalyst.expressions.codegen.{GeneratedExpressionCode, CodeGenContext}
27+
import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, GeneratedExpressionCode}
28+
import org.apache.spark.sql.catalyst.util.GenericArrayData
3129
import org.apache.spark.sql.types._
3230

3331
/**
@@ -295,13 +293,17 @@ case class UnwrapOption(
295293
* Converts the result of evaluating `child` into an option, checking both the isNull bit and
296294
* (in the case of reference types) equality with null.
297295
* @param child The expression to evaluate and wrap.
296+
* @param optType The type of this option.
298297
*/
299-
case class WrapOption(child: Expression) extends UnaryExpression {
298+
case class WrapOption(child: Expression, optType: DataType)
299+
extends UnaryExpression with ExpectsInputTypes {
300300

301301
override def dataType: DataType = ObjectType(classOf[Option[_]])
302302

303303
override def nullable: Boolean = true
304304

305+
override def inputTypes: Seq[AbstractDataType] = optType :: Nil
306+
305307
override def eval(input: InternalRow): Any =
306308
throw new UnsupportedOperationException("Only code-generated evaluation is supported")
307309

sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,10 +481,18 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
481481
val ds = Seq(2 -> 2.toByte, 3 -> 3.toByte).toDF("a", "b").as[ClassData]
482482
assert(ds.collect().toSeq == Seq(ClassData("2", 2), ClassData("3", 3)))
483483
}
484-
}
485484

485+
test("verify mismatching field names fail with a good error") {
486+
val ds = Seq(ClassData("a", 1)).toDS()
487+
val e = intercept[AnalysisException] {
488+
ds.as[ClassData2].collect()
489+
}
490+
assert(e.getMessage.contains("cannot resolve 'c' given input columns a, b"), e.getMessage)
491+
}
492+
}
486493

487494
case class ClassData(a: String, b: Int)
495+
case class ClassData2(c: String, d: Int)
488496
case class ClassNullableData(a: String, b: Integer)
489497

490498
/**

0 commit comments

Comments
 (0)