From 6a599e762b34b9daa33cbbf1fd403ca85ad32f6d Mon Sep 17 00:00:00 2001 From: wangfei Date: Fri, 6 Feb 2015 17:12:28 +0800 Subject: [PATCH 1/5] throw exception when can not apply cast --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index b1bc858478ee1..19cc2a3ca9c7f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -21,8 +21,8 @@ import java.sql.{Date, Timestamp} import java.text.{DateFormat, SimpleDateFormat} import org.apache.spark.Logging -import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.types._ +import org.apache.spark.sql.catalyst.errors.TreeNodeException /** Cast the child expression to the target data type. */ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression with Logging { @@ -99,7 +99,8 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w toField.nullable) } - case _ => false + case (from, to) => throw new TreeNodeException(this, s"can not cast from $from to $to!") + } } From 9fb60cef54d787ccd3fded7cc434f95dd0a0f1cf Mon Sep 17 00:00:00 2001 From: wangfei Date: Fri, 6 Feb 2015 17:30:52 +0800 Subject: [PATCH 2/5] unused changes --- .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 19cc2a3ca9c7f..e26d46bd7fedf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -21,8 +21,8 @@ import java.sql.{Date, Timestamp} import java.text.{DateFormat, SimpleDateFormat} import org.apache.spark.Logging -import org.apache.spark.sql.types._ import org.apache.spark.sql.catalyst.errors.TreeNodeException +import org.apache.spark.sql.types._ /** Cast the child expression to the target data type. */ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression with Logging { From 97bb90b889fdf91b123adbc72ed5fb260f37b9ae Mon Sep 17 00:00:00 2001 From: wangfei Date: Fri, 6 Feb 2015 18:04:03 +0800 Subject: [PATCH 3/5] test failure fix --- .../spark/sql/catalyst/analysis/Analyzer.scala | 16 ++++++++++++++++ .../spark/sql/catalyst/expressions/Cast.scala | 3 +-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ae1aee02c64a5..da20fc14480b3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -69,6 +69,7 @@ class Analyzer(catalog: Catalog, typeCoercionRules ++ extendedRules : _*), Batch("Check Analysis", Once, + CheckCast :: CheckResolution :: CheckAggregation :: Nil: _*), @@ -76,6 +77,21 @@ class Analyzer(catalog: Catalog, EliminateAnalysisOperators) ) + /** + * Makes sure datatype cast is legitimate, if not throw an exception + */ + object CheckCast extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan.transform { + case q: LogicalPlan => + q transformExpressions { + case cast @ Cast(child, dataType) if cast.childrenResolved && !cast.resolved => + throw new TreeNodeException(q, + s"can not cast from ${child.dataType} to $dataType!") + case p => p + } + } + } + /** * Makes sure all attributes and logical plans have been resolved. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index e26d46bd7fedf..b1bc858478ee1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -99,8 +99,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w toField.nullable) } - case (from, to) => throw new TreeNodeException(this, s"can not cast from $from to $to!") - + case _ => false } } From bd8d9f88e069b0e260789e2900cd5393d63dd003 Mon Sep 17 00:00:00 2001 From: scwf Date: Fri, 6 Feb 2015 22:41:19 +0800 Subject: [PATCH 4/5] added test case --- .../spark/sql/hive/execution/HiveQuerySuite.scala | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index bb73ff1ea7e43..4df70ccd07a44 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -27,6 +27,8 @@ import scala.util.Try import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.spark.{SparkFiles, SparkException} +import org.apache.spark.sql.catalyst.errors.TreeNodeException +import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.Dsl._ @@ -59,13 +61,19 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { Locale.setDefault(originalLocale) } - test("SPARK-4908: concurent hive native commands") { + test("SPARK-4908: concurrent hive native commands") { (1 to 100).par.map { _ => sql("USE default") sql("SHOW TABLES") } } + test("SPARK-5649: added a rule to check datatypes cast") { + intercept[TreeNodeException[Cast]] { + sql("select cast(key as binary) from src").collect() + } + } + createQueryTest("! operator", """ |SELECT a FROM ( From db86dc47c27c31532889ed14611c4edfdc26fd3b Mon Sep 17 00:00:00 2001 From: wangfei Date: Wed, 11 Feb 2015 09:46:26 +0800 Subject: [PATCH 5/5] using AnalysisException --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 5 ++--- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 2 +- .../apache/spark/sql/hive/execution/HiveQuerySuite.scala | 6 ++---- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 73d227d9e9cb7..39e5ef0d0a2be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -84,9 +84,8 @@ class Analyzer(catalog: Catalog, def apply(plan: LogicalPlan): LogicalPlan = plan.transform { case q: LogicalPlan => q transformExpressions { - case cast @ Cast(child, dataType) if cast.childrenResolved && !cast.resolved => - throw new TreeNodeException(q, - s"can not cast from ${child.dataType} to $dataType!") + case cast @ Cast(child, dataType) if !cast.resolve(child.dataType, dataType) => + throw new AnalysisException(s"can not cast from ${child.dataType} to $dataType!") case p => p } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index b1bc858478ee1..5c0269aac831f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -51,7 +51,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w private[this] def resolvableNullability(from: Boolean, to: Boolean) = !from || to - private[this] def resolve(from: DataType, to: DataType): Boolean = { + private[sql] def resolve(from: DataType, to: DataType): Boolean = { (from, to) match { case (from, to) if from == to => true diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 956cdd1a8bb9f..8a670faae379b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -27,9 +27,7 @@ import scala.util.Try import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.spark.{SparkFiles, SparkException} -import org.apache.spark.sql.catalyst.errors.TreeNodeException -import org.apache.spark.sql.catalyst.expressions.Cast -import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.{AnalysisException, DataFrame, Row} import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.Dsl._ import org.apache.spark.sql.hive._ @@ -69,7 +67,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { } test("SPARK-5649: added a rule to check datatypes cast") { - intercept[TreeNodeException[Cast]] { + intercept[AnalysisException] { sql("select cast(key as binary) from src").collect() } }