Skip to content

Commit e8a5d50

Browse files
committed
[SPARK-5638][SQL] Add a config flag to disable eager analysis of DataFrames
Author: Reynold Xin <[email protected]> Closes apache#4408 from rxin/df-config-eager and squashes the following commits: c0204cf [Reynold Xin] [SPARK-5638][SQL] Add a config flag to disable eager analysis of DataFrames.
1 parent 85ccee8 commit e8a5d50

File tree

3 files changed

+23
-4
lines changed

3 files changed

+23
-4
lines changed

sql/core/src/main/scala/org/apache/spark/sql/DataFrameImpl.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,9 @@ private[sql] class DataFrameImpl protected[sql](
5353
def this(sqlContext: SQLContext, logicalPlan: LogicalPlan) = {
5454
this(sqlContext, {
5555
val qe = sqlContext.executePlan(logicalPlan)
56-
qe.analyzed // This should force analysis and throw errors if there are any
56+
if (sqlContext.conf.dataFrameEagerAnalysis) {
57+
qe.analyzed // This should force analysis and throw errors if there are any
58+
}
5759
qe
5860
})
5961
}

sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ private[spark] object SQLConf {
5252
// This is used to set the default data source
5353
val DEFAULT_DATA_SOURCE_NAME = "spark.sql.default.datasource"
5454

55+
// Whether to perform eager analysis on a DataFrame.
56+
val DATAFRAME_EAGER_ANALYSIS = "spark.sql.dataframe.eagerAnalysis"
57+
5558
object Deprecated {
5659
val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
5760
}
@@ -173,6 +176,9 @@ private[sql] class SQLConf extends Serializable {
173176
private[spark] def defaultDataSourceName: String =
174177
getConf(DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.parquet")
175178

179+
private[spark] def dataFrameEagerAnalysis: Boolean =
180+
getConf(DATAFRAME_EAGER_ANALYSIS, "true").toBoolean
181+
176182
/** ********************** SQLConf functionality methods ************ */
177183

178184
/** Set Spark SQL configuration properties. */

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,23 @@
1717

1818
package org.apache.spark.sql
1919

20+
import scala.language.postfixOps
21+
2022
import org.apache.spark.sql.Dsl._
2123
import org.apache.spark.sql.types._
22-
23-
/* Implicits */
24+
import org.apache.spark.sql.test.TestSQLContext
2425
import org.apache.spark.sql.test.TestSQLContext.logicalPlanToSparkQuery
2526
import org.apache.spark.sql.test.TestSQLContext.implicits._
2627

27-
import scala.language.postfixOps
2828

2929
class DataFrameSuite extends QueryTest {
3030
import org.apache.spark.sql.TestData._
3131

3232
test("analysis error should be eagerly reported") {
33+
val oldSetting = TestSQLContext.conf.dataFrameEagerAnalysis
34+
// Eager analysis.
35+
TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, "true")
36+
3337
intercept[Exception] { testData.select('nonExistentName) }
3438
intercept[Exception] {
3539
testData.groupBy('key).agg(Map("nonExistentName" -> "sum"))
@@ -40,6 +44,13 @@ class DataFrameSuite extends QueryTest {
4044
intercept[Exception] {
4145
testData.groupBy($"abcd").agg(Map("key" -> "sum"))
4246
}
47+
48+
// No more eager analysis once the flag is turned off
49+
TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, "false")
50+
testData.select('nonExistentName)
51+
52+
// Set the flag back to original value before this test.
53+
TestSQLContext.setConf(SQLConf.DATAFRAME_EAGER_ANALYSIS, oldSetting.toString)
4354
}
4455

4556
test("table scan") {

0 commit comments

Comments
 (0)