SPARK-20213: Fix more tests with nested SQL executions.

rdblue · rdblue · commit f63b773ad924 · 2017-05-03T16:16:42.000-07:00
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
@@ -86,12 +86,10 @@ private[kafka010] object KafkaWriter extends Logging {
       topic: Option[String] = None): Unit = {
     val schema = queryExecution.analyzed.output
     validateQuery(queryExecution, kafkaParameters, topic)
-    SQLExecution.withNewExecutionId(sparkSession, queryExecution) {
-      queryExecution.toRdd.foreachPartition { iter =>
-        val writeTask = new KafkaWriteTask(kafkaParameters, schema, topic)
-        Utils.tryWithSafeFinally(block = writeTask.execute(iter))(
-          finallyBlock = writeTask.close())
-      }
+    queryExecution.toRdd.foreachPartition { iter =>
+      val writeTask = new KafkaWriteTask(kafkaParameters, schema, topic)
+      Utils.tryWithSafeFinally(block = writeTask.execute(iter))(
+        finallyBlock = writeTask.close())
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -122,7 +122,7 @@ object SQLExecution extends Logging {
       // To avoid this warning, use nested { ... }
       if (!Option(sc.getLocalProperty(ALLOW_NESTED_EXECUTION)).exists(_.toBoolean)) {
         if (testing) {
-          throw new IllegalArgumentException(s"$EXECUTION_ID_KEY is already set")
+          throw new IllegalArgumentException(s"$EXECUTION_ID_KEY is already set: $oldExecutionId")
         } else {
           logWarning(s"$EXECUTION_ID_KEY is already set")
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTableTyp
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.execution.SQLExecution
 
 
 /**
@@ -96,7 +97,9 @@ case class AnalyzeColumnCommand(
       attributesToAnalyze.map(ColumnStat.statExprs(_, ndvMaxErr))
 
     val namedExpressions = expressions.map(e => Alias(e, e.toString)())
-    val statsRow = Dataset.ofRows(sparkSession, Aggregate(Nil, namedExpressions, relation)).head()
+    val statsRow = SQLExecution.nested(sparkSession) {
+      Dataset.ofRows(sparkSession, Aggregate(Nil, namedExpressions, relation)).head()
+    }
 
     val rowCount = statsRow.getLong(0)
     val columnStats = attributesToAnalyze.zipWithIndex.map { case (attr, i) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -25,6 +25,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.internal.SessionState
 
 
@@ -56,7 +57,9 @@ case class AnalyzeTableCommand(
     // 2. when total size is changed, `oldRowCount` becomes invalid.
     // This is to make sure that we only record the right statistics.
     if (!noscan) {
-      val newRowCount = sparkSession.table(tableIdentWithDB).count()
+      val newRowCount = SQLExecution.nested(sparkSession) {
+        sparkSession.table(tableIdentWithDB).count()
+      }
       if (newRowCount >= 0 && newRowCount != oldRowCount) {
         newStats = if (newStats.isDefined) {
           newStats.map(_.copy(rowCount = Some(BigInt(newRowCount))))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoDataSourceCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoDataSourceCommand.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.RunnableCommand
 import org.apache.spark.sql.sources.InsertableRelation
 
@@ -37,14 +38,18 @@ case class InsertIntoDataSourceCommand(
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val relation = logicalRelation.relation.asInstanceOf[InsertableRelation]
-    val data = Dataset.ofRows(sparkSession, query)
-    // Apply the schema of the existing table to the new data.
-    val df = sparkSession.internalCreateDataFrame(data.queryExecution.toRdd, logicalRelation.schema)
-    relation.insert(df, overwrite)
-
-    // Re-cache all cached plans(including this relation itself, if it's cached) that refer to this
-    // data source relation.
-    sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, logicalRelation)
+    SQLExecution.nested(sparkSession) {
+      val data = Dataset.ofRows(sparkSession, query)
+
+      // Apply the schema of the existing table to the new data.
+      val df = sparkSession.internalCreateDataFrame(
+        data.queryExecution.toRdd, logicalRelation.schema)
+      relation.insert(df, overwrite)
+
+      // Re-cache all cached plans(including this relation itself, if it's cached) that refer to
+      // this data source relation.
+      sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, logicalRelation)
+    }
 
     Seq.empty[Row]
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources
 import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.RunnableCommand
 
 /**
@@ -41,12 +42,13 @@ case class SaveIntoDataSourceCommand(
   override protected def innerChildren: Seq[QueryPlan[_]] = Seq(query)
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    DataSource(
-      sparkSession,
-      className = provider,
-      partitionColumns = partitionColumns,
-      options = options).write(mode, Dataset.ofRows(sparkSession, query))
-
+    SQLExecution.nested(sparkSession) {
+      DataSource(
+        sparkSession,
+        className = provider,
+        partitionColumns = partitionColumns,
+        options = options).write(mode, Dataset.ofRows(sparkSession, query))
+    }
     Seq.empty[Row]
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.{DDLUtils, RunnableCommand}
 import org.apache.spark.sql.types._
 
@@ -89,8 +90,9 @@ case class CreateTempViewUsing(
       options = options)
 
     val catalog = sparkSession.sessionState.catalog
-    val viewDefinition = Dataset.ofRows(
-      sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan
+    val viewDefinition = SQLExecution.nested(sparkSession) {
+      Dataset.ofRows(sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan
+    }
 
     if (global) {
       catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.sources.{DataSourceRegister, StreamSinkProvider}
 import org.apache.spark.sql.streaming.OutputMode
 
@@ -45,9 +46,11 @@ class ConsoleSink(options: Map[String, String]) extends Sink with Logging {
     println(batchIdStr)
     println("-------------------------------------------")
     // scalastyle:off println
-    data.sparkSession.createDataFrame(
-      data.sparkSession.sparkContext.parallelize(data.collect()), data.schema)
-      .show(numRowsToShow, isTruncated)
+    SQLExecution.nested(data.sparkSession) {
+      data.sparkSession.createDataFrame(
+        data.sparkSession.sparkContext.parallelize(data.collect()), data.schema)
+          .show(numRowsToShow, isTruncated)
+    }
   }
 }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -35,7 +35,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
 import org.apache.spark.sql.execution.command.CacheTableCommand
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.client.HiveClient
@@ -552,7 +552,10 @@ private[hive] class TestHiveQueryExecution(
         logical.collect { case UnresolvedRelation(tableIdent) => tableIdent.table }
     val referencedTestTables = referencedTables.filter(sparkSession.testTables.contains)
     logDebug(s"Query references test tables: ${referencedTestTables.mkString(", ")}")
-    referencedTestTables.foreach(sparkSession.loadTestTable)
+    // this lazy value may be computed inside another SQLExecution.withNewExecutionId block
+    SQLExecution.nested(sparkSession) {
+      referencedTestTables.foreach(sparkSession.loadTestTable)
+    }
     // Proceed with analysis.
     sparkSession.sessionState.analyzer.execute(logical)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -965,7 +965,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   }
 
   test("sanity test for SPARK-6618") {
-    (1 to 100).par.map { i =>
+    (1 to 100).map { i =>
       val tableName = s"SPARK_6618_table_$i"
       sql(s"CREATE TABLE $tableName (col1 string)")
       sessionState.catalog.lookupRelation(TableIdentifier(tableName))

Original file line number	Diff line number	Diff line change
`@@ -86,12 +86,10 @@ private[kafka010] object KafkaWriter extends Logging {`
`86`	`86`	`topic: Option[String] = None): Unit = {`
`87`	`87`	`val schema = queryExecution.analyzed.output`
`88`	`88`	`validateQuery(queryExecution, kafkaParameters, topic)`
`89`		`- SQLExecution.withNewExecutionId(sparkSession, queryExecution) {`
`90`		`- queryExecution.toRdd.foreachPartition { iter =>`
`91`		`- val writeTask = new KafkaWriteTask(kafkaParameters, schema, topic)`
`92`		`- Utils.tryWithSafeFinally(block = writeTask.execute(iter))(`
`93`		`- finallyBlock = writeTask.close())`
`94`		`- }`
	`89`	`+ queryExecution.toRdd.foreachPartition { iter =>`
	`90`	`+ val writeTask = new KafkaWriteTask(kafkaParameters, schema, topic)`
	`91`	`+ Utils.tryWithSafeFinally(block = writeTask.execute(iter))(`
	`92`	`+ finallyBlock = writeTask.close())`
`95`	`93`	`}`
`96`	`94`	`}`
`97`	`95`	`}`
Original file line number	Diff line number	Diff line change
`@@ -122,7 +122,7 @@ object SQLExecution extends Logging {`
`122`	`122`	`// To avoid this warning, use nested { ... }`
`123`	`123`	`if (!Option(sc.getLocalProperty(ALLOW_NESTED_EXECUTION)).exists(_.toBoolean)) {`
`124`	`124`	`if (testing) {`
`125`		`- throw new IllegalArgumentException(s"$EXECUTION_ID_KEY is already set")`
	`125`	`+ throw new IllegalArgumentException(s"$EXECUTION_ID_KEY is already set: $oldExecutionId")`
`126`	`126`	`} else {`
`127`	`127`	`logWarning(s"$EXECUTION_ID_KEY is already set")`
`128`	`128`	`}`
Original file line number	Diff line number	Diff line change
`@@ -965,7 +965,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {`
`965`	`965`	`}`
`966`	`966`
`967`	`967`	`test("sanity test for SPARK-6618") {`
`968`		`- (1 to 100).par.map { i =>`
	`968`	`+ (1 to 100).map { i =>`
`969`	`969`	`val tableName = s"SPARK_6618_table_$i"`
`970`	`970`	`sql(s"CREATE TABLE $tableName (col1 string)")`
`971`	`971`	`sessionState.catalog.lookupRelation(TableIdentifier(tableName))`