Inject SQLContext into SparkLogicalPlan, removing SQLConf mixin from it.

concretevitamin · concretevitamin · commit ca5b8258ff99 · 2014-07-29T11:48:26.000-07:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -86,7 +86,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @group userf
    */
   implicit def createSchemaRDD[A <: Product: TypeTag](rdd: RDD[A]) =
-    new SchemaRDD(this, SparkLogicalPlan(ExistingRdd.fromProductRdd(rdd)))
+    new SchemaRDD(this, SparkLogicalPlan(ExistingRdd.fromProductRdd(rdd))(self))
 
   /**
    * Loads a Parquet file, returning the result as a [[SchemaRDD]].
@@ -127,7 +127,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    */
   @Experimental
   def jsonRDD(json: RDD[String], samplingRatio: Double): SchemaRDD =
-    new SchemaRDD(this, JsonRDD.inferSchema(json, samplingRatio))
+    new SchemaRDD(this, JsonRDD.inferSchema(self, json, samplingRatio))
 
   /**
    * :: Experimental ::
@@ -208,7 +208,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
       case inMem @ InMemoryRelation(_, _, e: ExistingRdd) =>
         inMem.cachedColumnBuffers.unpersist()
         catalog.unregisterTable(None, tableName)
-        catalog.registerTable(None, tableName, SparkLogicalPlan(e))
+        catalog.registerTable(None, tableName, SparkLogicalPlan(e)(self))
       case inMem: InMemoryRelation =>
         inMem.cachedColumnBuffers.unpersist()
         catalog.unregisterTable(None, tableName)
@@ -367,7 +367,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
         new GenericRow(map.values.toArray.asInstanceOf[Array[Any]]): Row
       }
     }
-    new SchemaRDD(this, SparkLogicalPlan(ExistingRdd(schema, rowRdd)))
+    new SchemaRDD(this, SparkLogicalPlan(ExistingRdd(schema, rowRdd))(self))
   }
 
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
@@ -430,7 +430,8 @@ class SchemaRDD(
    * @group schema
    */
   private def applySchema(rdd: RDD[Row]): SchemaRDD = {
-    new SchemaRDD(sqlContext, SparkLogicalPlan(ExistingRdd(queryExecution.analyzed.output, rdd)))
+    new SchemaRDD(sqlContext,
+      SparkLogicalPlan(ExistingRdd(queryExecution.analyzed.output, rdd))(sqlContext))
   }
 
   // =======================================================================
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
@@ -56,7 +56,7 @@ private[sql] trait SchemaRDDLike {
     // happen right away to let these side effects take place eagerly.
     case _: Command | _: InsertIntoTable | _: InsertIntoCreatedTable | _: WriteToFile =>
       queryExecution.toRdd
-      SparkLogicalPlan(queryExecution.executedPlan)
+      SparkLogicalPlan(queryExecution.executedPlan)(sqlContext)
     case _ =>
       baseLogicalPlan
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSQLContext.scala
@@ -92,7 +92,7 @@ class JavaSQLContext(val sqlContext: SQLContext) {
         new GenericRow(extractors.map(e => e.invoke(row)).toArray[Any]): ScalaRow
       }
     }
-    new JavaSchemaRDD(sqlContext, SparkLogicalPlan(ExistingRdd(schema, rowRdd)))
+    new JavaSchemaRDD(sqlContext, SparkLogicalPlan(ExistingRdd(schema, rowRdd))(sqlContext))
   }
 
   /**
@@ -120,7 +120,7 @@ class JavaSQLContext(val sqlContext: SQLContext) {
    * @group userf
    */
   def jsonRDD(json: JavaRDD[String]): JavaSchemaRDD =
-    new JavaSchemaRDD(sqlContext, JsonRDD.inferSchema(json, 1.0))
+    new JavaSchemaRDD(sqlContext, JsonRDD.inferSchema(sqlContext, json, 1.0))
 
   /**
    * Registers the given RDD as a temporary table in the catalog.  Temporary tables exist only
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Logging, Row, SQLConf}
+import org.apache.spark.sql.{Logging, Row, SQLContext}
 import org.apache.spark.sql.catalyst.trees
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.GenericRow
@@ -66,8 +66,8 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging {
  * linking.
  */
 @DeveloperApi
-case class SparkLogicalPlan(alreadyPlanned: SparkPlan)
-  extends LogicalPlan with MultiInstanceRelation with SQLConf {
+case class SparkLogicalPlan(alreadyPlanned: SparkPlan)(@transient sqlContext: SQLContext)
+  extends LogicalPlan with MultiInstanceRelation {
 
   def output = alreadyPlanned.output
   override def references = Set.empty
@@ -78,7 +78,7 @@ case class SparkLogicalPlan(alreadyPlanned: SparkPlan)
       alreadyPlanned match {
         case ExistingRdd(output, rdd) => ExistingRdd(output.map(_.newInstance), rdd)
         case _ => sys.error("Multiple instance of the same relation detected.")
-      }).asInstanceOf[this.type]
+      })(sqlContext).asInstanceOf[this.type]
   }
 
   @transient override lazy val statistics = Statistics(
@@ -89,7 +89,7 @@ case class SparkLogicalPlan(alreadyPlanned: SparkPlan)
       alreadyPlanned match {
         // TODO: Instead of returning a default value here, find a way to return a meaningful
         // size estimate for RDDs. See PR 1238 for more discussions.
-        case e: ExistingRdd if naiveVal == 1L => statsDefaultSizeInBytes
+        case e: ExistingRdd if naiveVal == 1L => sqlContext.statsDefaultSizeInBytes
         case _ => naiveVal
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -28,27 +28,30 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.execution.{ExistingRdd, SparkLogicalPlan}
-import org.apache.spark.sql.Logging
+import org.apache.spark.sql.{SQLContext, Logging}
 
 private[sql] object JsonRDD extends Logging {
 
   private[sql] def inferSchema(
+      sqlContext: SQLContext,
       json: RDD[String],
       samplingRatio: Double = 1.0): LogicalPlan = {
     require(samplingRatio > 0, s"samplingRatio ($samplingRatio) should be greater than 0")
     val schemaData = if (samplingRatio > 0.99) json else json.sample(false, samplingRatio, 1)
     val allKeys = parseJson(schemaData).map(allKeysWithValueTypes).reduce(_ ++ _)
     val baseSchema = createSchema(allKeys)
 
-    createLogicalPlan(json, baseSchema)
+    createLogicalPlan(json, baseSchema, sqlContext)
   }
 
   private def createLogicalPlan(
       json: RDD[String],
-      baseSchema: StructType): LogicalPlan = {
+      baseSchema: StructType,
+      sqlContext: SQLContext): LogicalPlan = {
     val schema = nullTypeToStringType(baseSchema)
 
-    SparkLogicalPlan(ExistingRdd(asAttributes(schema), parseJson(json).map(asRow(_, schema))))
+    SparkLogicalPlan(
+      ExistingRdd(asAttributes(schema), parseJson(json).map(asRow(_, schema))))(sqlContext)
   }
 
   private def createSchema(allKeys: Set[(String, DataType)]): StructType = {

Original file line number	Diff line number	Diff line change
`@@ -430,7 +430,8 @@ class SchemaRDD(`
`430`	`430`	`* @group schema`
`431`	`431`	`*/`
`432`	`432`	`private def applySchema(rdd: RDD[Row]): SchemaRDD = {`
`433`		`- new SchemaRDD(sqlContext, SparkLogicalPlan(ExistingRdd(queryExecution.analyzed.output, rdd)))`
	`433`	`+ new SchemaRDD(sqlContext,`
	`434`	`+ SparkLogicalPlan(ExistingRdd(queryExecution.analyzed.output, rdd))(sqlContext))`
`434`	`435`	`}`
`435`	`436`
`436`	`437`	`// =======================================================================`
Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@ private[sql] trait SchemaRDDLike {`
`56`	`56`	`// happen right away to let these side effects take place eagerly.`
`57`	`57`	`case _: Command \| _: InsertIntoTable \| _: InsertIntoCreatedTable \| _: WriteToFile =>`
`58`	`58`	`queryExecution.toRdd`
`59`		`- SparkLogicalPlan(queryExecution.executedPlan)`
	`59`	`+ SparkLogicalPlan(queryExecution.executedPlan)(sqlContext)`
`60`	`60`	`case _ =>`
`61`	`61`	`baseLogicalPlan`
`62`	`62`	`}`
Original file line number	Diff line number	Diff line change
`@@ -92,7 +92,7 @@ class JavaSQLContext(val sqlContext: SQLContext) {`
`92`	`92`	`new GenericRow(extractors.map(e => e.invoke(row)).toArray[Any]): ScalaRow`
`93`	`93`	`}`
`94`	`94`	`}`
`95`		`- new JavaSchemaRDD(sqlContext, SparkLogicalPlan(ExistingRdd(schema, rowRdd)))`
	`95`	`+ new JavaSchemaRDD(sqlContext, SparkLogicalPlan(ExistingRdd(schema, rowRdd))(sqlContext))`
`96`	`96`	`}`
`97`	`97`
`98`	`98`	`/**`
`@@ -120,7 +120,7 @@ class JavaSQLContext(val sqlContext: SQLContext) {`
`120`	`120`	`* @group userf`
`121`	`121`	`*/`
`122`	`122`	`def jsonRDD(json: JavaRDD[String]): JavaSchemaRDD =`
`123`		`- new JavaSchemaRDD(sqlContext, JsonRDD.inferSchema(json, 1.0))`
	`123`	`+ new JavaSchemaRDD(sqlContext, JsonRDD.inferSchema(sqlContext, json, 1.0))`
`124`	`124`
`125`	`125`	`/**`
`126`	`126`	`* Registers the given RDD as a temporary table in the catalog. Temporary tables exist only`