Refactor the logic of eagerly processing SET commands.

concretevitamin · concretevitamin · commit 13279e690c41 · 2014-06-05T11:58:27.000-07:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -272,6 +272,22 @@ class SQLContext(@transient val sparkContext: SparkContext)
   protected abstract class QueryExecution {
     def logical: LogicalPlan
 
+    def eagerlyProcess(plan: LogicalPlan): RDD[Row] = plan match {
+      case SetCommand(key, value) =>
+        // Only this case needs to be executed eagerly. The other cases will
+        // be taken care of when the actual results are being extracted.
+        // In the case of HiveContext, sqlConf is overridden to also pass the
+        // pair into its HiveConf.
+        if (key.isDefined && value.isDefined) {
+          sqlConf.set(key.get, value.get)
+        }
+        // It doesn't matter what we return here, since this is only used
+        // to force the evaluation to happen eagerly.  To query the results,
+        // one must use SchemaRDD operations to extract them.
+        emptyResult
+      case _ => executedPlan.execute()
+    }
+
     lazy val analyzed = analyzer(logical)
     lazy val optimizedPlan = optimizer(analyzed)
     // TODO: Don't just pick the first one...
@@ -281,14 +297,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     /** Internal version of the RDD. Avoids copies and has no schema */
     lazy val toRdd: RDD[Row] = {
       logical match {
-        case SetCommand(key, value) =>
-          if (key.isDefined && value.isDefined) {
-            sqlConf.set(key.get, value.get)
-          }
-          // It doesn't matter what we return here, since toRdd is used
-          // to force the evaluation happen eagerly.  To query the results,
-          // one must use SchemaRDD operations to extract them.
-          emptyResult
+        case s: SetCommand => eagerlyProcess(s)
         case _ => executedPlan.execute()
       }
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -257,7 +257,6 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
       optimizer(catalog.PreInsertionCasts(catalog.CreateTables(analyzed)))
 
     override lazy val toRdd: RDD[Row] = {
-
       def processCmd(cmd: String): RDD[Row] = {
         val output = runSqlHive(cmd)
         if (output.size == 0) {
@@ -268,21 +267,12 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         }
       }
 
-      analyzed match {
-        case SetCommand(key, value) =>
-          // Record the set command inside SQLConf, as well as have Hive execute it.
-          if (key.isDefined && value.isDefined) {
-            sqlConf.set(key.get, value.get)
-            processCmd(s"SET $key=$value")
-          }
-          // Only the above case needs to be executed in Hive eagerly (i.e. now).
-          // The other cases will be taken care of when the actual results are
-          // being extracted.
-          emptyResult
-        case NativeCommand(cmd) =>
-          processCmd(cmd)
-        case _ =>
-          executedPlan.execute().map(_.copy())
+      logical match {
+        case s: SetCommand => eagerlyProcess(s)
+        case _ => analyzed match {
+          case NativeCommand(cmd) => processCmd(cmd)
+          case _ => executedPlan.execute().map(_.copy())
+        }
       }
     }