From 0ad343a7164a8af74c0f2db14a3df2351335a7bd Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Wed, 11 Jun 2014 15:14:24 -0700
Subject: [PATCH 1/8] Added physical plan for DDL and commands to ensure the
 "exactly once" semantics

---
 .../sql/catalyst/plans/logical/commands.scala | 18 +++--
 .../optimizer/FilterPushdownSuite.scala       |  2 +-
 .../org/apache/spark/sql/SQLContext.scala     | 36 +--------
 .../org/apache/spark/sql/SchemaRDDLike.scala  |  8 ++
 .../apache/spark/sql/execution/commands.scala | 79 ++++++++++++-------
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 16 ++--
 .../apache/spark/sql/hive/HiveContext.scala   | 48 +++--------
 .../spark/sql/hive/HiveStrategies.scala       |  8 ++
 .../sql/hive/execution/hiveOperators.scala    | 32 +++++++-
 .../sql/hive/execution/HiveQuerySuite.scala   | 57 +++++++------
 10 files changed, 166 insertions(+), 138 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
index d05c9652753e0..3299e86b85941 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Attribute}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BoundReference}
 import org.apache.spark.sql.catalyst.types.StringType
 
 /**
@@ -26,23 +26,25 @@ import org.apache.spark.sql.catalyst.types.StringType
  */
 abstract class Command extends LeafNode {
   self: Product =>
-  def output: Seq[Attribute] = Seq.empty  // TODO: SPARK-2081 should fix this
+  def output: Seq[Attribute] = Seq.empty
 }
 
 /**
  * Returned for commands supported by a given parser, but not catalyst.  In general these are DDL
  * commands that are passed directly to another system.
  */
-case class NativeCommand(cmd: String) extends Command
+case class NativeCommand(cmd: String) extends Command {
+  override def output =
+    Seq(BoundReference(0, AttributeReference("result", StringType, nullable = false)()))
+}
 
 /**
  * Commands of the form "SET (key) (= value)".
  */
 case class SetCommand(key: Option[String], value: Option[String]) extends Command {
   override def output = Seq(
-    AttributeReference("key", StringType, nullable = false)(),
-    AttributeReference("value", StringType, nullable = false)()
-  )
+    BoundReference(0, AttributeReference("key", StringType, nullable = false)()),
+    BoundReference(1, AttributeReference("value", StringType, nullable = false)()))
 }
 
 /**
@@ -50,11 +52,11 @@ case class SetCommand(key: Option[String], value: Option[String]) extends Comman
  * actually performing the execution.
  */
 case class ExplainCommand(plan: LogicalPlan) extends Command {
-  override def output = Seq(AttributeReference("plan", StringType, nullable = false)())
+  override def output =
+    Seq(BoundReference(0, AttributeReference("plan", StringType, nullable = false)()))
 }
 
 /**
  * Returned for the "CACHE TABLE tableName" and "UNCACHE TABLE tableName" command.
  */
 case class CacheCommand(tableName: String, doCache: Boolean) extends Command
-
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 0cada785b6630..1f67c80e54906 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -161,7 +161,7 @@ class FilterPushdownSuite extends OptimizerTest {
 
     comparePlans(optimized, correctAnswer)
   }
-  
+
   test("joins: push down left outer join #1") {
     val x = testRelation.subquery('x)
     val y = testRelation.subquery('y)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 264192ed1aa26..585bfcbd75ecb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.{ScalaReflection, dsl}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
-import org.apache.spark.sql.catalyst.plans.logical.{SetCommand, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 
 import org.apache.spark.sql.columnar.InMemoryColumnarTableScan
@@ -147,14 +147,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    *
    * @group userf
    */
-  def sql(sqlText: String): SchemaRDD = {
-    val result = new SchemaRDD(this, parseSql(sqlText))
-    // We force query optimization to happen right away instead of letting it happen lazily like
-    // when using the query DSL.  This is so DDL commands behave as expected.  This is only
-    // generates the RDD lineage for DML queries, but do not perform any execution.
-    result.queryExecution.toRdd
-    result
-  }
+  def sql(sqlText: String): SchemaRDD = new SchemaRDD(this, parseSql(sqlText))
 
   /** Returns the specified table as a SchemaRDD */
   def table(tableName: String): SchemaRDD =
@@ -280,22 +273,6 @@ class SQLContext(@transient val sparkContext: SparkContext)
   protected abstract class QueryExecution {
     def logical: LogicalPlan
 
-    def eagerlyProcess(plan: LogicalPlan): RDD[Row] = plan match {
-      case SetCommand(key, value) =>
-        // Only this case needs to be executed eagerly. The other cases will
-        // be taken care of when the actual results are being extracted.
-        // In the case of HiveContext, sqlConf is overridden to also pass the
-        // pair into its HiveConf.
-        if (key.isDefined && value.isDefined) {
-          set(key.get, value.get)
-        }
-        // It doesn't matter what we return here, since this is only used
-        // to force the evaluation to happen eagerly.  To query the results,
-        // one must use SchemaRDD operations to extract them.
-        emptyResult
-      case _ => executedPlan.execute()
-    }
-
     lazy val analyzed = analyzer(logical)
     lazy val optimizedPlan = optimizer(analyzed)
     // TODO: Don't just pick the first one...
@@ -303,12 +280,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     lazy val executedPlan: SparkPlan = prepareForExecution(sparkPlan)
 
     /** Internal version of the RDD. Avoids copies and has no schema */
-    lazy val toRdd: RDD[Row] = {
-      logical match {
-        case s: SetCommand => eagerlyProcess(s)
-        case _ => executedPlan.execute()
-      }
-    }
+    lazy val toRdd: RDD[Row] = executedPlan.execute()
 
     protected def stringOrError[A](f: => A): String =
       try f.toString catch { case e: Throwable => e.toString }
@@ -330,7 +302,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * TODO: We only support primitive types, add support for nested types.
    */
   private[sql] def inferSchema(rdd: RDD[Map[String, _]]): SchemaRDD = {
-    val schema = rdd.first.map { case (fieldName, obj) =>
+    val schema = rdd.first().map { case (fieldName, obj) =>
       val dataType = obj.getClass match {
         case c: Class[_] if c == classOf[java.lang.String] => StringType
         case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
index 3a895e15a4508..e4cbb037709ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
@@ -50,6 +50,14 @@ private[sql] trait SchemaRDDLike {
   @DeveloperApi
   lazy val queryExecution = sqlContext.executePlan(logicalPlan)
 
+  logicalPlan match {
+    // For various commands (like DDL) and queries with side effects, we force query optimization to
+    // happen right away to let these side effects take place eagerly.
+    case _: Command | _: InsertIntoTable | _: InsertIntoCreatedTable | _: WriteToFile =>
+      queryExecution.toRdd
+    case _ =>
+  }
+
   override def toString =
     s"""${super.toString}
        |== Query Plan ==
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index be26d19e66862..67830d422a52f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -22,45 +22,69 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{SQLContext, Row}
 import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
 
+trait PhysicalCommand {
+  /**
+   * A concrete command should override this lazy field to wrap up any side effects caused by the
+   * command or any other computation that should be evaluated exactly once. The value of this field
+   * can be used as the contents of the corresponding RDD generated from the physical plan of this
+   * command.
+   *
+   * The `execute()` method of all the physical command classes should reference `sideEffect` so
+   * that the command can be executed eagerly right after the command query is created.
+   */
+  protected[sql] lazy val sideEffectResult: Seq[Any] = Seq.empty[Any]
+}
+
 /**
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class SetCommandPhysical(key: Option[String], value: Option[String], output: Seq[Attribute])
-                             (@transient context: SQLContext) extends LeafNode {
-  def execute(): RDD[Row] = (key, value) match {
-    // Set value for key k; the action itself would
-    // have been performed in QueryExecution eagerly.
-    case (Some(k), Some(v)) => context.emptyResult
+case class SetCommandPhysical(
+    key: Option[String], value: Option[String], output: Seq[Attribute])(
+    @transient context: SQLContext)
+  extends LeafNode with PhysicalCommand {
+
+  override protected[sql] lazy val sideEffectResult: Seq[(String, String)] = (key, value) match {
+    // Set value for key k.
+    case (Some(k), Some(v)) =>
+      context.set(k, v)
+      Array.empty[(String, String)]
+
     // Query the value bound to key k.
-    case (Some(k), None) =>
-      val resultString = context.getOption(k) match {
-        case Some(v) => s"$k=$v"
-        case None => s"$k is undefined"
-      }
-      context.sparkContext.parallelize(Seq(new GenericRow(Array[Any](resultString))), 1)
+    case (Some(k), _) =>
+      Array(k -> context.getOption(k).getOrElse("<undefined>"))
+
     // Query all key-value pairs that are set in the SQLConf of the context.
     case (None, None) =>
-      val pairs = context.getAll
-      val rows = pairs.map { case (k, v) =>
-        new GenericRow(Array[Any](s"$k=$v"))
-      }.toSeq
-      // Assume config parameters can fit into one split (machine) ;)
-      context.sparkContext.parallelize(rows, 1)
-    // The only other case is invalid semantics and is impossible.
-    case _ => context.emptyResult
+      context.getAll
+
+    case _ =>
+      throw new IllegalArgumentException()
   }
+
+  def execute(): RDD[Row] = {
+    val rows = sideEffectResult.map { case (k, v) => new GenericRow(Array[Any](k, v)) }
+    context.sparkContext.parallelize(rows, 1)
+  }
+
+  override def otherCopyArgs = context :: Nil
 }
 
 /**
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class ExplainCommandPhysical(child: SparkPlan, output: Seq[Attribute])
-                                 (@transient context: SQLContext) extends UnaryNode {
+case class ExplainCommandPhysical(
+    child: SparkPlan, output: Seq[Attribute])(
+    @transient context: SQLContext)
+  extends UnaryNode with PhysicalCommand {
+
+  // Actually "EXPLAIN" command doesn't cause any side effect.
+  override protected[sql] lazy val sideEffectResult: Seq[String] = child.toString.split("\n")
+
   def execute(): RDD[Row] = {
-    val planString = new GenericRow(Array[Any](child.toString))
-    context.sparkContext.parallelize(Seq(planString))
+    val explanation = sideEffectResult.mkString("\n")
+    context.sparkContext.parallelize(Seq(new GenericRow(Array[Any](explanation))), 1)
   }
 
   override def otherCopyArgs = context :: Nil
@@ -71,18 +95,19 @@ case class ExplainCommandPhysical(child: SparkPlan, output: Seq[Attribute])
  */
 @DeveloperApi
 case class CacheCommandPhysical(tableName: String, doCache: Boolean)(@transient context: SQLContext)
-  extends LeafNode {
+  extends LeafNode with PhysicalCommand {
 
-  lazy val commandSideEffect = {
+  override protected[sql] lazy val sideEffectResult = {
     if (doCache) {
       context.cacheTable(tableName)
     } else {
       context.uncacheTable(tableName)
     }
+    Seq.empty[Any]
   }
 
   override def execute(): RDD[Row] = {
-    commandSideEffect
+    sideEffectResult
     context.emptyResult
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index c1fc99f077431..e9360b0fc7910 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -141,7 +141,7 @@ class SQLQuerySuite extends QueryTest {
       sql("SELECT AVG(a),b FROM largeAndSmallInts group by b"),
       Seq((2147483645.0,1),(2.0,2)))
   }
-  
+
   test("count") {
     checkAnswer(
       sql("SELECT COUNT(*) FROM testData2"),
@@ -332,7 +332,7 @@ class SQLQuerySuite extends QueryTest {
         (3, "C"),
         (4, "D")))
   }
-  
+
   test("system function upper()") {
     checkAnswer(
       sql("SELECT n,UPPER(l) FROM lowerCaseData"),
@@ -349,7 +349,7 @@ class SQLQuerySuite extends QueryTest {
         (2, "ABC"),
         (3, null)))
   }
-    
+
   test("system function lower()") {
     checkAnswer(
       sql("SELECT N,LOWER(L) FROM upperCaseData"),
@@ -382,25 +382,25 @@ class SQLQuerySuite extends QueryTest {
     sql(s"SET $testKey=$testVal")
     checkAnswer(
       sql("SET"),
-      Seq(Seq(s"$testKey=$testVal"))
+      Seq(Seq(testKey, testVal))
     )
 
     sql(s"SET ${testKey + testKey}=${testVal + testVal}")
     checkAnswer(
       sql("set"),
       Seq(
-        Seq(s"$testKey=$testVal"),
-        Seq(s"${testKey + testKey}=${testVal + testVal}"))
+        Seq(testKey, testVal),
+        Seq(testKey + testKey, testVal + testVal))
     )
 
     // "set key"
     checkAnswer(
       sql(s"SET $testKey"),
-      Seq(Seq(s"$testKey=$testVal"))
+      Seq(Seq(testKey, testVal))
     )
     checkAnswer(
       sql(s"SET $nonexistentKey"),
-      Seq(Seq(s"$nonexistentKey is undefined"))
+      Seq(Seq(nonexistentKey, "<undefined>"))
     )
     clear()
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 64978215542ec..b3c87513a59c0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -34,7 +34,6 @@ import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, OverrideCatalog}
-import org.apache.spark.sql.catalyst.expressions.GenericRow
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.execution._
@@ -71,14 +70,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   /**
    * Executes a query expressed in HiveQL using Spark, returning the result as a SchemaRDD.
    */
-  def hiveql(hqlQuery: String): SchemaRDD = {
-    val result = new SchemaRDD(this, HiveQl.parseSql(hqlQuery))
-    // We force query optimization to happen right away instead of letting it happen lazily like
-    // when using the query DSL.  This is so DDL commands behave as expected.  This is only
-    // generates the RDD lineage for DML queries, but does not perform any execution.
-    result.queryExecution.toRdd
-    result
-  }
+  def hiveql(hqlQuery: String): SchemaRDD = new SchemaRDD(this, HiveQl.parseSql(hqlQuery))
 
   /** An alias for `hiveql`. */
   def hql(hqlQuery: String): SchemaRDD = hiveql(hqlQuery)
@@ -164,7 +156,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   /**
    * Runs the specified SQL query using Hive.
    */
-  protected def runSqlHive(sql: String): Seq[String] = {
+  protected[sql] def runSqlHive(sql: String): Seq[String] = {
     val maxResults = 100000
     val results = runHive(sql, 100000)
     // It is very confusing when you only get back some of the results...
@@ -228,6 +220,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
 
     override val strategies: Seq[Strategy] = Seq(
       CommandStrategy(self),
+      HiveCommandStrategy(self),
       TakeOrdered,
       ParquetOperations,
       HiveTableScans,
@@ -251,25 +244,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
     override lazy val optimizedPlan =
       optimizer(catalog.PreInsertionCasts(catalog.CreateTables(analyzed)))
 
-    override lazy val toRdd: RDD[Row] = {
-      def processCmd(cmd: String): RDD[Row] = {
-        val output = runSqlHive(cmd)
-        if (output.size == 0) {
-          emptyResult
-        } else {
-          val asRows = output.map(r => new GenericRow(r.split("\t").asInstanceOf[Array[Any]]))
-          sparkContext.parallelize(asRows, 1)
-        }
-      }
-
-      logical match {
-        case s: SetCommand => eagerlyProcess(s)
-        case _ => analyzed match {
-          case NativeCommand(cmd) => processCmd(cmd)
-          case _ => executedPlan.execute().map(_.copy())
-        }
-      }
-    }
+    override lazy val toRdd: RDD[Row] = executedPlan.execute().map(_.copy())
 
     protected val primitiveTypes =
       Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
@@ -297,7 +272,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
         struct.zip(fields).map {
           case (v, t) => s""""${t.name}":${toHiveStructString(v, t.dataType)}"""
         }.mkString("{", ",", "}")
-      case (seq: Seq[_], ArrayType(typ))=>
+      case (seq: Seq[_], ArrayType(typ)) =>
         seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
       case (map: Map[_,_], MapType(kType, vType)) =>
         map.map {
@@ -313,10 +288,11 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
      * Returns the result as a hive compatible sequence of strings.  For native commands, the
      * execution is simply passed back to Hive.
      */
-    def stringResult(): Seq[String] = analyzed match {
-      case NativeCommand(cmd) => runSqlHive(cmd)
-      case ExplainCommand(plan) => executePlan(plan).toString.split("\n")
-      case query =>
+    def stringResult(): Seq[String] = executedPlan match {
+      case command: PhysicalCommand =>
+        command.sideEffectResult.map(_.toString)
+
+      case other =>
         val result: Seq[Seq[Any]] = toRdd.collect().toSeq
         // We need the types so we can output struct field names
         val types = analyzed.output.map(_.dataType)
@@ -327,8 +303,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
 
     override def simpleString: String =
       logical match {
-        case _: NativeCommand => "<Executed by Hive>"
-        case _: SetCommand => "<Set Command: Executed by Hive, and noted by SQLContext>"
+        case _: NativeCommand => "<Native command: executed by Hive>"
+        case _: SetCommand => "<SET command: executed by Hive, and noted by SQLContext>"
         case _ => executedPlan.toString
       }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 8b51957162e04..e0ea7826fa70d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -75,4 +75,12 @@ private[hive] trait HiveStrategies {
         Nil
     }
   }
+
+  case class HiveCommandStrategy(context: HiveContext) extends Strategy {
+    def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case logical.NativeCommand(sql) =>
+        NativeCommandPhysical(sql, plan.output)(context) :: Nil
+      case _ => Nil
+    }
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
index 29b4b9b006e45..e5fbd1c6f15f5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
@@ -32,14 +32,15 @@ import org.apache.hadoop.hive.serde2.{ColumnProjectionUtils, Serializer}
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapred._
 
+import org.apache.spark
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.types.{BooleanType, DataType}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.hive._
-import org.apache.spark.{TaskContext, SparkException}
 import org.apache.spark.util.MutablePair
+import org.apache.spark.{TaskContext, SparkException}
 
 /* Implicits */
 import scala.collection.JavaConversions._
@@ -57,7 +58,7 @@ case class HiveTableScan(
     attributes: Seq[Attribute],
     relation: MetastoreRelation,
     partitionPruningPred: Option[Expression])(
-    @transient val sc: HiveContext)
+    @transient val context: HiveContext)
   extends LeafNode
   with HiveInspectors {
 
@@ -75,7 +76,7 @@ case class HiveTableScan(
   }
 
   @transient
-  val hadoopReader = new HadoopTableReader(relation.tableDesc, sc)
+  val hadoopReader = new HadoopTableReader(relation.tableDesc, context)
 
   /**
    * The hive object inspector for this table, which can be used to extract values from the
@@ -156,7 +157,7 @@ case class HiveTableScan(
     hiveConf.set(serdeConstants.LIST_COLUMNS, columnInternalNames)
   }
 
-  addColumnMetadataToConf(sc.hiveconf)
+  addColumnMetadataToConf(context.hiveconf)
 
   @transient
   def inputRdd = if (!relation.hiveQlTable.isPartitioned) {
@@ -428,3 +429,26 @@ case class InsertIntoHiveTable(
     sc.sparkContext.makeRDD(Nil, 1)
   }
 }
+
+/**
+ * :: DeveloperApi ::
+ */
+@DeveloperApi
+case class NativeCommandPhysical(
+    sql: String, output: Seq[Attribute])(
+    @transient context: HiveContext)
+  extends LeafNode with PhysicalCommand {
+
+  override protected[sql] lazy val sideEffectResult: Seq[String] = context.runSqlHive(sql)
+
+  override def execute(): RDD[spark.sql.Row] = {
+    if (sideEffectResult.size == 0) {
+      context.emptyResult
+    } else {
+      val rows = sideEffectResult.map(r => new GenericRow(Array[Any](r)))
+      context.sparkContext.parallelize(rows, 1)
+    }
+  }
+
+  override def otherCopyArgs = context :: Nil
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 6c239b02ed09a..1c0b86e9a90a3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -18,8 +18,9 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.catalyst.plans.logical.ExplainCommand
 import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.TestHive._
 
 /**
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
@@ -166,7 +167,7 @@ class HiveQuerySuite extends HiveComparisonTest {
     hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
     val rdd = hql("explain select key, count(value) from src group by key")
     assert(rdd.collect().size == 1)
-    assert(rdd.toString.contains("ExplainCommand"))
+    assert(rdd.toString.contains(ExplainCommand.getClass.getSimpleName))
     assert(rdd.filter(row => row.toString.contains("ExplainCommand")).collect().size == 0,
       "actual contents of the result should be the plans of the query to be explained")
     TestHive.reset()
@@ -195,9 +196,11 @@ class HiveQuerySuite extends HiveComparisonTest {
   test("SET commands semantics for a HiveContext") {
     // Adapted from its SQL counterpart.
     val testKey = "spark.sql.key.usedfortestonly"
-    var testVal = "test.val.0"
+    val testVal = "test.val.0"
     val nonexistentKey = "nonexistent"
-    def fromRows(row: Array[Row]): Array[String] = row.map(_.getString(0))
+    def rowsToPairs(rows: Array[Row]) = rows.map { case Row(key: String, value: String) =>
+      key -> value
+    }
 
     clear()
 
@@ -206,41 +209,51 @@ class HiveQuerySuite extends HiveComparisonTest {
 
     // "set key=val"
     hql(s"SET $testKey=$testVal")
-    assert(fromRows(hql("SET").collect()) sameElements Array(s"$testKey=$testVal"))
     assert(hiveconf.get(testKey, "") == testVal)
+    assertResult(Array(testKey -> testVal)) {
+      rowsToPairs(hql("SET").collect())
+    }
 
     hql(s"SET ${testKey + testKey}=${testVal + testVal}")
-    assert(fromRows(hql("SET").collect()) sameElements
-      Array(
-        s"$testKey=$testVal",
-        s"${testKey + testKey}=${testVal + testVal}"))
     assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
+    assertResult(Array(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
+      rowsToPairs(hql("SET").collect())
+    }
 
     // "set key"
-    assert(fromRows(hql(s"SET $testKey").collect()) sameElements
-      Array(s"$testKey=$testVal"))
-    assert(fromRows(hql(s"SET $nonexistentKey").collect()) sameElements
-      Array(s"$nonexistentKey is undefined"))
+    assertResult(Array(testKey -> testVal)) {
+      rowsToPairs(hql(s"SET $testKey").collect())
+    }
+
+    assertResult(Array(testKey -> "<undefined>")) {
+      rowsToPairs(hql(s"SET $nonexistentKey").collect())
+    }
 
     // Assert that sql() should have the same effects as hql() by repeating the above using sql().
     clear()
     assert(sql("set").collect().size == 0)
 
     sql(s"SET $testKey=$testVal")
-    assert(fromRows(sql("SET").collect()) sameElements Array(s"$testKey=$testVal"))
     assert(hiveconf.get(testKey, "") == testVal)
+    assertResult(Array(testKey -> testVal)) {
+      rowsToPairs(sql("SET").collect())
+    }
 
     sql(s"SET ${testKey + testKey}=${testVal + testVal}")
-    assert(fromRows(sql("SET").collect()) sameElements
-      Array(
-        s"$testKey=$testVal",
-        s"${testKey + testKey}=${testVal + testVal}"))
     assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
+    assertResult(Array(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
+      rowsToPairs(sql("SET").collect())
+    }
 
-    assert(fromRows(sql(s"SET $testKey").collect()) sameElements
-      Array(s"$testKey=$testVal"))
-    assert(fromRows(sql(s"SET $nonexistentKey").collect()) sameElements
-      Array(s"$nonexistentKey is undefined"))
+    assertResult(Array(testKey -> testVal)) {
+      rowsToPairs(sql(s"SET $testKey").collect())
+    }
+
+    assertResult(Array(testKey -> "<undefined>")) {
+      rowsToPairs(sql(s"SET $nonexistentKey").collect())
+    }
+
+    clear()
   }
 
   // Put tests that depend on specific Hive settings before these last two test,

From 74789c1197e7ea936b25714f2b8ce131b96ef62d Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Thu, 12 Jun 2014 12:33:14 -0700
Subject: [PATCH 2/8] Fixed failing test cases

---
 .../apache/spark/sql/execution/commands.scala |  2 +-
 .../execution/HiveCompatibilitySuite.scala    |  9 ++++++---
 .../sql/hive/execution/HiveQuerySuite.scala   | 20 ++++++++++++-------
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index 67830d422a52f..da28b4a879056 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -80,7 +80,7 @@ case class ExplainCommandPhysical(
   extends UnaryNode with PhysicalCommand {
 
   // Actually "EXPLAIN" command doesn't cause any side effect.
-  override protected[sql] lazy val sideEffectResult: Seq[String] = child.toString.split("\n")
+  override protected[sql] lazy val sideEffectResult: Seq[String] = this.toString.split("\n")
 
   def execute(): RDD[Row] = {
     val explanation = sideEffectResult.mkString("\n")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 3581617c269a6..ee194dbcb77b2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -172,7 +172,12 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "case_sensitivity",
 
     // Flaky test, Hive sometimes returns different set of 10 rows.
-    "lateral_view_outer"
+    "lateral_view_outer",
+
+    // After stop taking the `stringOrError` route, exceptions are thrown from these cases.
+    // See SPARK-2129 for details.
+    "join_view",
+    "mergejoins_mixed"
   )
 
   /**
@@ -476,7 +481,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "join_reorder3",
     "join_reorder4",
     "join_star",
-    "join_view",
     "lateral_view",
     "lateral_view_cp",
     "lateral_view_ppd",
@@ -507,7 +511,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "merge1",
     "merge2",
     "mergejoins",
-    "mergejoins_mixed",
     "multigroupby_singlemr",
     "multi_insert_gby",
     "multi_insert_gby3",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 1c0b86e9a90a3..ca06c5c25800b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.plans.logical.ExplainCommand
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.execution.ExplainCommandPhysical
 
 /**
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
@@ -165,11 +166,16 @@ class HiveQuerySuite extends HiveComparisonTest {
 
   test("SPARK-1704: Explain commands as a SchemaRDD") {
     hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+
     val rdd = hql("explain select key, count(value) from src group by key")
-    assert(rdd.collect().size == 1)
-    assert(rdd.toString.contains(ExplainCommand.getClass.getSimpleName))
-    assert(rdd.filter(row => row.toString.contains("ExplainCommand")).collect().size == 0,
-      "actual contents of the result should be the plans of the query to be explained")
+    val explanation = rdd.select('plan).collect().map {
+      case Row(plan: String) => plan
+    }
+    assert(explanation.size == 1)
+
+    val explainCommandClassName = classOf[ExplainCommandPhysical].getSimpleName.stripSuffix("$")
+    assert(explanation.head.contains(explainCommandClassName))
+
     TestHive.reset()
   }
 
@@ -225,13 +231,13 @@ class HiveQuerySuite extends HiveComparisonTest {
       rowsToPairs(hql(s"SET $testKey").collect())
     }
 
-    assertResult(Array(testKey -> "<undefined>")) {
+    assertResult(Array(nonexistentKey -> "<undefined>")) {
       rowsToPairs(hql(s"SET $nonexistentKey").collect())
     }
 
     // Assert that sql() should have the same effects as hql() by repeating the above using sql().
     clear()
-    assert(sql("set").collect().size == 0)
+    assert(sql("SET").collect().size == 0)
 
     sql(s"SET $testKey=$testVal")
     assert(hiveconf.get(testKey, "") == testVal)
@@ -249,7 +255,7 @@ class HiveQuerySuite extends HiveComparisonTest {
       rowsToPairs(sql(s"SET $testKey").collect())
     }
 
-    assertResult(Array(testKey -> "<undefined>")) {
+    assertResult(Array(nonexistentKey -> "<undefined>")) {
       rowsToPairs(sql(s"SET $nonexistentKey").collect())
     }
 

From cc64f32384340a8adfcf713e4fd2c91ae2e600e6 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Thu, 12 Jun 2014 16:57:09 -0700
Subject: [PATCH 3/8] Renamed physical plan classes for DDL/commands

---
 .../spark/sql/execution/SparkStrategies.scala  | 11 +++++------
 .../apache/spark/sql/execution/commands.scala  | 18 +++++++++---------
 .../apache/spark/sql/hive/HiveContext.scala    |  7 ++++---
 .../apache/spark/sql/hive/HiveStrategies.scala |  2 +-
 .../sql/hive/execution/hiveOperators.scala     |  4 ++--
 .../sql/hive/execution/HiveQuerySuite.scala    |  6 ++----
 6 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index f2f95dfe27e69..f92dd18320ded 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.{SQLConf, SQLContext, execution}
+import org.apache.spark.sql.{SQLContext, execution}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
@@ -156,7 +156,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         InsertIntoParquetTable(relation, planLater(child), overwrite=true)(sparkContext) :: Nil
       case logical.InsertIntoTable(table: ParquetRelation, partition, child, overwrite) =>
         InsertIntoParquetTable(table, planLater(child), overwrite)(sparkContext) :: Nil
-      case PhysicalOperation(projectList, filters: Seq[Expression], relation: ParquetRelation) => {
+      case PhysicalOperation(projectList, filters: Seq[Expression], relation: ParquetRelation) =>
         val prunePushedDownFilters =
           if (sparkContext.conf.getBoolean(ParquetFilters.PARQUET_FILTER_PUSHDOWN_ENABLED, true)) {
             (filters: Seq[Expression]) => {
@@ -185,7 +185,6 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           filters,
           prunePushedDownFilters,
           ParquetTableScan(_, relation, filters)(sparkContext)) :: Nil
-      }
 
       case _ => Nil
     }
@@ -237,12 +236,12 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   case class CommandStrategy(context: SQLContext) extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.SetCommand(key, value) =>
-        Seq(execution.SetCommandPhysical(key, value, plan.output)(context))
+        Seq(execution.SetCommand(key, value, plan.output)(context))
       case logical.ExplainCommand(child) =>
         val executedPlan = context.executePlan(child).executedPlan
-        Seq(execution.ExplainCommandPhysical(executedPlan, plan.output)(context))
+        Seq(execution.ExplainCommand(executedPlan, plan.output)(context))
       case logical.CacheCommand(tableName, cache) =>
-        Seq(execution.CacheCommandPhysical(tableName, cache)(context))
+        Seq(execution.CacheCommand(tableName, cache)(context))
       case _ => Nil
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index da28b4a879056..e9efe28723745 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -22,15 +22,15 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{SQLContext, Row}
 import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
 
-trait PhysicalCommand {
+trait Command {
   /**
    * A concrete command should override this lazy field to wrap up any side effects caused by the
    * command or any other computation that should be evaluated exactly once. The value of this field
    * can be used as the contents of the corresponding RDD generated from the physical plan of this
    * command.
    *
-   * The `execute()` method of all the physical command classes should reference `sideEffect` so
-   * that the command can be executed eagerly right after the command query is created.
+   * The `execute()` method of all the physical command classes should reference `sideEffectResult`
+   * so that the command can be executed eagerly right after the command query is created.
    */
   protected[sql] lazy val sideEffectResult: Seq[Any] = Seq.empty[Any]
 }
@@ -39,10 +39,10 @@ trait PhysicalCommand {
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class SetCommandPhysical(
+case class SetCommand(
     key: Option[String], value: Option[String], output: Seq[Attribute])(
     @transient context: SQLContext)
-  extends LeafNode with PhysicalCommand {
+  extends LeafNode with Command {
 
   override protected[sql] lazy val sideEffectResult: Seq[(String, String)] = (key, value) match {
     // Set value for key k.
@@ -74,10 +74,10 @@ case class SetCommandPhysical(
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class ExplainCommandPhysical(
+case class ExplainCommand(
     child: SparkPlan, output: Seq[Attribute])(
     @transient context: SQLContext)
-  extends UnaryNode with PhysicalCommand {
+  extends UnaryNode with Command {
 
   // Actually "EXPLAIN" command doesn't cause any side effect.
   override protected[sql] lazy val sideEffectResult: Seq[String] = this.toString.split("\n")
@@ -94,8 +94,8 @@ case class ExplainCommandPhysical(
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class CacheCommandPhysical(tableName: String, doCache: Boolean)(@transient context: SQLContext)
-  extends LeafNode with PhysicalCommand {
+case class CacheCommand(tableName: String, doCache: Boolean)(@transient context: SQLContext)
+  extends LeafNode with Command {
 
   override protected[sql] lazy val sideEffectResult = {
     if (doCache) {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index b3c87513a59c0..d6ceaaf0fe080 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -15,8 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql
-package hive
+package org.apache.spark.sql.hive
 
 import java.io.{BufferedReader, File, InputStreamReader, PrintStream}
 import java.util.{ArrayList => JArrayList}
@@ -32,11 +31,13 @@ import org.apache.hadoop.hive.ql.session.SessionState
 
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, OverrideCatalog}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.QueryExecutionException
+import org.apache.spark.sql.execution.{Command => PhysicalCommand}
 
 /**
  * Starts up an instance of hive where metadata is stored locally. An in-process metadata data is
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index e0ea7826fa70d..3ca8ac708bf8f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -79,7 +79,7 @@ private[hive] trait HiveStrategies {
   case class HiveCommandStrategy(context: HiveContext) extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.NativeCommand(sql) =>
-        NativeCommandPhysical(sql, plan.output)(context) :: Nil
+        NativeCommand(sql, plan.output)(context) :: Nil
       case _ => Nil
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
index e5fbd1c6f15f5..a839231449161 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
@@ -434,10 +434,10 @@ case class InsertIntoHiveTable(
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class NativeCommandPhysical(
+case class NativeCommand(
     sql: String, output: Seq[Attribute])(
     @transient context: HiveContext)
-  extends LeafNode with PhysicalCommand {
+  extends LeafNode with Command {
 
   override protected[sql] lazy val sideEffectResult: Seq[String] = context.runSqlHive(sql)
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index ca06c5c25800b..9233b2e9b6e88 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -17,11 +17,9 @@
 
 package org.apache.spark.sql.hive.execution
 
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.plans.logical.ExplainCommand
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.execution.ExplainCommandPhysical
+import org.apache.spark.sql.{execution, Row}
 
 /**
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
@@ -173,7 +171,7 @@ class HiveQuerySuite extends HiveComparisonTest {
     }
     assert(explanation.size == 1)
 
-    val explainCommandClassName = classOf[ExplainCommandPhysical].getSimpleName.stripSuffix("$")
+    val explainCommandClassName = classOf[execution.ExplainCommand].getSimpleName.stripSuffix("$")
     assert(explanation.head.contains(explainCommandClassName))
 
     TestHive.reset()

From 48aa2e59e3d58cc24a4878951a389680f085acb5 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Thu, 12 Jun 2014 17:15:42 -0700
Subject: [PATCH 4/8] Refined SQLContext.emptyResult as an empty RDD[Row]

---
 sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 585bfcbd75ecb..3938a68edc293 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -252,8 +252,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   protected[sql] val planner = new SparkPlanner
 
   @transient
-  protected[sql] lazy val emptyResult =
-    sparkContext.parallelize(Seq(new GenericRow(Array[Any]()): Row), 1)
+  protected[sql] lazy val emptyResult = sparkContext.parallelize(Seq.empty[Row], 1)
 
   /**
    * Prepares a planned SparkPlan for execution by binding references to specific ordinals, and

From 5c7e680ef4a66ac2ba0b05a1525b7f4509ac4519 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Fri, 13 Jun 2014 00:01:22 -0700
Subject: [PATCH 5/8] Bug fix: wrong type used in pattern matching

---
 .../apache/spark/sql/hive/execution/HiveComparisonTest.scala   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 357c7e654bd20..24c929ff7430d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -24,6 +24,7 @@ import org.scalatest.{BeforeAndAfterAll, FunSuite, GivenWhenThen}
 import org.apache.spark.sql.Logging
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.{NativeCommand => LogicalNativeCommand}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.hive.test.TestHive
 
@@ -141,7 +142,7 @@ abstract class HiveComparisonTest
       // Hack: Hive simply prints the result of a SET command to screen,
       // and does not return it as a query answer.
       case _: SetCommand => Seq("0")
-      case _: NativeCommand => answer.filterNot(nonDeterministicLine).filterNot(_ == "")
+      case _: LogicalNativeCommand => answer.filterNot(nonDeterministicLine).filterNot(_ == "")
       case _: ExplainCommand => answer
       case plan => if (isSorted(plan)) answer else answer.sorted
     }

From 1d0093733c169fd7a5c67b2827121a1ab4a4de05 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Fri, 13 Jun 2014 00:02:35 -0700
Subject: [PATCH 6/8] Makes SchemaRDD DSLs work for DDL/command statement RDDs

---
 .../src/main/scala/org/apache/spark/sql/SchemaRDD.scala  | 2 +-
 .../main/scala/org/apache/spark/sql/SchemaRDDLike.scala  | 9 ++++++---
 .../org/apache/spark/sql/api/java/JavaSchemaRDD.scala    | 2 +-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
index 7ad8edf5a5a6e..821ac850ac3f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
@@ -97,7 +97,7 @@ import java.util.{Map => JMap}
 @AlphaComponent
 class SchemaRDD(
     @transient val sqlContext: SQLContext,
-    @transient protected[spark] val logicalPlan: LogicalPlan)
+    @transient val baseLogicalPlan: LogicalPlan)
   extends RDD[Row](sqlContext.sparkContext, Nil) with SchemaRDDLike {
 
   def baseSchemaRDD = this
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
index e4cbb037709ef..656be965a8fd9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala
@@ -20,13 +20,14 @@ package org.apache.spark.sql
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.execution.SparkLogicalPlan
 
 /**
  * Contains functions that are shared between all SchemaRDD types (i.e., Scala, Java)
  */
 private[sql] trait SchemaRDDLike {
   @transient val sqlContext: SQLContext
-  @transient protected[spark] val logicalPlan: LogicalPlan
+  @transient val baseLogicalPlan: LogicalPlan
 
   private[sql] def baseSchemaRDD: SchemaRDD
 
@@ -48,14 +49,16 @@ private[sql] trait SchemaRDDLike {
    */
   @transient
   @DeveloperApi
-  lazy val queryExecution = sqlContext.executePlan(logicalPlan)
+  lazy val queryExecution = sqlContext.executePlan(baseLogicalPlan)
 
-  logicalPlan match {
+  @transient protected[spark] val logicalPlan: LogicalPlan = baseLogicalPlan match {
     // For various commands (like DDL) and queries with side effects, we force query optimization to
     // happen right away to let these side effects take place eagerly.
     case _: Command | _: InsertIntoTable | _: InsertIntoCreatedTable | _: WriteToFile =>
       queryExecution.toRdd
+      SparkLogicalPlan(queryExecution.executedPlan)
     case _ =>
+      baseLogicalPlan
   }
 
   override def toString =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala
index 22f57b758dd02..aff6ffe9f3478 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala
@@ -37,7 +37,7 @@ import org.apache.spark.storage.StorageLevel
  */
 class JavaSchemaRDD(
      @transient val sqlContext: SQLContext,
-     @transient protected[spark] val logicalPlan: LogicalPlan)
+     @transient val baseLogicalPlan: LogicalPlan)
   extends JavaRDDLike[Row, JavaRDD[Row]]
   with SchemaRDDLike {
 

From f6c77155d7fd209dd3492afef32d3e98a97fdc65 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Fri, 13 Jun 2014 00:03:38 -0700
Subject: [PATCH 7/8] Added test cases for DDL/command statement RDDs

---
 .../sql/hive/execution/HiveQuerySuite.scala   | 53 ++++++++++++++++---
 1 file changed, 47 insertions(+), 6 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 9233b2e9b6e88..0b64c0a970348 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.hive.execution
 
+import scala.util.Try
+
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.{execution, Row}
+import org.apache.spark.sql.{SchemaRDD, execution, Row}
 
 /**
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
@@ -162,21 +164,60 @@ class HiveQuerySuite extends HiveComparisonTest {
     hql("SELECT * FROM src").toString
   }
 
+  private val explainCommandClassName =
+    classOf[execution.ExplainCommand].getSimpleName.stripSuffix("$")
+
+  def isExplanation(result: SchemaRDD) = {
+    val explanation = result.select('plan).collect().map { case Row(plan: String) => plan }
+    explanation.size == 1 && explanation.head.startsWith(explainCommandClassName)
+  }
+
   test("SPARK-1704: Explain commands as a SchemaRDD") {
     hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
 
     val rdd = hql("explain select key, count(value) from src group by key")
-    val explanation = rdd.select('plan).collect().map {
-      case Row(plan: String) => plan
+    assert(isExplanation(rdd))
+
+    TestHive.reset()
+  }
+
+  test("Query Hive native command execution result") {
+    val tableName = "test_native_commands"
+
+    val q0 = hql(s"DROP TABLE IF EXISTS $tableName")
+    assert(q0.count() == 0)
+
+    val q1 = hql(s"CREATE TABLE $tableName(key INT, value STRING)")
+    assert(q1.count() == 0)
+
+    val q2 = hql("SHOW TABLES")
+    val tables = q2.select('result).collect().map { case Row(table: String) => table }
+    assert(tables.contains(tableName))
+
+    val q3 = hql(s"DESCRIBE $tableName")
+    assertResult(Array(Array("key", "int", "None"), Array("value", "string", "None"))) {
+      q3.select('result).collect().map { case Row(fieldDesc: String) =>
+        fieldDesc.split("\t").map(_.trim)
+      }
     }
-    assert(explanation.size == 1)
 
-    val explainCommandClassName = classOf[execution.ExplainCommand].getSimpleName.stripSuffix("$")
-    assert(explanation.head.contains(explainCommandClassName))
+    val q4 = hql(s"EXPLAIN SELECT key, COUNT(*) FROM $tableName GROUP BY key")
+    assert(isExplanation(q4))
 
     TestHive.reset()
   }
 
+  test("Exactly once semantics for DDL and command statements") {
+    val tableName = "test_exactly_once"
+    val q0 = hql(s"CREATE TABLE $tableName(key INT, value STRING)")
+
+    // If the table was not created, the following assertion would fail
+    assert(Try(table(tableName)).isSuccess)
+
+    // If the CREATE TABLE command got executed again, the following assertion would fail
+    assert(Try(q0.count()).isSuccess)
+  }
+
   test("parse HQL set commands") {
     // Adapted from its SQL counterpart.
     val testKey = "spark.sql.key.usedfortestonly"

From d005b03b7a587135b0adf969babf91a7fc70b43f Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Fri, 13 Jun 2014 00:35:34 -0700
Subject: [PATCH 8/8] Made "SET key=value" returns the newly set key value pair

---
 .../org/apache/spark/sql/execution/commands.scala   |  2 +-
 .../spark/sql/hive/execution/HiveQuerySuite.scala   | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
index e9efe28723745..0377290af5926 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala
@@ -48,7 +48,7 @@ case class SetCommand(
     // Set value for key k.
     case (Some(k), Some(v)) =>
       context.set(k, v)
-      Array.empty[(String, String)]
+      Array(k -> v)
 
     // Query the value bound to key k.
     case (Some(k), _) =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 0b64c0a970348..0d656c556965d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -250,10 +250,12 @@ class HiveQuerySuite extends HiveComparisonTest {
     clear()
 
     // "set" itself returns all config variables currently specified in SQLConf.
-    assert(hql("set").collect().size == 0)
+    assert(hql("SET").collect().size == 0)
+
+    assertResult(Array(testKey -> testVal)) {
+      rowsToPairs(hql(s"SET $testKey=$testVal").collect())
+    }
 
-    // "set key=val"
-    hql(s"SET $testKey=$testVal")
     assert(hiveconf.get(testKey, "") == testVal)
     assertResult(Array(testKey -> testVal)) {
       rowsToPairs(hql("SET").collect())
@@ -278,7 +280,10 @@ class HiveQuerySuite extends HiveComparisonTest {
     clear()
     assert(sql("SET").collect().size == 0)
 
-    sql(s"SET $testKey=$testVal")
+    assertResult(Array(testKey -> testVal)) {
+      rowsToPairs(sql(s"SET $testKey=$testVal").collect())
+    }
+
     assert(hiveconf.get(testKey, "") == testVal)
     assertResult(Array(testKey -> testVal)) {
       rowsToPairs(sql("SET").collect())