From 5128fe27aa265b7359a914bd508ff366b9762544 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 19 Jun 2014 08:00:31 +0100
Subject: [PATCH 1/2] Make sure InsertIntoHiveTable doesn't execute each time
 you ask for its result.

---
 .../org/apache/spark/sql/hive/execution/hiveOperators.scala | 6 +++++-
 .../apache/spark/sql/hive/execution/HiveQuerySuite.scala    | 6 ++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
index a839231449161..240aa0df4935a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
@@ -344,12 +344,16 @@ case class InsertIntoHiveTable(
     writer.commitJob()
   }
 
+  override def execute() = result
+
   /**
    * Inserts all the rows in the table into Hive.  Row objects are properly serialized with the
    * `org.apache.hadoop.hive.serde2.SerDe` and the
    * `org.apache.hadoop.mapred.OutputFormat` provided by the table definition.
+   *
+   * Note: this is run once and then kept to avoid double insertions.
    */
-  def execute() = {
+  private lazy val result: RDD[Row] = {
     val childRdd = child.execute()
     assert(childRdd != null)
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index fe698f0fc57b8..ae9f6acbcec0e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -28,6 +28,12 @@ import org.apache.spark.sql.{SchemaRDD, execution, Row}
  */
 class HiveQuerySuite extends HiveComparisonTest {
 
+  test("create as table as runs once") {
+    hql("CREATE TABLE foo AS SELECT 1 FROM src LIMIT 1").collect()
+    assert(hql("SELECT COUNT(*) FROM foo").collect().head.getLong(0) === 1,
+      "Incorrect number of rows in created table")
+  }
+
   createQueryTest("between",
     "SELECT * FROM src WHERE key Between 1 and 2")
 

From 9c6d9e4218dff661b4536b09b84dc314d903983e Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 19 Jun 2014 08:03:29 +0100
Subject: [PATCH 2/2] Fix typo.

---
 .../org/apache/spark/sql/hive/execution/HiveQuerySuite.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index ae9f6acbcec0e..c5dc45cdbca4d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.{SchemaRDD, execution, Row}
  */
 class HiveQuerySuite extends HiveComparisonTest {
 
-  test("create as table as runs once") {
+  test("CREATE TABLE AS runs once") {
     hql("CREATE TABLE foo AS SELECT 1 FROM src LIMIT 1").collect()
     assert(hql("SELECT COUNT(*) FROM foo").collect().head.getLong(0) === 1,
       "Incorrect number of rows in created table")