Skip to content

Commit 5128fe2

Browse files
committed
Make sure InsertIntoHiveTable doesn't execute each time you ask for its result.
1 parent 45a95f8 commit 5128fe2

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,12 +344,16 @@ case class InsertIntoHiveTable(
344344
writer.commitJob()
345345
}
346346

347+
override def execute() = result
348+
347349
/**
348350
* Inserts all the rows in the table into Hive. Row objects are properly serialized with the
349351
* `org.apache.hadoop.hive.serde2.SerDe` and the
350352
* `org.apache.hadoop.mapred.OutputFormat` provided by the table definition.
353+
*
354+
* Note: this is run once and then kept to avoid double insertions.
351355
*/
352-
def execute() = {
356+
private lazy val result: RDD[Row] = {
353357
val childRdd = child.execute()
354358
assert(childRdd != null)
355359

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ import org.apache.spark.sql.{SchemaRDD, execution, Row}
2828
*/
2929
class HiveQuerySuite extends HiveComparisonTest {
3030

31+
test("create as table as runs once") {
32+
hql("CREATE TABLE foo AS SELECT 1 FROM src LIMIT 1").collect()
33+
assert(hql("SELECT COUNT(*) FROM foo").collect().head.getLong(0) === 1,
34+
"Incorrect number of rows in created table")
35+
}
36+
3137
createQueryTest("between",
3238
"SELECT * FROM src WHERE key Between 1 and 2")
3339

0 commit comments

Comments
 (0)