From d717d18960cf860bc80c6d6c803f01db8865495c Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Sun, 23 Aug 2015 21:50:51 -0700 Subject: [PATCH 1/5] Print dependent test tables --- .../hive/execution/HiveComparisonTest.scala | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 2bdb0e11878e..8ab15b89b8a5 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.hive.execution import java.io._ +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.hive.MetastoreRelation import org.scalatest.{BeforeAndAfterAll, GivenWhenThen} import org.apache.spark.{Logging, SparkFunSuite} @@ -29,6 +31,8 @@ import org.apache.spark.sql.execution.{SetCommand, ExplainCommand} import org.apache.spark.sql.execution.datasources.DescribeCommand import org.apache.spark.sql.hive.test.TestHive +import scala.util.control.NonFatal + /** * Allows the creations of tests that execute the same query against both hive * and catalyst, comparing the results. @@ -386,11 +390,48 @@ abstract class HiveComparisonTest hiveCacheFiles.foreach(_.delete()) } + + val computedTablesMessages = try { + TestHive.reset() + val executions = queryList.map(new TestHive.QueryExecution(_)) + executions.foreach(_.toRdd) + val tablesRead = executions.flatMap(_.executedPlan.collect { + case ts: HiveTableScan => ts.relation.tableName + }).toSet + + val tablesGenerated = queryList.zip(executions).flatMap{ + case (q, e) => e.executedPlan.collect { + case i: InsertIntoHiveTable if tablesRead contains i.table.tableName => + (q, e, i) + } + } + + println(tablesRead) + println(tablesGenerated.map(_._3.table)) + + tablesGenerated.map { case (hiveql, execution, insert) => + s""" + | + |=== Generated Table === + |$hiveql + |$execution + |== Results == + |${insert.child.execute().collect().mkString("\n")} + """.stripMargin + } + + } catch { + case NonFatal(e) => + e.printStackTrace() + s"Couldn't compute tables: $e" + } + val errorMessage = s""" |Results do not match for $testCaseName: |$hiveQuery\n${hiveQuery.analyzed.output.map(_.name).mkString("\t")} |$resultComparison + |$computedTablesMessages """.stripMargin stringToFile(new File(wrongDirectory, testCaseName), errorMessage + consoleTestCase) From 4894383bc1e6f861793f17a9c5198923db562f47 Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Sun, 23 Aug 2015 22:16:48 -0700 Subject: [PATCH 2/5] fix wording --- .../apache/spark/sql/hive/execution/HiveComparisonTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 8ab15b89b8a5..b97eaa065925 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -423,7 +423,7 @@ abstract class HiveComparisonTest } catch { case NonFatal(e) => e.printStackTrace() - s"Couldn't compute tables: $e" + s"Couldn't compute dependent tables: $e" } val errorMessage = From 125707bc7931972269ff4fa65139868287365592 Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Sun, 23 Aug 2015 22:23:00 -0700 Subject: [PATCH 3/5] better filter --- .../sql/hive/execution/HiveComparisonTest.scala | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index b97eaa065925..6bfa72f2cc6c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -390,15 +390,16 @@ abstract class HiveComparisonTest hiveCacheFiles.foreach(_.delete()) } - + // If this query is reading other tables that were created during this test run + // also print out the query plans and results for those. val computedTablesMessages = try { + val tablesRead = new TestHive.QueryExecution(query).executedPlan.collect { + case ts: HiveTableScan => ts.relation.tableName + }.toSet + TestHive.reset() val executions = queryList.map(new TestHive.QueryExecution(_)) executions.foreach(_.toRdd) - val tablesRead = executions.flatMap(_.executedPlan.collect { - case ts: HiveTableScan => ts.relation.tableName - }).toSet - val tablesGenerated = queryList.zip(executions).flatMap{ case (q, e) => e.executedPlan.collect { case i: InsertIntoHiveTable if tablesRead contains i.table.tableName => @@ -406,9 +407,6 @@ abstract class HiveComparisonTest } } - println(tablesRead) - println(tablesGenerated.map(_._3.table)) - tablesGenerated.map { case (hiveql, execution, insert) => s""" | From b3b84b47eba9b3be4f2263b8d282fd41c277c833 Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Sun, 23 Aug 2015 22:33:42 -0700 Subject: [PATCH 4/5] import --- .../spark/sql/hive/execution/HiveComparisonTest.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 6bfa72f2cc6c..1c142e9d35ee 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -19,8 +19,8 @@ package org.apache.spark.sql.hive.execution import java.io._ -import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.hive.MetastoreRelation +import scala.util.control.NonFatal + import org.scalatest.{BeforeAndAfterAll, GivenWhenThen} import org.apache.spark.{Logging, SparkFunSuite} @@ -31,8 +31,6 @@ import org.apache.spark.sql.execution.{SetCommand, ExplainCommand} import org.apache.spark.sql.execution.datasources.DescribeCommand import org.apache.spark.sql.hive.test.TestHive -import scala.util.control.NonFatal - /** * Allows the creations of tests that execute the same query against both hive * and catalyst, comparing the results. From a723dda36176ed8fc595df2701709020e27b5b3d Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Mon, 24 Aug 2015 18:35:54 -0700 Subject: [PATCH 5/5] address comments --- .../spark/sql/hive/execution/HiveComparisonTest.scala | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 1c142e9d35ee..4d45249d9c6b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -390,7 +390,7 @@ abstract class HiveComparisonTest // If this query is reading other tables that were created during this test run // also print out the query plans and results for those. - val computedTablesMessages = try { + val computedTablesMessages: String = try { val tablesRead = new TestHive.QueryExecution(query).executedPlan.collect { case ts: HiveTableScan => ts.relation.tableName }.toSet @@ -398,7 +398,7 @@ abstract class HiveComparisonTest TestHive.reset() val executions = queryList.map(new TestHive.QueryExecution(_)) executions.foreach(_.toRdd) - val tablesGenerated = queryList.zip(executions).flatMap{ + val tablesGenerated = queryList.zip(executions).flatMap { case (q, e) => e.executedPlan.collect { case i: InsertIntoHiveTable if tablesRead contains i.table.tableName => (q, e, i) @@ -407,18 +407,17 @@ abstract class HiveComparisonTest tablesGenerated.map { case (hiveql, execution, insert) => s""" - | |=== Generated Table === |$hiveql |$execution |== Results == |${insert.child.execute().collect().mkString("\n")} """.stripMargin - } + }.mkString("\n") } catch { case NonFatal(e) => - e.printStackTrace() + logError("Failed to compute generated tables", e) s"Couldn't compute dependent tables: $e" }