From ed395be288b800b2d1fd6b669a13570381ba8b54 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Thu, 29 Oct 2015 21:57:52 -0700 Subject: [PATCH 1/2] [SPARK-11188][SQL] Elide stacktraces in bin/spark-sql for AnalysisExceptions --- .../thriftserver/AbstractSparkSQLDriver.scala | 23 ++++++++++--- .../hive/thriftserver/SparkSQLCLIDriver.scala | 16 +++++++--- .../sql/hive/thriftserver/CliSuite.scala | 32 ++++++++++++++++++- 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala index 48ac9062af96a..d96449a5688f9 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive.thriftserver +import org.apache.spark.sql.AnalysisException + import scala.collection.JavaConversions._ import org.apache.commons.lang3.exception.ExceptionUtils @@ -58,13 +60,23 @@ private[hive] abstract class AbstractSparkSQLDriver( hiveResponse = execution.stringResult() tableSchema = getResultSetSchema(execution) new CommandProcessorResponse(0) - } catch { - case cause: Throwable => - logError(s"Failed in [$command]", cause) - new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(cause), null) } } + def runWrapper(command: String): CommandProcessorResponseWrapper = try { + val result = run(command) + new CommandProcessorResponseWrapper(result, null) + } catch { + case ae: AnalysisException => + logDebug(s"Failed in [$command]", ae) + new CommandProcessorResponseWrapper(new CommandProcessorResponse(1, + ExceptionUtils.getStackTrace(ae), null), ae) + case cause: Throwable => + logError(s"Failed in [$command]", cause) + new CommandProcessorResponseWrapper(new CommandProcessorResponse(1, + ExceptionUtils.getStackTrace(cause), null), cause) + } + override def close(): Int = { hiveResponse = null tableSchema = null @@ -79,3 +91,6 @@ private[hive] abstract class AbstractSparkSQLDriver( tableSchema = null } } + +private[hive] case class CommandProcessorResponseWrapper (rc : CommandProcessorResponse, + cause : Throwable) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index c56d807c46758..1216b09696dbc 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -21,6 +21,8 @@ import scala.collection.JavaConversions._ import java.io._ import java.util.{ArrayList => JArrayList} +import org.apache.spark.sql.AnalysisException + import jline.{ConsoleReader, History} @@ -276,19 +278,23 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { driver.init() val out = sessionState.out + val err = sessionState.err val start: Long = System.currentTimeMillis() if (sessionState.getIsVerbose) { out.println(cmd) } - val rc = driver.run(cmd) + val rcWrapper = driver.runWrapper(cmd) val end = System.currentTimeMillis() val timeTaken: Double = (end - start) / 1000.0 - ret = rc.getResponseCode + ret = rcWrapper.rc.getResponseCode if (ret != 0) { - console.printError(rc.getErrorMessage()) - driver.close() - return ret + // For analysis exception, only the error is printed out to the console. + rcWrapper.cause match { + case e : AnalysisException => + err.println(s"""Error in query: ${e.getMessage}""") + case _ => err.println(rcWrapper.rc.getErrorMessage()) + } } val res = new JArrayList[String]() diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 13b0c5951dddc..989aceca25340 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -48,9 +48,22 @@ class CliSuite extends SparkFunSuite with BeforeAndAfter with Logging { metastorePath.delete() } + /** + * Run a CLI operation and expect all the queries and expected answers to be returned. + * @param timeout maximum time for the commands to complete + * @param extraArgs any extra arguments + * @param errorResponses a sequence of strings whose presence in the stdout of the forked process + * is taken as an immediate error condition. That is: if a line containing + * with one of these strings is found, fail the test immediately. + * The default value is `Seq("Error:")` + * + * @param queriesAndExpectedAnswers one or more tupes of query + answer + */ + def runCliWithin( timeout: FiniteDuration, - extraArgs: Seq[String] = Seq.empty)( + extraArgs: Seq[String] = Seq.empty, + errorResponses: Seq[String] = Seq("Error:"))( queriesAndExpectedAnswers: (String, String)*): Unit = { val (queries, expectedAnswers) = queriesAndExpectedAnswers.unzip @@ -82,6 +95,15 @@ class CliSuite extends SparkFunSuite with BeforeAndAfter with Logging { foundAllExpectedAnswers.trySuccess(()) } } + else + { + errorResponses.foreach { r => + if (line.contains(r)) { + foundAllExpectedAnswers.tryFailure( + new RuntimeException(s"Failed with error line '$line'")) + } + } + } } // Searching expected output line from both stdout and stderr of the CLI process @@ -184,4 +206,12 @@ class CliSuite extends SparkFunSuite with BeforeAndAfter with Logging { -> "OK" ) } + + test("SPARK-11188 Analysis error reporting") { + runCliWithin(timeout = 2.minute, + errorResponses = Seq("AnalysisException"))( + "select * from nonexistent_table;" + -> "Error in query: no such table nonexistent_table;" + ) + } } From f244688f7b85eae9f5cf04c324840df6ac04e29c Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Tue, 3 Nov 2015 23:55:34 -0800 Subject: [PATCH 2/2] Review comments from michael --- .../hive/thriftserver/AbstractSparkSQLDriver.scala | 8 ++++---- .../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 12 +++++------- .../spark/sql/hive/thriftserver/CliSuite.scala | 14 ++++++-------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala index d96449a5688f9..df025d0a187f1 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/AbstractSparkSQLDriver.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.hive.thriftserver -import org.apache.spark.sql.AnalysisException - import scala.collection.JavaConversions._ import org.apache.commons.lang3.exception.ExceptionUtils @@ -27,6 +25,7 @@ import org.apache.hadoop.hive.ql.Driver import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse import org.apache.spark.Logging +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes} private[hive] abstract class AbstractSparkSQLDriver( @@ -92,5 +91,6 @@ private[hive] abstract class AbstractSparkSQLDriver( } } -private[hive] case class CommandProcessorResponseWrapper (rc : CommandProcessorResponse, - cause : Throwable) +private[hive] case class CommandProcessorResponseWrapper( + rc : CommandProcessorResponse, + cause : Throwable) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 1216b09696dbc..3928f7b789345 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -17,15 +17,12 @@ package org.apache.spark.sql.hive.thriftserver -import scala.collection.JavaConversions._ - import java.io._ import java.util.{ArrayList => JArrayList} -import org.apache.spark.sql.AnalysisException +import scala.collection.JavaConversions._ import jline.{ConsoleReader, History} - import org.apache.commons.lang3.StringUtils import org.apache.commons.logging.LogFactory import org.apache.hadoop.conf.Configuration @@ -34,13 +31,14 @@ import org.apache.hadoop.hive.common.{HiveInterruptCallback, HiveInterruptUtils} import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.ql.Driver import org.apache.hadoop.hive.ql.exec.Utilities -import org.apache.hadoop.hive.ql.processors.{AddResourceProcessor, SetProcessor, CommandProcessor} +import org.apache.hadoop.hive.ql.processors.{AddResourceProcessor, CommandProcessor, SetProcessor} import org.apache.hadoop.hive.ql.session.SessionState import org.apache.thrift.transport.TSocket import org.apache.spark.Logging +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.hive.{HiveContext, HiveShim} -import org.apache.spark.util.{ShutdownHookManager, Utils} +import org.apache.spark.util.ShutdownHookManager private[hive] object SparkSQLCLIDriver { private var prompt = "spark-sql" @@ -291,7 +289,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { if (ret != 0) { // For analysis exception, only the error is printed out to the console. rcWrapper.cause match { - case e : AnalysisException => + case e: AnalysisException => err.println(s"""Error in query: ${e.getMessage}""") case _ => err.println(rcWrapper.rc.getErrorMessage()) } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 989aceca25340..1e28de4a49ad3 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -59,7 +59,6 @@ class CliSuite extends SparkFunSuite with BeforeAndAfter with Logging { * * @param queriesAndExpectedAnswers one or more tupes of query + answer */ - def runCliWithin( timeout: FiniteDuration, extraArgs: Seq[String] = Seq.empty, @@ -95,15 +94,14 @@ class CliSuite extends SparkFunSuite with BeforeAndAfter with Logging { foundAllExpectedAnswers.trySuccess(()) } } - else - { - errorResponses.foreach { r => - if (line.contains(r)) { - foundAllExpectedAnswers.tryFailure( - new RuntimeException(s"Failed with error line '$line'")) - } + else { + errorResponses.foreach { r => + if (line.contains(r)) { + foundAllExpectedAnswers.tryFailure( + new RuntimeException(s"Failed with error line '$line'")) } } + } } // Searching expected output line from both stdout and stderr of the CLI process