Skip to content

Commit 59a7087

Browse files
yaooqinncloud-fan
authored andcommitted
[SPARK-32145][SQL][TEST-HIVE1.2][TEST-HADOOP2.7] ThriftCLIService.GetOperationStatus should include exception's stack trace to the error message
### What changes were proposed in this pull request? In https://issues.apache.org/jira/browse/SPARK-29283, we only show the error message of root cause to end-users through JDBC client. In some cases, it erases the straightaway messages that we intentionally make to help them for better understanding. The root cause is somehow obscure for JDBC end-users who only writing SQL queries. e.g ``` Error running query: org.apache.spark.sql.AnalysisException: The second argument of 'date_sub' function needs to be an integer.; ``` is better than just ``` Caused by: java.lang.NumberFormatException: invalid input syntax for type numeric: 1.2 ``` We should do as Hive does in https://issues.apache.org/jira/browse/HIVE-14368 In general, this PR partially reverts SPARK-29283, ports HIVE-14368, and improves test coverage ### Why are the changes needed? 1. Do the same as Hive 2.3 and later for getting an error message in ThriftCLIService.GetOperationStatus 2. The root cause is somehow obscure for JDBC end-users who only writing SQL queries. 3. Consistency with `spark-sql` script ### Does this PR introduce _any_ user-facing change? Yes, when running queries using thrift server and an error occurs, you will get the full stack traces instead of only the message of the root cause ### How was this patch tested? add unit test Closes apache#28963 from yaooqinn/SPARK-32145. Authored-by: Kent Yao <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent dea7bc4 commit 59a7087

File tree

13 files changed

+106
-147
lines changed

13 files changed

+106
-147
lines changed

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,22 @@
1818
package org.apache.spark.sql.hive.thriftserver
1919

2020
import java.security.PrivilegedExceptionAction
21-
import java.sql.{Date, Timestamp}
22-
import java.util.{Arrays, Map => JMap, UUID}
21+
import java.util.{Arrays, Map => JMap}
2322
import java.util.concurrent.RejectedExecutionException
2423

2524
import scala.collection.JavaConverters._
2625
import scala.collection.mutable.ArrayBuffer
2726
import scala.util.control.NonFatal
2827

29-
import org.apache.commons.lang3.exception.ExceptionUtils
3028
import org.apache.hadoop.hive.metastore.api.FieldSchema
3129
import org.apache.hadoop.hive.shims.Utils
3230
import org.apache.hive.service.cli._
3331
import org.apache.hive.service.cli.operation.ExecuteStatementOperation
3432
import org.apache.hive.service.cli.session.HiveSession
3533

36-
import org.apache.spark.SparkContext
3734
import org.apache.spark.internal.Logging
3835
import org.apache.spark.sql.{DataFrame, Row => SparkRow, SQLContext}
3936
import org.apache.spark.sql.execution.HiveResult.{getTimeFormatters, toHiveString, TimeFormatters}
40-
import org.apache.spark.sql.execution.command.SetCommand
4137
import org.apache.spark.sql.internal.SQLConf
4238
import org.apache.spark.sql.types._
4339
import org.apache.spark.unsafe.types.CalendarInterval
@@ -315,16 +311,11 @@ private[hive] class SparkExecuteStatementOperation(
315311
} else {
316312
logError(s"Error executing query with $statementId, currentState $currentState, ", e)
317313
setState(OperationState.ERROR)
314+
HiveThriftServer2.eventManager.onStatementError(
315+
statementId, e.getMessage, SparkUtils.exceptionString(e))
318316
e match {
319-
case hiveException: HiveSQLException =>
320-
HiveThriftServer2.eventManager.onStatementError(
321-
statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
322-
throw hiveException
323-
case _ =>
324-
val root = ExceptionUtils.getRootCause(e)
325-
HiveThriftServer2.eventManager.onStatementError(
326-
statementId, root.getMessage, SparkUtils.exceptionString(root))
327-
throw new HiveSQLException("Error running query: " + root.toString, root)
317+
case _: HiveSQLException => throw e
318+
case _ => throw new HiveSQLException("Error running query: " + e.toString, e)
328319
}
329320
}
330321
} finally {

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetCatalogsOperation.scala

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,13 @@
1717

1818
package org.apache.spark.sql.hive.thriftserver
1919

20-
import java.util.UUID
21-
22-
import org.apache.commons.lang3.exception.ExceptionUtils
2320
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
24-
import org.apache.hive.service.cli.{HiveSQLException, OperationState}
21+
import org.apache.hive.service.cli.OperationState
2522
import org.apache.hive.service.cli.operation.GetCatalogsOperation
2623
import org.apache.hive.service.cli.session.HiveSession
2724

2825
import org.apache.spark.internal.Logging
2926
import org.apache.spark.sql.SQLContext
30-
import org.apache.spark.util.{Utils => SparkUtils}
3127

3228
/**
3329
* Spark's own GetCatalogsOperation
@@ -62,22 +58,8 @@ private[hive] class SparkGetCatalogsOperation(
6258
authorizeMetaGets(HiveOperationType.GET_CATALOGS, null)
6359
}
6460
setState(OperationState.FINISHED)
65-
} catch {
66-
case e: Throwable =>
67-
logError(s"Error executing get catalogs operation with $statementId", e)
68-
setState(OperationState.ERROR)
69-
e match {
70-
case hiveException: HiveSQLException =>
71-
HiveThriftServer2.eventManager.onStatementError(
72-
statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
73-
throw hiveException
74-
case _ =>
75-
val root = ExceptionUtils.getRootCause(e)
76-
HiveThriftServer2.eventManager.onStatementError(
77-
statementId, root.getMessage, SparkUtils.exceptionString(root))
78-
throw new HiveSQLException("Error getting catalogs: " + root.toString, root)
79-
}
80-
}
61+
} catch onError()
62+
8163
HiveThriftServer2.eventManager.onStatementFinish(statementId)
8264
}
8365
}

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ import org.apache.spark.sql.catalyst.TableIdentifier
3535
import org.apache.spark.sql.catalyst.catalog.SessionCatalog
3636
import org.apache.spark.sql.hive.thriftserver.ThriftserverShimUtils.toJavaSQLType
3737
import org.apache.spark.sql.types.StructType
38-
import org.apache.spark.util.{Utils => SparkUtils}
3938

4039
/**
4140
* Spark's own SparkGetColumnsOperation
@@ -122,22 +121,8 @@ private[hive] class SparkGetColumnsOperation(
122121
}
123122
}
124123
setState(OperationState.FINISHED)
125-
} catch {
126-
case e: Throwable =>
127-
logError(s"Error executing get columns operation with $statementId", e)
128-
setState(OperationState.ERROR)
129-
e match {
130-
case hiveException: HiveSQLException =>
131-
HiveThriftServer2.eventManager.onStatementError(
132-
statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
133-
throw hiveException
134-
case _ =>
135-
val root = ExceptionUtils.getRootCause(e)
136-
HiveThriftServer2.eventManager.onStatementError(
137-
statementId, root.getMessage, SparkUtils.exceptionString(root))
138-
throw new HiveSQLException("Error getting columns: " + root.toString, root)
139-
}
140-
}
124+
} catch onError()
125+
141126
HiveThriftServer2.eventManager.onStatementFinish(statementId)
142127
}
143128

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -98,22 +98,8 @@ private[hive] class SparkGetFunctionsOperation(
9898
}
9999
}
100100
setState(OperationState.FINISHED)
101-
} catch {
102-
case e: Throwable =>
103-
logError(s"Error executing get functions operation with $statementId", e)
104-
setState(OperationState.ERROR)
105-
e match {
106-
case hiveException: HiveSQLException =>
107-
HiveThriftServer2.eventManager.onStatementError(
108-
statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
109-
throw hiveException
110-
case _ =>
111-
val root = ExceptionUtils.getRootCause(e)
112-
HiveThriftServer2.eventManager.onStatementError(
113-
statementId, root.getMessage, SparkUtils.exceptionString(root))
114-
throw new HiveSQLException("Error getting functions: " + root.toString, root)
115-
}
116-
}
101+
} catch onError()
102+
117103
HiveThriftServer2.eventManager.onStatementFinish(statementId)
118104
}
119105
}

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -81,22 +81,8 @@ private[hive] class SparkGetSchemasOperation(
8181
rowSet.addRow(Array[AnyRef](globalTempViewDb, DEFAULT_HIVE_CATALOG))
8282
}
8383
setState(OperationState.FINISHED)
84-
} catch {
85-
case e: Throwable =>
86-
logError(s"Error executing get schemas operation with $statementId", e)
87-
setState(OperationState.ERROR)
88-
e match {
89-
case hiveException: HiveSQLException =>
90-
HiveThriftServer2.eventManager.onStatementError(
91-
statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
92-
throw hiveException
93-
case _ =>
94-
val root = ExceptionUtils.getRootCause(e)
95-
HiveThriftServer2.eventManager.onStatementError(
96-
statementId, root.getMessage, SparkUtils.exceptionString(root))
97-
throw new HiveSQLException("Error getting schemas: " + root.toString, root)
98-
}
99-
}
84+
} catch onError()
85+
10086
HiveThriftServer2.eventManager.onStatementFinish(statementId)
10187
}
10288
}

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTableTypesOperation.scala

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package org.apache.spark.sql.hive.thriftserver
1919

2020
import java.util.UUID
2121

22-
import org.apache.commons.lang3.exception.ExceptionUtils
2322
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
2423
import org.apache.hive.service.cli._
2524
import org.apache.hive.service.cli.operation.GetTableTypesOperation
@@ -28,7 +27,6 @@ import org.apache.hive.service.cli.session.HiveSession
2827
import org.apache.spark.internal.Logging
2928
import org.apache.spark.sql.SQLContext
3029
import org.apache.spark.sql.catalyst.catalog.CatalogTableType
31-
import org.apache.spark.util.{Utils => SparkUtils}
3230

3331
/**
3432
* Spark's own GetTableTypesOperation
@@ -69,22 +67,8 @@ private[hive] class SparkGetTableTypesOperation(
6967
rowSet.addRow(Array[AnyRef](tableType))
7068
}
7169
setState(OperationState.FINISHED)
72-
} catch {
73-
case e: Throwable =>
74-
logError(s"Error executing get table types operation with $statementId", e)
75-
setState(OperationState.ERROR)
76-
e match {
77-
case hiveException: HiveSQLException =>
78-
HiveThriftServer2.eventManager.onStatementError(
79-
statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
80-
throw hiveException
81-
case _ =>
82-
val root = ExceptionUtils.getRootCause(e)
83-
HiveThriftServer2.eventManager.onStatementError(
84-
statementId, root.getMessage, SparkUtils.exceptionString(root))
85-
throw new HiveSQLException("Error getting table types: " + root.toString, root)
86-
}
87-
}
70+
} catch onError()
71+
8872
HiveThriftServer2.eventManager.onStatementFinish(statementId)
8973
}
9074
}

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,12 @@
1717

1818
package org.apache.spark.sql.hive.thriftserver
1919

20-
import java.util.{List => JList, UUID}
20+
import java.util.{List => JList}
2121
import java.util.regex.Pattern
2222

2323
import scala.collection.JavaConverters._
2424

25-
import org.apache.commons.lang3.exception.ExceptionUtils
26-
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
27-
import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils
25+
import org.apache.hadoop.hive.ql.security.authorization.plugin.{HiveOperationType, HivePrivilegeObjectUtils}
2826
import org.apache.hive.service.cli._
2927
import org.apache.hive.service.cli.operation.GetTablesOperation
3028
import org.apache.hive.service.cli.session.HiveSession
@@ -33,7 +31,6 @@ import org.apache.spark.internal.Logging
3331
import org.apache.spark.sql.SQLContext
3432
import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
3533
import org.apache.spark.sql.hive.HiveUtils
36-
import org.apache.spark.util.{Utils => SparkUtils}
3734

3835
/**
3936
* Spark's own GetTablesOperation
@@ -111,22 +108,8 @@ private[hive] class SparkGetTablesOperation(
111108
}
112109
}
113110
setState(OperationState.FINISHED)
114-
} catch {
115-
case e: Throwable =>
116-
logError(s"Error executing get tables operation with $statementId", e)
117-
setState(OperationState.ERROR)
118-
e match {
119-
case hiveException: HiveSQLException =>
120-
HiveThriftServer2.eventManager.onStatementError(
121-
statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
122-
throw hiveException
123-
case _ =>
124-
val root = ExceptionUtils.getRootCause(e)
125-
HiveThriftServer2.eventManager.onStatementError(
126-
statementId, root.getMessage, SparkUtils.exceptionString(root))
127-
throw new HiveSQLException("Error getting tables: " + root.toString, root)
128-
}
129-
}
111+
} catch onError()
112+
130113
HiveThriftServer2.eventManager.onStatementFinish(statementId)
131114
}
132115

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,13 @@ package org.apache.spark.sql.hive.thriftserver
1919

2020
import java.util.UUID
2121

22-
import org.apache.commons.lang3.exception.ExceptionUtils
2322
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
24-
import org.apache.hive.service.cli.{HiveSQLException, OperationState}
23+
import org.apache.hive.service.cli.OperationState
2524
import org.apache.hive.service.cli.operation.GetTypeInfoOperation
2625
import org.apache.hive.service.cli.session.HiveSession
2726

2827
import org.apache.spark.internal.Logging
2928
import org.apache.spark.sql.SQLContext
30-
import org.apache.spark.util.{Utils => SparkUtils}
3129

3230
/**
3331
* Spark's own GetTypeInfoOperation
@@ -87,22 +85,8 @@ private[hive] class SparkGetTypeInfoOperation(
8785
rowSet.addRow(rowData)
8886
})
8987
setState(OperationState.FINISHED)
90-
} catch {
91-
case e: Throwable =>
92-
logError(s"Error executing get type info with $statementId", e)
93-
setState(OperationState.ERROR)
94-
e match {
95-
case hiveException: HiveSQLException =>
96-
HiveThriftServer2.eventManager.onStatementError(
97-
statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
98-
throw hiveException
99-
case _ =>
100-
val root = ExceptionUtils.getRootCause(e)
101-
HiveThriftServer2.eventManager.onStatementError(
102-
statementId, root.getMessage, SparkUtils.exceptionString(root))
103-
throw new HiveSQLException("Error getting type info: " + root.toString, root)
104-
}
105-
}
88+
} catch onError()
89+
10690
HiveThriftServer2.eventManager.onStatementFinish(statementId)
10791
}
10892
}

sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.hive.thriftserver
1919

20-
import org.apache.hive.service.cli.OperationState
20+
import org.apache.hive.service.cli.{HiveSQLException, OperationState}
2121
import org.apache.hive.service.cli.operation.Operation
2222

2323
import org.apache.spark.SparkContext
@@ -93,4 +93,16 @@ private[hive] trait SparkOperation extends Operation with Logging {
9393
case t =>
9494
throw new IllegalArgumentException(s"Unknown table type is found: $t")
9595
}
96+
97+
protected def onError(): PartialFunction[Throwable, Unit] = {
98+
case e: Throwable =>
99+
logError(s"Error executing get catalogs operation with $statementId", e)
100+
super.setState(OperationState.ERROR)
101+
HiveThriftServer2.eventManager.onStatementError(
102+
statementId, e.getMessage, Utils.exceptionString(e))
103+
e match {
104+
case _: HiveSQLException => throw e
105+
case _ => throw new HiveSQLException(s"Error operating $getType ${e.getMessage}", e)
106+
}
107+
}
96108
}

sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.thriftserver
1919

2020
import java.io.File
2121
import java.sql.{DriverManager, Statement}
22+
import java.util
2223

2324
import scala.collection.JavaConverters._
2425
import scala.concurrent.duration._
@@ -27,7 +28,12 @@ import scala.util.Try
2728
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
2829
import org.apache.hadoop.hive.ql.metadata.Hive
2930
import org.apache.hadoop.hive.ql.session.SessionState
30-
import org.apache.hive.service.cli.thrift.ThriftCLIService
31+
import org.apache.hive.jdbc.HttpBasicAuthInterceptor
32+
import org.apache.hive.service.auth.PlainSaslHelper
33+
import org.apache.hive.service.cli.thrift.{ThriftCLIService, ThriftCLIServiceClient}
34+
import org.apache.http.impl.client.HttpClientBuilder
35+
import org.apache.thrift.protocol.TBinaryProtocol
36+
import org.apache.thrift.transport.{THttpClient, TSocket}
3137

3238
import org.apache.spark.sql.test.SharedSparkSession
3339
import org.apache.spark.util.Utils
@@ -76,8 +82,9 @@ trait SharedThriftServer extends SharedSparkSession {
7682
s"jdbc:hive2://localhost:$serverPort/"
7783
}
7884

85+
protected def user: String = System.getProperty("user.name")
86+
7987
protected def withJdbcStatement(fs: (Statement => Unit)*): Unit = {
80-
val user = System.getProperty("user.name")
8188
require(serverPort != 0, "Failed to bind an actual port for HiveThriftServer2")
8289
val connections =
8390
fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") }
@@ -91,6 +98,29 @@ trait SharedThriftServer extends SharedSparkSession {
9198
}
9299
}
93100

101+
protected def withCLIServiceClient(f: ThriftCLIServiceClient => Unit): Unit = {
102+
require(serverPort != 0, "Failed to bind an actual port for HiveThriftServer2")
103+
val transport = mode match {
104+
case ServerMode.binary =>
105+
val rawTransport = new TSocket("localhost", serverPort)
106+
PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
107+
case ServerMode.http =>
108+
val interceptor = new HttpBasicAuthInterceptor(
109+
user,
110+
"anonymous",
111+
null, null, true, new util.HashMap[String, String]())
112+
new THttpClient(
113+
s"http://localhost:$serverPort/cliservice",
114+
HttpClientBuilder.create.addInterceptorFirst(interceptor).build())
115+
}
116+
117+
val protocol = new TBinaryProtocol(transport)
118+
val client = new ThriftCLIServiceClient(new ThriftserverShimUtils.Client(protocol))
119+
120+
transport.open()
121+
try f(client) finally transport.close()
122+
}
123+
94124
private def startThriftServer(attempt: Int): Unit = {
95125
logInfo(s"Trying to start HiveThriftServer2: mode=$mode, attempt=$attempt")
96126
val sqlContext = spark.newSession().sqlContext

0 commit comments

Comments
 (0)