[CARMEL-6434] Expose active sql related metrics from driver (#1171)

wakun · GitHub Enterprise · commit c35e50e31570 · 2022-12-09T17:37:38.000+08:00
* [CARMEL-6434] Expose active sql related metrics from driver

* Use atomic variable instead of int

* Count download data and upload data queries
diff --git a/core/src/main/scala/org/apache/spark/WorkQueueSource.scala b/core/src/main/scala/org/apache/spark/WorkQueueSource.scala
@@ -184,6 +184,12 @@ class WorkQueueSource(
       }
     })
 
+    metricRegistry.register(MetricRegistry.name("task-scheduler.runningTasks"), new Gauge[Int] {
+      override def getValue: Int = {
+        taskScheduler.get.taskSummary().runningTasks
+      }
+    })
+
     dagScheduler.map { scheduler =>
       scheduler.stageRetryCount.indices.foreach { idx =>
         metricRegistry.register(MetricRegistry.name(s"task-scheduler.stage-retry-count-${idx + 1}"),
@@ -195,6 +201,20 @@ class WorkQueueSource(
       }
     }
 
+    metricRegistry.register(MetricRegistry.name(s"task-scheduler.activeSessions"),
+      new Gauge[Int] {
+        override def getValue: Int = {
+          TaskSchedulerImpl.activeSessions.get()
+        }
+      })
+
+    metricRegistry.register(MetricRegistry.name(s"task-scheduler.activeSqls"),
+      new Gauge[Int] {
+        override def getValue: Int = {
+          TaskSchedulerImpl.activeSqls.get()
+        }
+      })
+
   }
 
   if (schedulerBackend.isDefined) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -1552,4 +1552,6 @@ private[spark] object TaskSchedulerImpl {
     }
   }
 
+  var activeSqls: AtomicInteger = new AtomicInteger(0)
+  var activeSessions: AtomicInteger = new AtomicInteger(0)
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkDownloadDataOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkDownloadDataOperation.scala
@@ -39,6 +39,7 @@ import org.apache.hive.service.cli.session.HiveSession
 
 import org.apache.spark.SparkContext
 import org.apache.spark.internal.{Logging, QueryLogging}
+import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.sql.{Column, DataFrame, SaveMode, SQLContext}
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.execution._
@@ -237,6 +238,7 @@ private[hive] class SparkDownloadDataOperation(
 
       logInfo(s"Running query [$statementId] in session " +
         s"[${parentSession.getSessionHandle.getSessionId.toString}] DOWNLOAD '$query'")
+      TaskSchedulerImpl.activeSqls.incrementAndGet()
       val resultPath = if (keepDataType) {
         writeDataKeepDataType(new Path(pathPrefix, statementId))
       } else {
@@ -301,6 +303,8 @@ private[hive] class SparkDownloadDataOperation(
           statementId, Utils.findFirstCause(e).toString, Utils.exceptionString(e))
         val exception = new HiveSQLException(e)
         setOperationException(exception)
+    } finally {
+      TaskSchedulerImpl.activeSqls.decrementAndGet()
     }
   }
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -36,7 +36,7 @@ import org.apache.hive.service.cli.session.HiveSession
 import org.apache.spark.SparkContext
 import org.apache.spark.broadcast.CorruptBroadcastException
 import org.apache.spark.internal.{config, Logging, QueryLogging}
-import org.apache.spark.scheduler.{RepeatableIterator, SpilledResultIterator, UserInfo}
+import org.apache.spark.scheduler.{RepeatableIterator, SpilledResultIterator, TaskSchedulerImpl, UserInfo}
 import org.apache.spark.sql.{DataFrame, Row => SparkRow, SQLContext}
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.plans.logical.{Command, ParsedStatement}
@@ -335,6 +335,7 @@ private[hive] class SparkExecuteStatementOperation(
       sqlContext.sparkContext.userResourceManager.
         foreach(_.requestQuery(UserInfo(userName, profile, None)))
 
+      TaskSchedulerImpl.activeSqls.incrementAndGet()
       withRetry {
         result = sqlContext.sql(statement)
         logDebug(result.queryExecution.toString())
@@ -385,6 +386,7 @@ private[hive] class SparkExecuteStatementOperation(
           }
         }
     } finally {
+      TaskSchedulerImpl.activeSqls.decrementAndGet()
       synchronized {
         if (!getStatus.getState.isTerminal) {
           setState(OperationState.FINISHED)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
@@ -30,7 +30,7 @@ import org.apache.hive.service.server.HiveServer2
 
 import org.apache.spark.SparkContext
 import org.apache.spark.internal.Logging
-import org.apache.spark.scheduler.UserInfo
+import org.apache.spark.scheduler.{TaskSchedulerImpl, UserInfo}
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.hive.HiveUtils
@@ -107,6 +107,7 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
         }
       }
       sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx)
+      TaskSchedulerImpl.activeSessions.incrementAndGet()
       logInfo(s"Successfully open session ${sessionHandle.getSessionId.toString}")
       sessionHandle
     } catch {
@@ -135,6 +136,7 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
     ctx.clearTempTables()
     super.closeSession(sessionHandle)
     sparkSqlOperationManager.sessionToContexts.remove(sessionHandle)
+    TaskSchedulerImpl.activeSessions.decrementAndGet()
 
     // Remove temporary folder generated by data upload operation
     val dataUploadBaseDir = Some(Utils.resolveURI(
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkTransferDataOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkTransferDataOperation.scala
@@ -30,6 +30,7 @@ import org.apache.hive.service.cli.operation.TransferDataOperation
 import org.apache.hive.service.cli.session.HiveSession
 
 import org.apache.spark.internal.{Logging, QueryLogging}
+import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.hive.thriftserver.errors.QueryLevelRestrictionErrors
 import org.apache.spark.util.Utils
@@ -83,7 +84,13 @@ private[hive] class SparkTransferDataOperation(
     if(!fileSystem.exists(new Path(sessionPath))) {
       fileSystem.mkdirs(new Path(sessionPath))
     }
-    persistData(fileSystem, hadoopConf, new Path(getDataPath(dataUploadBaseDir, sessionId, path)))
+    try {
+      TaskSchedulerImpl.activeSqls.incrementAndGet()
+      val dataPath = new Path(getDataPath(dataUploadBaseDir, sessionId, path))
+      persistData(fileSystem, hadoopConf, dataPath)
+    } finally {
+      TaskSchedulerImpl.activeSqls.decrementAndGet()
+    }
   }
 
   private def persistData(fileSystem: FileSystem, hadoopConf: Configuration,

Original file line number	Diff line number	Diff line change
`@@ -1552,4 +1552,6 @@ private[spark] object TaskSchedulerImpl {`
`1552`	`1552`	`}`
`1553`	`1553`	`}`
`1554`	`1554`
	`1555`	`+ var activeSqls: AtomicInteger = new AtomicInteger(0)`
	`1556`	`+ var activeSessions: AtomicInteger = new AtomicInteger(0)`
`1555`	`1557`	`}`