@@ -28,18 +28,20 @@ import org.apache.spark.sql.catalyst.{InternalRow, QueryPlanningTracker}
2828import org .apache .spark .sql .catalyst .analysis .UnsupportedOperationChecker
2929import org .apache .spark .sql .catalyst .expressions .codegen .ByteCodeStats
3030import org .apache .spark .sql .catalyst .plans .QueryPlan
31- import org .apache .spark .sql .catalyst .plans .logical .{LogicalPlan , ReturnAnswer }
31+ import org .apache .spark .sql .catalyst .plans .logical .{CreateTableAsSelect , LogicalPlan , ReturnAnswer }
3232import org .apache .spark .sql .catalyst .rules .Rule
3333import org .apache .spark .sql .catalyst .trees .TreeNodeTag
3434import org .apache .spark .sql .catalyst .util .StringUtils .PlanStringConcat
3535import org .apache .spark .sql .catalyst .util .truncatedString
3636import org .apache .spark .sql .execution .QueryExecution .skipAuthTag
3737import org .apache .spark .sql .execution .adaptive .{AdaptiveExecutionContext , EnsureRepartitionForWriting , InsertAdaptiveSparkPlan }
3838import org .apache .spark .sql .execution .bucketing .{AdjustScanPartitionSizeDynamically , DisableUnnecessaryBucketedScan }
39+ import org .apache .spark .sql .execution .command .DataWritingCommand
3940import org .apache .spark .sql .execution .dynamicpruning .PlanDynamicPruningFilters
4041import org .apache .spark .sql .execution .exchange .{EliminateShuffleExec , EnsureRequirements , ExchangePushDownThroughAggregate }
4142import org .apache .spark .sql .execution .reuse .ReuseExchangeAndSubquery
4243import org .apache .spark .sql .execution .streaming .{IncrementalExecution , OffsetSeqMetadata }
44+ import org .apache .spark .sql .expressions .lineage .AttributeLineageUtils
4345import org .apache .spark .sql .internal .{SQLConf , StaticSQLConf }
4446import org .apache .spark .sql .streaming .OutputMode
4547import org .apache .spark .util .Utils
@@ -237,6 +239,7 @@ class QueryExecution(
237239 if (SQLConf .get.uiPlanWithMetrics) {
238240 if (tracker.alreadyExecuted) {
239241 append(tracker.getRealExecutionInfo)
242+ return
240243 }
241244 append(stringWithStats)
242245 if (tracker.hasView) {
@@ -245,6 +248,36 @@ class QueryExecution(
245248 if (tracker.hasTempView) {
246249 append(tracker.formattedTempViewUsage())
247250 }
251+ if (sparkSession.sessionState.conf.getConfString(
252+ " spark.sql.logColumnLineage.enable" , " false" ).equalsIgnoreCase(" true" )) {
253+ if (optimizedPlan.isInstanceOf [DataWritingCommand ] ||
254+ optimizedPlan.isInstanceOf [CreateTableAsSelect ]) {
255+ append(" \n === Column Lineage Start ===\n " )
256+ val startTime = System .currentTimeMillis()
257+ var columns = 0
258+ try {
259+ val (outputColumnNames, query) = optimizedPlan match {
260+ case dw : DataWritingCommand => (dw.outputColumnNames, dw.query)
261+ case ctas : CreateTableAsSelect => (ctas.query.output.map(_.name), ctas.query)
262+ }
263+ columns = outputColumnNames.length
264+ outputColumnNames.zipWithIndex.foreach { cidx =>
265+ val lineages = AttributeLineageUtils .getAttributeOrigins(
266+ query, cidx._2, keepOperations = false )
267+ if (lineages.nonEmpty) {
268+ append(cidx._1 + " : " + lineages.map(_.getColumn).mkString(" |" ) + " \n " )
269+ }
270+ }
271+ } catch {
272+ case _ : Exception =>
273+ append(" Fail to get the column lineage information\n " )
274+ }
275+ val cost = System .currentTimeMillis() - startTime
276+ append(s " Total table columns: $columns columns \n " )
277+ append(s " Lineage time cost: $cost ms \n " )
278+ append(" === Column Lineage End ===\n " )
279+ }
280+ }
248281 append(tracker.formattedRulesByTime())
249282 } else {
250283 val (verbose, addSuffix) = (true , false )
0 commit comments