@@ -19,35 +19,46 @@ package org.apache.spark.sql.execution.datasources.v2
1919
2020import org .apache .spark .{SparkException , TaskContext }
2121import org .apache .spark .internal .Logging
22- import org .apache .spark .sql .{Dataset , Row , SparkSession }
22+ import org .apache .spark .rdd .RDD
23+ import org .apache .spark .sql .Row
2324import org .apache .spark .sql .catalyst .InternalRow
2425import org .apache .spark .sql .catalyst .encoders .{ExpressionEncoder , RowEncoder }
25- import org .apache .spark .sql .catalyst .plans . QueryPlan
26+ import org .apache .spark .sql .catalyst .expressions . Attribute
2627import org .apache .spark .sql .catalyst .plans .logical .LogicalPlan
27- import org .apache .spark .sql .execution .command . RunnableCommand
28+ import org .apache .spark .sql .execution .SparkPlan
2829import org .apache .spark .sql .sources .v2 .writer ._
2930import org .apache .spark .sql .types .StructType
3031import org .apache .spark .util .Utils
3132
32- case class WriteToDataSourceV2Command (writer : DataSourceV2Writer , query : LogicalPlan )
33- extends RunnableCommand {
33+ /**
34+ * The logical plan for writing data into data source v2.
35+ */
36+ case class WriteToDataSourceV2 (writer : DataSourceV2Writer , query : LogicalPlan ) extends LogicalPlan {
37+ override def children : Seq [LogicalPlan ] = Seq (query)
38+ override def output : Seq [Attribute ] = Nil
39+ }
3440
35- override protected def innerChildren : Seq [QueryPlan [_]] = Seq (query)
41+ /**
42+ * The physical plan for writing data into data source v2.
43+ */
44+ case class WriteToDataSourceV2Exec (writer : DataSourceV2Writer , query : SparkPlan ) extends SparkPlan {
45+ override def children : Seq [SparkPlan ] = Seq (query)
46+ override def output : Seq [Attribute ] = Nil
3647
37- override def run ( sparkSession : SparkSession ): Seq [ Row ] = {
48+ override protected def doExecute ( ): RDD [ InternalRow ] = {
3849 val writeTask = writer match {
3950 case w : SupportsWriteInternalRow => w.createInternalRowWriterFactory()
4051 case _ => new RowToInternalRowDataWriterFactory (writer.createWriterFactory(), query.schema)
4152 }
4253
43- val rdd = Dataset .ofRows(sparkSession, query).queryExecution.toRdd
54+ val rdd = query.execute()
4455 val messages = new Array [WriterCommitMessage ](rdd.partitions.length)
4556
4657 logInfo(s " Start processing data source writer: $writer. " +
4758 s " The input RDD has ${messages.length} partitions. " )
4859
4960 try {
50- sparkSession. sparkContext.runJob(
61+ sparkContext.runJob(
5162 rdd,
5263 (context : TaskContext , iter : Iterator [InternalRow ]) =>
5364 DataWritingSparkTask .run(writeTask, context, iter),
@@ -73,7 +84,7 @@ case class WriteToDataSourceV2Command(writer: DataSourceV2Writer, query: Logical
7384 throw new SparkException (" Writing job aborted." , cause)
7485 }
7586
76- Nil
87+ sparkContext.emptyRDD
7788 }
7889}
7990
0 commit comments