Skip to content

Commit 88478b7

Browse files
committed
WriteTaskStatsTracker should know which file the row is written to
1 parent 482b43d commit 88478b7

File tree

3 files changed

+5
-4
lines changed

3 files changed

+5
-4
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ class BasicWriteTaskStatsTracker(hadoopConf: Configuration)
151151
}
152152
}
153153

154-
override def newRow(row: InternalRow): Unit = {
154+
override def newRow(filePath: String, row: InternalRow): Unit = {
155155
numRows += 1
156156
}
157157

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ class SingleDirectoryDataWriter(
157157
}
158158

159159
currentWriter.write(record)
160-
statsTrackers.foreach(_.newRow(record))
160+
statsTrackers.foreach(_.newRow(currentWriter.path, record))
161161
recordsInFile += 1
162162
}
163163
}
@@ -301,7 +301,7 @@ abstract class BaseDynamicPartitionDataWriter(
301301
protected def writeRecord(record: InternalRow): Unit = {
302302
val outputRow = getOutputRow(record)
303303
currentWriter.write(outputRow)
304-
statsTrackers.foreach(_.newRow(outputRow))
304+
statsTrackers.foreach(_.newRow(currentWriter.path, outputRow))
305305
recordsInFile += 1
306306
}
307307
}

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriteStatsTracker.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,10 @@ trait WriteTaskStatsTracker {
5959
* Process the fact that a new row to update the tracked statistics accordingly.
6060
* @note Keep in mind that any overhead here is per-row, obviously,
6161
* so implementations should be as lightweight as possible.
62+
* @param filePath Path of the file which the row is written to.
6263
* @param row Current data row to be processed.
6364
*/
64-
def newRow(row: InternalRow): Unit
65+
def newRow(filePath: String, row: InternalRow): Unit
6566

6667
/**
6768
* Returns the final statistics computed so far.

0 commit comments

Comments
 (0)