Skip to content

Commit b638592

Browse files
committed
checking the OperatorStateMetadata log for the state schema file
1 parent 6c90c9f commit b638592

File tree

3 files changed

+23
-2
lines changed

3 files changed

+23
-2
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ class IncrementalExecution(
214214
val metadata = stateStoreWriter.operatorStateMetadata()
215215
stateStoreWriter match {
216216
case tws: TransformWithStateExec =>
217+
logError(s"### checkpointLocation: $checkpointLocation")
217218
val metadataPath = OperatorStateMetadataV2.metadataFilePath(new Path(
218219
checkpointLocation, tws.getStateInfo.operatorId.toString))
219220
val operatorStateMetadataLog = new OperatorStateMetadataLog(sparkSession,

sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OperatorStateMetadataLog.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ class OperatorStateMetadataLog(
5151
case 1 =>
5252
OperatorStateMetadataV1.serialize(fsDataOutputStream, metadata)
5353
case 2 =>
54+
logError(s"### stateSchemaPath: ${metadata.asInstanceOf[OperatorStateMetadataV2].
55+
stateStoreInfo.head.stateSchemaFilePath}")
5456
OperatorStateMetadataV2.serialize(fsDataOutputStream, metadata)
5557
}
5658
}

sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,19 +380,37 @@ case class TransformWithStateExec(
380380
)
381381
}
382382

383+
private def fetchOperatorStateMetadataLog(
384+
hadoopConf: Configuration,
385+
checkpointDir: String,
386+
operatorId: Long): OperatorStateMetadataLog = {
387+
val checkpointPath = new Path(checkpointDir, operatorId.toString)
388+
val operatorStateMetadataPath = OperatorStateMetadataV2.metadataFilePath(checkpointPath)
389+
new OperatorStateMetadataLog(hadoopConf, operatorStateMetadataPath.toString)
390+
}
391+
383392
override def validateAndMaybeEvolveStateSchema(
384393
hadoopConf: Configuration,
385394
batchId: Long,
386395
stateSchemaVersion: Int): Array[String] = {
387396
assert(stateSchemaVersion >= 3)
388-
val newColumnFamilySchemas = getColFamilySchemas()
397+
val newSchemas = getColFamilySchemas()
389398
val schemaFile = new StateSchemaV3File(
390399
hadoopConf, stateSchemaDirPath(StateStoreId.DEFAULT_STORE_NAME).toString)
391400
// TODO: Read the schema path from the OperatorStateMetadata file
392401
// and validate it with the new schema
393402

403+
val operatorStateMetadataLog = fetchOperatorStateMetadataLog(
404+
hadoopConf, getStateInfo.checkpointLocation, getStateInfo.operatorId)
405+
val mostRecentLog = operatorStateMetadataLog.getLatest()
406+
val oldSchemas = mostRecentLog.map(_._2.asInstanceOf[OperatorStateMetadataV2])
407+
.map(_.stateStoreInfo.map(_.stateSchemaFilePath)).getOrElse(Array.empty)
408+
.flatMap { schemaPath =>
409+
schemaFile.getWithPath(new Path(schemaPath))
410+
}.toList
411+
validateSchemas(oldSchemas, newSchemas)
394412
// Write the new schema to the schema file
395-
val schemaPath = schemaFile.addWithUUID(batchId, newColumnFamilySchemas.values.toList)
413+
val schemaPath = schemaFile.addWithUUID(batchId, newSchemas.values.toList)
396414
Array(schemaPath.toString)
397415
}
398416

0 commit comments

Comments
 (0)