@@ -31,6 +31,21 @@ import org.apache.spark.scheduler._
3131import org .apache .spark .storage ._
3232import org .apache .spark ._
3333
34+ /**
35+ * Serializes SparkListener events to/from JSON. This protocol provides strong backwards-
36+ * and forwards-compatibility guarantees: any version of Spark should be able to read JSON output
37+ * written by any other version, including newer versions.
38+ *
39+ * JsonProtocolSuite contains backwards-compatibility tests which check that the current version of
40+ * JsonProtocol is able to read output written by earlier versions. We do not currently have tests
41+ * for reading newer JSON output with older Spark versions.
42+ *
43+ * To ensure that we provide these guarantees, follow these rules when modifying these methods:
44+ *
45+ * - Never delete any JSON fields.
46+ * - Any new JSON fields should be optional; use `Utils.jsonOption` when reading these fields
47+ * in `*FromJson` methods.
48+ */
3449private [spark] object JsonProtocol {
3550 // TODO: Remove this file and put JSON serialization into each individual class.
3651
@@ -121,8 +136,8 @@ private[spark] object JsonProtocol {
121136 val properties = propertiesToJson(jobStart.properties)
122137 (" Event" -> Utils .getFormattedClassName(jobStart)) ~
123138 (" Job ID" -> jobStart.jobId) ~
124- // ("Stage IDs " -> jobStart.stageIds) ~ // Removed in 1.2.0
125- (" Stage Infos " -> jobStart.stageInfos.map(stageInfoToJson)) ~ // Added in 1.2.0
139+ (" Stage Infos " -> jobStart.stageInfos.map(stageInfoToJson)) ~ // Added in Spark 1.2.0
140+ (" Stage IDs " -> jobStart.stageIds) ~
126141 (" Properties" -> properties)
127142 }
128143
@@ -454,20 +469,13 @@ private[spark] object JsonProtocol {
454469
455470 def jobStartFromJson (json : JValue ): SparkListenerJobStart = {
456471 val jobId = (json \ " Job ID" ).extract[Int ]
472+ val stageIds = (json \ " Stage IDs" ).extract[List [JValue ]].map(_.extract[Int ])
457473 val properties = propertiesFromJson(json \ " Properties" )
458- val stageInfos = {
459- // Prior to 1.2.0, we serialized stageIds but not stageInfos; in 1.2.0, we do the opposite.
460- // This block of code handles backwards compatibility:
461- val stageIds : Option [Seq [Int ]] =
462- Utils .jsonOption(json \ " Stage IDs" ).map(_.extract[List [JValue ]].map(_.extract[Int ]))
463- if (stageIds.isDefined) { // Reading JSON written prior to 1.2.0
464- stageIds.get.map(id => new StageInfo (id, 0 , " unknown" , 0 , Seq .empty, " unknown" ))
465- } else { // Reading JSON written after 1.2.0
466- Utils .jsonOption(json \ " Stage Infos" )
467- .map(_.extract[Seq [JValue ]].map(stageInfoFromJson)).getOrElse(Seq .empty)
474+ // The "Stage Infos" field was added in Spark 1.2.0
475+ val stageInfos = Utils .jsonOption(json \ " Stage Infos" )
476+ .map(_.extract[Seq [JValue ]].map(stageInfoFromJson)).getOrElse {
477+ stageIds.map(id => new StageInfo (id, 0 , " unknown" , 0 , Seq .empty, " unknown" ))
468478 }
469- }
470-
471479 SparkListenerJobStart (jobId, stageInfos, properties)
472480 }
473481
0 commit comments