@@ -32,8 +32,8 @@ import org.apache.spark.serializer.SerializerInstance
3232 */
3333class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateMethodTester {
3434
35- // Start a SparkContext so that SparkEnv.get.closureSerializer is accessible
36- // We do not actually use this explicitly except to stop it later
35+ // Start a SparkContext so that the closure serializer is accessible
36+ // We do not actually use this explicitly otherwise
3737 private var sc : SparkContext = null
3838 private var closureSerializer : SerializerInstance = null
3939
@@ -48,7 +48,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
4848 closureSerializer = null
4949 }
5050
51- // Some fields and methods that belong to this class, which is itself not serializable
51+ // Some fields and methods to reference in inner closures later
5252 private val someSerializableValue = 1
5353 private val someNonSerializableValue = new NonSerializable
5454 private def someSerializableMethod () = 1
@@ -86,19 +86,19 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
8686 assertSerializable(closure, serializableBefore)
8787 // If the resulting closure is not serializable even after
8888 // cleaning, we expect ClosureCleaner to throw a SparkException
89- intercept[ SparkException ] {
89+ if (serializableAfter) {
9090 ClosureCleaner .clean(closure, checkSerializable = true , transitive)
91- // Otherwise, if we do expect the closure to be serializable after the
92- // clean, throw the SparkException ourselves so scalatest is happy
93- if (serializableAfter) { throw new SparkException (" no-op" ) }
91+ } else {
92+ intercept[SparkException ] {
93+ ClosureCleaner .clean(closure, checkSerializable = true , transitive)
94+ }
9495 }
9596 assertSerializable(closure, serializableAfter)
9697 }
9798
9899 /**
99100 * Return the fields accessed by the given closure by class.
100- * This also optionally finds the fields transitively referenced through methods
101- * that belong to other classes.
101+ * This also optionally finds the fields transitively referenced through methods invocations.
102102 */
103103 private def findAccessedFields (
104104 closure : AnyRef ,
@@ -211,7 +211,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
211211 val outerObjects2 = getOuterObjects(closure2)
212212 assert(outerClasses1.size === outerObjects1.size)
213213 assert(outerClasses2.size === outerObjects2.size)
214- // These inner closures only reference local variables, and so do not have $outer pointer
214+ // These inner closures only reference local variables, and so do not have $outer pointers
215215 assert(outerClasses1.isEmpty)
216216 assert(outerClasses2.isEmpty)
217217 }
@@ -235,8 +235,8 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
235235 // This closure references the "test2" scope because it needs to find the method `y`
236236 // Scope hierarchy: "test2" < "FunSuite#test" < ClosureCleanerSuite2
237237 assert(outerClasses2.size === 3 )
238- // This closure references the "test2" scope because it needs to find the
239- // `localValue` defined outside of this scope
238+ // This closure references the "test2" scope because it needs to find the `localValue`
239+ // defined outside of this scope
240240 assert(outerClasses3.size === 3 )
241241 assert(isClosure(outerClasses2(0 )))
242242 assert(isClosure(outerClasses3(0 )))
@@ -270,10 +270,12 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
270270 assert(fields2.isEmpty)
271271 assert(fields3.size === 2 )
272272 // This corresponds to the "FunSuite#test" closure. This is empty because the
273- // field `closure3` references belongs to its parent (i.e. ClosureCleanerSuite2)
273+ // `someSerializableValue` belongs to its parent (i.e. ClosureCleanerSuite2).
274274 assert(fields3(outerClasses3(0 )).isEmpty)
275275 // This corresponds to the ClosureCleanerSuite2. This is also empty, however,
276- // because we did not find fields transitively (i.e. beyond 1 enclosing scope)
276+ // because accessing a `ClosureCleanerSuite2#someSerializableValue` actually involves a
277+ // method call. Since we do not find fields transitively, we will not recursively trace
278+ // through the fields referenced by this method.
277279 assert(fields3(outerClasses3(1 )).isEmpty)
278280
279281 val fields1t = findAccessedFields(closure1, outerClasses1, findTransitively = true )
@@ -283,7 +285,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
283285 assert(fields2t.isEmpty)
284286 assert(fields3t.size === 2 )
285287 // Because we find fields transitively now, we are able to detect that we need the
286- // $outer pointer to get the field from the ClosureCleanerSuite2.
288+ // $outer pointer to get the field from the ClosureCleanerSuite2
287289 assert(fields3t(outerClasses3(0 )).size === 1 )
288290 assert(fields3t(outerClasses3(0 )).head === " $outer" )
289291 assert(fields3t(outerClasses3(1 )).size === 1 )
@@ -304,31 +306,32 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
304306 val outerClasses3 = getOuterClasses(closure3)
305307 val outerClasses4 = getOuterClasses(closure4)
306308
307- // First, find only fields the closures directly access
309+ // First, find only fields accessed directly, not transitively, by these closures
308310 val fields1 = findAccessedFields(closure1, outerClasses1, findTransitively = false )
309311 val fields2 = findAccessedFields(closure2, outerClasses2, findTransitively = false )
310312 val fields3 = findAccessedFields(closure3, outerClasses3, findTransitively = false )
311313 val fields4 = findAccessedFields(closure4, outerClasses4, findTransitively = false )
312314 assert(fields1.isEmpty)
313315 // "test1" < "FunSuite#test" < ClosureCleanerSuite2
314316 assert(fields2.size === 3 )
315- assert(fields2(outerClasses2(0 )).isEmpty) // `def a` is not a field
316- assert(fields2(outerClasses2(1 )).isEmpty)
317- assert(fields2(outerClasses2(2 )).isEmpty)
317+ // Since we do not find fields transitively here, we do not look into what `def a` references
318+ assert(fields2(outerClasses2(0 )).isEmpty) // This corresponds to the "test1" scope
319+ assert(fields2(outerClasses2(1 )).isEmpty) // This corresponds to the "FunSuite#test" scope
320+ assert(fields2(outerClasses2(2 )).isEmpty) // This corresponds to the ClosureCleanerSuite2
318321 assert(fields3.size === 3 )
319- // Note that `localValue` is a field of the "test1" closure because `def a` needs it
320- // Further note that it is NOT a field of the "FunSuite#test" closure but a local variable
322+ // Note that `localValue` is a field of the "test1" scope because `def a` references it,
323+ // but NOT a field of the "FunSuite#test" scope because it is only a local variable there
321324 assert(fields3(outerClasses3(0 )).size === 1 )
322325 assert(fields3(outerClasses3(0 )).head.contains(" localValue" ))
323326 assert(fields3(outerClasses3(1 )).isEmpty)
324327 assert(fields3(outerClasses3(2 )).isEmpty)
325328 assert(fields4.size === 3 )
329+ // Because `val someSerializableValue` is an instance variable, even an explicit reference
330+ // here actually involves a method call to access the underlying value of the variable.
331+ // Because we are not finding fields transitively here, we do not consider the fields
332+ // accessed by this "method" (i.e. the val's accessor).
326333 assert(fields4(outerClasses4(0 )).isEmpty)
327334 assert(fields4(outerClasses4(1 )).isEmpty)
328- // Because `someSerializableValue` is a val, even an explicit reference here actually
329- // involves a method call to access the underlying value of the variable. Because we are
330- // not finding fields transitively here, we do not consider the fields accessed by this
331- // "method" (i.e. the val's accessor).
332335 assert(fields4(outerClasses4(2 )).isEmpty)
333336
334337 // Now do the same, but find fields that the closures transitively reference
@@ -338,8 +341,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
338341 val fields4t = findAccessedFields(closure4, outerClasses4, findTransitively = true )
339342 assert(fields1t.isEmpty)
340343 assert(fields2t.size === 3 )
341- // This closure transitively references `localValue` because `def a` uses it
342- assert(fields2t(outerClasses2(0 )).size === 1 )
344+ assert(fields2t(outerClasses2(0 )).size === 1 ) // `def a` references `localValue`
343345 assert(fields2t(outerClasses2(0 )).head.contains(" localValue" ))
344346 assert(fields2t(outerClasses2(1 )).isEmpty)
345347 assert(fields2t(outerClasses2(2 )).isEmpty)
@@ -362,11 +364,11 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
362364 }
363365
364366 test(" clean basic serializable closures" ) {
365- val localSerializableVal = someSerializableValue
367+ val localValue = someSerializableValue
366368 val closure1 = () => 1
367369 val closure2 = () => Array [String ](" a" , " b" , " c" )
368370 val closure3 = (s : String , arr : Array [Long ]) => s + arr.mkString(" , " )
369- val closure4 = () => localSerializableVal
371+ val closure4 = () => localValue
370372 val closure5 = () => new NonSerializable (5 ) // we're just serializing the class information
371373 val closure1r = closure1()
372374 val closure2r = closure2()
@@ -395,7 +397,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
395397 val closure4 = () => someNonSerializableValue
396398 val closure2 = () => someNonSerializableMethod()
397399
398- // These are not cleanable because they ultimately reference the `this` pointer
400+ // These are not cleanable because they ultimately reference the ClosureCleanerSuite2
399401 testClean(closure1, serializableBefore = false , serializableAfter = false )
400402 testClean(closure2, serializableBefore = false , serializableAfter = false )
401403 testClean(closure3, serializableBefore = false , serializableAfter = false )
@@ -404,13 +406,13 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
404406 }
405407
406408 test(" clean basic nested serializable closures" ) {
407- val localSerializableValue = someSerializableValue
409+ val localValue = someSerializableValue
408410 val closure1 = (i : Int ) => {
409- (1 to i).map { x => x + localSerializableValue } // 1 level of nesting
411+ (1 to i).map { x => x + localValue } // 1 level of nesting
410412 }
411413 val closure2 = (j : Int ) => {
412414 (1 to j).flatMap { x =>
413- (1 to x).map { y => y + localSerializableValue } // 2 levels
415+ (1 to x).map { y => y + localValue } // 2 levels
414416 }
415417 }
416418 val closure3 = (k : Int , l : Int , m : Int ) => {
@@ -426,6 +428,7 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
426428 testClean(closure2, serializableBefore = true , serializableAfter = true )
427429 testClean(closure3, serializableBefore = true , serializableAfter = true )
428430
431+ // Verify that closures can still be invoked and the result still the same
429432 assert(closure1(1 ) === closure1r)
430433 assert(closure2(2 ) === closure2r)
431434 assert(closure3(3 , 4 , 5 ) === closure3r)
@@ -434,9 +437,12 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
434437 test(" clean basic nested non-serializable closures" ) {
435438 def localSerializableMethod () = someSerializableValue
436439 val localNonSerializableValue = someNonSerializableValue
440+ // These closures ultimately reference the ClosureCleanerSuite2
441+ // Note that even accessing `val` that is an instance variable involves a method call
437442 val closure1 = (i : Int ) => { (1 to i).map { x => x + someSerializableValue } }
438443 val closure2 = (j : Int ) => { (1 to j).map { x => x + someSerializableMethod() } }
439444 val closure4 = (k : Int ) => { (1 to k).map { x => x + localSerializableMethod() } }
445+ // This closure references a local non-serializable value
440446 val closure3 = (l : Int ) => { (1 to l).map { x => localNonSerializableValue } }
441447 // This is non-serializable no matter how many levels we nest it
442448 val closure5 = (m : Int ) => {
@@ -457,15 +463,18 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
457463 }
458464
459465 test(" clean complicated nested serializable closures" ) {
460- val localSerializableValue = someSerializableValue
466+ val localValue = someSerializableValue
467+
468+ // Here we assume that if the outer closure is serializable,
469+ // then all inner closures must also be serializable
461470
462471 // Reference local fields from all levels
463472 val closure1 = (i : Int ) => {
464473 val a = 1
465474 (1 to i).flatMap { x =>
466475 val b = a + 1
467476 (1 to x).map { y =>
468- y + a + b + localSerializableValue
477+ y + a + b + localValue
469478 }
470479 }
471480 }
@@ -479,8 +488,8 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
479488 def b2 = a2 + 1
480489 (1 to x).map { y =>
481490 // If this references a method outside the outermost closure, then it will try to pull
482- // in the ClosureCleanerSuite2. This is why `localSerializableValue ` here must be a val.
483- y + a1 + a2 + b1 + b2 + localSerializableValue
491+ // in the ClosureCleanerSuite2. This is why `localValue ` here must be a local ` val` .
492+ y + a1 + a2 + b1 + b2 + localValue
484493 }
485494 }
486495 }
@@ -494,13 +503,13 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
494503 }
495504
496505 test(" clean complicated nested non-serializable closures" ) {
497- val localSerializableValue = someSerializableValue
506+ val localValue = someSerializableValue
498507
499- // Note that we are not interested in cleaning the outer closures here
508+ // Note that we are not interested in cleaning the outer closures here (they are not cleanable)
500509 // The only reason why they exist is to nest the inner closures
501510
502511 val test1 = () => {
503- val a = localSerializableValue
512+ val a = localValue
504513 val b = sc
505514 val inner1 = (x : Int ) => x + a + b.hashCode()
506515 val inner2 = (x : Int ) => x + a
@@ -509,15 +518,15 @@ class ClosureCleanerSuite2 extends FunSuite with BeforeAndAfterAll with PrivateM
509518 // There is no way to clean it
510519 testClean(inner1, serializableBefore = false , serializableAfter = false )
511520
512- // This closure is serializable to begin with since
513- // it does not have a pointer to the outer closure
521+ // This closure is serializable to begin with since it does not need a pointer to
522+ // the outer closure (it only references local variables)
514523 testClean(inner2, serializableBefore = true , serializableAfter = true )
515524 }
516525
517526 // Same as above, but the `val a` becomes `def a`
518527 // The difference here is that all inner closures now have pointers to the outer closure
519528 val test2 = () => {
520- def a = localSerializableValue
529+ def a = localValue
521530 val b = sc
522531 val inner1 = (x : Int ) => x + a + b.hashCode()
523532 val inner2 = (x : Int ) => x + a
0 commit comments