From e66e6108b2fea2d724d4302d5d3fab8cc75e05f7 Mon Sep 17 00:00:00 2001 From: Nihar Sheth Date: Fri, 13 Jul 2018 13:37:59 -0700 Subject: [PATCH 1/2] [MINOR][CORE] Add test cases for RDD.cartesian The scala code for RDD.cartesian does not have any tests for correctness. This adds a couple basic tests to verify cartesian yields correct values. Passes the added test cases, and passes the scala style tests. Author: Nihar Sheth --- .../scala/org/apache/spark/rdd/RDDSuite.scala | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala index 5148ce05bd918..6f76f013003b3 100644 --- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala @@ -443,7 +443,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext { map{x => List(x)}.toList, "Tried coalescing 9 partitions to 20 but didn't get 9 back") } - test("coalesced RDDs with partial locality") { + test("coalesced RDDs with partial locality") { // Make an RDD that has some locality preferences and some without. This can happen // with UnionRDD val data = sc.makeRDD((1 to 9).map(i => { @@ -846,6 +846,28 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext { assert(partitions(1) === Seq((1, 3), (3, 8), (3, 8))) } + test("cartesian on empty RDD") { + val a = sc.makeRDD(Array[Int]()) + val b = sc.parallelize(1 to 3) + val cartesian_result = Array[(Int, Int)]() + assert(a.cartesian(a).collect().toList === cartesian_result) + assert(a.cartesian(b).collect().toList === cartesian_result) + assert(b.cartesian(a).collect().toList === cartesian_result) + } + + test("cartesian on non-empty RDDs") { + val a = sc.parallelize(1 to 3) + val b = sc.parallelize(2 to 4) + val c = sc.parallelize(1 to 1) + val a_cartesian_b = + Array((1, 2), (1, 3), (1, 4), (2, 2), (2, 3), (2, 4), (3, 2), (3, 3), (3, 4)) + val a_cartesian_c = Array((1, 1), (2, 1), (3, 1)) + val c_cartesian_a = Array((1, 1), (1, 2), (1, 3)) + assert(a.cartesian[Int](b).collect().toList.sorted === a_cartesian_b) + assert(a.cartesian[Int](c).collect().toList.sorted === a_cartesian_c) + assert(c.cartesian[Int](a).collect().toList.sorted === c_cartesian_a) + } + test("intersection") { val all = sc.parallelize(1 to 10) val evens = sc.parallelize(2 to 10 by 2) From e5f469a0b83d35b8735eeba30dfca4fe0320810b Mon Sep 17 00:00:00 2001 From: Nihar Sheth Date: Tue, 17 Jul 2018 13:30:27 -0700 Subject: [PATCH 2/2] Changed to sc.emptyRDD and Array.empty for consistancy with rest of file --- core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala index 6f76f013003b3..b143a468a1baf 100644 --- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala @@ -847,9 +847,9 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext { } test("cartesian on empty RDD") { - val a = sc.makeRDD(Array[Int]()) + val a = sc.emptyRDD[Int] val b = sc.parallelize(1 to 3) - val cartesian_result = Array[(Int, Int)]() + val cartesian_result = Array.empty[(Int, Int)] assert(a.cartesian(a).collect().toList === cartesian_result) assert(a.cartesian(b).collect().toList === cartesian_result) assert(b.cartesian(a).collect().toList === cartesian_result)