From 641bb10120784002fbb0f32d8e973f5185012e9f Mon Sep 17 00:00:00 2001 From: erenavsarogullari Date: Sun, 19 Feb 2017 14:11:34 +0000 Subject: [PATCH 1/3] Add Fair Scheduler Unit Test coverage for different build cases --- .../fairscheduler-with-valid-data.xml | 35 ++++++++++++++ .../apache/spark/scheduler/PoolSuite.scala | 46 ++++++++++++++++++- docs/job-scheduling.md | 2 +- 3 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 core/src/test/resources/fairscheduler-with-valid-data.xml diff --git a/core/src/test/resources/fairscheduler-with-valid-data.xml b/core/src/test/resources/fairscheduler-with-valid-data.xml new file mode 100644 index 0000000000000..3d882331835ca --- /dev/null +++ b/core/src/test/resources/fairscheduler-with-valid-data.xml @@ -0,0 +1,35 @@ + + + + + + 3 + 1 + FIFO + + + 4 + 2 + FAIR + + + 2 + 3 + FAIR + + \ No newline at end of file diff --git a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala index 4901062a78553..e01ef6bdede51 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.scheduler +import java.io.FileNotFoundException import java.util.Properties import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite} @@ -33,7 +34,7 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext { val SCHEDULER_ALLOCATION_FILE_PROPERTY = "spark.scheduler.allocation.file" val TEST_POOL = "testPool" - def createTaskSetManager(stageId: Int, numTasks: Int, taskScheduler: TaskSchedulerImpl) + private def createTaskSetManager(stageId: Int, numTasks: Int, taskScheduler: TaskSchedulerImpl) : TaskSetManager = { val tasks = Array.tabulate[Task[_]](numTasks) { i => new FakeTask(stageId, i, Nil) @@ -292,6 +293,49 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext { } } + test("Fair Scheduler should build fair scheduler when " + + "valid spark.scheduler.allocation.file property is set") { + val xmlPath = getClass.getClassLoader.getResource("fairscheduler-with-valid-data.xml").getFile() + val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE_PROPERTY, xmlPath) + sc = new SparkContext(LOCAL, APP_NAME, conf) + + val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0) + val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf) + schedulableBuilder.buildPools() + + verifyPool(rootPool, schedulableBuilder.DEFAULT_POOL_NAME, 0, 1, FIFO) + verifyPool(rootPool, "pool1", 3, 1, FIFO) + verifyPool(rootPool, "pool2", 4, 2, FAIR) + verifyPool(rootPool, "pool3", 2, 3, FAIR) + } + + test("Fair Scheduler should use default file(fairscheduler.xml) if it exists in classpath " + + "and spark.scheduler.allocation.file property is not set") { + val conf = new SparkConf() + sc = new SparkContext(LOCAL, APP_NAME, conf) + + val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0) + val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf) + schedulableBuilder.buildPools() + + verifyPool(rootPool, schedulableBuilder.DEFAULT_POOL_NAME, 0, 1, FIFO) + verifyPool(rootPool, "1", 2, 1, FIFO) + verifyPool(rootPool, "2", 3, 1, FIFO) + verifyPool(rootPool, "3", 0, 1, FIFO) + } + + test("Fair Scheduler should throw FileNotFoundException " + + "when invalid spark.scheduler.allocation.file property is set") { + val conf = new SparkConf().set(SCHEDULER_ALLOCATION_FILE_PROPERTY, "INVALID_FILE_PATH") + sc = new SparkContext(LOCAL, APP_NAME, conf) + + val rootPool = new Pool("", SchedulingMode.FAIR, 0, 0) + val schedulableBuilder = new FairSchedulableBuilder(rootPool, sc.conf) + intercept[FileNotFoundException] { + schedulableBuilder.buildPools() + } + } + private def verifyPool(rootPool: Pool, poolName: String, expectedInitMinShare: Int, expectedInitWeight: Int, expectedSchedulingMode: SchedulingMode): Unit = { val selectedPool = rootPool.getSchedulableByName(poolName) diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md index 807944f20a78a..c1661132f9b4d 100644 --- a/docs/job-scheduling.md +++ b/docs/job-scheduling.md @@ -235,7 +235,7 @@ properties: of the cluster. By default, each pool's `minShare` is 0. The pool properties can be set by creating an XML file, similar to `conf/fairscheduler.xml.template`, -and setting a `spark.scheduler.allocation.file` property in your +and either setting `fairscheduler.xml` into classpath or a `spark.scheduler.allocation.file` property in your [SparkConf](configuration.html#spark-properties). {% highlight scala %} From 77cfb03a82966412e6468edbff358415197c8aaa Mon Sep 17 00:00:00 2001 From: erenavsarogullari Date: Sat, 22 Jul 2017 00:16:44 +0100 Subject: [PATCH 2/3] Private access modifier is removed from test function. --- core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala index e01ef6bdede51..5bd3955f5adbb 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala @@ -34,7 +34,7 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext { val SCHEDULER_ALLOCATION_FILE_PROPERTY = "spark.scheduler.allocation.file" val TEST_POOL = "testPool" - private def createTaskSetManager(stageId: Int, numTasks: Int, taskScheduler: TaskSchedulerImpl) + def createTaskSetManager(stageId: Int, numTasks: Int, taskScheduler: TaskSchedulerImpl) : TaskSetManager = { val tasks = Array.tabulate[Task[_]](numTasks) { i => new FakeTask(stageId, i, Nil) From 3d6c80b4857b2b776b55516ea5e699e0e470b4a9 Mon Sep 17 00:00:00 2001 From: erenavsarogullari Date: Sun, 27 Aug 2017 17:36:40 +0100 Subject: [PATCH 3/3] Review comment is addressed. --- docs/job-scheduling.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md index c1661132f9b4d..e6d881639a13b 100644 --- a/docs/job-scheduling.md +++ b/docs/job-scheduling.md @@ -235,7 +235,7 @@ properties: of the cluster. By default, each pool's `minShare` is 0. The pool properties can be set by creating an XML file, similar to `conf/fairscheduler.xml.template`, -and either setting `fairscheduler.xml` into classpath or a `spark.scheduler.allocation.file` property in your +and either putting a file named `fairscheduler.xml` on the classpath, or setting `spark.scheduler.allocation.file` property in your [SparkConf](configuration.html#spark-properties). {% highlight scala %}