From 0fb4499f00ae4e65a181f8cc9c2128640227ba66 Mon Sep 17 00:00:00 2001
From: Chris Cope <ccope@resilientscience.com>
Date: Fri, 8 Aug 2014 16:43:23 -0400
Subject: [PATCH 1/2] [SPARK-1766] sorted functions to meet pedantic
 requirements

---
 .../apache/spark/rdd/PairRDDFunctions.scala   | 38 +++++++++----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 93af50c0a9cd1..5dd6472b0776c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -237,6 +237,25 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     combineByKey[V]((v: V) => v, func, func, partitioner)
   }
 
+  /**
+   * Merge the values for each key using an associative reduce function. This will also perform
+   * the merging locally on each mapper before sending results to a reducer, similarly to a
+   * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
+   */
+  def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = {
+    reduceByKey(new HashPartitioner(numPartitions), func)
+  }
+
+  /**
+   * Merge the values for each key using an associative reduce function. This will also perform
+   * the merging locally on each mapper before sending results to a reducer, similarly to a
+   * "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
+   * parallelism level.
+   */
+  def reduceByKey(func: (V, V) => V): RDD[(K, V)] = {
+    reduceByKey(defaultPartitioner(self), func)
+  }
+
   /**
    * Merge the values for each key using an associative reduce function, but return the results
    * immediately to the master as a Map. This will also perform the merging locally on each mapper
@@ -374,15 +393,6 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     countApproxDistinctByKey(relativeSD, defaultPartitioner(self))
   }
 
-  /**
-   * Merge the values for each key using an associative reduce function. This will also perform
-   * the merging locally on each mapper before sending results to a reducer, similarly to a
-   * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
-   */
-  def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = {
-    reduceByKey(new HashPartitioner(numPartitions), func)
-  }
-
   /**
    * Group the values for each key in the RDD into a single sequence. Allows controlling the
    * partitioning of the resulting key-value pair RDD by passing a Partitioner.
@@ -482,16 +492,6 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     combineByKey(createCombiner, mergeValue, mergeCombiners, defaultPartitioner(self))
   }
 
-  /**
-   * Merge the values for each key using an associative reduce function. This will also perform
-   * the merging locally on each mapper before sending results to a reducer, similarly to a
-   * "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/
-   * parallelism level.
-   */
-  def reduceByKey(func: (V, V) => V): RDD[(K, V)] = {
-    reduceByKey(defaultPartitioner(self), func)
-  }
-
   /**
    * Group the values for each key in the RDD into a single sequence. Hash-partitions the
    * resulting RDD with the existing partitioner/parallelism level.

From 073ed8295dec538fc0505f036b1b519891d8354f Mon Sep 17 00:00:00 2001
From: Chris Cope <ccope@resilientscience.com>
Date: Tue, 12 Aug 2014 11:33:16 -0400
Subject: [PATCH 2/2] [SPARK-2140] Enabling YARN memory overhead calculation
 from heap of the JVM

---
 .../main/scala/org/apache/spark/deploy/yarn/Client.scala | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 15f3c4f180ea3..9504b99de3246 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -113,11 +113,12 @@ class Client(clientArgs: ClientArguments, hadoopConf: Configuration, spConf: Spa
   }
 
   def calculateAMMemory(newApp: GetNewApplicationResponse) :Int = {
+    // It looks like -Xmx is handled in ClientBase. ?
     // TODO: Need a replacement for the following code to fix -Xmx?
-    // val minResMemory: Int = newApp.getMinimumResourceCapability().getMemory()
-    // var amMemory = ((args.amMemory / minResMemory) * minResMemory) +
-    //  ((if ((args.amMemory % minResMemory) == 0) 0 else minResMemory) -
-    //    memoryOverhead )
+    val minResMemory: Int = newApp.getMinimumResourceCapability().getMemory()
+    var amMemory = ((args.amMemory / minResMemory) * minResMemory) +
+     ((if ((args.amMemory % minResMemory) == 0) 0 else minResMemory) -
+       memoryOverhead )
     args.amMemory
   }