CodingCat
diff --git a/‎README.md‎
Lines changed: 15 additions & 9 deletions b/‎README.md‎
Lines changed: 15 additions & 9 deletions
diff --git a/‎assembly/pom.xml‎
Lines changed: 1 addition & 0 deletions b/‎assembly/pom.xml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bagel/pom.xml‎
Lines changed: 3 additions & 0 deletions b/‎bagel/pom.xml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎bin/spark-class‎
Lines changed: 2 additions & 2 deletions b/‎bin/spark-class‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/pom.xml‎
Lines changed: 7 additions & 0 deletions b/‎core/pom.xml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎core/src/main/resources/org/apache/spark/ui/static/webui.css‎
Lines changed: 2 additions & 0 deletions b/‎core/src/main/resources/org/apache/spark/ui/static/webui.css‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/Aggregator.scala‎
Lines changed: 4 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/Aggregator.scala‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 10 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/TaskEndReason.scala‎
Lines changed: 2 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/TaskEndReason.scala‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/TestUtils.scala‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/TestUtils.scala‎
Lines changed: 2 additions & 2 deletions
@@ -1,6 +1,13 @@
 # Apache Spark
 
-Lightning-Fast Cluster Computing - <http://spark.apache.org/>
+Spark is a fast and general cluster computing system for Big Data. It provides
+high-level APIs in Scala, Java, and Python, and an optimized engine that
+supports general computation graphs for data analysis. It also supports a
+rich set of higher-level tools including Spark SQL for SQL and structured
+data processing, MLLib for machine learning, GraphX for graph processing,
+and Spark Streaming.
+
+<http://spark.apache.org/>
 
 
 ## Online Documentation
@@ -69,29 +76,28 @@ can be run using:
 Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
 storage systems. Because the protocols have changed in different versions of
 Hadoop, you must build Spark against the same version that your cluster runs.
-You can change the version by setting the `SPARK_HADOOP_VERSION` environment
-when building Spark.
+You can change the version by setting `-Dhadoop.version` when building Spark.
 
 For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop
 versions without YARN, use:
 
     # Apache Hadoop 1.2.1
-    $ SPARK_HADOOP_VERSION=1.2.1 sbt/sbt assembly
+    $ sbt/sbt -Dhadoop.version=1.2.1 assembly
 
     # Cloudera CDH 4.2.0 with MapReduce v1
-    $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt assembly
+    $ sbt/sbt -Dhadoop.version=2.0.0-mr1-cdh4.2.0 assembly
 
 For Apache Hadoop 2.2.X, 2.1.X, 2.0.X, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions
-with YARN, also set `SPARK_YARN=true`:
+with YARN, also set `-Pyarn`:
 
     # Apache Hadoop 2.0.5-alpha
-    $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly
+    $ sbt/sbt -Dhadoop.version=2.0.5-alpha -Pyarn assembly
 
     # Cloudera CDH 4.2.0 with MapReduce v2
-    $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt/sbt assembly
+    $ sbt/sbt -Dhadoop.version=2.0.0-cdh4.2.0 -Pyarn assembly
 
     # Apache Hadoop 2.2.X and newer
-    $ SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt/sbt assembly
+    $ sbt/sbt -Dhadoop.version=2.2.0 -Pyarn assembly
 
 When developing a Spark application, specify the Hadoop version by adding the
 "hadoop-client" artifact to your project's dependencies. For example, if you're
 
@@ -32,6 +32,7 @@
   <packaging>pom</packaging>
 
   <properties>
+    <sbt.project.name>assembly</sbt.project.name>
     <spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
     <spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
     <spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
 
@@ -27,6 +27,9 @@
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-bagel_2.10</artifactId>
+  <properties>
+     <sbt.project.name>bagel</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project Bagel</name>
   <url>http://spark.apache.org/</url>
 
@@ -110,9 +110,9 @@ export JAVA_OPTS
 
 TOOLS_DIR="$FWDIR"/tools
 SPARK_TOOLS_JAR=""
-if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
+if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
   # Use the JAR from the SBT build
-  export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar`
+  export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`
 fi
 if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
   # Use the JAR from the Maven build
 
@@ -27,6 +27,9 @@
 
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-core_2.10</artifactId>
+  <properties>
+     <sbt.project.name>core</sbt.project.name>
+  </properties>
   <packaging>jar</packaging>
   <name>Spark Project Core</name>
   <url>http://spark.apache.org/</url>
@@ -111,6 +114,10 @@
       <groupId>org.xerial.snappy</groupId>
       <artifactId>snappy-java</artifactId>
     </dependency>
+    <dependency>
+      <groupId>net.jpountz.lz4</groupId>
+      <artifactId>lz4</artifactId>
+    </dependency>
     <dependency>
       <groupId>com.twitter</groupId>
       <artifactId>chill_${scala.binary.version}</artifactId>
 
@@ -81,7 +81,9 @@ table.sortable thead {
 
 span.kill-link {
   margin-right: 2px;
+  margin-left: 20px;
   color: gray;
+  float: right;
 }
 
 span.kill-link a {
 
@@ -56,8 +56,8 @@ case class Aggregator[K, V, C] (
     } else {
       val combiners = new ExternalAppendOnlyMap[K, V, C](createCombiner, mergeValue, mergeCombiners)
       while (iter.hasNext) {
-        val (k, v) = iter.next()
-        combiners.insert(k, v)
+        val pair = iter.next()
+        combiners.insert(pair._1, pair._2)
       }
       // TODO: Make this non optional in a future release
       Option(context).foreach(c => c.taskMetrics.memoryBytesSpilled = combiners.memoryBytesSpilled)
@@ -85,8 +85,8 @@ case class Aggregator[K, V, C] (
     } else {
       val combiners = new ExternalAppendOnlyMap[K, C, C](identity, mergeCombiners, mergeCombiners)
       while (iter.hasNext) {
-        val (k, c) = iter.next()
-        combiners.insert(k, c)
+        val pair = iter.next()
+        combiners.insert(pair._1, pair._2)
       }
       // TODO: Make this non optional in a future release
       Option(context).foreach(c => c.taskMetrics.memoryBytesSpilled = combiners.memoryBytesSpilled)
 
@@ -1531,7 +1531,16 @@ object SparkContext extends Logging {
             throw new SparkException("YARN mode not available ?", e)
           }
         }
-        val backend = new CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem)
+        val backend = try {
+          val clazz =
+            Class.forName("org.apache.spark.scheduler.cluster.YarnClusterSchedulerBackend")
+          val cons = clazz.getConstructor(classOf[TaskSchedulerImpl], classOf[SparkContext])
+          cons.newInstance(scheduler, sc).asInstanceOf[CoarseGrainedSchedulerBackend]
+        } catch {
+          case e: Exception => {
+            throw new SparkException("YARN mode not available ?", e)
+          }
+        }
         scheduler.initialize(backend)
         scheduler
 
 
@@ -20,6 +20,7 @@ package org.apache.spark
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.Utils
 
 /**
  * :: DeveloperApi ::
@@ -88,10 +89,7 @@ case class ExceptionFailure(
     stackTrace: Array[StackTraceElement],
     metrics: Option[TaskMetrics])
   extends TaskFailedReason {
-  override def toErrorString: String = {
-    val stackTraceString = if (stackTrace == null) "null" else stackTrace.mkString("\n")
-    s"$className ($description}\n$stackTraceString"
-  }
+  override def toErrorString: String = Utils.exceptionString(className, description, stackTrace)
 }
 
 /**
 
@@ -92,8 +92,8 @@ private[spark] object TestUtils {
   def createCompiledClass(className: String, destDir: File, value: String = ""): File = {
     val compiler = ToolProvider.getSystemJavaCompiler
     val sourceFile = new JavaSourceFromString(className,
-      "public class " + className + " { @Override public String toString() { " +
-       "return \"" + value + "\";}}")
+      "public class " + className + " implements java.io.Serializable {" +
+      "  @Override public String toString() { return \"" + value + "\"; }}")
 
     // Calling this outputs a class file in pwd. It's easier to just rename the file than
     // build a custom FileManager that controls the output location.
Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,9 @@ table.sortable thead {`
`81`	`81`
`82`	`82`	`span.kill-link {`
`83`	`83`	`margin-right: 2px;`
	`84`	`+ margin-left: 20px;`
`84`	`85`	`color: gray;`
	`86`	`+ float: right;`
`85`	`87`	`}`
`86`	`88`
`87`	`89`	`span.kill-link a {`