From 651844c06d1890b3c5466ad200efaa55ca70c7cf Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashant.s@imaginea.com>
Date: Wed, 12 Feb 2014 14:27:22 +0530
Subject: [PATCH 1/8] Support MiMa for reporting binary compatibility accross
 versions.

---
 dev/run-tests            |  6 ++++
 project/MimaBuild.scala  | 60 ++++++++++++++++++++++++++++++++++++++++
 project/SparkBuild.scala | 25 ++++++++++++++---
 project/plugins.sbt      |  1 +
 4 files changed, 88 insertions(+), 4 deletions(-)
 create mode 100644 project/MimaBuild.scala

diff --git a/dev/run-tests b/dev/run-tests
index d65a397b4c8c7..432563e1ef845 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -32,6 +32,12 @@ echo "Running Scala style checks"
 echo "========================================================================="
 sbt/sbt clean scalastyle
 
+echo "========================================================================="
+echo "Running MiMa for detecting binary incompatibilites."
+echo "Please see MimaBuild.scala for details."
+echo "========================================================================="
+sbt/sbt mima-report-binary-issues
+
 echo "========================================================================="
 echo "Running Spark unit tests"
 echo "========================================================================="
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
new file mode 100644
index 0000000000000..f416629f5d03d
--- /dev/null
+++ b/project/MimaBuild.scala
@@ -0,0 +1,60 @@
+import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact}
+import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
+
+object MimaBuild {
+
+  val ignoredABIProblems = {
+    import com.typesafe.tools.mima.core._
+    import com.typesafe.tools.mima.core.ProblemFilters._
+    /**
+     * A: Detections are semi private or likely to become semi private at some point.
+     */
+    Seq(exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom"),
+      exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom$"),
+      exclude[MissingMethodProblem]("org.apache.spark.util.Utils.cloneWritables"),
+      // Scheduler is not considered a public API.
+      excludePackage("org.apache.spark.deploy"),
+      // Was made private in 1.0
+      excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#DiskMapIterator"),
+      excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#ExternalIterator"),
+      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.cogroupResultToJava"),
+      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.groupByResultToJava"),
+      exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.handleFailedTask"),
+      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.taskSetTaskIds"),
+      exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSetManager.handleFailedTask"),
+      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.removeAllRunningTasks"),
+      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.runningTasks_="),
+      exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime"),
+      exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime_="),
+      exclude[MissingMethodProblem]("org.apache.spark.storage.BlockObjectWriter.bytesWritten")) ++
+    /**
+     * B: Detections are mostly false +ve.
+     */
+    Seq(exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.setGenerator"),
+      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
+      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
+      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
+      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.foreachPartition"),
+      exclude[MissingMethodProblem]("org.apache.spark.api.python.PythonRDD.writeToStream")) ++
+    /**
+     * Detections I am unsure about. Should be either moved to B (false +ve) or A.
+     */
+    Seq(exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator$"),
+      exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags"),
+      exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags$"),
+      exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$wrapForCompression$1"),
+      exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$sparkConf"),
+      exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator"),
+      exclude[MissingClassProblem]("org.apache.spark.mllib.optimization.SquaredGradient"),
+      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.gradient"),
+      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.gradient"),
+      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient")
+    )
+  }
+
+  lazy val mimaSettings = mimaDefaultSettings ++ Seq(
+    previousArtifact := None,
+    binaryIssueFilters ++= ignoredABIProblems
+  )
+
+}
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 74bad66cfd018..6410704c2c7a2 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -22,6 +22,7 @@ import sbtassembly.Plugin._
 import AssemblyKeys._
 import scala.util.Properties
 import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
+import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact
 
 // For Sonatype publishing
 //import com.jsuereth.pgp.sbtplugin.PgpKeys._
@@ -121,7 +122,7 @@ object SparkBuild extends Build {
 
   lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj)
 
-  def sharedSettings = Defaults.defaultSettings ++ Seq(
+  def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings ++ Seq(
     organization       := "org.apache.spark",
     version            := "1.0.0-incubating-SNAPSHOT",
     scalaVersion       := "2.10.3",
@@ -244,13 +245,19 @@ object SparkBuild extends Build {
   val excludeAsm = ExclusionRule(organization = "asm")
   val excludeSnappy = ExclusionRule(organization = "org.xerial.snappy")
 
+  def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark", 
+      version: String = "0.9.0-incubating", crossVersion: String = "2.10"): Option[sbt.ModuleID] = {
+    val fullId = if (crossVersion.isEmpty) id else id + "_" + crossVersion
+    Some(organization % fullId % version) // the artifact to compare binary compatibility with
+  }
+
   def coreSettings = sharedSettings ++ Seq(
     name := "spark-core",
     resolvers ++= Seq(
        "JBoss Repository"     at "http://repository.jboss.org/nexus/content/repositories/releases/",
        "Cloudera Repository"  at "https://repository.cloudera.com/artifactory/cloudera-repos/"
     ),
-
+    previousArtifact := sparkPreviousArtifact("spark-core"),
     libraryDependencies ++= Seq(
         "com.google.guava"         % "guava"            % "14.0.1",
         "com.google.code.findbugs" % "jsr305"           % "1.3.9",
@@ -289,7 +296,7 @@ object SparkBuild extends Build {
     publish := {}
   )
 
- def replSettings = sharedSettings ++ Seq(
+  def replSettings = sharedSettings ++ Seq(
     name := "spark-repl",
    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "scala-compiler" % v ),
    libraryDependencies <+= scalaVersion(v => "org.scala-lang"  % "jline"          % v ),
@@ -298,6 +305,7 @@ object SparkBuild extends Build {
 
   def examplesSettings = sharedSettings ++ Seq(
     name := "spark-examples",
+    previousArtifact := sparkPreviousArtifact("spark-examples"),
     libraryDependencies ++= Seq(
       "com.twitter"          %% "algebird-core"   % "0.1.11",
       "org.apache.hbase"     %  "hbase"           % "0.94.6" excludeAll(excludeNetty, excludeAsm),
@@ -321,17 +329,20 @@ object SparkBuild extends Build {
 
   def graphxSettings = sharedSettings ++ Seq(
     name := "spark-graphx",
+    previousArtifact := sparkPreviousArtifact("spark-graphx"),
     libraryDependencies ++= Seq(
       "org.jblas" % "jblas" % "1.2.3"
     )
   )
 
   def bagelSettings = sharedSettings ++ Seq(
-    name := "spark-bagel"
+    name := "spark-bagel",
+    previousArtifact := sparkPreviousArtifact("spark-bagel")
   )
 
   def mllibSettings = sharedSettings ++ Seq(
     name := "spark-mllib",
+    previousArtifact := sparkPreviousArtifact("spark-mllib"),
     libraryDependencies ++= Seq(
       "org.jblas" % "jblas" % "1.2.3"
     )
@@ -339,6 +350,7 @@ object SparkBuild extends Build {
 
   def streamingSettings = sharedSettings ++ Seq(
     name := "spark-streaming",
+    previousArtifact := sparkPreviousArtifact("spark-streaming"),
     libraryDependencies ++= Seq(
       "commons-io" % "commons-io" % "2.4"
     )
@@ -403,6 +415,7 @@ object SparkBuild extends Build {
 
   def twitterSettings() = sharedSettings ++ Seq(
     name := "spark-streaming-twitter",
+    previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"),
     libraryDependencies ++= Seq(
       "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty)
     )
@@ -410,6 +423,7 @@ object SparkBuild extends Build {
 
   def kafkaSettings() = sharedSettings ++ Seq(
     name := "spark-streaming-kafka",
+    previousArtifact := sparkPreviousArtifact("spark-streaming-kafka"),
     libraryDependencies ++= Seq(
       "com.github.sgroschupf"    % "zkclient"   % "0.1"          excludeAll(excludeNetty),
       "org.apache.kafka"        %% "kafka"      % "0.8.0"
@@ -422,6 +436,7 @@ object SparkBuild extends Build {
 
   def flumeSettings() = sharedSettings ++ Seq(
     name := "spark-streaming-flume",
+    previousArtifact := sparkPreviousArtifact("spark-streaming-flume"),
     libraryDependencies ++= Seq(
       "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty, excludeSnappy)
     )
@@ -429,6 +444,7 @@ object SparkBuild extends Build {
 
   def zeromqSettings() = sharedSettings ++ Seq(
     name := "spark-streaming-zeromq",
+    previousArtifact := sparkPreviousArtifact("spark-streaming-zeromq"),
     libraryDependencies ++= Seq(
       "org.spark-project.akka" %% "akka-zeromq" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty)
     )
@@ -436,6 +452,7 @@ object SparkBuild extends Build {
 
   def mqttSettings() = streamingSettings ++ Seq(
     name := "spark-streaming-mqtt",
+    previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"),
     resolvers ++= Seq("Eclipse Repo" at "https://repo.eclipse.org/content/repositories/paho-releases/"),
     libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0")
   )
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 914f2e05a402a..aa2baecd2fe1f 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -19,3 +19,4 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4")
 
 addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.4.0")
 
+addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6")

From b551519d2cea42cf75fdebf493fb50b927e78581 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashant.s@imaginea.com>
Date: Thu, 27 Feb 2014 09:53:25 +0530
Subject: [PATCH 2/8] adding a new exclude after rebasing with master

---
 project/MimaBuild.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index f416629f5d03d..dd68ad5a24f25 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -48,7 +48,8 @@ object MimaBuild {
       exclude[MissingClassProblem]("org.apache.spark.mllib.optimization.SquaredGradient"),
       exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.gradient"),
       exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.gradient"),
-      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient")
+      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient"),
+      exclude[FinalClassProblem]("org.apache.spark.SparkFiles")
     )
   }
 

From 4c771e00cb45f5fbcf8b1efdac22ef43c1740593 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <scrapcodes@gmail.com>
Date: Thu, 20 Mar 2014 21:54:50 +0530
Subject: [PATCH 3/8] Added a tool to generate mima excludes and also adapted
 build to pick automatically.

---
 .mima-exclude                                 | 360 ++++++++++++++++++
 bin/spark-class                               |   2 +-
 project/MimaBuild.scala                       |  93 ++---
 project/SparkBuild.scala                      |   2 +-
 .../spark/tools/GenerateMIMAIgnore.scala      | 114 ++++++
 5 files changed, 525 insertions(+), 46 deletions(-)
 create mode 100644 .mima-exclude
 create mode 100644 tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala

diff --git a/.mima-exclude b/.mima-exclude
new file mode 100644
index 0000000000000..54a46b4f0ee9e
--- /dev/null
+++ b/.mima-exclude
@@ -0,0 +1,360 @@
+org.apache.spark.scheduler.cluster.SparkDeploySchedulerBackend
+org.apache.spark.rdd.FlatMappedRDD
+org.apache.spark.storage.PutResult
+org.apache.spark.mllib.recommendation.OutLinkBlock
+org.apache.spark.network.netty.ShuffleSender
+org.apache.spark.scheduler.TaskResult
+org.apache.spark.scheduler.JobWaiter
+org.apache.spark.deploy.worker.ExecutorRunner
+org.apache.spark.streaming.dstream.FilteredDStream
+org.apache.spark.util.SerializableBuffer
+org.apache.spark.scheduler.TaskSetManager
+org.apache.spark.scheduler.Stage
+org.apache.spark.rdd.ZippedWithIndexRDD
+org.apache.spark.scheduler.SchedulerBackend
+org.apache.spark.streaming.dstream.GlommedDStream
+org.apache.spark.rdd.FlatMappedValuesRDD
+org.apache.spark.deploy.master.MonarchyLeaderAgent
+org.apache.spark.rdd.CoGroupPartition
+org.apache.spark.scheduler.SlaveLost
+org.apache.spark.api.python.PythonWorkerFactory
+org.apache.spark.streaming.ContextWaiter
+org.apache.spark.serializer.JavaSerializationStream
+org.apache.spark.rdd.CheckpointRDDPartition
+org.apache.spark.scheduler.WorkerOffer
+org.apache.spark.streaming.scheduler.JobCompleted
+org.apache.spark.rdd.ParallelCollectionRDD
+org.apache.spark.streaming.dstream.ForEachDStream
+org.apache.spark.scheduler.JobFailed
+org.apache.spark.graphx.impl.ReplicatedVertexView
+org.apache.spark.deploy.master.ui.ApplicationPage
+org.apache.spark.rdd.ShuffledRDDPartition
+org.apache.spark.ui.jobs.JobProgressUI
+org.apache.spark.deploy.Command
+org.apache.spark.ui.jobs.StagePage
+org.apache.spark.serializer.KryoDeserializationStream
+org.apache.spark.scheduler.ExecutorLost
+org.apache.spark.deploy.master.BlackHolePersistenceEngine
+org.apache.spark.scheduler.cluster.SimrSchedulerBackend
+org.apache.spark.streaming.dstream.MapPartitionedDStream
+org.apache.spark.scheduler.TaskInfo
+org.apache.spark.storage.BlockManagerSlaveActor
+org.apache.spark.ui.env.EnvironmentUI
+org.apache.spark.executor.CoarseGrainedExecutorBackend
+org.apache.spark.storage.ShuffleBlockManager
+org.apache.spark.rdd.NarrowCoGroupSplitDep
+org.apache.spark.scheduler.JobGroupCancelled
+org.apache.spark.rdd.MapPartitionsRDD
+org.apache.spark.graphx.impl.EdgeTripletIterator
+org.apache.spark.storage.BlockManagerMaster
+org.apache.spark.partial.SumEvaluator
+org.apache.spark.network.netty.FileServerHandler
+org.apache.spark.GetMapOutputStatuses
+org.apache.spark.tools.SparkMethod
+org.apache.spark.storage.ShuffleWriterGroup
+org.apache.spark.ui.jobs.StageTable
+org.apache.spark.scheduler.TaskDescription
+org.apache.spark.deploy.master.Master
+org.apache.spark.scheduler.DAGSchedulerSource
+org.apache.spark.streaming.util.Clock
+org.apache.spark.deploy.master.ui.MasterWebUI
+org.apache.spark.CacheManager
+org.apache.spark.streaming.util.SystemClock
+org.apache.spark.storage.BroadcastHelperBlockId
+org.apache.spark.deploy.master.ZooKeeperLeaderElectionAgent
+org.apache.spark.storage.FileSegment
+org.apache.spark.api.python.PythonPartitioner
+org.apache.spark.scheduler.ExecutorLossReason
+org.apache.spark.network.ReceivingConnection
+org.apache.spark.scheduler.Schedulable
+org.apache.spark.scheduler.TaskSet
+org.apache.spark.storage.BlockStore
+org.apache.spark.streaming.scheduler.StreamingListenerBus
+org.apache.spark.deploy.ClientArguments
+org.apache.spark.metrics.MetricsConfig
+org.apache.spark.serializer.SerializerManager
+org.apache.spark.streaming.scheduler.NetworkInputTracker
+org.apache.spark.broadcast.HttpBroadcast
+org.apache.spark.executor.ExecutorURLClassLoader
+org.apache.spark.scheduler.TaskSchedulerImpl
+org.apache.spark.streaming.dstream.RawNetworkReceiver
+org.apache.spark.util.collection.SizeTrackingAppendOnlyMap
+org.apache.spark.rdd.SampledRDDPartition
+org.apache.spark.storage.BlockManagerId
+org.apache.spark.deploy.master.LeaderElectionAgent
+org.apache.spark.streaming.dstream.StopReceiver
+org.apache.spark.storage.GotBlock
+org.apache.spark.scheduler.Task
+org.apache.spark.rdd.CartesianPartition
+org.apache.spark.storage.StorageStatus
+org.apache.spark.rdd.PartitionwiseSampledRDDPartition
+org.apache.spark.graphx.impl.ShuffleSerializationStream
+org.apache.spark.scheduler.cluster.mesos.MesosSchedulerBackend
+org.apache.spark.deploy.worker.ui.IndexPage
+org.apache.spark.partial.ApproximateEvaluator
+org.apache.spark.graphx.impl.ShuffleSerializerInstance
+org.apache.spark.ui.jobs.ExecutorSummary
+org.apache.spark.scheduler.GettingResultEvent
+org.apache.spark.graphx.impl.RoutingTable
+org.apache.spark.streaming.util.KillingThread
+org.apache.spark.GrowableAccumulableParam
+org.apache.spark.deploy.master.ApplicationInfo
+org.apache.spark.deploy.worker.Worker
+org.apache.spark.streaming.dstream.ReportBlock
+org.apache.spark.partial.StudentTCacher
+org.apache.spark.scheduler.ActiveJob
+org.apache.spark.network.netty.FileServer
+org.apache.spark.network.netty.ShuffleCopier
+org.apache.spark.util.collection.PrimitiveVector
+org.apache.spark.serializer.KryoSerializerInstance
+org.apache.spark.streaming.util.RateLimitedOutputStream
+org.apache.spark.network.Message
+org.apache.spark.util.InnerClosureFinder
+org.apache.spark.util.SerializableHyperLogLog
+org.apache.spark.deploy.worker.ProcessBuilderLike
+org.apache.spark.streaming.Interval
+org.apache.spark.rdd.CheckpointRDD
+org.apache.spark.deploy.master.SparkZooKeeperSession
+org.apache.spark.graphx.impl.VertexAttributeBlock
+org.apache.spark.streaming.dstream.MapValuedDStream
+org.apache.spark.util.collection.ExternalAppendOnlyMap
+org.apache.spark.deploy.client.AppClient
+org.apache.spark.deploy.DriverDescription
+org.apache.spark.scheduler.FairSchedulableBuilder
+org.apache.spark.executor.MesosExecutorBackend
+org.apache.spark.streaming.scheduler.ClearMetadata
+org.apache.spark.storage.BroadcastBlockId
+org.apache.spark.streaming.scheduler.Job
+org.apache.spark.streaming.scheduler.DeregisterReceiver
+org.apache.spark.serializer.KryoSerializationStream
+org.apache.spark.scheduler.FIFOSchedulableBuilder
+org.apache.spark.ui.jobs.JobProgressListener
+org.apache.spark.TaskEndReason
+org.apache.spark.storage.BlockId
+org.apache.spark.streaming.scheduler.JobGenerator
+org.apache.spark.deploy.master.WorkerInfo
+org.apache.spark.storage.ShuffleBlockId
+org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessage
+org.apache.spark.storage.ThreadingTest.ConsumerThread
+org.apache.spark.rdd.PartitionGroup
+org.apache.spark.util.collection.AppendOnlyMap
+org.apache.spark.scheduler.ExecutorExited
+org.apache.spark.streaming.Checkpoint
+org.apache.spark.streaming.scheduler.AddBlocks
+org.apache.spark.network.netty.FileClient
+org.apache.spark.scheduler.JobListener
+org.apache.spark.streaming.scheduler.JobSet
+org.apache.spark.scheduler.ExecutorGained
+org.apache.spark.partial.CountEvaluator
+org.apache.spark.scheduler.Pool
+org.apache.spark.deploy.master.FileSystemPersistenceEngine
+org.apache.spark.rdd.BlockRDD
+org.apache.spark.network.netty.FileHeader
+org.apache.spark.rdd.PartitionerAwareUnionRDDPartition
+org.apache.spark.graphx.impl.VertexIdMsgSerializer
+org.apache.spark.streaming.dstream.FileInputDStream
+org.apache.spark.deploy.worker.ui.WorkerWebUI
+org.apache.spark.metrics.MetricsSystem
+org.apache.spark.scheduler.JobSubmitted
+org.apache.spark.graphx.impl.MsgRDDFunctions
+org.apache.spark.api.java.function.WrappedFunction3
+org.apache.spark.streaming.CheckpointWriter
+org.apache.spark.storage.BlockManager
+org.apache.spark.util.CompletionIterator
+org.apache.spark.network.MessageChunk
+org.apache.spark.rdd.MappedValuesRDD
+org.apache.spark.streaming.dstream.PluggableInputDStream
+org.apache.spark.rdd.ZippedWithIndexRDDPartition
+org.apache.spark.streaming.receivers.ActorReceiver
+org.apache.spark.serializer.JavaDeserializationStream
+org.apache.spark.rdd.SubtractedRDD
+org.apache.spark.streaming.dstream.SocketInputDStream
+org.apache.spark.partial.GroupedSumEvaluator
+org.apache.spark.broadcast.TorrentInfo
+org.apache.spark.storage.BlockException
+org.apache.spark.streaming.dstream.FlatMapValuedDStream
+org.apache.spark.deploy.worker.DriverRunner
+org.apache.spark.deploy.master.ExecutorInfo
+org.apache.spark.deploy.master.ui.IndexPage
+org.apache.spark.rdd.ParallelCollectionPartition
+org.apache.spark.scheduler.DirectTaskResult
+org.apache.spark.partial.ApproximateActionListener
+org.apache.spark.streaming.DStreamGraph
+org.apache.spark.partial.GroupedCountEvaluator
+org.apache.spark.api.java.JavaSparkContextVarargsWorkaround
+org.apache.spark.graphx.util.collection.PrimitiveKeyOpenHashMap
+org.apache.spark.streaming.dstream.StateDStream
+org.apache.spark.FetchFailedException
+org.apache.spark.BlockStoreShuffleFetcher
+org.apache.spark.util.Clock
+org.apache.spark.storage.DiskBlockManager
+org.apache.spark.scheduler.SparkListenerBus
+org.apache.spark.rdd.GlommedRDD
+org.apache.spark.streaming.dstream.UnionDStream
+org.apache.spark.scheduler.JobResult
+org.apache.spark.deploy.ApplicationDescription
+org.apache.spark.rdd.CoGroupSplitDep
+org.apache.spark.api.python.PythonRDD
+org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend
+org.apache.spark.partial.GroupedMeanEvaluator
+org.apache.spark.graphx.impl.VertexBroadcastMsg
+org.apache.spark.ShuffleFetcher
+org.apache.spark.streaming.dstream.ShuffledDStream
+org.apache.spark.scheduler.TaskScheduler
+org.apache.spark.streaming.dstream.MappedDStream
+org.apache.spark.storage.BlockFetchTracker
+org.apache.spark.rdd.ZippedPartitionsPartition
+org.apache.spark.rdd.ShuffleCoGroupSplitDep
+org.apache.spark.graphx.impl.LongVertexBroadcastMsgSerializer
+org.apache.spark.deploy.ExecutorDescription
+org.apache.spark.MapOutputTrackerMessage
+org.apache.spark.util.MetadataCleaner
+org.apache.spark.HttpFileServer
+org.apache.spark.streaming.util.ManualClock
+org.apache.spark.streaming.scheduler.JobGeneratorEvent
+org.apache.spark.scheduler.JobCancelled
+org.apache.spark.streaming.scheduler.DoCheckpoint
+org.apache.spark.broadcast.TorrentBroadcast
+org.apache.spark.scheduler.FIFOSchedulingAlgorithm
+org.apache.spark.network.ConnectionManagerId
+org.apache.spark.deploy.master.PersistenceEngine
+org.apache.spark.mllib.recommendation.InLinkBlock
+org.apache.spark.partial.MeanEvaluator
+org.apache.spark.streaming.dstream.ReportError
+org.apache.spark.storage.RDDBlockId
+org.apache.spark.api.java.function.WrappedFunction2
+org.apache.spark.ui.exec.ExecutorsUI
+org.apache.spark.network.netty.FileServerChannelInitializer
+org.apache.spark.streaming.scheduler.JobStarted
+org.apache.spark.streaming.dstream.RawInputDStream
+org.apache.spark.storage.GetBlock
+org.apache.spark.ui.jobs.IndexPage
+org.apache.spark.storage.BlockManagerSource
+org.apache.spark.rdd.BlockRDDPartition
+org.apache.spark.rdd.PartitionCoalescer
+org.apache.spark.network.ConnectionManager
+org.apache.spark.deploy.master.MasterArguments
+org.apache.spark.graphx.impl.MessageToPartition
+org.apache.spark.executor.ExecutorBackend
+org.apache.spark.util.NextIterator
+org.apache.spark.storage.BlockManagerWorker
+org.apache.spark.streaming.dstream.QueueInputDStream
+org.apache.spark.streaming.scheduler.JobScheduler
+org.apache.spark.streaming.dstream.FlatMappedDStream
+org.apache.spark.scheduler.TaskResultGetter
+org.apache.spark.network.netty.FileClientChannelInitializer
+org.apache.spark.rdd.MappedRDD
+org.apache.spark.rdd.PartitionerAwareUnionRDD
+org.apache.spark.network.BufferMessage
+org.apache.spark.streaming.dstream.DStreamCheckpointData
+org.apache.spark.executor.Executor
+org.apache.spark.MapOutputTrackerMaster
+org.apache.spark.deploy.client.AppClientListener
+org.apache.spark.storage.BlockInfo
+org.apache.spark.streaming.dstream.ReducedWindowedDStream
+org.apache.spark.rdd.JdbcPartition
+org.apache.spark.deploy.TestWorkerInfo
+org.apache.spark.scheduler.BeginEvent
+org.apache.spark.storage.BlockMessage
+org.apache.spark.tools.SparkType
+org.apache.spark.rdd.NewHadoopPartition
+org.apache.spark.streaming.scheduler.JobSchedulerEvent
+org.apache.spark.streaming.util.TestOutputStream
+org.apache.spark.scheduler.local.LocalActor
+org.apache.spark.graphx.impl.EdgePartition
+org.apache.spark.scheduler.TaskSetFailed
+org.apache.spark.ServerStateException
+org.apache.spark.network.MessageChunkHeader
+org.apache.spark.storage.DiskBlockObjectWriter
+org.apache.spark.graphx.impl.VertexPartition
+org.apache.spark.MapOutputTracker
+org.apache.spark.rdd.CartesianRDD
+org.apache.spark.storage.ThreadingTest.ProducerThread
+org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
+org.apache.spark.graphx.impl.IntVertexBroadcastMsgSerializer
+org.apache.spark.streaming.scheduler.NetworkInputTrackerMessage
+org.apache.spark.deploy.worker.WorkerSource
+org.apache.spark.graphx.impl.DoubleVertexBroadcastMsgSerializer
+org.apache.spark.WritableConverter
+org.apache.spark.streaming.util.RecurringTimer
+org.apache.spark.util.collection.PrimitiveKeyOpenHashMap
+org.apache.spark.scheduler.CompletionEvent
+org.apache.spark.streaming.receivers.Data
+org.apache.spark.rdd.UnionPartition
+org.apache.spark.broadcast.TorrentBlock
+org.apache.spark.storage.TestBlockId
+org.apache.spark.storage.BlockManagerMasterActor
+org.apache.spark.storage.TaskResultBlockId
+org.apache.spark.streaming.scheduler.ErrorReported
+org.apache.spark.streaming.scheduler.RegisterReceiver
+org.apache.spark.scheduler.DAGScheduler
+org.apache.spark.deploy.TestMasterInfo
+org.apache.spark.deploy.master.DriverInfo
+org.apache.spark.ui.storage.BlockManagerUI
+org.apache.spark.streaming.scheduler.GenerateJobs
+org.apache.spark.storage.StreamBlockId
+org.apache.spark.util.FieldAccessFinder
+org.apache.spark.scheduler.local.LocalBackend
+org.apache.spark.network.netty.FileClientHandler
+org.apache.spark.tools.ParameterizedType
+org.apache.spark.network.Connection
+org.apache.spark.ui.storage.RDDPage
+org.apache.spark.HttpServer
+org.apache.spark.deploy.DockerId
+org.apache.spark.streaming.scheduler.ClearCheckpointData
+org.apache.spark.rdd.ZippedPartition
+org.apache.spark.FetchFailed
+org.apache.spark.serializer.JavaSerializerInstance
+org.apache.spark.scheduler.SchedulableBuilder
+org.apache.spark.streaming.dstream.TransformedDStream
+org.apache.spark.streaming.util.FileGeneratingThread
+org.apache.spark.api.java.function.WrappedFunction1
+org.apache.spark.broadcast.BroadcastManager
+org.apache.spark.rdd.FilteredRDD
+org.apache.spark.graphx.impl.IntAggMsgSerializer
+org.apache.spark.scheduler.MapStatus
+org.apache.spark.tools.BaseType
+org.apache.spark.ui.jobs.PoolTable
+org.apache.spark.graphx.impl.ShuffleDeserializationStream
+org.apache.spark.scheduler.DAGSchedulerEvent
+org.apache.spark.ui.jobs.ExecutorTable
+org.apache.spark.deploy.master.MasterSource
+org.apache.spark.graphx.impl.VertexBroadcastMsgRDDFunctions
+org.apache.spark.deploy.worker.WorkerArguments
+org.apache.spark.deploy.worker.WorkerWatcher
+org.apache.spark.graphx.impl.EdgePartitionBuilder
+org.apache.spark.deploy.DeployMessage
+org.apache.spark.streaming.dstream.SocketReceiver
+org.apache.spark.scheduler.ResultTask
+org.apache.spark.rdd.RDDCheckpointData
+org.apache.spark.ui.SparkUI
+org.apache.spark.util.collection.OpenHashSet
+org.apache.spark.streaming.dstream.NetworkReceiverMessage
+org.apache.spark.ui.jobs.PoolPage
+org.apache.spark.deploy.worker.Clock
+org.apache.spark.scheduler.TaskLocation
+org.apache.spark.deploy.worker.Sleeper
+org.apache.spark.storage.TempBlockId
+org.apache.spark.storage.BlockObjectWriter
+org.apache.spark.streaming.dstream.WindowedDStream
+org.apache.spark.util.random.XORShiftRandom
+org.apache.spark.network.SendingConnection
+org.apache.spark.ui.storage.IndexPage
+org.apache.spark.rdd.HadoopPartition
+org.apache.spark.util.Utils.CallSiteInfo
+org.apache.spark.deploy.LocalSparkCluster
+org.apache.spark.streaming.ObjectInputStreamWithLoader
+org.apache.spark.util.IdGenerator
+org.apache.spark.graphx.impl.DoubleAggMsgSerializer
+org.apache.spark.ExceptionFailure
+org.apache.spark.storage.BlockFetcherIterator
+org.apache.spark.MapOutputTrackerMasterActor
+org.apache.spark.storage.BlockMessageArray
+org.apache.spark.graphx.impl.LongAggMsgSerializer
+org.apache.spark.scheduler.FairSchedulingAlgorithm
+org.apache.spark.scheduler.IndirectTaskResult
+org.apache.spark.storage.PutBlock
+org.apache.spark.util.collection.OpenHashMap
+org.apache.spark.scheduler.ShuffleMapTask
+org.apache.spark.util.ByteBufferInputStream
+org.apache.spark.scheduler.SchedulingAlgorithm
\ No newline at end of file
diff --git a/bin/spark-class b/bin/spark-class
index c4225a392d6da..dfa11e854dc89 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -129,7 +129,7 @@ fi
 
 # Compute classpath using external script
 CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
-
+CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
 if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
   CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
 fi
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index dd68ad5a24f25..d83c7bf092833 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -1,61 +1,66 @@
 import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact}
 import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
+import sbt._
 
 object MimaBuild {
 
-  val ignoredABIProblems = {
+  def ignoredABIProblems(base: File) = {
     import com.typesafe.tools.mima.core._
     import com.typesafe.tools.mima.core.ProblemFilters._
-    /**
-     * A: Detections are semi private or likely to become semi private at some point.
-     */
-    Seq(exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom"),
-      exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom$"),
-      exclude[MissingMethodProblem]("org.apache.spark.util.Utils.cloneWritables"),
-      // Scheduler is not considered a public API.
-      excludePackage("org.apache.spark.deploy"),
-      // Was made private in 1.0
-      excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#DiskMapIterator"),
-      excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#ExternalIterator"),
-      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.cogroupResultToJava"),
-      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.groupByResultToJava"),
-      exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.handleFailedTask"),
-      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.taskSetTaskIds"),
-      exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSetManager.handleFailedTask"),
-      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.removeAllRunningTasks"),
-      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.runningTasks_="),
-      exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime"),
-      exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime_="),
-      exclude[MissingMethodProblem]("org.apache.spark.storage.BlockObjectWriter.bytesWritten")) ++
-    /**
-     * B: Detections are mostly false +ve.
-     */
-    Seq(exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.setGenerator"),
-      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
-      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
-      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
-      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.foreachPartition"),
-      exclude[MissingMethodProblem]("org.apache.spark.api.python.PythonRDD.writeToStream")) ++
+
+    IO.read(file(base.getAbsolutePath + "/.mima-exclude")).split("\n").map(excludePackage).toSeq ++
+//    /**
+//     * A: Detections are semi private or likely to become semi private at some point.
+//     */
+//    Seq(exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom"),
+//      exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom$"),
+//      exclude[MissingMethodProblem]("org.apache.spark.util.Utils.cloneWritables"),
+//      // Scheduler is not considered a public API.
+//      excludePackage("org.apache.spark.deploy"),
+//      // Was made private in 1.0
+//      excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#DiskMapIterator"),
+//      excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#ExternalIterator"),
+//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.cogroupResultToJava"),
+//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.groupByResultToJava"),
+//      exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.handleFailedTask"),
+//      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.taskSetTaskIds"),
+//      exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSetManager.handleFailedTask"),
+//      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.removeAllRunningTasks"),
+//      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.runningTasks_="),
+//      exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime"),
+//      exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime_="),
+//      exclude[MissingMethodProblem]("org.apache.spark.storage.BlockObjectWriter.bytesWritten")) ++
+//    /**
+//     * B: Detections are mostly false +ve.
+//     */
+//    Seq(exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.setGenerator"),
+//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
+//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
+//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
+//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.foreachPartition"),
+//      exclude[MissingMethodProblem]("org.apache.spark.api.python.PythonRDD.writeToStream")) ++
     /**
      * Detections I am unsure about. Should be either moved to B (false +ve) or A.
      */
-    Seq(exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator$"),
-      exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags"),
-      exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags$"),
-      exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$wrapForCompression$1"),
-      exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$sparkConf"),
-      exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator"),
-      exclude[MissingClassProblem]("org.apache.spark.mllib.optimization.SquaredGradient"),
-      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.gradient"),
-      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.gradient"),
-      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient"),
-      exclude[FinalClassProblem]("org.apache.spark.SparkFiles")
+    Seq(
+//      exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator$"),
+//      exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags"),
+//      exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags$"),
+//      exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$wrapForCompression$1"),
+//      exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$sparkConf"),
+//      exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator"),
+//      exclude[MissingClassProblem]("org.apache.spark.mllib.optimization.SquaredGradient"),
+//      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.gradient"),
+//      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.gradient"),
+//      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient"),
+//      exclude[FinalClassProblem]("org.apache.spark.SparkFiles")
+
     )
   }
 
-  lazy val mimaSettings = mimaDefaultSettings ++ Seq(
+  def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq(
     previousArtifact := None,
-    binaryIssueFilters ++= ignoredABIProblems
+    binaryIssueFilters ++= ignoredABIProblems(sparkHome)
   )
 
 }
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 6410704c2c7a2..ed5e598d798c2 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -122,7 +122,7 @@ object SparkBuild extends Build {
 
   lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj)
 
-  def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings ++ Seq(
+  def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq(
     organization       := "org.apache.spark",
     version            := "1.0.0-incubating-SNAPSHOT",
     scalaVersion       := "2.10.3",
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
new file mode 100644
index 0000000000000..3246db98f5df5
--- /dev/null
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tools
+
+import java.io.File
+import java.util.jar.JarFile
+
+import scala.collection.mutable
+import scala.collection.JavaConversions.enumerationAsScalaIterator
+
+/**
+ * Mima(TODO: Paste URL here) generates a lot of false positives as it does not detect
+ * private[x] as internal APIs.
+ */
+object GenerateMIMAIgnore {
+
+  def classesWithPrivateWithin(packageName: String, excludePackages: Seq[String]): Set[String] = {
+    import scala.reflect.runtime.universe.runtimeMirror
+    val classLoader: ClassLoader = Thread.currentThread().getContextClassLoader
+    val mirror = runtimeMirror(classLoader)
+    val classes = Utils.getClasses(packageName, classLoader)
+    val privateClasses = mutable.HashSet[String]()
+    for (x <- classes) {
+      try {
+        // some of the classnames throw malformed class name exceptions and weird Match errors.
+        if (excludePackages.forall(!x.startsWith(_)) &&
+          mirror.staticClass(x).privateWithin.toString.trim != "<none>") {
+          privateClasses += x
+        }
+      } catch {
+        case e: Throwable => // println(e)
+      }
+    }
+    privateClasses.toSet
+  }
+
+  def main(args: Array[String]) {
+    scala.tools.nsc.io.File(".mima-exclude").
+      writeAll(classesWithPrivateWithin("org.apache.spark", args).mkString("\n"))
+    println("Created : .mima-exclude in current directory.")
+  }
+
+}
+
+object Utils {
+
+  /**
+   * Get all classes in a package from a jar file.
+   */
+  def getAllClasses(jarPath: String, packageName: String) = {
+    val jar = new JarFile(new File(jarPath))
+    val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName))
+    val classes = mutable.HashSet[Class[_]]()
+    for (entry <- enums) {
+      if (!entry.endsWith("/") && !entry.endsWith("MANIFEST.MF") && !entry.endsWith("properties")) {
+        try {
+          classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.'))
+        } catch {
+          case e: Throwable => // println(e) // It may throw a few ClassNotFoundExceptions
+        }
+      }
+    }
+    classes
+  }
+
+  /**
+   * Scans all classes accessible from the context class loader which belong to the given package
+   * and subpackages both from directories and jars present on the classpath.
+   */
+  def getClasses(packageName: String,
+      classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Set[String] = {
+    val path = packageName.replace('.', '/')
+    val resources = classLoader.getResources(path).toArray
+    val jars = resources.filter(x => x.getProtocol == "jar")
+      .map(_.getFile.split(":")(1).split("!")(0))
+    val classesFromJars = jars.map(getAllClasses(_, path)).flatten
+    val dirs = resources.filter(x => x.getProtocol == "file")
+      .map(x => new File(x.getFile.split(":")(1)))
+    val classFromDirs = dirs.map(findClasses(_, packageName)).flatten
+    (classFromDirs ++ classesFromJars).map(_.getCanonicalName).filter(_ != null).toSet
+  }
+
+  private def findClasses(directory: File, packageName: String): Seq[Class[_]] = {
+    val classes = mutable.ArrayBuffer[Class[_]]()
+    if (!directory.exists()) {
+      return classes
+    }
+    val files = directory.listFiles()
+    for (file <- files) {
+      if (file.isDirectory) {
+        classes ++= findClasses(file, packageName + "." + file.getName)
+      } else if (file.getName.endsWith(".class")) {
+        classes += Class.forName(packageName + '.' + file.getName.substring(0,
+          file.getName.length() - 6))
+      }
+    }
+    classes
+  }
+}

From c39f3b576f234a59264b21c630a95d520da7bd58 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Sun, 23 Mar 2014 13:11:53 -0700
Subject: [PATCH 4/8] Some enhancements to binary checking.

1. Special case for inner classes of packge-private objects.
2. Made tools classes accessible when running `spark-class`.
3. Various other improvements to exclude-generation script.
---
 .gitignore                                    |   1 +
 .mima-exclude                                 | 360 ------------------
 bin/compute-classpath.sh                      |   1 +
 dev/run-tests                                 |  13 +-
 project/MimaBuild.scala                       |  90 +++--
 project/SparkBuild.scala                      |   4 +-
 .../spark/tools/GenerateMIMAIgnore.scala      | 117 ++++--
 7 files changed, 135 insertions(+), 451 deletions(-)
 delete mode 100644 .mima-exclude

diff --git a/.gitignore b/.gitignore
index 3d178992123da..e5da68d9658ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@
 sbt/*.jar
 .settings
 .cache
+.mima-excludes
 /build/
 work/
 out/
diff --git a/.mima-exclude b/.mima-exclude
deleted file mode 100644
index 54a46b4f0ee9e..0000000000000
--- a/.mima-exclude
+++ /dev/null
@@ -1,360 +0,0 @@
-org.apache.spark.scheduler.cluster.SparkDeploySchedulerBackend
-org.apache.spark.rdd.FlatMappedRDD
-org.apache.spark.storage.PutResult
-org.apache.spark.mllib.recommendation.OutLinkBlock
-org.apache.spark.network.netty.ShuffleSender
-org.apache.spark.scheduler.TaskResult
-org.apache.spark.scheduler.JobWaiter
-org.apache.spark.deploy.worker.ExecutorRunner
-org.apache.spark.streaming.dstream.FilteredDStream
-org.apache.spark.util.SerializableBuffer
-org.apache.spark.scheduler.TaskSetManager
-org.apache.spark.scheduler.Stage
-org.apache.spark.rdd.ZippedWithIndexRDD
-org.apache.spark.scheduler.SchedulerBackend
-org.apache.spark.streaming.dstream.GlommedDStream
-org.apache.spark.rdd.FlatMappedValuesRDD
-org.apache.spark.deploy.master.MonarchyLeaderAgent
-org.apache.spark.rdd.CoGroupPartition
-org.apache.spark.scheduler.SlaveLost
-org.apache.spark.api.python.PythonWorkerFactory
-org.apache.spark.streaming.ContextWaiter
-org.apache.spark.serializer.JavaSerializationStream
-org.apache.spark.rdd.CheckpointRDDPartition
-org.apache.spark.scheduler.WorkerOffer
-org.apache.spark.streaming.scheduler.JobCompleted
-org.apache.spark.rdd.ParallelCollectionRDD
-org.apache.spark.streaming.dstream.ForEachDStream
-org.apache.spark.scheduler.JobFailed
-org.apache.spark.graphx.impl.ReplicatedVertexView
-org.apache.spark.deploy.master.ui.ApplicationPage
-org.apache.spark.rdd.ShuffledRDDPartition
-org.apache.spark.ui.jobs.JobProgressUI
-org.apache.spark.deploy.Command
-org.apache.spark.ui.jobs.StagePage
-org.apache.spark.serializer.KryoDeserializationStream
-org.apache.spark.scheduler.ExecutorLost
-org.apache.spark.deploy.master.BlackHolePersistenceEngine
-org.apache.spark.scheduler.cluster.SimrSchedulerBackend
-org.apache.spark.streaming.dstream.MapPartitionedDStream
-org.apache.spark.scheduler.TaskInfo
-org.apache.spark.storage.BlockManagerSlaveActor
-org.apache.spark.ui.env.EnvironmentUI
-org.apache.spark.executor.CoarseGrainedExecutorBackend
-org.apache.spark.storage.ShuffleBlockManager
-org.apache.spark.rdd.NarrowCoGroupSplitDep
-org.apache.spark.scheduler.JobGroupCancelled
-org.apache.spark.rdd.MapPartitionsRDD
-org.apache.spark.graphx.impl.EdgeTripletIterator
-org.apache.spark.storage.BlockManagerMaster
-org.apache.spark.partial.SumEvaluator
-org.apache.spark.network.netty.FileServerHandler
-org.apache.spark.GetMapOutputStatuses
-org.apache.spark.tools.SparkMethod
-org.apache.spark.storage.ShuffleWriterGroup
-org.apache.spark.ui.jobs.StageTable
-org.apache.spark.scheduler.TaskDescription
-org.apache.spark.deploy.master.Master
-org.apache.spark.scheduler.DAGSchedulerSource
-org.apache.spark.streaming.util.Clock
-org.apache.spark.deploy.master.ui.MasterWebUI
-org.apache.spark.CacheManager
-org.apache.spark.streaming.util.SystemClock
-org.apache.spark.storage.BroadcastHelperBlockId
-org.apache.spark.deploy.master.ZooKeeperLeaderElectionAgent
-org.apache.spark.storage.FileSegment
-org.apache.spark.api.python.PythonPartitioner
-org.apache.spark.scheduler.ExecutorLossReason
-org.apache.spark.network.ReceivingConnection
-org.apache.spark.scheduler.Schedulable
-org.apache.spark.scheduler.TaskSet
-org.apache.spark.storage.BlockStore
-org.apache.spark.streaming.scheduler.StreamingListenerBus
-org.apache.spark.deploy.ClientArguments
-org.apache.spark.metrics.MetricsConfig
-org.apache.spark.serializer.SerializerManager
-org.apache.spark.streaming.scheduler.NetworkInputTracker
-org.apache.spark.broadcast.HttpBroadcast
-org.apache.spark.executor.ExecutorURLClassLoader
-org.apache.spark.scheduler.TaskSchedulerImpl
-org.apache.spark.streaming.dstream.RawNetworkReceiver
-org.apache.spark.util.collection.SizeTrackingAppendOnlyMap
-org.apache.spark.rdd.SampledRDDPartition
-org.apache.spark.storage.BlockManagerId
-org.apache.spark.deploy.master.LeaderElectionAgent
-org.apache.spark.streaming.dstream.StopReceiver
-org.apache.spark.storage.GotBlock
-org.apache.spark.scheduler.Task
-org.apache.spark.rdd.CartesianPartition
-org.apache.spark.storage.StorageStatus
-org.apache.spark.rdd.PartitionwiseSampledRDDPartition
-org.apache.spark.graphx.impl.ShuffleSerializationStream
-org.apache.spark.scheduler.cluster.mesos.MesosSchedulerBackend
-org.apache.spark.deploy.worker.ui.IndexPage
-org.apache.spark.partial.ApproximateEvaluator
-org.apache.spark.graphx.impl.ShuffleSerializerInstance
-org.apache.spark.ui.jobs.ExecutorSummary
-org.apache.spark.scheduler.GettingResultEvent
-org.apache.spark.graphx.impl.RoutingTable
-org.apache.spark.streaming.util.KillingThread
-org.apache.spark.GrowableAccumulableParam
-org.apache.spark.deploy.master.ApplicationInfo
-org.apache.spark.deploy.worker.Worker
-org.apache.spark.streaming.dstream.ReportBlock
-org.apache.spark.partial.StudentTCacher
-org.apache.spark.scheduler.ActiveJob
-org.apache.spark.network.netty.FileServer
-org.apache.spark.network.netty.ShuffleCopier
-org.apache.spark.util.collection.PrimitiveVector
-org.apache.spark.serializer.KryoSerializerInstance
-org.apache.spark.streaming.util.RateLimitedOutputStream
-org.apache.spark.network.Message
-org.apache.spark.util.InnerClosureFinder
-org.apache.spark.util.SerializableHyperLogLog
-org.apache.spark.deploy.worker.ProcessBuilderLike
-org.apache.spark.streaming.Interval
-org.apache.spark.rdd.CheckpointRDD
-org.apache.spark.deploy.master.SparkZooKeeperSession
-org.apache.spark.graphx.impl.VertexAttributeBlock
-org.apache.spark.streaming.dstream.MapValuedDStream
-org.apache.spark.util.collection.ExternalAppendOnlyMap
-org.apache.spark.deploy.client.AppClient
-org.apache.spark.deploy.DriverDescription
-org.apache.spark.scheduler.FairSchedulableBuilder
-org.apache.spark.executor.MesosExecutorBackend
-org.apache.spark.streaming.scheduler.ClearMetadata
-org.apache.spark.storage.BroadcastBlockId
-org.apache.spark.streaming.scheduler.Job
-org.apache.spark.streaming.scheduler.DeregisterReceiver
-org.apache.spark.serializer.KryoSerializationStream
-org.apache.spark.scheduler.FIFOSchedulableBuilder
-org.apache.spark.ui.jobs.JobProgressListener
-org.apache.spark.TaskEndReason
-org.apache.spark.storage.BlockId
-org.apache.spark.streaming.scheduler.JobGenerator
-org.apache.spark.deploy.master.WorkerInfo
-org.apache.spark.storage.ShuffleBlockId
-org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessage
-org.apache.spark.storage.ThreadingTest.ConsumerThread
-org.apache.spark.rdd.PartitionGroup
-org.apache.spark.util.collection.AppendOnlyMap
-org.apache.spark.scheduler.ExecutorExited
-org.apache.spark.streaming.Checkpoint
-org.apache.spark.streaming.scheduler.AddBlocks
-org.apache.spark.network.netty.FileClient
-org.apache.spark.scheduler.JobListener
-org.apache.spark.streaming.scheduler.JobSet
-org.apache.spark.scheduler.ExecutorGained
-org.apache.spark.partial.CountEvaluator
-org.apache.spark.scheduler.Pool
-org.apache.spark.deploy.master.FileSystemPersistenceEngine
-org.apache.spark.rdd.BlockRDD
-org.apache.spark.network.netty.FileHeader
-org.apache.spark.rdd.PartitionerAwareUnionRDDPartition
-org.apache.spark.graphx.impl.VertexIdMsgSerializer
-org.apache.spark.streaming.dstream.FileInputDStream
-org.apache.spark.deploy.worker.ui.WorkerWebUI
-org.apache.spark.metrics.MetricsSystem
-org.apache.spark.scheduler.JobSubmitted
-org.apache.spark.graphx.impl.MsgRDDFunctions
-org.apache.spark.api.java.function.WrappedFunction3
-org.apache.spark.streaming.CheckpointWriter
-org.apache.spark.storage.BlockManager
-org.apache.spark.util.CompletionIterator
-org.apache.spark.network.MessageChunk
-org.apache.spark.rdd.MappedValuesRDD
-org.apache.spark.streaming.dstream.PluggableInputDStream
-org.apache.spark.rdd.ZippedWithIndexRDDPartition
-org.apache.spark.streaming.receivers.ActorReceiver
-org.apache.spark.serializer.JavaDeserializationStream
-org.apache.spark.rdd.SubtractedRDD
-org.apache.spark.streaming.dstream.SocketInputDStream
-org.apache.spark.partial.GroupedSumEvaluator
-org.apache.spark.broadcast.TorrentInfo
-org.apache.spark.storage.BlockException
-org.apache.spark.streaming.dstream.FlatMapValuedDStream
-org.apache.spark.deploy.worker.DriverRunner
-org.apache.spark.deploy.master.ExecutorInfo
-org.apache.spark.deploy.master.ui.IndexPage
-org.apache.spark.rdd.ParallelCollectionPartition
-org.apache.spark.scheduler.DirectTaskResult
-org.apache.spark.partial.ApproximateActionListener
-org.apache.spark.streaming.DStreamGraph
-org.apache.spark.partial.GroupedCountEvaluator
-org.apache.spark.api.java.JavaSparkContextVarargsWorkaround
-org.apache.spark.graphx.util.collection.PrimitiveKeyOpenHashMap
-org.apache.spark.streaming.dstream.StateDStream
-org.apache.spark.FetchFailedException
-org.apache.spark.BlockStoreShuffleFetcher
-org.apache.spark.util.Clock
-org.apache.spark.storage.DiskBlockManager
-org.apache.spark.scheduler.SparkListenerBus
-org.apache.spark.rdd.GlommedRDD
-org.apache.spark.streaming.dstream.UnionDStream
-org.apache.spark.scheduler.JobResult
-org.apache.spark.deploy.ApplicationDescription
-org.apache.spark.rdd.CoGroupSplitDep
-org.apache.spark.api.python.PythonRDD
-org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend
-org.apache.spark.partial.GroupedMeanEvaluator
-org.apache.spark.graphx.impl.VertexBroadcastMsg
-org.apache.spark.ShuffleFetcher
-org.apache.spark.streaming.dstream.ShuffledDStream
-org.apache.spark.scheduler.TaskScheduler
-org.apache.spark.streaming.dstream.MappedDStream
-org.apache.spark.storage.BlockFetchTracker
-org.apache.spark.rdd.ZippedPartitionsPartition
-org.apache.spark.rdd.ShuffleCoGroupSplitDep
-org.apache.spark.graphx.impl.LongVertexBroadcastMsgSerializer
-org.apache.spark.deploy.ExecutorDescription
-org.apache.spark.MapOutputTrackerMessage
-org.apache.spark.util.MetadataCleaner
-org.apache.spark.HttpFileServer
-org.apache.spark.streaming.util.ManualClock
-org.apache.spark.streaming.scheduler.JobGeneratorEvent
-org.apache.spark.scheduler.JobCancelled
-org.apache.spark.streaming.scheduler.DoCheckpoint
-org.apache.spark.broadcast.TorrentBroadcast
-org.apache.spark.scheduler.FIFOSchedulingAlgorithm
-org.apache.spark.network.ConnectionManagerId
-org.apache.spark.deploy.master.PersistenceEngine
-org.apache.spark.mllib.recommendation.InLinkBlock
-org.apache.spark.partial.MeanEvaluator
-org.apache.spark.streaming.dstream.ReportError
-org.apache.spark.storage.RDDBlockId
-org.apache.spark.api.java.function.WrappedFunction2
-org.apache.spark.ui.exec.ExecutorsUI
-org.apache.spark.network.netty.FileServerChannelInitializer
-org.apache.spark.streaming.scheduler.JobStarted
-org.apache.spark.streaming.dstream.RawInputDStream
-org.apache.spark.storage.GetBlock
-org.apache.spark.ui.jobs.IndexPage
-org.apache.spark.storage.BlockManagerSource
-org.apache.spark.rdd.BlockRDDPartition
-org.apache.spark.rdd.PartitionCoalescer
-org.apache.spark.network.ConnectionManager
-org.apache.spark.deploy.master.MasterArguments
-org.apache.spark.graphx.impl.MessageToPartition
-org.apache.spark.executor.ExecutorBackend
-org.apache.spark.util.NextIterator
-org.apache.spark.storage.BlockManagerWorker
-org.apache.spark.streaming.dstream.QueueInputDStream
-org.apache.spark.streaming.scheduler.JobScheduler
-org.apache.spark.streaming.dstream.FlatMappedDStream
-org.apache.spark.scheduler.TaskResultGetter
-org.apache.spark.network.netty.FileClientChannelInitializer
-org.apache.spark.rdd.MappedRDD
-org.apache.spark.rdd.PartitionerAwareUnionRDD
-org.apache.spark.network.BufferMessage
-org.apache.spark.streaming.dstream.DStreamCheckpointData
-org.apache.spark.executor.Executor
-org.apache.spark.MapOutputTrackerMaster
-org.apache.spark.deploy.client.AppClientListener
-org.apache.spark.storage.BlockInfo
-org.apache.spark.streaming.dstream.ReducedWindowedDStream
-org.apache.spark.rdd.JdbcPartition
-org.apache.spark.deploy.TestWorkerInfo
-org.apache.spark.scheduler.BeginEvent
-org.apache.spark.storage.BlockMessage
-org.apache.spark.tools.SparkType
-org.apache.spark.rdd.NewHadoopPartition
-org.apache.spark.streaming.scheduler.JobSchedulerEvent
-org.apache.spark.streaming.util.TestOutputStream
-org.apache.spark.scheduler.local.LocalActor
-org.apache.spark.graphx.impl.EdgePartition
-org.apache.spark.scheduler.TaskSetFailed
-org.apache.spark.ServerStateException
-org.apache.spark.network.MessageChunkHeader
-org.apache.spark.storage.DiskBlockObjectWriter
-org.apache.spark.graphx.impl.VertexPartition
-org.apache.spark.MapOutputTracker
-org.apache.spark.rdd.CartesianRDD
-org.apache.spark.storage.ThreadingTest.ProducerThread
-org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
-org.apache.spark.graphx.impl.IntVertexBroadcastMsgSerializer
-org.apache.spark.streaming.scheduler.NetworkInputTrackerMessage
-org.apache.spark.deploy.worker.WorkerSource
-org.apache.spark.graphx.impl.DoubleVertexBroadcastMsgSerializer
-org.apache.spark.WritableConverter
-org.apache.spark.streaming.util.RecurringTimer
-org.apache.spark.util.collection.PrimitiveKeyOpenHashMap
-org.apache.spark.scheduler.CompletionEvent
-org.apache.spark.streaming.receivers.Data
-org.apache.spark.rdd.UnionPartition
-org.apache.spark.broadcast.TorrentBlock
-org.apache.spark.storage.TestBlockId
-org.apache.spark.storage.BlockManagerMasterActor
-org.apache.spark.storage.TaskResultBlockId
-org.apache.spark.streaming.scheduler.ErrorReported
-org.apache.spark.streaming.scheduler.RegisterReceiver
-org.apache.spark.scheduler.DAGScheduler
-org.apache.spark.deploy.TestMasterInfo
-org.apache.spark.deploy.master.DriverInfo
-org.apache.spark.ui.storage.BlockManagerUI
-org.apache.spark.streaming.scheduler.GenerateJobs
-org.apache.spark.storage.StreamBlockId
-org.apache.spark.util.FieldAccessFinder
-org.apache.spark.scheduler.local.LocalBackend
-org.apache.spark.network.netty.FileClientHandler
-org.apache.spark.tools.ParameterizedType
-org.apache.spark.network.Connection
-org.apache.spark.ui.storage.RDDPage
-org.apache.spark.HttpServer
-org.apache.spark.deploy.DockerId
-org.apache.spark.streaming.scheduler.ClearCheckpointData
-org.apache.spark.rdd.ZippedPartition
-org.apache.spark.FetchFailed
-org.apache.spark.serializer.JavaSerializerInstance
-org.apache.spark.scheduler.SchedulableBuilder
-org.apache.spark.streaming.dstream.TransformedDStream
-org.apache.spark.streaming.util.FileGeneratingThread
-org.apache.spark.api.java.function.WrappedFunction1
-org.apache.spark.broadcast.BroadcastManager
-org.apache.spark.rdd.FilteredRDD
-org.apache.spark.graphx.impl.IntAggMsgSerializer
-org.apache.spark.scheduler.MapStatus
-org.apache.spark.tools.BaseType
-org.apache.spark.ui.jobs.PoolTable
-org.apache.spark.graphx.impl.ShuffleDeserializationStream
-org.apache.spark.scheduler.DAGSchedulerEvent
-org.apache.spark.ui.jobs.ExecutorTable
-org.apache.spark.deploy.master.MasterSource
-org.apache.spark.graphx.impl.VertexBroadcastMsgRDDFunctions
-org.apache.spark.deploy.worker.WorkerArguments
-org.apache.spark.deploy.worker.WorkerWatcher
-org.apache.spark.graphx.impl.EdgePartitionBuilder
-org.apache.spark.deploy.DeployMessage
-org.apache.spark.streaming.dstream.SocketReceiver
-org.apache.spark.scheduler.ResultTask
-org.apache.spark.rdd.RDDCheckpointData
-org.apache.spark.ui.SparkUI
-org.apache.spark.util.collection.OpenHashSet
-org.apache.spark.streaming.dstream.NetworkReceiverMessage
-org.apache.spark.ui.jobs.PoolPage
-org.apache.spark.deploy.worker.Clock
-org.apache.spark.scheduler.TaskLocation
-org.apache.spark.deploy.worker.Sleeper
-org.apache.spark.storage.TempBlockId
-org.apache.spark.storage.BlockObjectWriter
-org.apache.spark.streaming.dstream.WindowedDStream
-org.apache.spark.util.random.XORShiftRandom
-org.apache.spark.network.SendingConnection
-org.apache.spark.ui.storage.IndexPage
-org.apache.spark.rdd.HadoopPartition
-org.apache.spark.util.Utils.CallSiteInfo
-org.apache.spark.deploy.LocalSparkCluster
-org.apache.spark.streaming.ObjectInputStreamWithLoader
-org.apache.spark.util.IdGenerator
-org.apache.spark.graphx.impl.DoubleAggMsgSerializer
-org.apache.spark.ExceptionFailure
-org.apache.spark.storage.BlockFetcherIterator
-org.apache.spark.MapOutputTrackerMasterActor
-org.apache.spark.storage.BlockMessageArray
-org.apache.spark.graphx.impl.LongAggMsgSerializer
-org.apache.spark.scheduler.FairSchedulingAlgorithm
-org.apache.spark.scheduler.IndirectTaskResult
-org.apache.spark.storage.PutBlock
-org.apache.spark.util.collection.OpenHashMap
-org.apache.spark.scheduler.ShuffleMapTask
-org.apache.spark.util.ByteBufferInputStream
-org.apache.spark.scheduler.SchedulingAlgorithm
\ No newline at end of file
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 278969655de48..eb1b99933cbd5 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -41,6 +41,7 @@ if [ -f "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-dep
   CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
+  CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes"
 
   DEPS_ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar`
   CLASSPATH="$CLASSPATH:$DEPS_ASSEMBLY_JAR"
diff --git a/dev/run-tests b/dev/run-tests
index 432563e1ef845..e557057a907c5 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -32,12 +32,6 @@ echo "Running Scala style checks"
 echo "========================================================================="
 sbt/sbt clean scalastyle
 
-echo "========================================================================="
-echo "Running MiMa for detecting binary incompatibilites."
-echo "Please see MimaBuild.scala for details."
-echo "========================================================================="
-sbt/sbt mima-report-binary-issues
-
 echo "========================================================================="
 echo "Running Spark unit tests"
 echo "========================================================================="
@@ -50,3 +44,10 @@ if [ -z "$PYSPARK_PYTHON" ]; then
   export PYSPARK_PYTHON=/usr/local/bin/python2.7
 fi
 ./python/run-tests
+
+echo "========================================================================="
+echo "Detecting binary incompatibilites with MiMa"
+echo "========================================================================="
+./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
+sbt/sbt mima-report-binary-issues
+
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index d83c7bf092833..d83844d68a5fb 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -8,54 +8,50 @@ object MimaBuild {
     import com.typesafe.tools.mima.core._
     import com.typesafe.tools.mima.core.ProblemFilters._
 
-    IO.read(file(base.getAbsolutePath + "/.mima-exclude")).split("\n").map(excludePackage).toSeq ++
-//    /**
-//     * A: Detections are semi private or likely to become semi private at some point.
-//     */
-//    Seq(exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom"),
-//      exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom$"),
-//      exclude[MissingMethodProblem]("org.apache.spark.util.Utils.cloneWritables"),
-//      // Scheduler is not considered a public API.
-//      excludePackage("org.apache.spark.deploy"),
-//      // Was made private in 1.0
-//      excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#DiskMapIterator"),
-//      excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#ExternalIterator"),
-//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.cogroupResultToJava"),
-//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.groupByResultToJava"),
-//      exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.handleFailedTask"),
-//      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.taskSetTaskIds"),
-//      exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSetManager.handleFailedTask"),
-//      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.removeAllRunningTasks"),
-//      exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.runningTasks_="),
-//      exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime"),
-//      exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime_="),
-//      exclude[MissingMethodProblem]("org.apache.spark.storage.BlockObjectWriter.bytesWritten")) ++
-//    /**
-//     * B: Detections are mostly false +ve.
-//     */
-//    Seq(exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.setGenerator"),
-//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
-//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
-//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"),
-//      exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.foreachPartition"),
-//      exclude[MissingMethodProblem]("org.apache.spark.api.python.PythonRDD.writeToStream")) ++
-    /**
-     * Detections I am unsure about. Should be either moved to B (false +ve) or A.
-     */
-    Seq(
-//      exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator$"),
-//      exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags"),
-//      exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags$"),
-//      exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$wrapForCompression$1"),
-//      exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$sparkConf"),
-//      exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator"),
-//      exclude[MissingClassProblem]("org.apache.spark.mllib.optimization.SquaredGradient"),
-//      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.gradient"),
-//      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.gradient"),
-//      exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient"),
-//      exclude[FinalClassProblem]("org.apache.spark.SparkFiles")
+    // Excludes relevant to all Spark versions
+    val defaultExcludes = Seq(excludePackage("org.apache.spark.repl"))
 
-    )
+    // Read package-private excludes from file
+    val excludeFilePath = (base.getAbsolutePath + "/.mima-excludes")
+    val excludeFile = file(excludeFilePath) 
+    val packagePrivateList: Seq[String] =
+      if (!excludeFile.exists()) {
+        Seq()
+      } else {
+        IO.read(excludeFile).split("\n")
+      }
+
+    def excludeClass(className: String) = {
+      Seq(
+        excludePackage(className), 
+        ProblemFilters.exclude[MissingClassProblem](className),
+        ProblemFilters.exclude[MissingTypesProblem](className),
+        excludePackage(className + "$"), 
+        ProblemFilters.exclude[MissingClassProblem](className + "$"),
+        ProblemFilters.exclude[MissingTypesProblem](className + "$")
+      )
+    }
+    def excludeSparkClass(className: String) = excludeClass("org.apache.spark." + className)
+
+    val packagePrivateExcludes = packagePrivateList.flatMap(excludeClass)
+
+    /* Excludes specific to a given version of Spark. When comparing the given version against
+       its immediate predecessor, the excludes listed here will be applied. */
+    val versionExcludes =
+      SparkBuild.SPARK_VERSION match {
+        case v if v.startsWith("1.0") =>
+          Seq(excludePackage("org.apache.spark.api.java")) ++
+           excludeSparkClass("rdd.ClassTags") ++
+           excludeSparkClass("util.XORShiftRandom") ++
+           excludeSparkClass("mllib.recommendation.MFDataGenerator") ++
+           excludeSparkClass("mllib.optimization.SquaredGradient") ++
+           excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++
+           excludeSparkClass("mllib.regression.LassoWithSGD") ++
+           excludeSparkClass("mllib.regression.LinearRegressionWithSGD")
+        case _ => Seq()
+      }
+
+    packagePrivateExcludes ++ versionExcludes
   }
 
   def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq(
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index ed5e598d798c2..ca050f9049aa6 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -28,6 +28,8 @@ import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact
 //import com.jsuereth.pgp.sbtplugin.PgpKeys._
 
 object SparkBuild extends Build {
+  val SPARK_VERSION = "1.0.0-incubating-SNAPSHOT" 
+
   // Hadoop version to build against. For example, "1.0.4" for Apache releases, or
   // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set
   // through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN.
@@ -124,7 +126,7 @@ object SparkBuild extends Build {
 
   def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq(
     organization       := "org.apache.spark",
-    version            := "1.0.0-incubating-SNAPSHOT",
+    version            := SPARK_VERSION,
     scalaVersion       := "2.10.3",
     scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation",
       "-target:" + SCALAC_JVM_VERSION),
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index 3246db98f5df5..44984f8a83d8f 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -21,78 +21,121 @@ import java.io.File
 import java.util.jar.JarFile
 
 import scala.collection.mutable
-import scala.collection.JavaConversions.enumerationAsScalaIterator
+import scala.collection.JavaConversions._
+import scala.reflect.runtime.universe.runtimeMirror
+import scala.util.Try
 
 /**
- * Mima(TODO: Paste URL here) generates a lot of false positives as it does not detect
- * private[x] as internal APIs.
+ * A tool for generating classes to be excluded during binary checking with MIMA. It is expected
+ * that this tool is run with ./spark-class.
+ *
+ * MIMA itself only supports JVM-level visibility and doesn't account for package-private classes.
+ * This tool looks at all currently package-private classes and generates exclusions for them. Note
+ * that this approach is not sound. It can lead to false positives if we move or rename a previously
+ * package-private class. It can lead to false negatives if someone explicitly makes a class
+ * package-private that wasn't before. This exists only to help catch certain classes of changes
+ * which might be difficult to catch during review.
  */
 object GenerateMIMAIgnore {
+  private val classLoader = Thread.currentThread().getContextClassLoader
+  private val mirror = runtimeMirror(classLoader)
+
+  private def classesPrivateWithin(packageName: String): Set[String] = {
 
-  def classesWithPrivateWithin(packageName: String, excludePackages: Seq[String]): Set[String] = {
-    import scala.reflect.runtime.universe.runtimeMirror
-    val classLoader: ClassLoader = Thread.currentThread().getContextClassLoader
-    val mirror = runtimeMirror(classLoader)
-    val classes = Utils.getClasses(packageName, classLoader)
+    val classes = getClasses(packageName, classLoader)
     val privateClasses = mutable.HashSet[String]()
-    for (x <- classes) {
-      try {
-        // some of the classnames throw malformed class name exceptions and weird Match errors.
-        if (excludePackages.forall(!x.startsWith(_)) &&
-          mirror.staticClass(x).privateWithin.toString.trim != "<none>") {
-          privateClasses += x
+
+    def isPackagePrivate(className: String) = {
+     try {
+       /* Couldn't figure out if it's possible to determine a-priori whether a given symbol
+          is a module or class. */
+
+       val privateAsClass = mirror
+         .staticClass(className)
+         .privateWithin
+         .fullName
+         .startsWith(packageName)
+
+       val privateAsModule = mirror
+         .staticModule(className)
+         .privateWithin
+         .fullName
+         .startsWith(packageName)
+
+       privateAsClass || privateAsModule
+     } catch {
+        case _: Throwable => {
+          println("Error determining visibility: " + className)
+          false
+        }
+      }
+    }
+
+    for (className <- classes) {
+      val directlyPrivateSpark = isPackagePrivate(className)
+
+      /* Inner classes defined within a private[spark] class or object are effectively
+         invisible, so we account for them as package private. */
+      val indirectlyPrivateSpark = {
+        val maybeOuter = className.toString.takeWhile(_ != '$')
+        if (maybeOuter != className) {
+          isPackagePrivate(maybeOuter)
+        } else {
+          false
         }
-      } catch {
-        case e: Throwable => // println(e)
       }
+      if (directlyPrivateSpark || indirectlyPrivateSpark) privateClasses += className
     }
-    privateClasses.toSet
+    privateClasses.flatMap(c => Seq(c, c.replace("$", "#"))).toSet
   }
 
   def main(args: Array[String]) {
-    scala.tools.nsc.io.File(".mima-exclude").
-      writeAll(classesWithPrivateWithin("org.apache.spark", args).mkString("\n"))
-    println("Created : .mima-exclude in current directory.")
+    scala.tools.nsc.io.File(".mima-excludes").
+      writeAll(classesPrivateWithin("org.apache.spark").mkString("\n"))
+    println("Created : .mima-excludes in current directory.")
   }
 
-}
-
-object Utils {
-
   /**
    * Get all classes in a package from a jar file.
    */
-  def getAllClasses(jarPath: String, packageName: String) = {
+  private def getAllClasses(jarPath: String, packageName: String) = {
     val jar = new JarFile(new File(jarPath))
     val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName))
     val classes = mutable.HashSet[Class[_]]()
     for (entry <- enums) {
       if (!entry.endsWith("/") && !entry.endsWith("MANIFEST.MF") && !entry.endsWith("properties")) {
-        try {
-          classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.'))
-        } catch {
-          case e: Throwable => // println(e) // It may throw a few ClassNotFoundExceptions
-        }
+        classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.'))
       }
     }
     classes
   }
 
+  private def shouldExclude(name: String) = {
+    // Heuristic to remove JVM classes that do not correspond to user-facing classes in Scala
+    Try(mirror.staticClass(name)).isFailure ||
+    name.contains("anon") ||
+    name.endsWith("class") ||
+    name.contains("$sp")
+  }
+
   /**
    * Scans all classes accessible from the context class loader which belong to the given package
    * and subpackages both from directories and jars present on the classpath.
    */
-  def getClasses(packageName: String,
-      classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Set[String] = {
+  private def getClasses(packageName: String,
+      classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Seq[String] = {
     val path = packageName.replace('.', '/')
-    val resources = classLoader.getResources(path).toArray
+    val resources = classLoader.getResources(path)
+
     val jars = resources.filter(x => x.getProtocol == "jar")
       .map(_.getFile.split(":")(1).split("!")(0))
     val classesFromJars = jars.map(getAllClasses(_, path)).flatten
+
     val dirs = resources.filter(x => x.getProtocol == "file")
-      .map(x => new File(x.getFile.split(":")(1)))
+      .map(x => new File(x.getFile.split(":").last))
     val classFromDirs = dirs.map(findClasses(_, packageName)).flatten
-    (classFromDirs ++ classesFromJars).map(_.getCanonicalName).filter(_ != null).toSet
+
+    (classFromDirs ++ classesFromJars).map(_.getName).filter(!shouldExclude(_)).toSeq
   }
 
   private def findClasses(directory: File, packageName: String): Seq[Class[_]] = {
@@ -105,8 +148,8 @@ object Utils {
       if (file.isDirectory) {
         classes ++= findClasses(file, packageName + "." + file.getName)
       } else if (file.getName.endsWith(".class")) {
-        classes += Class.forName(packageName + '.' + file.getName.substring(0,
-          file.getName.length() - 6))
+        val className = file.getName.substring(0, file.getName.length() - 6)
+        classes += Class.forName(packageName + '.' + className)
       }
     }
     classes

From 647c547b59508e17c195a9eb17032774b68ddf8e Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Sun, 23 Mar 2014 13:30:25 -0700
Subject: [PATCH 5/8] Reveiw feedback.

---
 bin/spark-class                               |  3 +-
 project/MimaBuild.scala                       |  6 +-
 .../spark/tools/GenerateMIMAIgnore.scala      | 80 +++++++++----------
 3 files changed, 43 insertions(+), 46 deletions(-)

diff --git a/bin/spark-class b/bin/spark-class
index dfa11e854dc89..535a6652baa6d 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -129,8 +129,7 @@ fi
 
 # Compute classpath using external script
 CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
-CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
-if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
+if [[ "$1" =~ org.apache.spark.tools.* ]]; then
   CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
 fi
 
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index d83844d68a5fb..9dc9e479f9808 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -8,8 +8,8 @@ object MimaBuild {
     import com.typesafe.tools.mima.core._
     import com.typesafe.tools.mima.core.ProblemFilters._
 
-    // Excludes relevant to all Spark versions
-    val defaultExcludes = Seq(excludePackage("org.apache.spark.repl"))
+    // Excludes placed here will be used for all Spark versions
+    val defaultExcludes = Seq()
 
     // Read package-private excludes from file
     val excludeFilePath = (base.getAbsolutePath + "/.mima-excludes")
@@ -51,7 +51,7 @@ object MimaBuild {
         case _ => Seq()
       }
 
-    packagePrivateExcludes ++ versionExcludes
+    defaultExcludes ++ packagePrivateExcludes ++ versionExcludes
   }
 
   def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq(
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index 44984f8a83d8f..8390b3301df8e 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -46,24 +46,24 @@ object GenerateMIMAIgnore {
     val privateClasses = mutable.HashSet[String]()
 
     def isPackagePrivate(className: String) = {
-     try {
-       /* Couldn't figure out if it's possible to determine a-priori whether a given symbol
-          is a module or class. */
-
-       val privateAsClass = mirror
-         .staticClass(className)
-         .privateWithin
-         .fullName
-         .startsWith(packageName)
-
-       val privateAsModule = mirror
-         .staticModule(className)
-         .privateWithin
-         .fullName
-         .startsWith(packageName)
-
-       privateAsClass || privateAsModule
-     } catch {
+      try {
+        /* Couldn't figure out if it's possible to determine a-priori whether a given symbol
+           is a module or class. */
+
+        val privateAsClass = mirror
+          .staticClass(className)
+          .privateWithin
+          .fullName
+          .startsWith(packageName)
+
+        val privateAsModule = mirror
+          .staticModule(className)
+          .privateWithin
+          .fullName
+          .startsWith(packageName)
+
+        privateAsClass || privateAsModule
+      } catch {
         case _: Throwable => {
           println("Error determining visibility: " + className)
           false
@@ -95,26 +95,11 @@ object GenerateMIMAIgnore {
     println("Created : .mima-excludes in current directory.")
   }
 
-  /**
-   * Get all classes in a package from a jar file.
-   */
-  private def getAllClasses(jarPath: String, packageName: String) = {
-    val jar = new JarFile(new File(jarPath))
-    val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName))
-    val classes = mutable.HashSet[Class[_]]()
-    for (entry <- enums) {
-      if (!entry.endsWith("/") && !entry.endsWith("MANIFEST.MF") && !entry.endsWith("properties")) {
-        classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.'))
-      }
-    }
-    classes
-  }
 
   private def shouldExclude(name: String) = {
     // Heuristic to remove JVM classes that do not correspond to user-facing classes in Scala
-    Try(mirror.staticClass(name)).isFailure ||
     name.contains("anon") ||
-    name.endsWith("class") ||
+    name.endsWith("$class") ||
     name.contains("$sp")
   }
 
@@ -123,22 +108,22 @@ object GenerateMIMAIgnore {
    * and subpackages both from directories and jars present on the classpath.
    */
   private def getClasses(packageName: String,
-      classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Seq[String] = {
+      classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Set[String] = {
     val path = packageName.replace('.', '/')
     val resources = classLoader.getResources(path)
 
     val jars = resources.filter(x => x.getProtocol == "jar")
       .map(_.getFile.split(":")(1).split("!")(0))
-    val classesFromJars = jars.map(getAllClasses(_, path)).flatten
+    val classesFromJars = jars.map(getClassesFromJar(_, path)).flatten
 
     val dirs = resources.filter(x => x.getProtocol == "file")
       .map(x => new File(x.getFile.split(":").last))
-    val classFromDirs = dirs.map(findClasses(_, packageName)).flatten
+    val classFromDirs = dirs.map(getClassesFromDir(_, packageName)).flatten
 
-    (classFromDirs ++ classesFromJars).map(_.getName).filter(!shouldExclude(_)).toSeq
+    (classFromDirs ++ classesFromJars).map(_.getName).filterNot(shouldExclude).toSet
   }
 
-  private def findClasses(directory: File, packageName: String): Seq[Class[_]] = {
+  private def getClassesFromDir(directory: File, packageName: String): Seq[Class[_]] = {
     val classes = mutable.ArrayBuffer[Class[_]]()
     if (!directory.exists()) {
       return classes
@@ -146,12 +131,25 @@ object GenerateMIMAIgnore {
     val files = directory.listFiles()
     for (file <- files) {
       if (file.isDirectory) {
-        classes ++= findClasses(file, packageName + "." + file.getName)
+        classes ++= getClassesFromDir(file, packageName + "." + file.getName)
       } else if (file.getName.endsWith(".class")) {
-        val className = file.getName.substring(0, file.getName.length() - 6)
+        val className = file.getName.stripSuffix(".class")
         classes += Class.forName(packageName + '.' + className)
       }
     }
     classes
   }
+
+  /**
+   * Get all classes in a package from a jar file.
+   */
+  private def getClassesFromJar(jarPath: String, packageName: String) = {
+    val jar = new JarFile(new File(jarPath))
+    val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName))
+    val classes = mutable.HashSet[Class[_]]()
+    for (entry <- enums if entry.endsWith(".class")) {
+      classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.'))
+    }
+    classes
+  }
 }

From 0e0f5703090313f458072ac73105eec7a05b1027 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Mon, 24 Mar 2014 00:23:36 -0700
Subject: [PATCH 6/8] Small fix and removing directory listings

---
 .../spark/tools/GenerateMIMAIgnore.scala      | 32 +++----------------
 1 file changed, 4 insertions(+), 28 deletions(-)

diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index 8390b3301df8e..8eb8504ca9120 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -23,7 +23,6 @@ import java.util.jar.JarFile
 import scala.collection.mutable
 import scala.collection.JavaConversions._
 import scala.reflect.runtime.universe.runtimeMirror
-import scala.util.Try
 
 /**
  * A tool for generating classes to be excluded during binary checking with MIMA. It is expected
@@ -113,31 +112,10 @@ object GenerateMIMAIgnore {
     val resources = classLoader.getResources(path)
 
     val jars = resources.filter(x => x.getProtocol == "jar")
-      .map(_.getFile.split(":")(1).split("!")(0))
+      .map(_.getFile.split(":")(1).split("!")(0)).toSeq
     val classesFromJars = jars.map(getClassesFromJar(_, path)).flatten
 
-    val dirs = resources.filter(x => x.getProtocol == "file")
-      .map(x => new File(x.getFile.split(":").last))
-    val classFromDirs = dirs.map(getClassesFromDir(_, packageName)).flatten
-
-    (classFromDirs ++ classesFromJars).map(_.getName).filterNot(shouldExclude).toSet
-  }
-
-  private def getClassesFromDir(directory: File, packageName: String): Seq[Class[_]] = {
-    val classes = mutable.ArrayBuffer[Class[_]]()
-    if (!directory.exists()) {
-      return classes
-    }
-    val files = directory.listFiles()
-    for (file <- files) {
-      if (file.isDirectory) {
-        classes ++= getClassesFromDir(file, packageName + "." + file.getName)
-      } else if (file.getName.endsWith(".class")) {
-        val className = file.getName.stripSuffix(".class")
-        classes += Class.forName(packageName + '.' + className)
-      }
-    }
-    classes
+    classesFromJars.map(_.getName).filterNot(shouldExclude).toSet
   }
 
   /**
@@ -146,10 +124,8 @@ object GenerateMIMAIgnore {
   private def getClassesFromJar(jarPath: String, packageName: String) = {
     val jar = new JarFile(new File(jarPath))
     val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName))
-    val classes = mutable.HashSet[Class[_]]()
-    for (entry <- enums if entry.endsWith(".class")) {
-      classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.'))
-    }
+    val classes = for (entry <- enums if entry.endsWith(".class"))
+      yield Class.forName(entry.replace('/', '.').stripSuffix(".class"))
     classes
   }
 }

From 3666cf14a4ac042c263d03a0ce88758b6664aff7 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Mon, 24 Mar 2014 11:00:47 -0700
Subject: [PATCH 7/8] Minor style change

---
 project/MimaBuild.scala                         | 17 +++++++++++++++++
 .../apache/spark/tools/GenerateMIMAIgnore.scala |  5 +++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index 9dc9e479f9808..041919deaa9a6 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact}
 import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
 import sbt._
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index 8eb8504ca9120..5547e9fe58fc7 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -113,9 +113,10 @@ object GenerateMIMAIgnore {
 
     val jars = resources.filter(x => x.getProtocol == "jar")
       .map(_.getFile.split(":")(1).split("!")(0)).toSeq
-    val classesFromJars = jars.map(getClassesFromJar(_, path)).flatten
 
-    classesFromJars.map(_.getName).filterNot(shouldExclude).toSet
+    jars.flatMap(getClassesFromJar(_, path))
+      .map(_.getName)
+      .filterNot(shouldExclude).toSet
   }
 
   /**

From 22ae267b368669455666a3f9b18294662b4e3276 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Mon, 24 Mar 2014 19:39:46 -0700
Subject: [PATCH 8/8] New binary changes after upmerge

---
 project/MimaBuild.scala  | 6 +++++-
 project/SparkBuild.scala | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index 041919deaa9a6..e7c9c47c960fa 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -57,7 +57,11 @@ object MimaBuild {
     val versionExcludes =
       SparkBuild.SPARK_VERSION match {
         case v if v.startsWith("1.0") =>
-          Seq(excludePackage("org.apache.spark.api.java")) ++
+          Seq(
+             excludePackage("org.apache.spark.api.java"),
+             excludePackage("org.apache.spark.streaming.api.java"),
+             excludePackage("org.apache.spark.mllib")
+           ) ++
            excludeSparkClass("rdd.ClassTags") ++
            excludeSparkClass("util.XORShiftRandom") ++
            excludeSparkClass("mllib.recommendation.MFDataGenerator") ++
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index e4883c9c74c9f..21d2779d85b74 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -342,7 +342,6 @@ object SparkBuild extends Build {
 
   def examplesSettings = sharedSettings ++ Seq(
     name := "spark-examples",
-    previousArtifact := sparkPreviousArtifact("spark-examples"),
     libraryDependencies ++= Seq(
       "com.twitter"          %% "algebird-core"   % "0.1.11",
       "org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
@@ -553,6 +552,7 @@ object SparkBuild extends Build {
 
   def mqttSettings() = streamingSettings ++ Seq(
     name := "spark-streaming-mqtt",
+    previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"),
     libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0")
   )
 }