From 651844c06d1890b3c5466ad200efaa55ca70c7cf Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Wed, 12 Feb 2014 14:27:22 +0530 Subject: [PATCH 1/8] Support MiMa for reporting binary compatibility accross versions. --- dev/run-tests | 6 ++++ project/MimaBuild.scala | 60 ++++++++++++++++++++++++++++++++++++++++ project/SparkBuild.scala | 25 ++++++++++++++--- project/plugins.sbt | 1 + 4 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 project/MimaBuild.scala diff --git a/dev/run-tests b/dev/run-tests index d65a397b4c8c7..432563e1ef845 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -32,6 +32,12 @@ echo "Running Scala style checks" echo "=========================================================================" sbt/sbt clean scalastyle +echo "=========================================================================" +echo "Running MiMa for detecting binary incompatibilites." +echo "Please see MimaBuild.scala for details." +echo "=========================================================================" +sbt/sbt mima-report-binary-issues + echo "=========================================================================" echo "Running Spark unit tests" echo "=========================================================================" diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala new file mode 100644 index 0000000000000..f416629f5d03d --- /dev/null +++ b/project/MimaBuild.scala @@ -0,0 +1,60 @@ +import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact} +import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings + +object MimaBuild { + + val ignoredABIProblems = { + import com.typesafe.tools.mima.core._ + import com.typesafe.tools.mima.core.ProblemFilters._ + /** + * A: Detections are semi private or likely to become semi private at some point. + */ + Seq(exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom"), + exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom$"), + exclude[MissingMethodProblem]("org.apache.spark.util.Utils.cloneWritables"), + // Scheduler is not considered a public API. + excludePackage("org.apache.spark.deploy"), + // Was made private in 1.0 + excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#DiskMapIterator"), + excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#ExternalIterator"), + exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.cogroupResultToJava"), + exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.groupByResultToJava"), + exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.handleFailedTask"), + exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.taskSetTaskIds"), + exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSetManager.handleFailedTask"), + exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.removeAllRunningTasks"), + exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.runningTasks_="), + exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime"), + exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime_="), + exclude[MissingMethodProblem]("org.apache.spark.storage.BlockObjectWriter.bytesWritten")) ++ + /** + * B: Detections are mostly false +ve. + */ + Seq(exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.setGenerator"), + exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), + exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), + exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), + exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.foreachPartition"), + exclude[MissingMethodProblem]("org.apache.spark.api.python.PythonRDD.writeToStream")) ++ + /** + * Detections I am unsure about. Should be either moved to B (false +ve) or A. + */ + Seq(exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator$"), + exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags"), + exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags$"), + exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$wrapForCompression$1"), + exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$sparkConf"), + exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator"), + exclude[MissingClassProblem]("org.apache.spark.mllib.optimization.SquaredGradient"), + exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.gradient"), + exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.gradient"), + exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient") + ) + } + + lazy val mimaSettings = mimaDefaultSettings ++ Seq( + previousArtifact := None, + binaryIssueFilters ++= ignoredABIProblems + ) + +} diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 74bad66cfd018..6410704c2c7a2 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -22,6 +22,7 @@ import sbtassembly.Plugin._ import AssemblyKeys._ import scala.util.Properties import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings} +import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact // For Sonatype publishing //import com.jsuereth.pgp.sbtplugin.PgpKeys._ @@ -121,7 +122,7 @@ object SparkBuild extends Build { lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj) - def sharedSettings = Defaults.defaultSettings ++ Seq( + def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings ++ Seq( organization := "org.apache.spark", version := "1.0.0-incubating-SNAPSHOT", scalaVersion := "2.10.3", @@ -244,13 +245,19 @@ object SparkBuild extends Build { val excludeAsm = ExclusionRule(organization = "asm") val excludeSnappy = ExclusionRule(organization = "org.xerial.snappy") + def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark", + version: String = "0.9.0-incubating", crossVersion: String = "2.10"): Option[sbt.ModuleID] = { + val fullId = if (crossVersion.isEmpty) id else id + "_" + crossVersion + Some(organization % fullId % version) // the artifact to compare binary compatibility with + } + def coreSettings = sharedSettings ++ Seq( name := "spark-core", resolvers ++= Seq( "JBoss Repository" at "http://repository.jboss.org/nexus/content/repositories/releases/", "Cloudera Repository" at "https://repository.cloudera.com/artifactory/cloudera-repos/" ), - + previousArtifact := sparkPreviousArtifact("spark-core"), libraryDependencies ++= Seq( "com.google.guava" % "guava" % "14.0.1", "com.google.code.findbugs" % "jsr305" % "1.3.9", @@ -289,7 +296,7 @@ object SparkBuild extends Build { publish := {} ) - def replSettings = sharedSettings ++ Seq( + def replSettings = sharedSettings ++ Seq( name := "spark-repl", libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-compiler" % v ), libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "jline" % v ), @@ -298,6 +305,7 @@ object SparkBuild extends Build { def examplesSettings = sharedSettings ++ Seq( name := "spark-examples", + previousArtifact := sparkPreviousArtifact("spark-examples"), libraryDependencies ++= Seq( "com.twitter" %% "algebird-core" % "0.1.11", "org.apache.hbase" % "hbase" % "0.94.6" excludeAll(excludeNetty, excludeAsm), @@ -321,17 +329,20 @@ object SparkBuild extends Build { def graphxSettings = sharedSettings ++ Seq( name := "spark-graphx", + previousArtifact := sparkPreviousArtifact("spark-graphx"), libraryDependencies ++= Seq( "org.jblas" % "jblas" % "1.2.3" ) ) def bagelSettings = sharedSettings ++ Seq( - name := "spark-bagel" + name := "spark-bagel", + previousArtifact := sparkPreviousArtifact("spark-bagel") ) def mllibSettings = sharedSettings ++ Seq( name := "spark-mllib", + previousArtifact := sparkPreviousArtifact("spark-mllib"), libraryDependencies ++= Seq( "org.jblas" % "jblas" % "1.2.3" ) @@ -339,6 +350,7 @@ object SparkBuild extends Build { def streamingSettings = sharedSettings ++ Seq( name := "spark-streaming", + previousArtifact := sparkPreviousArtifact("spark-streaming"), libraryDependencies ++= Seq( "commons-io" % "commons-io" % "2.4" ) @@ -403,6 +415,7 @@ object SparkBuild extends Build { def twitterSettings() = sharedSettings ++ Seq( name := "spark-streaming-twitter", + previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"), libraryDependencies ++= Seq( "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty) ) @@ -410,6 +423,7 @@ object SparkBuild extends Build { def kafkaSettings() = sharedSettings ++ Seq( name := "spark-streaming-kafka", + previousArtifact := sparkPreviousArtifact("spark-streaming-kafka"), libraryDependencies ++= Seq( "com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty), "org.apache.kafka" %% "kafka" % "0.8.0" @@ -422,6 +436,7 @@ object SparkBuild extends Build { def flumeSettings() = sharedSettings ++ Seq( name := "spark-streaming-flume", + previousArtifact := sparkPreviousArtifact("spark-streaming-flume"), libraryDependencies ++= Seq( "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty, excludeSnappy) ) @@ -429,6 +444,7 @@ object SparkBuild extends Build { def zeromqSettings() = sharedSettings ++ Seq( name := "spark-streaming-zeromq", + previousArtifact := sparkPreviousArtifact("spark-streaming-zeromq"), libraryDependencies ++= Seq( "org.spark-project.akka" %% "akka-zeromq" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty) ) @@ -436,6 +452,7 @@ object SparkBuild extends Build { def mqttSettings() = streamingSettings ++ Seq( name := "spark-streaming-mqtt", + previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"), resolvers ++= Seq("Eclipse Repo" at "https://repo.eclipse.org/content/repositories/paho-releases/"), libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0") ) diff --git a/project/plugins.sbt b/project/plugins.sbt index 914f2e05a402a..aa2baecd2fe1f 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -19,3 +19,4 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.4") addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.4.0") +addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6") From b551519d2cea42cf75fdebf493fb50b927e78581 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 27 Feb 2014 09:53:25 +0530 Subject: [PATCH 2/8] adding a new exclude after rebasing with master --- project/MimaBuild.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index f416629f5d03d..dd68ad5a24f25 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -48,7 +48,8 @@ object MimaBuild { exclude[MissingClassProblem]("org.apache.spark.mllib.optimization.SquaredGradient"), exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.gradient"), exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.gradient"), - exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient") + exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient"), + exclude[FinalClassProblem]("org.apache.spark.SparkFiles") ) } From 4c771e00cb45f5fbcf8b1efdac22ef43c1740593 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 20 Mar 2014 21:54:50 +0530 Subject: [PATCH 3/8] Added a tool to generate mima excludes and also adapted build to pick automatically. --- .mima-exclude | 360 ++++++++++++++++++ bin/spark-class | 2 +- project/MimaBuild.scala | 93 ++--- project/SparkBuild.scala | 2 +- .../spark/tools/GenerateMIMAIgnore.scala | 114 ++++++ 5 files changed, 525 insertions(+), 46 deletions(-) create mode 100644 .mima-exclude create mode 100644 tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala diff --git a/.mima-exclude b/.mima-exclude new file mode 100644 index 0000000000000..54a46b4f0ee9e --- /dev/null +++ b/.mima-exclude @@ -0,0 +1,360 @@ +org.apache.spark.scheduler.cluster.SparkDeploySchedulerBackend +org.apache.spark.rdd.FlatMappedRDD +org.apache.spark.storage.PutResult +org.apache.spark.mllib.recommendation.OutLinkBlock +org.apache.spark.network.netty.ShuffleSender +org.apache.spark.scheduler.TaskResult +org.apache.spark.scheduler.JobWaiter +org.apache.spark.deploy.worker.ExecutorRunner +org.apache.spark.streaming.dstream.FilteredDStream +org.apache.spark.util.SerializableBuffer +org.apache.spark.scheduler.TaskSetManager +org.apache.spark.scheduler.Stage +org.apache.spark.rdd.ZippedWithIndexRDD +org.apache.spark.scheduler.SchedulerBackend +org.apache.spark.streaming.dstream.GlommedDStream +org.apache.spark.rdd.FlatMappedValuesRDD +org.apache.spark.deploy.master.MonarchyLeaderAgent +org.apache.spark.rdd.CoGroupPartition +org.apache.spark.scheduler.SlaveLost +org.apache.spark.api.python.PythonWorkerFactory +org.apache.spark.streaming.ContextWaiter +org.apache.spark.serializer.JavaSerializationStream +org.apache.spark.rdd.CheckpointRDDPartition +org.apache.spark.scheduler.WorkerOffer +org.apache.spark.streaming.scheduler.JobCompleted +org.apache.spark.rdd.ParallelCollectionRDD +org.apache.spark.streaming.dstream.ForEachDStream +org.apache.spark.scheduler.JobFailed +org.apache.spark.graphx.impl.ReplicatedVertexView +org.apache.spark.deploy.master.ui.ApplicationPage +org.apache.spark.rdd.ShuffledRDDPartition +org.apache.spark.ui.jobs.JobProgressUI +org.apache.spark.deploy.Command +org.apache.spark.ui.jobs.StagePage +org.apache.spark.serializer.KryoDeserializationStream +org.apache.spark.scheduler.ExecutorLost +org.apache.spark.deploy.master.BlackHolePersistenceEngine +org.apache.spark.scheduler.cluster.SimrSchedulerBackend +org.apache.spark.streaming.dstream.MapPartitionedDStream +org.apache.spark.scheduler.TaskInfo +org.apache.spark.storage.BlockManagerSlaveActor +org.apache.spark.ui.env.EnvironmentUI +org.apache.spark.executor.CoarseGrainedExecutorBackend +org.apache.spark.storage.ShuffleBlockManager +org.apache.spark.rdd.NarrowCoGroupSplitDep +org.apache.spark.scheduler.JobGroupCancelled +org.apache.spark.rdd.MapPartitionsRDD +org.apache.spark.graphx.impl.EdgeTripletIterator +org.apache.spark.storage.BlockManagerMaster +org.apache.spark.partial.SumEvaluator +org.apache.spark.network.netty.FileServerHandler +org.apache.spark.GetMapOutputStatuses +org.apache.spark.tools.SparkMethod +org.apache.spark.storage.ShuffleWriterGroup +org.apache.spark.ui.jobs.StageTable +org.apache.spark.scheduler.TaskDescription +org.apache.spark.deploy.master.Master +org.apache.spark.scheduler.DAGSchedulerSource +org.apache.spark.streaming.util.Clock +org.apache.spark.deploy.master.ui.MasterWebUI +org.apache.spark.CacheManager +org.apache.spark.streaming.util.SystemClock +org.apache.spark.storage.BroadcastHelperBlockId +org.apache.spark.deploy.master.ZooKeeperLeaderElectionAgent +org.apache.spark.storage.FileSegment +org.apache.spark.api.python.PythonPartitioner +org.apache.spark.scheduler.ExecutorLossReason +org.apache.spark.network.ReceivingConnection +org.apache.spark.scheduler.Schedulable +org.apache.spark.scheduler.TaskSet +org.apache.spark.storage.BlockStore +org.apache.spark.streaming.scheduler.StreamingListenerBus +org.apache.spark.deploy.ClientArguments +org.apache.spark.metrics.MetricsConfig +org.apache.spark.serializer.SerializerManager +org.apache.spark.streaming.scheduler.NetworkInputTracker +org.apache.spark.broadcast.HttpBroadcast +org.apache.spark.executor.ExecutorURLClassLoader +org.apache.spark.scheduler.TaskSchedulerImpl +org.apache.spark.streaming.dstream.RawNetworkReceiver +org.apache.spark.util.collection.SizeTrackingAppendOnlyMap +org.apache.spark.rdd.SampledRDDPartition +org.apache.spark.storage.BlockManagerId +org.apache.spark.deploy.master.LeaderElectionAgent +org.apache.spark.streaming.dstream.StopReceiver +org.apache.spark.storage.GotBlock +org.apache.spark.scheduler.Task +org.apache.spark.rdd.CartesianPartition +org.apache.spark.storage.StorageStatus +org.apache.spark.rdd.PartitionwiseSampledRDDPartition +org.apache.spark.graphx.impl.ShuffleSerializationStream +org.apache.spark.scheduler.cluster.mesos.MesosSchedulerBackend +org.apache.spark.deploy.worker.ui.IndexPage +org.apache.spark.partial.ApproximateEvaluator +org.apache.spark.graphx.impl.ShuffleSerializerInstance +org.apache.spark.ui.jobs.ExecutorSummary +org.apache.spark.scheduler.GettingResultEvent +org.apache.spark.graphx.impl.RoutingTable +org.apache.spark.streaming.util.KillingThread +org.apache.spark.GrowableAccumulableParam +org.apache.spark.deploy.master.ApplicationInfo +org.apache.spark.deploy.worker.Worker +org.apache.spark.streaming.dstream.ReportBlock +org.apache.spark.partial.StudentTCacher +org.apache.spark.scheduler.ActiveJob +org.apache.spark.network.netty.FileServer +org.apache.spark.network.netty.ShuffleCopier +org.apache.spark.util.collection.PrimitiveVector +org.apache.spark.serializer.KryoSerializerInstance +org.apache.spark.streaming.util.RateLimitedOutputStream +org.apache.spark.network.Message +org.apache.spark.util.InnerClosureFinder +org.apache.spark.util.SerializableHyperLogLog +org.apache.spark.deploy.worker.ProcessBuilderLike +org.apache.spark.streaming.Interval +org.apache.spark.rdd.CheckpointRDD +org.apache.spark.deploy.master.SparkZooKeeperSession +org.apache.spark.graphx.impl.VertexAttributeBlock +org.apache.spark.streaming.dstream.MapValuedDStream +org.apache.spark.util.collection.ExternalAppendOnlyMap +org.apache.spark.deploy.client.AppClient +org.apache.spark.deploy.DriverDescription +org.apache.spark.scheduler.FairSchedulableBuilder +org.apache.spark.executor.MesosExecutorBackend +org.apache.spark.streaming.scheduler.ClearMetadata +org.apache.spark.storage.BroadcastBlockId +org.apache.spark.streaming.scheduler.Job +org.apache.spark.streaming.scheduler.DeregisterReceiver +org.apache.spark.serializer.KryoSerializationStream +org.apache.spark.scheduler.FIFOSchedulableBuilder +org.apache.spark.ui.jobs.JobProgressListener +org.apache.spark.TaskEndReason +org.apache.spark.storage.BlockId +org.apache.spark.streaming.scheduler.JobGenerator +org.apache.spark.deploy.master.WorkerInfo +org.apache.spark.storage.ShuffleBlockId +org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessage +org.apache.spark.storage.ThreadingTest.ConsumerThread +org.apache.spark.rdd.PartitionGroup +org.apache.spark.util.collection.AppendOnlyMap +org.apache.spark.scheduler.ExecutorExited +org.apache.spark.streaming.Checkpoint +org.apache.spark.streaming.scheduler.AddBlocks +org.apache.spark.network.netty.FileClient +org.apache.spark.scheduler.JobListener +org.apache.spark.streaming.scheduler.JobSet +org.apache.spark.scheduler.ExecutorGained +org.apache.spark.partial.CountEvaluator +org.apache.spark.scheduler.Pool +org.apache.spark.deploy.master.FileSystemPersistenceEngine +org.apache.spark.rdd.BlockRDD +org.apache.spark.network.netty.FileHeader +org.apache.spark.rdd.PartitionerAwareUnionRDDPartition +org.apache.spark.graphx.impl.VertexIdMsgSerializer +org.apache.spark.streaming.dstream.FileInputDStream +org.apache.spark.deploy.worker.ui.WorkerWebUI +org.apache.spark.metrics.MetricsSystem +org.apache.spark.scheduler.JobSubmitted +org.apache.spark.graphx.impl.MsgRDDFunctions +org.apache.spark.api.java.function.WrappedFunction3 +org.apache.spark.streaming.CheckpointWriter +org.apache.spark.storage.BlockManager +org.apache.spark.util.CompletionIterator +org.apache.spark.network.MessageChunk +org.apache.spark.rdd.MappedValuesRDD +org.apache.spark.streaming.dstream.PluggableInputDStream +org.apache.spark.rdd.ZippedWithIndexRDDPartition +org.apache.spark.streaming.receivers.ActorReceiver +org.apache.spark.serializer.JavaDeserializationStream +org.apache.spark.rdd.SubtractedRDD +org.apache.spark.streaming.dstream.SocketInputDStream +org.apache.spark.partial.GroupedSumEvaluator +org.apache.spark.broadcast.TorrentInfo +org.apache.spark.storage.BlockException +org.apache.spark.streaming.dstream.FlatMapValuedDStream +org.apache.spark.deploy.worker.DriverRunner +org.apache.spark.deploy.master.ExecutorInfo +org.apache.spark.deploy.master.ui.IndexPage +org.apache.spark.rdd.ParallelCollectionPartition +org.apache.spark.scheduler.DirectTaskResult +org.apache.spark.partial.ApproximateActionListener +org.apache.spark.streaming.DStreamGraph +org.apache.spark.partial.GroupedCountEvaluator +org.apache.spark.api.java.JavaSparkContextVarargsWorkaround +org.apache.spark.graphx.util.collection.PrimitiveKeyOpenHashMap +org.apache.spark.streaming.dstream.StateDStream +org.apache.spark.FetchFailedException +org.apache.spark.BlockStoreShuffleFetcher +org.apache.spark.util.Clock +org.apache.spark.storage.DiskBlockManager +org.apache.spark.scheduler.SparkListenerBus +org.apache.spark.rdd.GlommedRDD +org.apache.spark.streaming.dstream.UnionDStream +org.apache.spark.scheduler.JobResult +org.apache.spark.deploy.ApplicationDescription +org.apache.spark.rdd.CoGroupSplitDep +org.apache.spark.api.python.PythonRDD +org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend +org.apache.spark.partial.GroupedMeanEvaluator +org.apache.spark.graphx.impl.VertexBroadcastMsg +org.apache.spark.ShuffleFetcher +org.apache.spark.streaming.dstream.ShuffledDStream +org.apache.spark.scheduler.TaskScheduler +org.apache.spark.streaming.dstream.MappedDStream +org.apache.spark.storage.BlockFetchTracker +org.apache.spark.rdd.ZippedPartitionsPartition +org.apache.spark.rdd.ShuffleCoGroupSplitDep +org.apache.spark.graphx.impl.LongVertexBroadcastMsgSerializer +org.apache.spark.deploy.ExecutorDescription +org.apache.spark.MapOutputTrackerMessage +org.apache.spark.util.MetadataCleaner +org.apache.spark.HttpFileServer +org.apache.spark.streaming.util.ManualClock +org.apache.spark.streaming.scheduler.JobGeneratorEvent +org.apache.spark.scheduler.JobCancelled +org.apache.spark.streaming.scheduler.DoCheckpoint +org.apache.spark.broadcast.TorrentBroadcast +org.apache.spark.scheduler.FIFOSchedulingAlgorithm +org.apache.spark.network.ConnectionManagerId +org.apache.spark.deploy.master.PersistenceEngine +org.apache.spark.mllib.recommendation.InLinkBlock +org.apache.spark.partial.MeanEvaluator +org.apache.spark.streaming.dstream.ReportError +org.apache.spark.storage.RDDBlockId +org.apache.spark.api.java.function.WrappedFunction2 +org.apache.spark.ui.exec.ExecutorsUI +org.apache.spark.network.netty.FileServerChannelInitializer +org.apache.spark.streaming.scheduler.JobStarted +org.apache.spark.streaming.dstream.RawInputDStream +org.apache.spark.storage.GetBlock +org.apache.spark.ui.jobs.IndexPage +org.apache.spark.storage.BlockManagerSource +org.apache.spark.rdd.BlockRDDPartition +org.apache.spark.rdd.PartitionCoalescer +org.apache.spark.network.ConnectionManager +org.apache.spark.deploy.master.MasterArguments +org.apache.spark.graphx.impl.MessageToPartition +org.apache.spark.executor.ExecutorBackend +org.apache.spark.util.NextIterator +org.apache.spark.storage.BlockManagerWorker +org.apache.spark.streaming.dstream.QueueInputDStream +org.apache.spark.streaming.scheduler.JobScheduler +org.apache.spark.streaming.dstream.FlatMappedDStream +org.apache.spark.scheduler.TaskResultGetter +org.apache.spark.network.netty.FileClientChannelInitializer +org.apache.spark.rdd.MappedRDD +org.apache.spark.rdd.PartitionerAwareUnionRDD +org.apache.spark.network.BufferMessage +org.apache.spark.streaming.dstream.DStreamCheckpointData +org.apache.spark.executor.Executor +org.apache.spark.MapOutputTrackerMaster +org.apache.spark.deploy.client.AppClientListener +org.apache.spark.storage.BlockInfo +org.apache.spark.streaming.dstream.ReducedWindowedDStream +org.apache.spark.rdd.JdbcPartition +org.apache.spark.deploy.TestWorkerInfo +org.apache.spark.scheduler.BeginEvent +org.apache.spark.storage.BlockMessage +org.apache.spark.tools.SparkType +org.apache.spark.rdd.NewHadoopPartition +org.apache.spark.streaming.scheduler.JobSchedulerEvent +org.apache.spark.streaming.util.TestOutputStream +org.apache.spark.scheduler.local.LocalActor +org.apache.spark.graphx.impl.EdgePartition +org.apache.spark.scheduler.TaskSetFailed +org.apache.spark.ServerStateException +org.apache.spark.network.MessageChunkHeader +org.apache.spark.storage.DiskBlockObjectWriter +org.apache.spark.graphx.impl.VertexPartition +org.apache.spark.MapOutputTracker +org.apache.spark.rdd.CartesianRDD +org.apache.spark.storage.ThreadingTest.ProducerThread +org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend +org.apache.spark.graphx.impl.IntVertexBroadcastMsgSerializer +org.apache.spark.streaming.scheduler.NetworkInputTrackerMessage +org.apache.spark.deploy.worker.WorkerSource +org.apache.spark.graphx.impl.DoubleVertexBroadcastMsgSerializer +org.apache.spark.WritableConverter +org.apache.spark.streaming.util.RecurringTimer +org.apache.spark.util.collection.PrimitiveKeyOpenHashMap +org.apache.spark.scheduler.CompletionEvent +org.apache.spark.streaming.receivers.Data +org.apache.spark.rdd.UnionPartition +org.apache.spark.broadcast.TorrentBlock +org.apache.spark.storage.TestBlockId +org.apache.spark.storage.BlockManagerMasterActor +org.apache.spark.storage.TaskResultBlockId +org.apache.spark.streaming.scheduler.ErrorReported +org.apache.spark.streaming.scheduler.RegisterReceiver +org.apache.spark.scheduler.DAGScheduler +org.apache.spark.deploy.TestMasterInfo +org.apache.spark.deploy.master.DriverInfo +org.apache.spark.ui.storage.BlockManagerUI +org.apache.spark.streaming.scheduler.GenerateJobs +org.apache.spark.storage.StreamBlockId +org.apache.spark.util.FieldAccessFinder +org.apache.spark.scheduler.local.LocalBackend +org.apache.spark.network.netty.FileClientHandler +org.apache.spark.tools.ParameterizedType +org.apache.spark.network.Connection +org.apache.spark.ui.storage.RDDPage +org.apache.spark.HttpServer +org.apache.spark.deploy.DockerId +org.apache.spark.streaming.scheduler.ClearCheckpointData +org.apache.spark.rdd.ZippedPartition +org.apache.spark.FetchFailed +org.apache.spark.serializer.JavaSerializerInstance +org.apache.spark.scheduler.SchedulableBuilder +org.apache.spark.streaming.dstream.TransformedDStream +org.apache.spark.streaming.util.FileGeneratingThread +org.apache.spark.api.java.function.WrappedFunction1 +org.apache.spark.broadcast.BroadcastManager +org.apache.spark.rdd.FilteredRDD +org.apache.spark.graphx.impl.IntAggMsgSerializer +org.apache.spark.scheduler.MapStatus +org.apache.spark.tools.BaseType +org.apache.spark.ui.jobs.PoolTable +org.apache.spark.graphx.impl.ShuffleDeserializationStream +org.apache.spark.scheduler.DAGSchedulerEvent +org.apache.spark.ui.jobs.ExecutorTable +org.apache.spark.deploy.master.MasterSource +org.apache.spark.graphx.impl.VertexBroadcastMsgRDDFunctions +org.apache.spark.deploy.worker.WorkerArguments +org.apache.spark.deploy.worker.WorkerWatcher +org.apache.spark.graphx.impl.EdgePartitionBuilder +org.apache.spark.deploy.DeployMessage +org.apache.spark.streaming.dstream.SocketReceiver +org.apache.spark.scheduler.ResultTask +org.apache.spark.rdd.RDDCheckpointData +org.apache.spark.ui.SparkUI +org.apache.spark.util.collection.OpenHashSet +org.apache.spark.streaming.dstream.NetworkReceiverMessage +org.apache.spark.ui.jobs.PoolPage +org.apache.spark.deploy.worker.Clock +org.apache.spark.scheduler.TaskLocation +org.apache.spark.deploy.worker.Sleeper +org.apache.spark.storage.TempBlockId +org.apache.spark.storage.BlockObjectWriter +org.apache.spark.streaming.dstream.WindowedDStream +org.apache.spark.util.random.XORShiftRandom +org.apache.spark.network.SendingConnection +org.apache.spark.ui.storage.IndexPage +org.apache.spark.rdd.HadoopPartition +org.apache.spark.util.Utils.CallSiteInfo +org.apache.spark.deploy.LocalSparkCluster +org.apache.spark.streaming.ObjectInputStreamWithLoader +org.apache.spark.util.IdGenerator +org.apache.spark.graphx.impl.DoubleAggMsgSerializer +org.apache.spark.ExceptionFailure +org.apache.spark.storage.BlockFetcherIterator +org.apache.spark.MapOutputTrackerMasterActor +org.apache.spark.storage.BlockMessageArray +org.apache.spark.graphx.impl.LongAggMsgSerializer +org.apache.spark.scheduler.FairSchedulingAlgorithm +org.apache.spark.scheduler.IndirectTaskResult +org.apache.spark.storage.PutBlock +org.apache.spark.util.collection.OpenHashMap +org.apache.spark.scheduler.ShuffleMapTask +org.apache.spark.util.ByteBufferInputStream +org.apache.spark.scheduler.SchedulingAlgorithm \ No newline at end of file diff --git a/bin/spark-class b/bin/spark-class index c4225a392d6da..dfa11e854dc89 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -129,7 +129,7 @@ fi # Compute classpath using external script CLASSPATH=`$FWDIR/bin/compute-classpath.sh` - +CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR" if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR" fi diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index dd68ad5a24f25..d83c7bf092833 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -1,61 +1,66 @@ import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact} import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings +import sbt._ object MimaBuild { - val ignoredABIProblems = { + def ignoredABIProblems(base: File) = { import com.typesafe.tools.mima.core._ import com.typesafe.tools.mima.core.ProblemFilters._ - /** - * A: Detections are semi private or likely to become semi private at some point. - */ - Seq(exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom"), - exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom$"), - exclude[MissingMethodProblem]("org.apache.spark.util.Utils.cloneWritables"), - // Scheduler is not considered a public API. - excludePackage("org.apache.spark.deploy"), - // Was made private in 1.0 - excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#DiskMapIterator"), - excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#ExternalIterator"), - exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.cogroupResultToJava"), - exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.groupByResultToJava"), - exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.handleFailedTask"), - exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.taskSetTaskIds"), - exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSetManager.handleFailedTask"), - exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.removeAllRunningTasks"), - exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.runningTasks_="), - exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime"), - exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime_="), - exclude[MissingMethodProblem]("org.apache.spark.storage.BlockObjectWriter.bytesWritten")) ++ - /** - * B: Detections are mostly false +ve. - */ - Seq(exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.setGenerator"), - exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), - exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), - exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), - exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.foreachPartition"), - exclude[MissingMethodProblem]("org.apache.spark.api.python.PythonRDD.writeToStream")) ++ + + IO.read(file(base.getAbsolutePath + "/.mima-exclude")).split("\n").map(excludePackage).toSeq ++ +// /** +// * A: Detections are semi private or likely to become semi private at some point. +// */ +// Seq(exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom"), +// exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom$"), +// exclude[MissingMethodProblem]("org.apache.spark.util.Utils.cloneWritables"), +// // Scheduler is not considered a public API. +// excludePackage("org.apache.spark.deploy"), +// // Was made private in 1.0 +// excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#DiskMapIterator"), +// excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#ExternalIterator"), +// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.cogroupResultToJava"), +// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.groupByResultToJava"), +// exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.handleFailedTask"), +// exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.taskSetTaskIds"), +// exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSetManager.handleFailedTask"), +// exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.removeAllRunningTasks"), +// exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.runningTasks_="), +// exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime"), +// exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime_="), +// exclude[MissingMethodProblem]("org.apache.spark.storage.BlockObjectWriter.bytesWritten")) ++ +// /** +// * B: Detections are mostly false +ve. +// */ +// Seq(exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.setGenerator"), +// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), +// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), +// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), +// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.foreachPartition"), +// exclude[MissingMethodProblem]("org.apache.spark.api.python.PythonRDD.writeToStream")) ++ /** * Detections I am unsure about. Should be either moved to B (false +ve) or A. */ - Seq(exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator$"), - exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags"), - exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags$"), - exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$wrapForCompression$1"), - exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$sparkConf"), - exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator"), - exclude[MissingClassProblem]("org.apache.spark.mllib.optimization.SquaredGradient"), - exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.gradient"), - exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.gradient"), - exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient"), - exclude[FinalClassProblem]("org.apache.spark.SparkFiles") + Seq( +// exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator$"), +// exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags"), +// exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags$"), +// exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$wrapForCompression$1"), +// exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$sparkConf"), +// exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator"), +// exclude[MissingClassProblem]("org.apache.spark.mllib.optimization.SquaredGradient"), +// exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.gradient"), +// exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.gradient"), +// exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient"), +// exclude[FinalClassProblem]("org.apache.spark.SparkFiles") + ) } - lazy val mimaSettings = mimaDefaultSettings ++ Seq( + def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq( previousArtifact := None, - binaryIssueFilters ++= ignoredABIProblems + binaryIssueFilters ++= ignoredABIProblems(sparkHome) ) } diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 6410704c2c7a2..ed5e598d798c2 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -122,7 +122,7 @@ object SparkBuild extends Build { lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj) - def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings ++ Seq( + def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq( organization := "org.apache.spark", version := "1.0.0-incubating-SNAPSHOT", scalaVersion := "2.10.3", diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala new file mode 100644 index 0000000000000..3246db98f5df5 --- /dev/null +++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.tools + +import java.io.File +import java.util.jar.JarFile + +import scala.collection.mutable +import scala.collection.JavaConversions.enumerationAsScalaIterator + +/** + * Mima(TODO: Paste URL here) generates a lot of false positives as it does not detect + * private[x] as internal APIs. + */ +object GenerateMIMAIgnore { + + def classesWithPrivateWithin(packageName: String, excludePackages: Seq[String]): Set[String] = { + import scala.reflect.runtime.universe.runtimeMirror + val classLoader: ClassLoader = Thread.currentThread().getContextClassLoader + val mirror = runtimeMirror(classLoader) + val classes = Utils.getClasses(packageName, classLoader) + val privateClasses = mutable.HashSet[String]() + for (x <- classes) { + try { + // some of the classnames throw malformed class name exceptions and weird Match errors. + if (excludePackages.forall(!x.startsWith(_)) && + mirror.staticClass(x).privateWithin.toString.trim != "") { + privateClasses += x + } + } catch { + case e: Throwable => // println(e) + } + } + privateClasses.toSet + } + + def main(args: Array[String]) { + scala.tools.nsc.io.File(".mima-exclude"). + writeAll(classesWithPrivateWithin("org.apache.spark", args).mkString("\n")) + println("Created : .mima-exclude in current directory.") + } + +} + +object Utils { + + /** + * Get all classes in a package from a jar file. + */ + def getAllClasses(jarPath: String, packageName: String) = { + val jar = new JarFile(new File(jarPath)) + val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName)) + val classes = mutable.HashSet[Class[_]]() + for (entry <- enums) { + if (!entry.endsWith("/") && !entry.endsWith("MANIFEST.MF") && !entry.endsWith("properties")) { + try { + classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.')) + } catch { + case e: Throwable => // println(e) // It may throw a few ClassNotFoundExceptions + } + } + } + classes + } + + /** + * Scans all classes accessible from the context class loader which belong to the given package + * and subpackages both from directories and jars present on the classpath. + */ + def getClasses(packageName: String, + classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Set[String] = { + val path = packageName.replace('.', '/') + val resources = classLoader.getResources(path).toArray + val jars = resources.filter(x => x.getProtocol == "jar") + .map(_.getFile.split(":")(1).split("!")(0)) + val classesFromJars = jars.map(getAllClasses(_, path)).flatten + val dirs = resources.filter(x => x.getProtocol == "file") + .map(x => new File(x.getFile.split(":")(1))) + val classFromDirs = dirs.map(findClasses(_, packageName)).flatten + (classFromDirs ++ classesFromJars).map(_.getCanonicalName).filter(_ != null).toSet + } + + private def findClasses(directory: File, packageName: String): Seq[Class[_]] = { + val classes = mutable.ArrayBuffer[Class[_]]() + if (!directory.exists()) { + return classes + } + val files = directory.listFiles() + for (file <- files) { + if (file.isDirectory) { + classes ++= findClasses(file, packageName + "." + file.getName) + } else if (file.getName.endsWith(".class")) { + classes += Class.forName(packageName + '.' + file.getName.substring(0, + file.getName.length() - 6)) + } + } + classes + } +} From c39f3b576f234a59264b21c630a95d520da7bd58 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Sun, 23 Mar 2014 13:11:53 -0700 Subject: [PATCH 4/8] Some enhancements to binary checking. 1. Special case for inner classes of packge-private objects. 2. Made tools classes accessible when running `spark-class`. 3. Various other improvements to exclude-generation script. --- .gitignore | 1 + .mima-exclude | 360 ------------------ bin/compute-classpath.sh | 1 + dev/run-tests | 13 +- project/MimaBuild.scala | 90 +++-- project/SparkBuild.scala | 4 +- .../spark/tools/GenerateMIMAIgnore.scala | 117 ++++-- 7 files changed, 135 insertions(+), 451 deletions(-) delete mode 100644 .mima-exclude diff --git a/.gitignore b/.gitignore index 3d178992123da..e5da68d9658ac 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ sbt/*.jar .settings .cache +.mima-excludes /build/ work/ out/ diff --git a/.mima-exclude b/.mima-exclude deleted file mode 100644 index 54a46b4f0ee9e..0000000000000 --- a/.mima-exclude +++ /dev/null @@ -1,360 +0,0 @@ -org.apache.spark.scheduler.cluster.SparkDeploySchedulerBackend -org.apache.spark.rdd.FlatMappedRDD -org.apache.spark.storage.PutResult -org.apache.spark.mllib.recommendation.OutLinkBlock -org.apache.spark.network.netty.ShuffleSender -org.apache.spark.scheduler.TaskResult -org.apache.spark.scheduler.JobWaiter -org.apache.spark.deploy.worker.ExecutorRunner -org.apache.spark.streaming.dstream.FilteredDStream -org.apache.spark.util.SerializableBuffer -org.apache.spark.scheduler.TaskSetManager -org.apache.spark.scheduler.Stage -org.apache.spark.rdd.ZippedWithIndexRDD -org.apache.spark.scheduler.SchedulerBackend -org.apache.spark.streaming.dstream.GlommedDStream -org.apache.spark.rdd.FlatMappedValuesRDD -org.apache.spark.deploy.master.MonarchyLeaderAgent -org.apache.spark.rdd.CoGroupPartition -org.apache.spark.scheduler.SlaveLost -org.apache.spark.api.python.PythonWorkerFactory -org.apache.spark.streaming.ContextWaiter -org.apache.spark.serializer.JavaSerializationStream -org.apache.spark.rdd.CheckpointRDDPartition -org.apache.spark.scheduler.WorkerOffer -org.apache.spark.streaming.scheduler.JobCompleted -org.apache.spark.rdd.ParallelCollectionRDD -org.apache.spark.streaming.dstream.ForEachDStream -org.apache.spark.scheduler.JobFailed -org.apache.spark.graphx.impl.ReplicatedVertexView -org.apache.spark.deploy.master.ui.ApplicationPage -org.apache.spark.rdd.ShuffledRDDPartition -org.apache.spark.ui.jobs.JobProgressUI -org.apache.spark.deploy.Command -org.apache.spark.ui.jobs.StagePage -org.apache.spark.serializer.KryoDeserializationStream -org.apache.spark.scheduler.ExecutorLost -org.apache.spark.deploy.master.BlackHolePersistenceEngine -org.apache.spark.scheduler.cluster.SimrSchedulerBackend -org.apache.spark.streaming.dstream.MapPartitionedDStream -org.apache.spark.scheduler.TaskInfo -org.apache.spark.storage.BlockManagerSlaveActor -org.apache.spark.ui.env.EnvironmentUI -org.apache.spark.executor.CoarseGrainedExecutorBackend -org.apache.spark.storage.ShuffleBlockManager -org.apache.spark.rdd.NarrowCoGroupSplitDep -org.apache.spark.scheduler.JobGroupCancelled -org.apache.spark.rdd.MapPartitionsRDD -org.apache.spark.graphx.impl.EdgeTripletIterator -org.apache.spark.storage.BlockManagerMaster -org.apache.spark.partial.SumEvaluator -org.apache.spark.network.netty.FileServerHandler -org.apache.spark.GetMapOutputStatuses -org.apache.spark.tools.SparkMethod -org.apache.spark.storage.ShuffleWriterGroup -org.apache.spark.ui.jobs.StageTable -org.apache.spark.scheduler.TaskDescription -org.apache.spark.deploy.master.Master -org.apache.spark.scheduler.DAGSchedulerSource -org.apache.spark.streaming.util.Clock -org.apache.spark.deploy.master.ui.MasterWebUI -org.apache.spark.CacheManager -org.apache.spark.streaming.util.SystemClock -org.apache.spark.storage.BroadcastHelperBlockId -org.apache.spark.deploy.master.ZooKeeperLeaderElectionAgent -org.apache.spark.storage.FileSegment -org.apache.spark.api.python.PythonPartitioner -org.apache.spark.scheduler.ExecutorLossReason -org.apache.spark.network.ReceivingConnection -org.apache.spark.scheduler.Schedulable -org.apache.spark.scheduler.TaskSet -org.apache.spark.storage.BlockStore -org.apache.spark.streaming.scheduler.StreamingListenerBus -org.apache.spark.deploy.ClientArguments -org.apache.spark.metrics.MetricsConfig -org.apache.spark.serializer.SerializerManager -org.apache.spark.streaming.scheduler.NetworkInputTracker -org.apache.spark.broadcast.HttpBroadcast -org.apache.spark.executor.ExecutorURLClassLoader -org.apache.spark.scheduler.TaskSchedulerImpl -org.apache.spark.streaming.dstream.RawNetworkReceiver -org.apache.spark.util.collection.SizeTrackingAppendOnlyMap -org.apache.spark.rdd.SampledRDDPartition -org.apache.spark.storage.BlockManagerId -org.apache.spark.deploy.master.LeaderElectionAgent -org.apache.spark.streaming.dstream.StopReceiver -org.apache.spark.storage.GotBlock -org.apache.spark.scheduler.Task -org.apache.spark.rdd.CartesianPartition -org.apache.spark.storage.StorageStatus -org.apache.spark.rdd.PartitionwiseSampledRDDPartition -org.apache.spark.graphx.impl.ShuffleSerializationStream -org.apache.spark.scheduler.cluster.mesos.MesosSchedulerBackend -org.apache.spark.deploy.worker.ui.IndexPage -org.apache.spark.partial.ApproximateEvaluator -org.apache.spark.graphx.impl.ShuffleSerializerInstance -org.apache.spark.ui.jobs.ExecutorSummary -org.apache.spark.scheduler.GettingResultEvent -org.apache.spark.graphx.impl.RoutingTable -org.apache.spark.streaming.util.KillingThread -org.apache.spark.GrowableAccumulableParam -org.apache.spark.deploy.master.ApplicationInfo -org.apache.spark.deploy.worker.Worker -org.apache.spark.streaming.dstream.ReportBlock -org.apache.spark.partial.StudentTCacher -org.apache.spark.scheduler.ActiveJob -org.apache.spark.network.netty.FileServer -org.apache.spark.network.netty.ShuffleCopier -org.apache.spark.util.collection.PrimitiveVector -org.apache.spark.serializer.KryoSerializerInstance -org.apache.spark.streaming.util.RateLimitedOutputStream -org.apache.spark.network.Message -org.apache.spark.util.InnerClosureFinder -org.apache.spark.util.SerializableHyperLogLog -org.apache.spark.deploy.worker.ProcessBuilderLike -org.apache.spark.streaming.Interval -org.apache.spark.rdd.CheckpointRDD -org.apache.spark.deploy.master.SparkZooKeeperSession -org.apache.spark.graphx.impl.VertexAttributeBlock -org.apache.spark.streaming.dstream.MapValuedDStream -org.apache.spark.util.collection.ExternalAppendOnlyMap -org.apache.spark.deploy.client.AppClient -org.apache.spark.deploy.DriverDescription -org.apache.spark.scheduler.FairSchedulableBuilder -org.apache.spark.executor.MesosExecutorBackend -org.apache.spark.streaming.scheduler.ClearMetadata -org.apache.spark.storage.BroadcastBlockId -org.apache.spark.streaming.scheduler.Job -org.apache.spark.streaming.scheduler.DeregisterReceiver -org.apache.spark.serializer.KryoSerializationStream -org.apache.spark.scheduler.FIFOSchedulableBuilder -org.apache.spark.ui.jobs.JobProgressListener -org.apache.spark.TaskEndReason -org.apache.spark.storage.BlockId -org.apache.spark.streaming.scheduler.JobGenerator -org.apache.spark.deploy.master.WorkerInfo -org.apache.spark.storage.ShuffleBlockId -org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessage -org.apache.spark.storage.ThreadingTest.ConsumerThread -org.apache.spark.rdd.PartitionGroup -org.apache.spark.util.collection.AppendOnlyMap -org.apache.spark.scheduler.ExecutorExited -org.apache.spark.streaming.Checkpoint -org.apache.spark.streaming.scheduler.AddBlocks -org.apache.spark.network.netty.FileClient -org.apache.spark.scheduler.JobListener -org.apache.spark.streaming.scheduler.JobSet -org.apache.spark.scheduler.ExecutorGained -org.apache.spark.partial.CountEvaluator -org.apache.spark.scheduler.Pool -org.apache.spark.deploy.master.FileSystemPersistenceEngine -org.apache.spark.rdd.BlockRDD -org.apache.spark.network.netty.FileHeader -org.apache.spark.rdd.PartitionerAwareUnionRDDPartition -org.apache.spark.graphx.impl.VertexIdMsgSerializer -org.apache.spark.streaming.dstream.FileInputDStream -org.apache.spark.deploy.worker.ui.WorkerWebUI -org.apache.spark.metrics.MetricsSystem -org.apache.spark.scheduler.JobSubmitted -org.apache.spark.graphx.impl.MsgRDDFunctions -org.apache.spark.api.java.function.WrappedFunction3 -org.apache.spark.streaming.CheckpointWriter -org.apache.spark.storage.BlockManager -org.apache.spark.util.CompletionIterator -org.apache.spark.network.MessageChunk -org.apache.spark.rdd.MappedValuesRDD -org.apache.spark.streaming.dstream.PluggableInputDStream -org.apache.spark.rdd.ZippedWithIndexRDDPartition -org.apache.spark.streaming.receivers.ActorReceiver -org.apache.spark.serializer.JavaDeserializationStream -org.apache.spark.rdd.SubtractedRDD -org.apache.spark.streaming.dstream.SocketInputDStream -org.apache.spark.partial.GroupedSumEvaluator -org.apache.spark.broadcast.TorrentInfo -org.apache.spark.storage.BlockException -org.apache.spark.streaming.dstream.FlatMapValuedDStream -org.apache.spark.deploy.worker.DriverRunner -org.apache.spark.deploy.master.ExecutorInfo -org.apache.spark.deploy.master.ui.IndexPage -org.apache.spark.rdd.ParallelCollectionPartition -org.apache.spark.scheduler.DirectTaskResult -org.apache.spark.partial.ApproximateActionListener -org.apache.spark.streaming.DStreamGraph -org.apache.spark.partial.GroupedCountEvaluator -org.apache.spark.api.java.JavaSparkContextVarargsWorkaround -org.apache.spark.graphx.util.collection.PrimitiveKeyOpenHashMap -org.apache.spark.streaming.dstream.StateDStream -org.apache.spark.FetchFailedException -org.apache.spark.BlockStoreShuffleFetcher -org.apache.spark.util.Clock -org.apache.spark.storage.DiskBlockManager -org.apache.spark.scheduler.SparkListenerBus -org.apache.spark.rdd.GlommedRDD -org.apache.spark.streaming.dstream.UnionDStream -org.apache.spark.scheduler.JobResult -org.apache.spark.deploy.ApplicationDescription -org.apache.spark.rdd.CoGroupSplitDep -org.apache.spark.api.python.PythonRDD -org.apache.spark.scheduler.cluster.mesos.CoarseMesosSchedulerBackend -org.apache.spark.partial.GroupedMeanEvaluator -org.apache.spark.graphx.impl.VertexBroadcastMsg -org.apache.spark.ShuffleFetcher -org.apache.spark.streaming.dstream.ShuffledDStream -org.apache.spark.scheduler.TaskScheduler -org.apache.spark.streaming.dstream.MappedDStream -org.apache.spark.storage.BlockFetchTracker -org.apache.spark.rdd.ZippedPartitionsPartition -org.apache.spark.rdd.ShuffleCoGroupSplitDep -org.apache.spark.graphx.impl.LongVertexBroadcastMsgSerializer -org.apache.spark.deploy.ExecutorDescription -org.apache.spark.MapOutputTrackerMessage -org.apache.spark.util.MetadataCleaner -org.apache.spark.HttpFileServer -org.apache.spark.streaming.util.ManualClock -org.apache.spark.streaming.scheduler.JobGeneratorEvent -org.apache.spark.scheduler.JobCancelled -org.apache.spark.streaming.scheduler.DoCheckpoint -org.apache.spark.broadcast.TorrentBroadcast -org.apache.spark.scheduler.FIFOSchedulingAlgorithm -org.apache.spark.network.ConnectionManagerId -org.apache.spark.deploy.master.PersistenceEngine -org.apache.spark.mllib.recommendation.InLinkBlock -org.apache.spark.partial.MeanEvaluator -org.apache.spark.streaming.dstream.ReportError -org.apache.spark.storage.RDDBlockId -org.apache.spark.api.java.function.WrappedFunction2 -org.apache.spark.ui.exec.ExecutorsUI -org.apache.spark.network.netty.FileServerChannelInitializer -org.apache.spark.streaming.scheduler.JobStarted -org.apache.spark.streaming.dstream.RawInputDStream -org.apache.spark.storage.GetBlock -org.apache.spark.ui.jobs.IndexPage -org.apache.spark.storage.BlockManagerSource -org.apache.spark.rdd.BlockRDDPartition -org.apache.spark.rdd.PartitionCoalescer -org.apache.spark.network.ConnectionManager -org.apache.spark.deploy.master.MasterArguments -org.apache.spark.graphx.impl.MessageToPartition -org.apache.spark.executor.ExecutorBackend -org.apache.spark.util.NextIterator -org.apache.spark.storage.BlockManagerWorker -org.apache.spark.streaming.dstream.QueueInputDStream -org.apache.spark.streaming.scheduler.JobScheduler -org.apache.spark.streaming.dstream.FlatMappedDStream -org.apache.spark.scheduler.TaskResultGetter -org.apache.spark.network.netty.FileClientChannelInitializer -org.apache.spark.rdd.MappedRDD -org.apache.spark.rdd.PartitionerAwareUnionRDD -org.apache.spark.network.BufferMessage -org.apache.spark.streaming.dstream.DStreamCheckpointData -org.apache.spark.executor.Executor -org.apache.spark.MapOutputTrackerMaster -org.apache.spark.deploy.client.AppClientListener -org.apache.spark.storage.BlockInfo -org.apache.spark.streaming.dstream.ReducedWindowedDStream -org.apache.spark.rdd.JdbcPartition -org.apache.spark.deploy.TestWorkerInfo -org.apache.spark.scheduler.BeginEvent -org.apache.spark.storage.BlockMessage -org.apache.spark.tools.SparkType -org.apache.spark.rdd.NewHadoopPartition -org.apache.spark.streaming.scheduler.JobSchedulerEvent -org.apache.spark.streaming.util.TestOutputStream -org.apache.spark.scheduler.local.LocalActor -org.apache.spark.graphx.impl.EdgePartition -org.apache.spark.scheduler.TaskSetFailed -org.apache.spark.ServerStateException -org.apache.spark.network.MessageChunkHeader -org.apache.spark.storage.DiskBlockObjectWriter -org.apache.spark.graphx.impl.VertexPartition -org.apache.spark.MapOutputTracker -org.apache.spark.rdd.CartesianRDD -org.apache.spark.storage.ThreadingTest.ProducerThread -org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend -org.apache.spark.graphx.impl.IntVertexBroadcastMsgSerializer -org.apache.spark.streaming.scheduler.NetworkInputTrackerMessage -org.apache.spark.deploy.worker.WorkerSource -org.apache.spark.graphx.impl.DoubleVertexBroadcastMsgSerializer -org.apache.spark.WritableConverter -org.apache.spark.streaming.util.RecurringTimer -org.apache.spark.util.collection.PrimitiveKeyOpenHashMap -org.apache.spark.scheduler.CompletionEvent -org.apache.spark.streaming.receivers.Data -org.apache.spark.rdd.UnionPartition -org.apache.spark.broadcast.TorrentBlock -org.apache.spark.storage.TestBlockId -org.apache.spark.storage.BlockManagerMasterActor -org.apache.spark.storage.TaskResultBlockId -org.apache.spark.streaming.scheduler.ErrorReported -org.apache.spark.streaming.scheduler.RegisterReceiver -org.apache.spark.scheduler.DAGScheduler -org.apache.spark.deploy.TestMasterInfo -org.apache.spark.deploy.master.DriverInfo -org.apache.spark.ui.storage.BlockManagerUI -org.apache.spark.streaming.scheduler.GenerateJobs -org.apache.spark.storage.StreamBlockId -org.apache.spark.util.FieldAccessFinder -org.apache.spark.scheduler.local.LocalBackend -org.apache.spark.network.netty.FileClientHandler -org.apache.spark.tools.ParameterizedType -org.apache.spark.network.Connection -org.apache.spark.ui.storage.RDDPage -org.apache.spark.HttpServer -org.apache.spark.deploy.DockerId -org.apache.spark.streaming.scheduler.ClearCheckpointData -org.apache.spark.rdd.ZippedPartition -org.apache.spark.FetchFailed -org.apache.spark.serializer.JavaSerializerInstance -org.apache.spark.scheduler.SchedulableBuilder -org.apache.spark.streaming.dstream.TransformedDStream -org.apache.spark.streaming.util.FileGeneratingThread -org.apache.spark.api.java.function.WrappedFunction1 -org.apache.spark.broadcast.BroadcastManager -org.apache.spark.rdd.FilteredRDD -org.apache.spark.graphx.impl.IntAggMsgSerializer -org.apache.spark.scheduler.MapStatus -org.apache.spark.tools.BaseType -org.apache.spark.ui.jobs.PoolTable -org.apache.spark.graphx.impl.ShuffleDeserializationStream -org.apache.spark.scheduler.DAGSchedulerEvent -org.apache.spark.ui.jobs.ExecutorTable -org.apache.spark.deploy.master.MasterSource -org.apache.spark.graphx.impl.VertexBroadcastMsgRDDFunctions -org.apache.spark.deploy.worker.WorkerArguments -org.apache.spark.deploy.worker.WorkerWatcher -org.apache.spark.graphx.impl.EdgePartitionBuilder -org.apache.spark.deploy.DeployMessage -org.apache.spark.streaming.dstream.SocketReceiver -org.apache.spark.scheduler.ResultTask -org.apache.spark.rdd.RDDCheckpointData -org.apache.spark.ui.SparkUI -org.apache.spark.util.collection.OpenHashSet -org.apache.spark.streaming.dstream.NetworkReceiverMessage -org.apache.spark.ui.jobs.PoolPage -org.apache.spark.deploy.worker.Clock -org.apache.spark.scheduler.TaskLocation -org.apache.spark.deploy.worker.Sleeper -org.apache.spark.storage.TempBlockId -org.apache.spark.storage.BlockObjectWriter -org.apache.spark.streaming.dstream.WindowedDStream -org.apache.spark.util.random.XORShiftRandom -org.apache.spark.network.SendingConnection -org.apache.spark.ui.storage.IndexPage -org.apache.spark.rdd.HadoopPartition -org.apache.spark.util.Utils.CallSiteInfo -org.apache.spark.deploy.LocalSparkCluster -org.apache.spark.streaming.ObjectInputStreamWithLoader -org.apache.spark.util.IdGenerator -org.apache.spark.graphx.impl.DoubleAggMsgSerializer -org.apache.spark.ExceptionFailure -org.apache.spark.storage.BlockFetcherIterator -org.apache.spark.MapOutputTrackerMasterActor -org.apache.spark.storage.BlockMessageArray -org.apache.spark.graphx.impl.LongAggMsgSerializer -org.apache.spark.scheduler.FairSchedulingAlgorithm -org.apache.spark.scheduler.IndirectTaskResult -org.apache.spark.storage.PutBlock -org.apache.spark.util.collection.OpenHashMap -org.apache.spark.scheduler.ShuffleMapTask -org.apache.spark.util.ByteBufferInputStream -org.apache.spark.scheduler.SchedulingAlgorithm \ No newline at end of file diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 278969655de48..eb1b99933cbd5 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -41,6 +41,7 @@ if [ -f "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-dep CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes" CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes" + CLASSPATH="$CLASSPATH:$FWDIR/tools/target/scala-$SCALA_VERSION/classes" DEPS_ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar` CLASSPATH="$CLASSPATH:$DEPS_ASSEMBLY_JAR" diff --git a/dev/run-tests b/dev/run-tests index 432563e1ef845..e557057a907c5 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -32,12 +32,6 @@ echo "Running Scala style checks" echo "=========================================================================" sbt/sbt clean scalastyle -echo "=========================================================================" -echo "Running MiMa for detecting binary incompatibilites." -echo "Please see MimaBuild.scala for details." -echo "=========================================================================" -sbt/sbt mima-report-binary-issues - echo "=========================================================================" echo "Running Spark unit tests" echo "=========================================================================" @@ -50,3 +44,10 @@ if [ -z "$PYSPARK_PYTHON" ]; then export PYSPARK_PYTHON=/usr/local/bin/python2.7 fi ./python/run-tests + +echo "=========================================================================" +echo "Detecting binary incompatibilites with MiMa" +echo "=========================================================================" +./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore +sbt/sbt mima-report-binary-issues + diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index d83c7bf092833..d83844d68a5fb 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -8,54 +8,50 @@ object MimaBuild { import com.typesafe.tools.mima.core._ import com.typesafe.tools.mima.core.ProblemFilters._ - IO.read(file(base.getAbsolutePath + "/.mima-exclude")).split("\n").map(excludePackage).toSeq ++ -// /** -// * A: Detections are semi private or likely to become semi private at some point. -// */ -// Seq(exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom"), -// exclude[MissingClassProblem]("org.apache.spark.util.XORShiftRandom$"), -// exclude[MissingMethodProblem]("org.apache.spark.util.Utils.cloneWritables"), -// // Scheduler is not considered a public API. -// excludePackage("org.apache.spark.deploy"), -// // Was made private in 1.0 -// excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#DiskMapIterator"), -// excludePackage("org.apache.spark.util.collection.ExternalAppendOnlyMap#ExternalIterator"), -// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.cogroupResultToJava"), -// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaPairRDD.groupByResultToJava"), -// exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.handleFailedTask"), -// exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSchedulerImpl.taskSetTaskIds"), -// exclude[IncompatibleMethTypeProblem]("org.apache.spark.scheduler.TaskSetManager.handleFailedTask"), -// exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.removeAllRunningTasks"), -// exclude[MissingMethodProblem]("org.apache.spark.scheduler.TaskSetManager.runningTasks_="), -// exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime"), -// exclude[MissingMethodProblem]("org.apache.spark.scheduler.DAGScheduler.lastFetchFailureTime_="), -// exclude[MissingMethodProblem]("org.apache.spark.storage.BlockObjectWriter.bytesWritten")) ++ -// /** -// * B: Detections are mostly false +ve. -// */ -// Seq(exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.setGenerator"), -// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), -// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), -// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.mapPartitions"), -// exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.foreachPartition"), -// exclude[MissingMethodProblem]("org.apache.spark.api.python.PythonRDD.writeToStream")) ++ - /** - * Detections I am unsure about. Should be either moved to B (false +ve) or A. - */ - Seq( -// exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator$"), -// exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags"), -// exclude[MissingClassProblem]("org.apache.spark.rdd.ClassTags$"), -// exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$wrapForCompression$1"), -// exclude[MissingMethodProblem]("org.apache.spark.util.collection.ExternalAppendOnlyMap.org$apache$spark$util$collection$ExternalAppendOnlyMap$$sparkConf"), -// exclude[MissingClassProblem]("org.apache.spark.mllib.recommendation.MFDataGenerator"), -// exclude[MissingClassProblem]("org.apache.spark.mllib.optimization.SquaredGradient"), -// exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.gradient"), -// exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.gradient"), -// exclude[IncompatibleResultTypeProblem]("org.apache.spark.mllib.regression.LassoWithSGD.gradient"), -// exclude[FinalClassProblem]("org.apache.spark.SparkFiles") + // Excludes relevant to all Spark versions + val defaultExcludes = Seq(excludePackage("org.apache.spark.repl")) - ) + // Read package-private excludes from file + val excludeFilePath = (base.getAbsolutePath + "/.mima-excludes") + val excludeFile = file(excludeFilePath) + val packagePrivateList: Seq[String] = + if (!excludeFile.exists()) { + Seq() + } else { + IO.read(excludeFile).split("\n") + } + + def excludeClass(className: String) = { + Seq( + excludePackage(className), + ProblemFilters.exclude[MissingClassProblem](className), + ProblemFilters.exclude[MissingTypesProblem](className), + excludePackage(className + "$"), + ProblemFilters.exclude[MissingClassProblem](className + "$"), + ProblemFilters.exclude[MissingTypesProblem](className + "$") + ) + } + def excludeSparkClass(className: String) = excludeClass("org.apache.spark." + className) + + val packagePrivateExcludes = packagePrivateList.flatMap(excludeClass) + + /* Excludes specific to a given version of Spark. When comparing the given version against + its immediate predecessor, the excludes listed here will be applied. */ + val versionExcludes = + SparkBuild.SPARK_VERSION match { + case v if v.startsWith("1.0") => + Seq(excludePackage("org.apache.spark.api.java")) ++ + excludeSparkClass("rdd.ClassTags") ++ + excludeSparkClass("util.XORShiftRandom") ++ + excludeSparkClass("mllib.recommendation.MFDataGenerator") ++ + excludeSparkClass("mllib.optimization.SquaredGradient") ++ + excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++ + excludeSparkClass("mllib.regression.LassoWithSGD") ++ + excludeSparkClass("mllib.regression.LinearRegressionWithSGD") + case _ => Seq() + } + + packagePrivateExcludes ++ versionExcludes } def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq( diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index ed5e598d798c2..ca050f9049aa6 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -28,6 +28,8 @@ import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact //import com.jsuereth.pgp.sbtplugin.PgpKeys._ object SparkBuild extends Build { + val SPARK_VERSION = "1.0.0-incubating-SNAPSHOT" + // Hadoop version to build against. For example, "1.0.4" for Apache releases, or // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set // through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN. @@ -124,7 +126,7 @@ object SparkBuild extends Build { def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq( organization := "org.apache.spark", - version := "1.0.0-incubating-SNAPSHOT", + version := SPARK_VERSION, scalaVersion := "2.10.3", scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation", "-target:" + SCALAC_JVM_VERSION), diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala index 3246db98f5df5..44984f8a83d8f 100644 --- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala +++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala @@ -21,78 +21,121 @@ import java.io.File import java.util.jar.JarFile import scala.collection.mutable -import scala.collection.JavaConversions.enumerationAsScalaIterator +import scala.collection.JavaConversions._ +import scala.reflect.runtime.universe.runtimeMirror +import scala.util.Try /** - * Mima(TODO: Paste URL here) generates a lot of false positives as it does not detect - * private[x] as internal APIs. + * A tool for generating classes to be excluded during binary checking with MIMA. It is expected + * that this tool is run with ./spark-class. + * + * MIMA itself only supports JVM-level visibility and doesn't account for package-private classes. + * This tool looks at all currently package-private classes and generates exclusions for them. Note + * that this approach is not sound. It can lead to false positives if we move or rename a previously + * package-private class. It can lead to false negatives if someone explicitly makes a class + * package-private that wasn't before. This exists only to help catch certain classes of changes + * which might be difficult to catch during review. */ object GenerateMIMAIgnore { + private val classLoader = Thread.currentThread().getContextClassLoader + private val mirror = runtimeMirror(classLoader) + + private def classesPrivateWithin(packageName: String): Set[String] = { - def classesWithPrivateWithin(packageName: String, excludePackages: Seq[String]): Set[String] = { - import scala.reflect.runtime.universe.runtimeMirror - val classLoader: ClassLoader = Thread.currentThread().getContextClassLoader - val mirror = runtimeMirror(classLoader) - val classes = Utils.getClasses(packageName, classLoader) + val classes = getClasses(packageName, classLoader) val privateClasses = mutable.HashSet[String]() - for (x <- classes) { - try { - // some of the classnames throw malformed class name exceptions and weird Match errors. - if (excludePackages.forall(!x.startsWith(_)) && - mirror.staticClass(x).privateWithin.toString.trim != "") { - privateClasses += x + + def isPackagePrivate(className: String) = { + try { + /* Couldn't figure out if it's possible to determine a-priori whether a given symbol + is a module or class. */ + + val privateAsClass = mirror + .staticClass(className) + .privateWithin + .fullName + .startsWith(packageName) + + val privateAsModule = mirror + .staticModule(className) + .privateWithin + .fullName + .startsWith(packageName) + + privateAsClass || privateAsModule + } catch { + case _: Throwable => { + println("Error determining visibility: " + className) + false + } + } + } + + for (className <- classes) { + val directlyPrivateSpark = isPackagePrivate(className) + + /* Inner classes defined within a private[spark] class or object are effectively + invisible, so we account for them as package private. */ + val indirectlyPrivateSpark = { + val maybeOuter = className.toString.takeWhile(_ != '$') + if (maybeOuter != className) { + isPackagePrivate(maybeOuter) + } else { + false } - } catch { - case e: Throwable => // println(e) } + if (directlyPrivateSpark || indirectlyPrivateSpark) privateClasses += className } - privateClasses.toSet + privateClasses.flatMap(c => Seq(c, c.replace("$", "#"))).toSet } def main(args: Array[String]) { - scala.tools.nsc.io.File(".mima-exclude"). - writeAll(classesWithPrivateWithin("org.apache.spark", args).mkString("\n")) - println("Created : .mima-exclude in current directory.") + scala.tools.nsc.io.File(".mima-excludes"). + writeAll(classesPrivateWithin("org.apache.spark").mkString("\n")) + println("Created : .mima-excludes in current directory.") } -} - -object Utils { - /** * Get all classes in a package from a jar file. */ - def getAllClasses(jarPath: String, packageName: String) = { + private def getAllClasses(jarPath: String, packageName: String) = { val jar = new JarFile(new File(jarPath)) val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName)) val classes = mutable.HashSet[Class[_]]() for (entry <- enums) { if (!entry.endsWith("/") && !entry.endsWith("MANIFEST.MF") && !entry.endsWith("properties")) { - try { - classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.')) - } catch { - case e: Throwable => // println(e) // It may throw a few ClassNotFoundExceptions - } + classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.')) } } classes } + private def shouldExclude(name: String) = { + // Heuristic to remove JVM classes that do not correspond to user-facing classes in Scala + Try(mirror.staticClass(name)).isFailure || + name.contains("anon") || + name.endsWith("class") || + name.contains("$sp") + } + /** * Scans all classes accessible from the context class loader which belong to the given package * and subpackages both from directories and jars present on the classpath. */ - def getClasses(packageName: String, - classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Set[String] = { + private def getClasses(packageName: String, + classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Seq[String] = { val path = packageName.replace('.', '/') - val resources = classLoader.getResources(path).toArray + val resources = classLoader.getResources(path) + val jars = resources.filter(x => x.getProtocol == "jar") .map(_.getFile.split(":")(1).split("!")(0)) val classesFromJars = jars.map(getAllClasses(_, path)).flatten + val dirs = resources.filter(x => x.getProtocol == "file") - .map(x => new File(x.getFile.split(":")(1))) + .map(x => new File(x.getFile.split(":").last)) val classFromDirs = dirs.map(findClasses(_, packageName)).flatten - (classFromDirs ++ classesFromJars).map(_.getCanonicalName).filter(_ != null).toSet + + (classFromDirs ++ classesFromJars).map(_.getName).filter(!shouldExclude(_)).toSeq } private def findClasses(directory: File, packageName: String): Seq[Class[_]] = { @@ -105,8 +148,8 @@ object Utils { if (file.isDirectory) { classes ++= findClasses(file, packageName + "." + file.getName) } else if (file.getName.endsWith(".class")) { - classes += Class.forName(packageName + '.' + file.getName.substring(0, - file.getName.length() - 6)) + val className = file.getName.substring(0, file.getName.length() - 6) + classes += Class.forName(packageName + '.' + className) } } classes From 647c547b59508e17c195a9eb17032774b68ddf8e Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Sun, 23 Mar 2014 13:30:25 -0700 Subject: [PATCH 5/8] Reveiw feedback. --- bin/spark-class | 3 +- project/MimaBuild.scala | 6 +- .../spark/tools/GenerateMIMAIgnore.scala | 80 +++++++++---------- 3 files changed, 43 insertions(+), 46 deletions(-) diff --git a/bin/spark-class b/bin/spark-class index dfa11e854dc89..535a6652baa6d 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -129,8 +129,7 @@ fi # Compute classpath using external script CLASSPATH=`$FWDIR/bin/compute-classpath.sh` -CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR" -if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then +if [[ "$1" =~ org.apache.spark.tools.* ]]; then CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR" fi diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index d83844d68a5fb..9dc9e479f9808 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -8,8 +8,8 @@ object MimaBuild { import com.typesafe.tools.mima.core._ import com.typesafe.tools.mima.core.ProblemFilters._ - // Excludes relevant to all Spark versions - val defaultExcludes = Seq(excludePackage("org.apache.spark.repl")) + // Excludes placed here will be used for all Spark versions + val defaultExcludes = Seq() // Read package-private excludes from file val excludeFilePath = (base.getAbsolutePath + "/.mima-excludes") @@ -51,7 +51,7 @@ object MimaBuild { case _ => Seq() } - packagePrivateExcludes ++ versionExcludes + defaultExcludes ++ packagePrivateExcludes ++ versionExcludes } def mimaSettings(sparkHome: File) = mimaDefaultSettings ++ Seq( diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala index 44984f8a83d8f..8390b3301df8e 100644 --- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala +++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala @@ -46,24 +46,24 @@ object GenerateMIMAIgnore { val privateClasses = mutable.HashSet[String]() def isPackagePrivate(className: String) = { - try { - /* Couldn't figure out if it's possible to determine a-priori whether a given symbol - is a module or class. */ - - val privateAsClass = mirror - .staticClass(className) - .privateWithin - .fullName - .startsWith(packageName) - - val privateAsModule = mirror - .staticModule(className) - .privateWithin - .fullName - .startsWith(packageName) - - privateAsClass || privateAsModule - } catch { + try { + /* Couldn't figure out if it's possible to determine a-priori whether a given symbol + is a module or class. */ + + val privateAsClass = mirror + .staticClass(className) + .privateWithin + .fullName + .startsWith(packageName) + + val privateAsModule = mirror + .staticModule(className) + .privateWithin + .fullName + .startsWith(packageName) + + privateAsClass || privateAsModule + } catch { case _: Throwable => { println("Error determining visibility: " + className) false @@ -95,26 +95,11 @@ object GenerateMIMAIgnore { println("Created : .mima-excludes in current directory.") } - /** - * Get all classes in a package from a jar file. - */ - private def getAllClasses(jarPath: String, packageName: String) = { - val jar = new JarFile(new File(jarPath)) - val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName)) - val classes = mutable.HashSet[Class[_]]() - for (entry <- enums) { - if (!entry.endsWith("/") && !entry.endsWith("MANIFEST.MF") && !entry.endsWith("properties")) { - classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.')) - } - } - classes - } private def shouldExclude(name: String) = { // Heuristic to remove JVM classes that do not correspond to user-facing classes in Scala - Try(mirror.staticClass(name)).isFailure || name.contains("anon") || - name.endsWith("class") || + name.endsWith("$class") || name.contains("$sp") } @@ -123,22 +108,22 @@ object GenerateMIMAIgnore { * and subpackages both from directories and jars present on the classpath. */ private def getClasses(packageName: String, - classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Seq[String] = { + classLoader: ClassLoader = Thread.currentThread().getContextClassLoader): Set[String] = { val path = packageName.replace('.', '/') val resources = classLoader.getResources(path) val jars = resources.filter(x => x.getProtocol == "jar") .map(_.getFile.split(":")(1).split("!")(0)) - val classesFromJars = jars.map(getAllClasses(_, path)).flatten + val classesFromJars = jars.map(getClassesFromJar(_, path)).flatten val dirs = resources.filter(x => x.getProtocol == "file") .map(x => new File(x.getFile.split(":").last)) - val classFromDirs = dirs.map(findClasses(_, packageName)).flatten + val classFromDirs = dirs.map(getClassesFromDir(_, packageName)).flatten - (classFromDirs ++ classesFromJars).map(_.getName).filter(!shouldExclude(_)).toSeq + (classFromDirs ++ classesFromJars).map(_.getName).filterNot(shouldExclude).toSet } - private def findClasses(directory: File, packageName: String): Seq[Class[_]] = { + private def getClassesFromDir(directory: File, packageName: String): Seq[Class[_]] = { val classes = mutable.ArrayBuffer[Class[_]]() if (!directory.exists()) { return classes @@ -146,12 +131,25 @@ object GenerateMIMAIgnore { val files = directory.listFiles() for (file <- files) { if (file.isDirectory) { - classes ++= findClasses(file, packageName + "." + file.getName) + classes ++= getClassesFromDir(file, packageName + "." + file.getName) } else if (file.getName.endsWith(".class")) { - val className = file.getName.substring(0, file.getName.length() - 6) + val className = file.getName.stripSuffix(".class") classes += Class.forName(packageName + '.' + className) } } classes } + + /** + * Get all classes in a package from a jar file. + */ + private def getClassesFromJar(jarPath: String, packageName: String) = { + val jar = new JarFile(new File(jarPath)) + val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName)) + val classes = mutable.HashSet[Class[_]]() + for (entry <- enums if entry.endsWith(".class")) { + classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.')) + } + classes + } } From 0e0f5703090313f458072ac73105eec7a05b1027 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Mon, 24 Mar 2014 00:23:36 -0700 Subject: [PATCH 6/8] Small fix and removing directory listings --- .../spark/tools/GenerateMIMAIgnore.scala | 32 +++---------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala index 8390b3301df8e..8eb8504ca9120 100644 --- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala +++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala @@ -23,7 +23,6 @@ import java.util.jar.JarFile import scala.collection.mutable import scala.collection.JavaConversions._ import scala.reflect.runtime.universe.runtimeMirror -import scala.util.Try /** * A tool for generating classes to be excluded during binary checking with MIMA. It is expected @@ -113,31 +112,10 @@ object GenerateMIMAIgnore { val resources = classLoader.getResources(path) val jars = resources.filter(x => x.getProtocol == "jar") - .map(_.getFile.split(":")(1).split("!")(0)) + .map(_.getFile.split(":")(1).split("!")(0)).toSeq val classesFromJars = jars.map(getClassesFromJar(_, path)).flatten - val dirs = resources.filter(x => x.getProtocol == "file") - .map(x => new File(x.getFile.split(":").last)) - val classFromDirs = dirs.map(getClassesFromDir(_, packageName)).flatten - - (classFromDirs ++ classesFromJars).map(_.getName).filterNot(shouldExclude).toSet - } - - private def getClassesFromDir(directory: File, packageName: String): Seq[Class[_]] = { - val classes = mutable.ArrayBuffer[Class[_]]() - if (!directory.exists()) { - return classes - } - val files = directory.listFiles() - for (file <- files) { - if (file.isDirectory) { - classes ++= getClassesFromDir(file, packageName + "." + file.getName) - } else if (file.getName.endsWith(".class")) { - val className = file.getName.stripSuffix(".class") - classes += Class.forName(packageName + '.' + className) - } - } - classes + classesFromJars.map(_.getName).filterNot(shouldExclude).toSet } /** @@ -146,10 +124,8 @@ object GenerateMIMAIgnore { private def getClassesFromJar(jarPath: String, packageName: String) = { val jar = new JarFile(new File(jarPath)) val enums = jar.entries().map(_.getName).filter(_.startsWith(packageName)) - val classes = mutable.HashSet[Class[_]]() - for (entry <- enums if entry.endsWith(".class")) { - classes += Class.forName(entry.trim.replaceAll(".class", "").replace('/', '.')) - } + val classes = for (entry <- enums if entry.endsWith(".class")) + yield Class.forName(entry.replace('/', '.').stripSuffix(".class")) classes } } From 3666cf14a4ac042c263d03a0ce88758b6664aff7 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Mon, 24 Mar 2014 11:00:47 -0700 Subject: [PATCH 7/8] Minor style change --- project/MimaBuild.scala | 17 +++++++++++++++++ .../apache/spark/tools/GenerateMIMAIgnore.scala | 5 +++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index 9dc9e479f9808..041919deaa9a6 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import com.typesafe.tools.mima.plugin.MimaKeys.{binaryIssueFilters, previousArtifact} import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings import sbt._ diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala index 8eb8504ca9120..5547e9fe58fc7 100644 --- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala +++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala @@ -113,9 +113,10 @@ object GenerateMIMAIgnore { val jars = resources.filter(x => x.getProtocol == "jar") .map(_.getFile.split(":")(1).split("!")(0)).toSeq - val classesFromJars = jars.map(getClassesFromJar(_, path)).flatten - classesFromJars.map(_.getName).filterNot(shouldExclude).toSet + jars.flatMap(getClassesFromJar(_, path)) + .map(_.getName) + .filterNot(shouldExclude).toSet } /** From 22ae267b368669455666a3f9b18294662b4e3276 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Mon, 24 Mar 2014 19:39:46 -0700 Subject: [PATCH 8/8] New binary changes after upmerge --- project/MimaBuild.scala | 6 +++++- project/SparkBuild.scala | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala index 041919deaa9a6..e7c9c47c960fa 100644 --- a/project/MimaBuild.scala +++ b/project/MimaBuild.scala @@ -57,7 +57,11 @@ object MimaBuild { val versionExcludes = SparkBuild.SPARK_VERSION match { case v if v.startsWith("1.0") => - Seq(excludePackage("org.apache.spark.api.java")) ++ + Seq( + excludePackage("org.apache.spark.api.java"), + excludePackage("org.apache.spark.streaming.api.java"), + excludePackage("org.apache.spark.mllib") + ) ++ excludeSparkClass("rdd.ClassTags") ++ excludeSparkClass("util.XORShiftRandom") ++ excludeSparkClass("mllib.recommendation.MFDataGenerator") ++ diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index e4883c9c74c9f..21d2779d85b74 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -342,7 +342,6 @@ object SparkBuild extends Build { def examplesSettings = sharedSettings ++ Seq( name := "spark-examples", - previousArtifact := sparkPreviousArtifact("spark-examples"), libraryDependencies ++= Seq( "com.twitter" %% "algebird-core" % "0.1.11", "org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging), @@ -553,6 +552,7 @@ object SparkBuild extends Build { def mqttSettings() = streamingSettings ++ Seq( name := "spark-streaming-mqtt", + previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"), libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0") ) }