From 5f28ee4367c8bfe36913051737d8faef08c991d1 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Sat, 8 Mar 2014 16:29:35 -0800 Subject: [PATCH 1/2] SPARK-1167: Remove metrics-ganglia from default build due to LGPL issues. This patch removes Ganglia integration from the default build. It allows users willing to link against LGPL code to use Ganglia by adding build flags or linking against a new Spark artifact called spark-ganglia-lgpl. This brings Spark in line with the Apache policy on LGPL code enumerated here: https://www.apache.org/legal/3party.html#options-optional --- assembly/pom.xml | 10 +++++ core/pom.xml | 4 -- dev/audit-release/README.md | 11 +++++ .../src/main/scala/SparkApp.scala | 15 ++++++- dev/audit-release/sbt_app_ganglia/build.sbt | 31 +++++++++++++ .../src/main/scala/SparkApp.scala | 39 ++++++++++++++++ dev/create-release/create-release.sh | 4 +- docs/monitoring.md | 13 +++++- extras/spark-ganglia-lgpl/pom.xml | 45 +++++++++++++++++++ .../spark/metrics/sink/GangliaSink.scala | 0 pom.xml | 9 +++- project/SparkBuild.scala | 25 ++++++++--- 12 files changed, 190 insertions(+), 16 deletions(-) create mode 100644 dev/audit-release/README.md create mode 100644 dev/audit-release/sbt_app_ganglia/build.sbt create mode 100644 dev/audit-release/sbt_app_ganglia/src/main/scala/SparkApp.scala create mode 100644 extras/spark-ganglia-lgpl/pom.xml rename {core => extras/spark-ganglia-lgpl}/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala (100%) diff --git a/assembly/pom.xml b/assembly/pom.xml index 82a5985504b4e..22bbbc57d81d4 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -158,6 +158,16 @@ + + spark-ganglia-lgpl + + + org.apache.spark + spark-ganglia-lgpl_${scala.binary.version} + ${project.version} + + + bigtop-dist + + 4.0.0 + + org.apache.spark + spark-parent + 1.0.0-SNAPSHOT + ../../pom.xml + + + + org.apache.spark + spark-ganglia-lgpl_2.10 + jar + Spark Ganglia Integration + + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + + + + com.codahale.metrics + metrics-ganglia + + + diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala b/extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala similarity index 100% rename from core/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala rename to extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala diff --git a/pom.xml b/pom.xml index 3b863856e4634..7888d1d708054 100644 --- a/pom.xml +++ b/pom.xml @@ -725,12 +725,19 @@ 0.23.7 - yarn + + + + spark-ganglia-lgpl + + extras/spark-ganglia-lgpl + + java8-tests diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 138aad7561043..632b45dea4dfa 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -65,7 +65,7 @@ object SparkBuild extends Build { lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core) lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings) - .dependsOn(core, graphx, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*) + .dependsOn(core, graphx, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*) dependsOn(maybeGanglia: _*) lazy val assembleDeps = TaskKey[Unit]("assemble-deps", "Build assembly of dependencies and packages Spark projects") @@ -91,19 +91,26 @@ object SparkBuild extends Build { lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client" val maybeAvro = if (hadoopVersion.startsWith("0.23.") && isYarnEnabled) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq() - // Conditionally include the java 8 sub-project + // Include Ganglia integration if the user has enabled Ganglia + // This is isolated from the normal build due to LGPL-licensed code in the library + lazy val isGangliaEnabled = Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined + lazy val gangliaProj = Project("spark-ganglia-lgpl", file("extras/spark-ganglia-lgpl"), settings = gangliaSettings).dependsOn(core) + val maybeGanglia: Seq[ClasspathDependency] = if (isGangliaEnabled) Seq(gangliaProj) else Seq() + val maybeGangliaRef: Seq[ProjectReference] = if (isGangliaEnabled) Seq(gangliaProj) else Seq() + + // Include the Java 8 project if the JVM version is 8+ lazy val javaVersion = System.getProperty("java.specification.version") lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]() lazy val java8Tests = Project("java8-tests", file("extras/java8-tests"), settings = java8TestsSettings). dependsOn(core) dependsOn(streaming % "compile->compile;test->test") - // Conditionally include the yarn sub-project + // Include the YARN project if the user has enabled YARN lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core) lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core) - lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](if (isNewHadoop) yarn else yarnAlpha) else Seq[ClasspathDependency]() - lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](if (isNewHadoop) yarn else yarnAlpha) else Seq[ProjectReference]() + lazy val maybeYarn: Seq[ClasspathDependency] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq() + lazy val maybeYarnRef: Seq[ProjectReference] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq() lazy val externalTwitter = Project("external-twitter", file("external/twitter"), settings = twitterSettings) .dependsOn(streaming % "compile->compile;test->test") @@ -127,7 +134,7 @@ object SparkBuild extends Build { .dependsOn(core, mllib, graphx, bagel, streaming, externalTwitter) dependsOn(allExternal: _*) // Everything except assembly, tools, java8Tests and examples belong to packageProjects - lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef + lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef ++ maybeGangliaRef lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests @@ -296,7 +303,6 @@ object SparkBuild extends Build { "com.codahale.metrics" % "metrics-core" % "3.0.0", "com.codahale.metrics" % "metrics-jvm" % "3.0.0", "com.codahale.metrics" % "metrics-json" % "3.0.0", - "com.codahale.metrics" % "metrics-ganglia" % "3.0.0", "com.codahale.metrics" % "metrics-graphite" % "3.0.0", "com.twitter" %% "chill" % "0.3.1", "com.twitter" % "chill-java" % "0.3.1", @@ -384,6 +390,11 @@ object SparkBuild extends Build { name := "spark-yarn" ) + def gangliaSettings = sharedSettings ++ Seq( + name := "spark-ganglia-lgpl", + libraryDependencies += "com.codahale.metrics" % "metrics-ganglia" % "3.0.0" + ) + def java8TestsSettings = sharedSettings ++ Seq( name := "java8-tests", javacOptions := Seq("-target", "1.8", "-source", "1.8"), From 326712a55081f2ddd8b63c2a7a52b2cbb7fceb41 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Sun, 9 Mar 2014 22:40:35 -0700 Subject: [PATCH 2/2] Responding to review feedback --- docs/monitoring.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/monitoring.md b/docs/monitoring.md index 192a957078737..15bfb041780da 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -60,9 +60,9 @@ licensing restrictions: To install the `GangliaSink` you'll need to perform a custom build of Spark. _**Note that by embedding this library you will include [LGPL](http://www.gnu.org/copyleft/lesser.html)-licensed code in your Spark package**_. For sbt users, set the -`SPARK_GANGLIA_LGPL` environment varaible before building. For Maven users, enable -the `-Pspark-ganglia-lgpl` profile. For users linking applications against Spark, link -include the `spark-ganglia-lgpl` artifact as a dependency. +`SPARK_GANGLIA_LGPL` environment variable before building. For Maven users, enable +the `-Pspark-ganglia-lgpl` profile. In addition to modifying the cluster's Spark build +user applications will need to link to the `spark-ganglia-lgpl` artifact. The syntax of the metrics configuration file is defined in an example configuration file, `$SPARK_HOME/conf/metrics.properties.template`.