Skip to content

Commit 3b1b940

Browse files
robert3005ash211
authored andcommitted
[SPARK-7481] Add cloud dependencies (apache-spark-on-k8s#169)
* [SPARK-7481] stripped down packaging only module * add cloud dependency * profiles * Update pom.xml * correct module * test dependencies * jars * whitespace
1 parent d64e290 commit 3b1b940

File tree

14 files changed

+863
-45
lines changed

14 files changed

+863
-45
lines changed

assembly/pom.xml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,5 +222,19 @@
222222
<parquet.deps.scope>provided</parquet.deps.scope>
223223
</properties>
224224
</profile>
225+
226+
<!--
227+
Pull in spark-hadoop-cloud and its associated JARs,
228+
-->
229+
<profile>
230+
<id>cloud</id>
231+
<dependencies>
232+
<dependency>
233+
<groupId>org.apache.spark</groupId>
234+
<artifactId>spark-hadoop-cloud_${scala.binary.version}</artifactId>
235+
<version>${project.version}</version>
236+
</dependency>
237+
</dependencies>
238+
</profile>
225239
</profiles>
226240
</project>

circle.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@ dependencies:
3737
# Copy contents into current build directory
3838
rsync --info=stats2,misc1,flist0 -a build_classes/ .
3939
fi
40-
- ./build/mvn -DskipTests -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pyarn -Phive -Psparkr install
40+
- ./build/mvn -DskipTests -Pcloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pyarn -Phive -Psparkr install
4141
# Copy all of */target/scala_2.11/classes to build_classes/
4242
- >
4343
rsync --info=stats2,misc1,flist0 -a --delete-excluded --prune-empty-dirs --exclude build_classes/ --exclude 'target/streams' --exclude 'assembly/target' --exclude 'common/network-yarn/target' --exclude 'examples/target' --exclude '***/*.jar' --include 'target/***' --include '**/' --exclude '*' . build_classes/
4444
- |
4545
# Make sbt fetch all the external deps to ~/.ivy2 so it gets cached
46-
./build/sbt -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pyarn -Phive -Psparkr externalDependencyClasspath
46+
./build/sbt -Pcloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pyarn -Phive -Psparkr externalDependencyClasspath
4747
cache_directories:
4848
- "build_classes"
4949
- "build"

cloud/pom.xml

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
~ Licensed to the Apache Software Foundation (ASF) under one or more
4+
~ contributor license agreements. See the NOTICE file distributed with
5+
~ this work for additional information regarding copyright ownership.
6+
~ The ASF licenses this file to You under the Apache License, Version 2.0
7+
~ (the "License"); you may not use this file except in compliance with
8+
~ the License. You may obtain a copy of the License at
9+
~
10+
~ http://www.apache.org/licenses/LICENSE-2.0
11+
~
12+
~ Unless required by applicable law or agreed to in writing, software
13+
~ distributed under the License is distributed on an "AS IS" BASIS,
14+
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
~ See the License for the specific language governing permissions and
16+
~ limitations under the License.
17+
-->
18+
<project xmlns="http://maven.apache.org/POM/4.0.0"
19+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
20+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
21+
<modelVersion>4.0.0</modelVersion>
22+
<parent>
23+
<groupId>org.apache.spark</groupId>
24+
<artifactId>spark-parent_2.11</artifactId>
25+
<version>2.2.0-SNAPSHOT</version>
26+
<relativePath>../pom.xml</relativePath>
27+
</parent>
28+
29+
<artifactId>spark-hadoop-cloud_2.11</artifactId>
30+
<packaging>jar</packaging>
31+
<name>Spark Project Cloud Integration</name>
32+
<description>
33+
Contains support for cloud infrastructures, specifically the Hadoop JARs and
34+
transitive dependencies needed to interact with the infrastructures.
35+
36+
Any project which explicitly depends upon the spark-hadoop-cloud artifact will get the
37+
dependencies; the exact versions of which will depend upon the hadoop version Spark was compiled
38+
against.
39+
40+
The imports of transitive dependencies are managed to make them consistent
41+
with those of the Spark build.
42+
43+
WARNING: the signatures of methods in the AWS and Azure SDKs do change between
44+
versions: use exactly the same version with which the Hadoop JARs were
45+
built.
46+
</description>
47+
<properties>
48+
<sbt.project.name>hadoop-cloud</sbt.project.name>
49+
</properties>
50+
51+
<dependencies>
52+
<dependency>
53+
<groupId>org.apache.hadoop</groupId>
54+
<artifactId>hadoop-aws</artifactId>
55+
<scope>${hadoop.deps.scope}</scope>
56+
</dependency>
57+
58+
<dependency>
59+
<groupId>org.apache.hadoop</groupId>
60+
<artifactId>hadoop-openstack</artifactId>
61+
<scope>${hadoop.deps.scope}</scope>
62+
</dependency>
63+
<!--
64+
Add joda time to ensure that anything downstream which doesn't pull in spark-hive
65+
gets the correct joda time artifact, so doesn't have auth failures on later Java 8 JVMs
66+
-->
67+
<dependency>
68+
<groupId>joda-time</groupId>
69+
<artifactId>joda-time</artifactId>
70+
<scope>${hadoop.deps.scope}</scope>
71+
</dependency>
72+
<!-- explicitly declare the jackson artifacts desired -->
73+
<dependency>
74+
<groupId>com.fasterxml.jackson.core</groupId>
75+
<artifactId>jackson-databind</artifactId>
76+
<scope>${hadoop.deps.scope}</scope>
77+
</dependency>
78+
<dependency>
79+
<groupId>com.fasterxml.jackson.core</groupId>
80+
<artifactId>jackson-annotations</artifactId>
81+
<scope>${hadoop.deps.scope}</scope>
82+
</dependency>
83+
<dependency>
84+
<groupId>com.fasterxml.jackson.dataformat</groupId>
85+
<artifactId>jackson-dataformat-cbor</artifactId>
86+
<scope>${hadoop.deps.scope}</scope>
87+
</dependency>
88+
<!--Explicit declaration to force in Spark version into transitive dependencies -->
89+
<dependency>
90+
<groupId>org.apache.httpcomponents</groupId>
91+
<artifactId>httpclient</artifactId>
92+
<scope>${hadoop.deps.scope}</scope>
93+
</dependency>
94+
<!--Explicit declaration to force in Spark version into transitive dependencies -->
95+
<dependency>
96+
<groupId>org.apache.httpcomponents</groupId>
97+
<artifactId>httpcore</artifactId>
98+
<scope>${hadoop.deps.scope}</scope>
99+
</dependency>
100+
</dependencies>
101+
102+
<profiles>
103+
<profile>
104+
<id>hadoop-2.7</id>
105+
<dependencies>
106+
<dependency>
107+
<groupId>org.apache.hadoop</groupId>
108+
<artifactId>hadoop-azure</artifactId>
109+
<scope>${hadoop.deps.scope}</scope>
110+
</dependency>
111+
</dependencies>
112+
</profile>
113+
<profile>
114+
<id>hadoop-palantir</id>
115+
<dependencies>
116+
<dependency>
117+
<groupId>org.apache.hadoop</groupId>
118+
<artifactId>hadoop-azure</artifactId>
119+
<scope>${hadoop.deps.scope}</scope>
120+
</dependency>
121+
</dependencies>
122+
</profile>
123+
</profiles>
124+
125+
</project>

dev/deps/spark-deps-hadoop-palantir

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ automaton-1.11-8.jar
1818
avro-1.7.7.jar
1919
avro-ipc-1.7.7.jar
2020
avro-mapred-1.7.7-hadoop2.jar
21+
aws-java-sdk-core-1.11.45.jar
22+
aws-java-sdk-kms-1.11.45.jar
23+
aws-java-sdk-s3-1.11.45.jar
2124
base64-2.3.8.jar
2225
bcpkix-jdk15on-1.54.jar
2326
bcprov-jdk15on-1.54.jar
@@ -70,6 +73,7 @@ guice-3.0.jar
7073
guice-servlet-3.0.jar
7174
hadoop-annotations-2.8.0-palantir3.jar
7275
hadoop-auth-2.8.0-palantir3.jar
76+
hadoop-aws-2.8.0-palantir3.jar
7377
hadoop-client-2.8.0-palantir3.jar
7478
hadoop-common-2.8.0-palantir3.jar
7579
hadoop-hdfs-2.8.0-palantir3.jar
@@ -79,6 +83,7 @@ hadoop-mapreduce-client-common-2.8.0-palantir3.jar
7983
hadoop-mapreduce-client-core-2.8.0-palantir3.jar
8084
hadoop-mapreduce-client-jobclient-2.8.0-palantir3.jar
8185
hadoop-mapreduce-client-shuffle-2.8.0-palantir3.jar
86+
hadoop-openstack-2.8.0-palantir3.jar
8287
hadoop-yarn-api-2.8.0-palantir3.jar
8388
hadoop-yarn-client-2.8.0-palantir3.jar
8489
hadoop-yarn-common-2.8.0-palantir3.jar
@@ -90,11 +95,13 @@ hk2-utils-2.4.0-b34.jar
9095
htrace-core4-4.0.1-incubating.jar
9196
httpclient-4.5.2.jar
9297
httpcore-4.4.4.jar
98+
ion-java-1.0.1.jar
9399
ivy-2.4.0.jar
94100
jackson-annotations-2.6.5.jar
95101
jackson-core-2.6.5.jar
96102
jackson-core-asl-1.9.13.jar
97103
jackson-databind-2.6.5.jar
104+
jackson-dataformat-cbor-2.6.5.jar
98105
jackson-dataformat-yaml-2.6.5.jar
99106
jackson-jaxrs-1.9.13.jar
100107
jackson-jaxrs-base-2.6.5.jar
@@ -129,6 +136,7 @@ jetty-6.1.26.jar
129136
jetty-sslengine-6.1.26.jar
130137
jetty-util-6.1.26.jar
131138
jline-2.12.1.jar
139+
jmespath-java-1.0.jar
132140
joda-time-2.9.3.jar
133141
jodd-core-3.5.2.jar
134142
json-smart-1.1.1.jar

dev/publish.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
set -euo pipefail
44
version=$(git describe --tags)
55

6-
PALANTIR_FLAGS=(-Phadoop-palantir -Pkinesis-asl -Pkubernetes -Phive -Pyarn -Psparkr)
6+
PALANTIR_FLAGS=(-Pcloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Phive -Pyarn -Psparkr)
77

88
publish_artifacts() {
99
tmp_settings="tmp-settings.xml"

dev/test-dependencies.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ export LC_ALL=C
2929
# TODO: This would be much nicer to do in SBT, once SBT supports Maven-style resolution.
3030

3131
# NOTE: These should match those in the release publishing script
32-
HADOOP2_MODULE_PROFILES="-Pkubernetes -Pyarn -Phive"
32+
HADOOP2_MODULE_PROFILES="-Pcloud -Pkubernetes -Pyarn -Phive"
3333
MVN="build/mvn"
3434
HADOOP_PROFILES=(
3535
hadoop-palantir

dists/hadoop-palantir/pom.xml

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@
6565
<artifactId>spark-sql_${scala.binary.version}</artifactId>
6666
<version>${project.version}</version>
6767
</dependency>
68+
<dependency>
69+
<groupId>org.apache.spark</groupId>
70+
<artifactId>spark-hadoop-cloud_${scala.binary.version}</artifactId>
71+
<version>${project.version}</version>
72+
</dependency>
6873
<dependency>
6974
<groupId>org.apache.spark</groupId>
7075
<artifactId>spark-repl_${scala.binary.version}</artifactId>
@@ -110,34 +115,64 @@
110115
<artifactId>spark-dist</artifactId>
111116
<version>${project.version}-hadoop-${hadoop.version}</version>
112117
</dependency>
118+
<!--
119+
Propagate these from spark-parent hadoop-palantir profile
120+
-->
121+
<dependency>
122+
<groupId>org.apache.hadoop</groupId>
123+
<artifactId>hadoop-azure</artifactId>
124+
<version>${hadoop.version}</version>
125+
<scope>${hadoop.deps.scope}</scope>
126+
<exclusions>
127+
<exclusion>
128+
<groupId>org.apache.hadoop</groupId>
129+
<artifactId>hadoop-common</artifactId>
130+
</exclusion>
131+
<exclusion>
132+
<groupId>org.codehaus.jackson</groupId>
133+
<artifactId>jackson-mapper-asl</artifactId>
134+
</exclusion>
135+
<exclusion>
136+
<groupId>com.fasterxml.jackson.core</groupId>
137+
<artifactId>jackson-core</artifactId>
138+
</exclusion>
139+
</exclusions>
140+
</dependency>
113141
</dependencies>
114142
</dependencyManagement>
115143

116144
<dependencies>
117145
<dependency>
118146
<groupId>org.apache.spark</groupId>
119147
<artifactId>spark-core_${scala.binary.version}</artifactId>
120-
<version>${project.version}</version>
121148
</dependency>
122149
<dependency>
123150
<groupId>org.apache.spark</groupId>
124151
<artifactId>spark-mllib_${scala.binary.version}</artifactId>
125-
<version>${project.version}</version>
126152
</dependency>
127153
<dependency>
128154
<groupId>org.apache.spark</groupId>
129155
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
130-
<version>${project.version}</version>
131156
</dependency>
132157
<dependency>
133158
<groupId>org.apache.spark</groupId>
134159
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
135-
<version>${project.version}</version>
136160
</dependency>
137161
<dependency>
138162
<groupId>org.apache.spark</groupId>
139163
<artifactId>spark-sql_${scala.binary.version}</artifactId>
140-
<version>${project.version}</version>
164+
</dependency>
165+
<dependency>
166+
<groupId>org.apache.spark</groupId>
167+
<artifactId>spark-hadoop-cloud_${scala.binary.version}</artifactId>
168+
</dependency>
169+
<!--
170+
Taken from cloud/pom.xml profile dpeendency
171+
-->
172+
<dependency>
173+
<groupId>org.apache.hadoop</groupId>
174+
<artifactId>hadoop-azure</artifactId>
175+
<scope>${hadoop.deps.scope}</scope>
141176
</dependency>
142177
<!--
143178
Because we don't shade dependencies anymore, we need to restore Guava to compile scope so
@@ -152,7 +187,6 @@
152187
<dependency>
153188
<groupId>org.apache.spark</groupId>
154189
<artifactId>spark-hive_${scala.binary.version}</artifactId>
155-
<version>${project.version}</version>
156190
</dependency>
157191
<dependency>
158192
<groupId>org.apache.spark</groupId>

dists/without-hadoop/pom.xml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,27 +123,22 @@
123123
<dependency>
124124
<groupId>org.apache.spark</groupId>
125125
<artifactId>spark-core_${scala.binary.version}</artifactId>
126-
<version>${project.version}</version>
127126
</dependency>
128127
<dependency>
129128
<groupId>org.apache.spark</groupId>
130129
<artifactId>spark-mllib_${scala.binary.version}</artifactId>
131-
<version>${project.version}</version>
132130
</dependency>
133131
<dependency>
134132
<groupId>org.apache.spark</groupId>
135133
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
136-
<version>${project.version}</version>
137134
</dependency>
138135
<dependency>
139136
<groupId>org.apache.spark</groupId>
140137
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
141-
<version>${project.version}</version>
142138
</dependency>
143139
<dependency>
144140
<groupId>org.apache.spark</groupId>
145141
<artifactId>spark-sql_${scala.binary.version}</artifactId>
146-
<version>${project.version}</version>
147142
</dependency>
148143

149144
<!--

0 commit comments

Comments
 (0)