Skip to content

Commit f11f84a

Browse files
committed
zip pyspark archives
1 parent 5192cca commit f11f84a

File tree

3 files changed

+45
-4
lines changed

3 files changed

+45
-4
lines changed

assembly/pom.xml

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,27 @@
9292
<skip>true</skip>
9393
</configuration>
9494
</plugin>
95+
<!-- zip pyspark archives to run python application on yarn mode -->
96+
<plugin>
97+
<groupId>org.apache.maven.plugins</groupId>
98+
<artifactId>maven-antrun-plugin</artifactId>
99+
<executions>
100+
<execution>
101+
<phase>package</phase>
102+
<goals>
103+
<goal>run</goal>
104+
</goals>
105+
</execution>
106+
</executions>
107+
<configuration>
108+
<target>
109+
<delete dir="${basedir}/../python/lib/pyspark.zip"/>
110+
<zip destfile="${basedir}/../python/lib/pyspark.zip">
111+
<fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
112+
</zip>
113+
</target>
114+
</configuration>
115+
</plugin>
95116
<!-- Use the shade plugin to create a big JAR with all the dependencies -->
96117
<plugin>
97118
<groupId>org.apache.maven.plugins</groupId>
@@ -196,6 +217,19 @@
196217
<artifactId>maven-assembly-plugin</artifactId>
197218
<version>2.4</version>
198219
<executions>
220+
<!--execution>
221+
<id>pyspark-zip</id>
222+
<phase>package</phase>
223+
<goals>
224+
<goal>single</goal>
225+
</goals>
226+
<configuration>
227+
<skipAssembly>true</skipAssembly>
228+
<descriptors>
229+
<descriptor>src/main/assembly/pyspark-assembly.xml</descriptor>
230+
</descriptors>
231+
</configuration>
232+
</execution-->
199233
<execution>
200234
<id>dist</id>
201235
<phase>package</phase>
@@ -208,7 +242,7 @@
208242
</descriptors>
209243
</configuration>
210244
</execution>
211-
</executions>
245+
</executions>
212246
</plugin>
213247
</plugins>
214248
</build>

make-distribution.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,6 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
228228
cp "$SPARK_HOME/README.md" "$DISTDIR"
229229
cp -r "$SPARK_HOME/bin" "$DISTDIR"
230230
cp -r "$SPARK_HOME/python" "$DISTDIR"
231-
zip -r "$DISTDIR"/python/lib/pyspark.zip "$SPARK_HOME"/python/lib/pyspark
232231
cp -r "$SPARK_HOME/sbin" "$DISTDIR"
233232
cp -r "$SPARK_HOME/ec2" "$DISTDIR"
234233

project/SparkBuild.scala

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,20 @@ object PySparkAssembly {
361361
// to be included in the assembly. We can't just add "python/" to the assembly's resource dir
362362
// list since that will copy unneeded / unwanted files.
363363
resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
364+
val src = new File(BuildCommons.sparkHome, "python/pyspark")
365+
366+
val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
367+
IO.delete(zipFile)
368+
def entries(f: File):List[File] =
369+
f :: (if (f.isDirectory) IO.listFiles(f).toList.flatMap(entries(_)) else Nil)
370+
IO.zip(entries(src).map(
371+
d => (d, d.getAbsolutePath.substring(src.getParent.length +1))),
372+
zipFile)
373+
364374
val dst = new File(outDir, "pyspark")
365375
if (!dst.isDirectory()) {
366376
require(dst.mkdirs())
367377
}
368-
369-
val src = new File(BuildCommons.sparkHome, "python/pyspark")
370378
copy(src, dst)
371379
}
372380
)

0 commit comments

Comments
 (0)