File tree Expand file tree Collapse file tree 3 files changed +45
-4
lines changed Expand file tree Collapse file tree 3 files changed +45
-4
lines changed Original file line number Diff line number Diff line change 9292 <skip >true</skip >
9393 </configuration >
9494 </plugin >
95+ <!-- zip pyspark archives to run python application on yarn mode -->
96+ <plugin >
97+ <groupId >org.apache.maven.plugins</groupId >
98+ <artifactId >maven-antrun-plugin</artifactId >
99+ <executions >
100+ <execution >
101+ <phase >package</phase >
102+ <goals >
103+ <goal >run</goal >
104+ </goals >
105+ </execution >
106+ </executions >
107+ <configuration >
108+ <target >
109+ <delete dir =" ${basedir}/../python/lib/pyspark.zip" />
110+ <zip destfile =" ${basedir}/../python/lib/pyspark.zip" >
111+ <fileset dir =" ${basedir}/../python/" includes =" pyspark/**/*" />
112+ </zip >
113+ </target >
114+ </configuration >
115+ </plugin >
95116 <!-- Use the shade plugin to create a big JAR with all the dependencies -->
96117 <plugin >
97118 <groupId >org.apache.maven.plugins</groupId >
196217 <artifactId >maven-assembly-plugin</artifactId >
197218 <version >2.4</version >
198219 <executions >
220+ <!-- execution>
221+ <id>pyspark-zip</id>
222+ <phase>package</phase>
223+ <goals>
224+ <goal>single</goal>
225+ </goals>
226+ <configuration>
227+ <skipAssembly>true</skipAssembly>
228+ <descriptors>
229+ <descriptor>src/main/assembly/pyspark-assembly.xml</descriptor>
230+ </descriptors>
231+ </configuration>
232+ </execution-->
199233 <execution >
200234 <id >dist</id >
201235 <phase >package</phase >
208242 </descriptors >
209243 </configuration >
210244 </execution >
211- </executions >
245+ </executions >
212246 </plugin >
213247 </plugins >
214248 </build >
Original file line number Diff line number Diff line change @@ -228,7 +228,6 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
228228cp " $SPARK_HOME /README.md" " $DISTDIR "
229229cp -r " $SPARK_HOME /bin" " $DISTDIR "
230230cp -r " $SPARK_HOME /python" " $DISTDIR "
231- zip -r " $DISTDIR " /python/lib/pyspark.zip " $SPARK_HOME " /python/lib/pyspark
232231cp -r " $SPARK_HOME /sbin" " $DISTDIR "
233232cp -r " $SPARK_HOME /ec2" " $DISTDIR "
234233
Original file line number Diff line number Diff line change @@ -361,12 +361,20 @@ object PySparkAssembly {
361361 // to be included in the assembly. We can't just add "python/" to the assembly's resource dir
362362 // list since that will copy unneeded / unwanted files.
363363 resourceGenerators in Compile <+= resourceManaged in Compile map { outDir : File =>
364+ val src = new File (BuildCommons .sparkHome, " python/pyspark" )
365+
366+ val zipFile = new File (BuildCommons .sparkHome , " python/lib/pyspark.zip" )
367+ IO .delete(zipFile)
368+ def entries (f : File ): List [File ] =
369+ f :: (if (f.isDirectory) IO .listFiles(f).toList.flatMap(entries(_)) else Nil )
370+ IO .zip(entries(src).map(
371+ d => (d, d.getAbsolutePath.substring(src.getParent.length + 1 ))),
372+ zipFile)
373+
364374 val dst = new File (outDir, " pyspark" )
365375 if (! dst.isDirectory()) {
366376 require(dst.mkdirs())
367377 }
368-
369- val src = new File (BuildCommons .sparkHome, " python/pyspark" )
370378 copy(src, dst)
371379 }
372380 )
You can’t perform that action at this time.
0 commit comments