@@ -25,35 +25,55 @@ SCALA_VERSION=2.10
2525# Figure out where Spark is installed
2626FWDIR=" $( cd ` dirname $0 ` /..; pwd) "
2727
28- # Load environment variables from conf/spark-env.sh, if it exists
29- if [ -e " $FWDIR /conf/spark-env.sh" ] ; then
30- . $FWDIR /conf/spark-env.sh
31- fi
28+ . $FWDIR /bin/load-spark-env.sh
3229
3330# Build up classpath
3431CLASSPATH=" $SPARK_CLASSPATH :$FWDIR /conf"
3532
33+ ASSEMBLY_DIR=" $FWDIR /assembly/target/scala-$SCALA_VERSION "
34+
3635# First check if we have a dependencies jar. If so, include binary classes with the deps jar
37- if [ -f " $FWDIR " /assembly/target/scala- $SCALA_VERSION /spark-assembly* hadoop* -deps.jar ]; then
36+ if [ -f " $ASSEMBLY_DIR " /spark-assembly* hadoop* -deps.jar ]; then
3837 CLASSPATH=" $CLASSPATH :$FWDIR /core/target/scala-$SCALA_VERSION /classes"
3938 CLASSPATH=" $CLASSPATH :$FWDIR /repl/target/scala-$SCALA_VERSION /classes"
4039 CLASSPATH=" $CLASSPATH :$FWDIR /mllib/target/scala-$SCALA_VERSION /classes"
4140 CLASSPATH=" $CLASSPATH :$FWDIR /bagel/target/scala-$SCALA_VERSION /classes"
4241 CLASSPATH=" $CLASSPATH :$FWDIR /graphx/target/scala-$SCALA_VERSION /classes"
4342 CLASSPATH=" $CLASSPATH :$FWDIR /streaming/target/scala-$SCALA_VERSION /classes"
43+ CLASSPATH=" $CLASSPATH :$FWDIR /tools/target/scala-$SCALA_VERSION /classes"
44+ CLASSPATH=" $CLASSPATH :$FWDIR /sql/catalyst/target/scala-$SCALA_VERSION /classes"
45+ CLASSPATH=" $CLASSPATH :$FWDIR /sql/core/target/scala-$SCALA_VERSION /classes"
46+ CLASSPATH=" $CLASSPATH :$FWDIR /sql/hive/target/scala-$SCALA_VERSION /classes"
4447
45- DEPS_ASSEMBLY_JAR=` ls " $FWDIR " /assembly/target/scala- $SCALA_VERSION /spark-assembly* hadoop* -deps.jar`
48+ DEPS_ASSEMBLY_JAR=` ls " $ASSEMBLY_DIR " /spark-assembly* hadoop* -deps.jar`
4649 CLASSPATH=" $CLASSPATH :$DEPS_ASSEMBLY_JAR "
4750else
4851 # Else use spark-assembly jar from either RELEASE or assembly directory
4952 if [ -f " $FWDIR /RELEASE" ]; then
50- ASSEMBLY_JAR=` ls " $FWDIR " /jars/spark-assembly* .jar`
53+ ASSEMBLY_JAR=` ls " $FWDIR " /jars/spark* -assembly* .jar`
5154 else
52- ASSEMBLY_JAR=` ls " $FWDIR " /assembly/target/scala- $SCALA_VERSION / spark-assembly* hadoop* .jar`
55+ ASSEMBLY_JAR=` ls " $ASSEMBLY_DIR " / spark* -assembly* hadoop* .jar`
5356 fi
5457 CLASSPATH=" $CLASSPATH :$ASSEMBLY_JAR "
5558fi
5659
60+ # When Hive support is needed, Datanucleus jars must be included on the classpath.
61+ # Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
62+ # Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
63+ # built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
64+ # assembly is built for Hive, before actually populating the CLASSPATH with the jars.
65+ # Note that this check order is faster (by up to half a second) in the case where Hive is not used.
66+ num_datanucleus_jars=$( ls " $FWDIR " /lib_managed/jars/ 2> /dev/null | grep " datanucleus-.*\\ .jar" | wc -l)
67+ if [ $num_datanucleus_jars -gt 0 ]; then
68+ AN_ASSEMBLY_JAR=${ASSEMBLY_JAR:- $DEPS_ASSEMBLY_JAR }
69+ num_hive_files=$( jar tvf " $AN_ASSEMBLY_JAR " org/apache/hadoop/hive/ql/exec 2> /dev/null | wc -l)
70+ if [ $num_hive_files -gt 0 ]; then
71+ echo " Spark assembly has been built with Hive, including Datanucleus jars on classpath" 1>&2
72+ DATANUCLEUSJARS=$( echo " $FWDIR /lib_managed/jars" /datanucleus-* .jar | tr " " :)
73+ CLASSPATH=$CLASSPATH :$DATANUCLEUSJARS
74+ fi
75+ fi
76+
5777# Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
5878if [[ $SPARK_TESTING == 1 ]]; then
5979 CLASSPATH=" $CLASSPATH :$FWDIR /core/target/scala-$SCALA_VERSION /test-classes"
@@ -62,6 +82,9 @@ if [[ $SPARK_TESTING == 1 ]]; then
6282 CLASSPATH=" $CLASSPATH :$FWDIR /bagel/target/scala-$SCALA_VERSION /test-classes"
6383 CLASSPATH=" $CLASSPATH :$FWDIR /graphx/target/scala-$SCALA_VERSION /test-classes"
6484 CLASSPATH=" $CLASSPATH :$FWDIR /streaming/target/scala-$SCALA_VERSION /test-classes"
85+ CLASSPATH=" $CLASSPATH :$FWDIR /sql/catalyst/target/scala-$SCALA_VERSION /test-classes"
86+ CLASSPATH=" $CLASSPATH :$FWDIR /sql/core/target/scala-$SCALA_VERSION /test-classes"
87+ CLASSPATH=" $CLASSPATH :$FWDIR /sql/hive/target/scala-$SCALA_VERSION /test-classes"
6588fi
6689
6790# Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail !
0 commit comments