Skip to content

Commit 196121b

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents 491a1eb + e0f9462 commit 196121b

File tree

51 files changed

+1314
-353
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1314
-353
lines changed

README.md

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,7 @@ If your project is built with Maven, add this to your POM file's `<dependencies>
118118
## A Note About Thrift JDBC server and CLI for Spark SQL
119119

120120
Spark SQL supports Thrift JDBC server and CLI.
121-
See sql-programming-guide.md for more information about those features.
122-
You can use those features by setting `-Phive-thriftserver` when building Spark as follows.
123-
124-
$ sbt/sbt -Phive-thriftserver assembly
125-
121+
See sql-programming-guide.md for more information about using the JDBC server.
126122

127123
## Configuration
128124

assembly/pom.xml

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@
4343
</properties>
4444

4545
<dependencies>
46+
<!-- Promote Guava to compile scope in this module so it's included while shading. -->
47+
<dependency>
48+
<groupId>com.google.guava</groupId>
49+
<artifactId>guava</artifactId>
50+
<scope>compile</scope>
51+
</dependency>
4652
<dependency>
4753
<groupId>org.apache.spark</groupId>
4854
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -113,6 +119,18 @@
113119
<goal>shade</goal>
114120
</goals>
115121
<configuration>
122+
<relocations>
123+
<relocation>
124+
<pattern>com.google</pattern>
125+
<shadedPattern>org.spark-project.guava</shadedPattern>
126+
<includes>
127+
<include>com.google.common.**</include>
128+
</includes>
129+
<excludes>
130+
<exclude>com.google.common.base.Optional**</exclude>
131+
</excludes>
132+
</relocation>
133+
</relocations>
116134
<transformers>
117135
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
118136
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
@@ -163,11 +181,6 @@
163181
<artifactId>spark-hive_${scala.binary.version}</artifactId>
164182
<version>${project.version}</version>
165183
</dependency>
166-
</dependencies>
167-
</profile>
168-
<profile>
169-
<id>hive-thriftserver</id>
170-
<dependencies>
171184
<dependency>
172185
<groupId>org.apache.spark</groupId>
173186
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>

bin/spark-class

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
# limitations under the License.
1818
#
1919

20+
# NOTE: Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala!
21+
2022
cygwin=false
2123
case "`uname`" in
2224
CYGWIN*) cygwin=true;;
@@ -39,7 +41,7 @@ fi
3941

4042
if [ -n "$SPARK_MEM" ]; then
4143
echo -e "Warning: SPARK_MEM is deprecated, please use a more specific config option" 1>&2
42-
echo -e "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)." 1>&2
44+
echo -e "(e.g., spark.executor.memory or spark.driver.memory)." 1>&2
4345
fi
4446

4547
# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
@@ -73,11 +75,17 @@ case "$1" in
7375
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
7476
;;
7577

76-
# Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS
77-
'org.apache.spark.deploy.SparkSubmit')
78-
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \
79-
-Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
78+
# Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
79+
# SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
80+
'org.apache.spark.deploy.SparkSubmit')
81+
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS"
8082
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
83+
if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then
84+
OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
85+
fi
86+
if [ -n "$SPARK_SUBMIT_DRIVER_MEMORY" ]; then
87+
OUR_JAVA_MEM="$SPARK_SUBMIT_DRIVER_MEMORY"
88+
fi
8189
;;
8290

8391
*)
@@ -101,11 +109,12 @@ fi
101109
# Set JAVA_OPTS to be able to load native libraries and to set heap size
102110
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
103111
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
112+
104113
# Load extra JAVA_OPTS from conf/java-opts, if it exists
105114
if [ -e "$FWDIR/conf/java-opts" ] ; then
106115
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
107116
fi
108-
export JAVA_OPTS
117+
109118
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
110119

111120
TOOLS_DIR="$FWDIR"/tools
@@ -146,10 +155,28 @@ if $cygwin; then
146155
fi
147156
export CLASSPATH
148157

149-
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
150-
echo -n "Spark Command: " 1>&2
151-
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
152-
echo -e "========================================\n" 1>&2
158+
# In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
159+
# Here we must parse the properties file for relevant "spark.driver.*" configs before launching
160+
# the driver JVM itself. Instead of handling this complexity in Bash, we launch a separate JVM
161+
# to prepare the launch environment of this driver JVM.
162+
163+
if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then
164+
# This is used only if the properties file actually contains these special configs
165+
# Export the environment variables needed by SparkSubmitDriverBootstrapper
166+
export RUNNER
167+
export CLASSPATH
168+
export JAVA_OPTS
169+
export OUR_JAVA_MEM
170+
export SPARK_CLASS=1
171+
shift # Ignore main class (org.apache.spark.deploy.SparkSubmit) and use our own
172+
exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@"
173+
else
174+
# Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala
175+
if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then
176+
echo -n "Spark Command: " 1>&2
177+
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
178+
echo -e "========================================\n" 1>&2
179+
fi
180+
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
153181
fi
154182

155-
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"

bin/spark-submit

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,18 @@
1717
# limitations under the License.
1818
#
1919

20+
# NOTE: Any changes in this file must be reflected in SparkClassLauncher.scala!
21+
2022
export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
2123
ORIG_ARGS=("$@")
2224

2325
while (($#)); do
2426
if [ "$1" = "--deploy-mode" ]; then
25-
DEPLOY_MODE=$2
27+
SPARK_SUBMIT_DEPLOY_MODE=$2
28+
elif [ "$1" = "--properties-file" ]; then
29+
SPARK_SUBMIT_PROPERTIES_FILE=$2
2630
elif [ "$1" = "--driver-memory" ]; then
27-
DRIVER_MEMORY=$2
31+
export SPARK_SUBMIT_DRIVER_MEMORY=$2
2832
elif [ "$1" = "--driver-library-path" ]; then
2933
export SPARK_SUBMIT_LIBRARY_PATH=$2
3034
elif [ "$1" = "--driver-class-path" ]; then
@@ -35,10 +39,24 @@ while (($#)); do
3539
shift
3640
done
3741

38-
DEPLOY_MODE=${DEPLOY_MODE:-"client"}
42+
DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
43+
export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"}
44+
export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}
45+
46+
# For client mode, the driver will be launched in the same JVM that launches
47+
# SparkSubmit, so we may need to read the properties file for any extra class
48+
# paths, library paths, java options and memory early on. Otherwise, it will
49+
# be too late by the time the driver JVM has started.
3950

40-
if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then
41-
export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
51+
if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FILE" ]]; then
52+
# Parse the properties file only if the special configs exist
53+
contains_special_configs=$(
54+
grep -e "spark.driver.extra*\|spark.driver.memory" "$SPARK_SUBMIT_PROPERTIES_FILE" | \
55+
grep -v "^[[:space:]]*#"
56+
)
57+
if [ -n "$contains_special_configs" ]; then
58+
export SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
59+
fi
4260
fi
4361

4462
exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"

bin/utils.sh

100644100755
File mode changed.

conf/spark-defaults.conf.template

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
# This is useful for setting default environmental settings.
33

44
# Example:
5-
# spark.master spark://master:7077
6-
# spark.eventLog.enabled true
7-
# spark.eventLog.dir hdfs://namenode:8021/directory
8-
# spark.serializer org.apache.spark.serializer.KryoSerializer
5+
# spark.master spark://master:7077
6+
# spark.eventLog.enabled true
7+
# spark.eventLog.dir hdfs://namenode:8021/directory
8+
# spark.serializer org.apache.spark.serializer.KryoSerializer
9+
# spark.driver.memory 5g
10+
# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dkey=value -Dnumbers="one two three"

core/pom.xml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,15 @@
6868
<groupId>org.eclipse.jetty</groupId>
6969
<artifactId>jetty-server</artifactId>
7070
</dependency>
71+
<!--
72+
Promote Guava to "compile" so that maven-shade-plugin picks it up (for packaging the Optional
73+
class exposed in the Java API). The plugin will then remove this dependency from the published
74+
pom, so that Guava does not pollute the client's compilation classpath.
75+
-->
7176
<dependency>
7277
<groupId>com.google.guava</groupId>
7378
<artifactId>guava</artifactId>
79+
<scope>compile</scope>
7480
</dependency>
7581
<dependency>
7682
<groupId>org.apache.commons</groupId>
@@ -322,6 +328,35 @@
322328
</arguments>
323329
</configuration>
324330
</plugin>
331+
<plugin>
332+
<groupId>org.apache.maven.plugins</groupId>
333+
<artifactId>maven-shade-plugin</artifactId>
334+
<executions>
335+
<execution>
336+
<phase>package</phase>
337+
<goals>
338+
<goal>shade</goal>
339+
</goals>
340+
<configuration>
341+
<shadedArtifactAttached>false</shadedArtifactAttached>
342+
<artifactSet>
343+
<includes>
344+
<include>com.google.guava:guava</include>
345+
</includes>
346+
</artifactSet>
347+
<filters>
348+
<!-- See comment in the guava dependency declaration above. -->
349+
<filter>
350+
<artifact>com.google.guava:guava</artifact>
351+
<includes>
352+
<include>com/google/common/base/Optional*</include>
353+
</includes>
354+
</filter>
355+
</filters>
356+
</configuration>
357+
</execution>
358+
</executions>
359+
</plugin>
325360
</plugins>
326361

327362
<resources>

core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -40,28 +40,3 @@ private[spark] object PythonUtils {
4040
paths.filter(_ != "").mkString(File.pathSeparator)
4141
}
4242
}
43-
44-
45-
/**
46-
* A utility class to redirect the child process's stdout or stderr.
47-
*/
48-
private[spark] class RedirectThread(
49-
in: InputStream,
50-
out: OutputStream,
51-
name: String)
52-
extends Thread(name) {
53-
54-
setDaemon(true)
55-
override def run() {
56-
scala.util.control.Exception.ignoring(classOf[IOException]) {
57-
// FIXME: We copy the stream on the level of bytes to avoid encoding problems.
58-
val buf = new Array[Byte](1024)
59-
var len = in.read(buf)
60-
while (len != -1) {
61-
out.write(buf, 0, len)
62-
out.flush()
63-
len = in.read(buf)
64-
}
65-
}
66-
}
67-
}

core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,14 @@
1717

1818
package org.apache.spark.api.python
1919

20-
import java.lang.Runtime
2120
import java.io.{DataOutputStream, DataInputStream, InputStream, OutputStreamWriter}
2221
import java.net.{InetAddress, ServerSocket, Socket, SocketException}
2322

2423
import scala.collection.mutable
2524
import scala.collection.JavaConversions._
2625

2726
import org.apache.spark._
28-
import org.apache.spark.util.Utils
27+
import org.apache.spark.util.{RedirectThread, Utils}
2928

3029
private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String, String])
3130
extends Logging {

core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ import java.net.URI
2222
import scala.collection.mutable.ArrayBuffer
2323
import scala.collection.JavaConversions._
2424

25-
import org.apache.spark.api.python.{PythonUtils, RedirectThread}
26-
import org.apache.spark.util.Utils
25+
import org.apache.spark.api.python.PythonUtils
26+
import org.apache.spark.util.{RedirectThread, Utils}
2727

2828
/**
2929
* A main class used by spark-submit to launch Python applications. It executes python as a

0 commit comments

Comments
 (0)