Skip to content

Commit b4d6912

Browse files
author
Marcelo Vanzin
committed
Use spark-submit script in SparkLauncher.
To reduce the surface for backwards compatibility, make SparkLauncher launch child processes through the spark-submit script. This way, the library doesn't need to know about the distribution layout of the Spark version being launched, as long as it has a "bin/spark-submit" script. Do some cleanup and fix a couple of issues that the change unearthed.
1 parent 28b1434 commit b4d6912

File tree

5 files changed

+256
-230
lines changed

5 files changed

+256
-230
lines changed

launcher/src/main/java/org/apache/spark/launcher/Main.java

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,16 +88,11 @@ public static void main(String[] argsArray) throws Exception {
8888
/**
8989
* Prepare a command line for execution from a Windows batch script.
9090
*
91-
* Two things need to be done:
91+
* The method quotes all arguments so that spaces are handled as expected. Quotes within arguments
92+
* are "double quoted" (which is batch for escaping a quote). This page has more details about
93+
* quoting and other batch script fun stuff: http://ss64.com/nt/syntax-esc.html
9294
*
93-
* - If a custom library path is needed, extend PATH to add it. Based on:
94-
* http://superuser.com/questions/223104/setting-environment-variable-for-just-one-command-in-windows-cmd-exe
95-
*
96-
* - Quote all arguments so that spaces are handled as expected. Quotes within arguments are
97-
* "double quoted" (which is batch for escaping a quote). This page has more details about
98-
* quoting and other batch script fun stuff: http://ss64.com/nt/syntax-esc.html
99-
*
100-
* The command is executed using "cmd /c" and formatted as single line, since that's the
95+
* The command is executed using "cmd /c" and formatted in single line, since that's the
10196
* easiest way to consume this from a batch script (see spark-class2.cmd).
10297
*/
10398
private static String prepareForWindows(List<String> cmd, Map<String, String> childEnv) {

launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java

Lines changed: 6 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,12 @@ public SparkLauncher setVerbose(boolean verbose) {
282282
* @return A process handle for the Spark app.
283283
*/
284284
public Process launch() throws IOException {
285-
Map<String, String> procEnv = new HashMap<String, String>(childEnv);
286-
List<String> cmd = buildSparkSubmitCommand(procEnv);
285+
List<String> cmd = new ArrayList<String>();
286+
cmd.add(join(File.separator, getSparkHome(), "bin", "spark-submit"));
287+
cmd.addAll(buildSparkSubmitArgs());
288+
287289
ProcessBuilder pb = new ProcessBuilder(cmd.toArray(new String[cmd.size()]));
288-
for (Map.Entry<String, String> e : procEnv.entrySet()) {
290+
for (Map.Entry<String, String> e : childEnv.entrySet()) {
289291
pb.environment().put(e.getKey(), e.getValue());
290292
}
291293
return pb.start();
@@ -406,7 +408,7 @@ List<String> buildClassPath(String appClassPath) throws IOException {
406408
assemblyJar = new JarFile(assembly);
407409
needsDataNucleus = assemblyJar.getEntry("org/apache/hadoop/hive/ql/exec/") != null;
408410
} catch (IOException ioe) {
409-
if (ioe.getMessage().indexOf("invalid CEN header") > 0) {
411+
if (ioe.getMessage().indexOf("invalid CEN header") >= 0) {
410412
System.err.println(
411413
"Loading Spark jar failed.\n" +
412414
"This is likely because Spark was compiled with Java 7 and run\n" +
@@ -565,40 +567,6 @@ List<String> buildSparkSubmitArgs() {
565567
return args;
566568
}
567569

568-
List<String> buildSparkSubmitCommand(Map<String, String> env) throws IOException {
569-
// Load the properties file and check whether spark-submit will be running the app's driver
570-
// or just launching a cluster app. When running the driver, the JVM's argument will be
571-
// modified to cover the driver's configuration.
572-
Properties props = loadPropertiesFile();
573-
boolean isClientMode = isClientMode(props);
574-
String extraClassPath = isClientMode ? find(DRIVER_EXTRA_CLASSPATH, conf, props) : null;
575-
576-
List<String> cmd = buildJavaCommand(extraClassPath);
577-
addOptionString(cmd, System.getenv("SPARK_SUBMIT_OPTS"));
578-
addOptionString(cmd, System.getenv("SPARK_JAVA_OPTS"));
579-
580-
if (isClientMode) {
581-
// Figuring out where the memory value come from is a little tricky due to precedence.
582-
// Precedence is observed in the following order:
583-
// - explicit configuration (setConf()), which also covers --driver-memory cli argument.
584-
// - properties file.
585-
// - SPARK_DRIVER_MEMORY env variable
586-
// - SPARK_MEM env variable
587-
// - default value (512m)
588-
String memory = firstNonEmpty(find(DRIVER_MEMORY, conf, props),
589-
System.getenv("SPARK_DRIVER_MEMORY"), System.getenv("SPARK_MEM"), DEFAULT_MEM);
590-
cmd.add("-Xms" + memory);
591-
cmd.add("-Xmx" + memory);
592-
addOptionString(cmd, find(DRIVER_EXTRA_JAVA_OPTIONS, conf, props));
593-
mergeEnvPathList(env, getLibPathEnvName(), find(DRIVER_EXTRA_LIBRARY_PATH, conf, props));
594-
}
595-
596-
addPermGenSizeOpt(cmd);
597-
cmd.add("org.apache.spark.deploy.SparkSubmit");
598-
cmd.addAll(buildSparkSubmitArgs());
599-
return cmd;
600-
}
601-
602570
String getSparkHome() {
603571
String path = getenv(ENV_SPARK_HOME);
604572
checkState(path != null,
@@ -644,14 +612,6 @@ String getenv(String key) {
644612
return firstNonEmpty(childEnv.get(key), System.getenv(key));
645613
}
646614

647-
private boolean isClientMode(Properties userProps) {
648-
String userMaster = firstNonEmpty(master, (String) userProps.get(SPARK_MASTER));
649-
return userMaster == null ||
650-
"client".equals(deployMode) ||
651-
"yarn-client".equals(userMaster) ||
652-
(deployMode == null && !userMaster.startsWith("yarn-"));
653-
}
654-
655615
private String findAssembly(String scalaVersion) {
656616
String sparkHome = getSparkHome();
657617
File libdir;

launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java

Lines changed: 65 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,9 @@
2121
import java.util.ArrayList;
2222
import java.util.Arrays;
2323
import java.util.HashMap;
24-
import java.util.Iterator;
2524
import java.util.List;
2625
import java.util.Map;
2726
import java.util.Properties;
28-
import java.util.regex.Matcher;
29-
import java.util.regex.Pattern;
3027

3128
import static org.apache.spark.launcher.CommandBuilderUtils.*;
3229

@@ -89,10 +86,44 @@ public List<String> buildCommand(Map<String, String> env) throws IOException {
8986
if (PYSPARK_SHELL.equals(appResource)) {
9087
return buildPySparkShellCommand(env);
9188
} else {
92-
return super.buildSparkSubmitCommand(env);
89+
return buildSparkSubmitCommand(env);
9390
}
9491
}
9592

93+
private List<String> buildSparkSubmitCommand(Map<String, String> env) throws IOException {
94+
// Load the properties file and check whether spark-submit will be running the app's driver
95+
// or just launching a cluster app. When running the driver, the JVM's argument will be
96+
// modified to cover the driver's configuration.
97+
Properties props = loadPropertiesFile();
98+
boolean isClientMode = isClientMode(props);
99+
String extraClassPath = isClientMode ? find(DRIVER_EXTRA_CLASSPATH, conf, props) : null;
100+
101+
List<String> cmd = buildJavaCommand(extraClassPath);
102+
addOptionString(cmd, System.getenv("SPARK_SUBMIT_OPTS"));
103+
addOptionString(cmd, System.getenv("SPARK_JAVA_OPTS"));
104+
105+
if (isClientMode) {
106+
// Figuring out where the memory value come from is a little tricky due to precedence.
107+
// Precedence is observed in the following order:
108+
// - explicit configuration (setConf()), which also covers --driver-memory cli argument.
109+
// - properties file.
110+
// - SPARK_DRIVER_MEMORY env variable
111+
// - SPARK_MEM env variable
112+
// - default value (512m)
113+
String memory = firstNonEmpty(find(DRIVER_MEMORY, conf, props),
114+
System.getenv("SPARK_DRIVER_MEMORY"), System.getenv("SPARK_MEM"), DEFAULT_MEM);
115+
cmd.add("-Xms" + memory);
116+
cmd.add("-Xmx" + memory);
117+
addOptionString(cmd, find(DRIVER_EXTRA_JAVA_OPTIONS, conf, props));
118+
mergeEnvPathList(env, getLibPathEnvName(), find(DRIVER_EXTRA_LIBRARY_PATH, conf, props));
119+
}
120+
121+
addPermGenSizeOpt(cmd);
122+
cmd.add("org.apache.spark.deploy.SparkSubmit");
123+
cmd.addAll(buildSparkSubmitArgs());
124+
return cmd;
125+
}
126+
96127
private List<String> buildPySparkShellCommand(Map<String, String> env) throws IOException {
97128
// For backwards compatibility, if a script is specified in
98129
// the pyspark command line, then run it using spark-submit.
@@ -139,6 +170,14 @@ private List<String> buildPySparkShellCommand(Map<String, String> env) throws IO
139170
return pyargs;
140171
}
141172

173+
private boolean isClientMode(Properties userProps) {
174+
String userMaster = firstNonEmpty(master, (String) userProps.get(SPARK_MASTER));
175+
return userMaster == null ||
176+
"client".equals(deployMode) ||
177+
"yarn-client".equals(userMaster) ||
178+
(deployMode == null && !userMaster.startsWith("yarn-"));
179+
}
180+
142181
/**
143182
* Quotes a string so that it can be used in a command string and be parsed back into a single
144183
* argument by python's "shlex.split()" function.
@@ -172,19 +211,25 @@ protected boolean handle(String opt, String value) {
172211
driverArgs.add(opt);
173212
driverArgs.add(value);
174213
} else if (opt.equals(DRIVER_MEMORY)) {
175-
setConf(DRIVER_MEMORY, value);
214+
setConf(SparkLauncher.DRIVER_MEMORY, value);
176215
driverArgs.add(opt);
177216
driverArgs.add(value);
178217
} else if (opt.equals(DRIVER_JAVA_OPTIONS)) {
179-
setConf(DRIVER_EXTRA_JAVA_OPTIONS, value);
218+
setConf(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, value);
180219
driverArgs.add(opt);
181220
driverArgs.add(value);
182221
} else if (opt.equals(DRIVER_LIBRARY_PATH)) {
183-
setConf(DRIVER_EXTRA_LIBRARY_PATH, value);
222+
setConf(SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH, value);
184223
driverArgs.add(opt);
185224
driverArgs.add(value);
186225
} else if (opt.equals(DRIVER_CLASS_PATH)) {
187-
setConf(DRIVER_EXTRA_CLASSPATH, value);
226+
setConf(SparkLauncher.DRIVER_EXTRA_CLASSPATH, value);
227+
driverArgs.add(opt);
228+
driverArgs.add(value);
229+
} else if (opt.equals(CONF)) {
230+
String[] conf = value.split("=", 2);
231+
checkArgument(conf.length == 2, "Invalid argument to %s: %s", CONF, value);
232+
handleConf(conf[0], conf[1]);
188233
driverArgs.add(opt);
189234
driverArgs.add(value);
190235
} else if (opt.equals(CLASS)) {
@@ -227,6 +272,18 @@ protected void handleExtraArgs(List<String> extra) {
227272
}
228273
}
229274

275+
private void handleConf(String key, String value) {
276+
List<String> driverJvmKeys = Arrays.asList(
277+
SparkLauncher.DRIVER_EXTRA_CLASSPATH,
278+
SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS,
279+
SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH,
280+
SparkLauncher.DRIVER_MEMORY);
281+
282+
if (driverJvmKeys.contains(key)) {
283+
setConf(key, value);
284+
}
285+
}
286+
230287
}
231288

232289
}

0 commit comments

Comments
 (0)