From c1f076afb28bc78e3b2d55234eec48c6e69ebeda Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 21 Feb 2024 15:07:07 -0800 Subject: [PATCH] [SPARK-47152][SQL][BUILD] Provide Apache Hive Jackson dependency via a new optional directory --- .../apache/spark/internal/config/package.scala | 17 +++++++++++++++++ dev/make-distribution.sh | 6 ++++++ .../spark/launcher/AbstractCommandBuilder.java | 2 ++ .../apache/spark/launcher/SparkLauncher.java | 8 ++++++++ .../launcher/SparkSubmitCommandBuilder.java | 8 ++++++++ .../spark/launcher/SparkSubmitOptionParser.java | 2 ++ 6 files changed, 43 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 0b026a888e881..7caac5884c745 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -17,6 +17,7 @@ package org.apache.spark.internal +import java.io.File import java.util.Locale import java.util.concurrent.TimeUnit @@ -64,8 +65,16 @@ package object config { .stringConf .createOptional + private[spark] val DRIVER_DEFAULT_EXTRA_CLASS_PATH = + ConfigBuilder(SparkLauncher.DRIVER_DEFAULT_EXTRA_CLASS_PATH) + .internal() + .version("4.0.0") + .stringConf + .createWithDefault(SparkLauncher.DRIVER_DEFAULT_EXTRA_CLASS_PATH_VALUE) + private[spark] val DRIVER_CLASS_PATH = ConfigBuilder(SparkLauncher.DRIVER_EXTRA_CLASSPATH) + .withPrepended(DRIVER_DEFAULT_EXTRA_CLASS_PATH.key, File.pathSeparator) .version("1.0.0") .stringConf .createOptional @@ -254,8 +263,16 @@ package object config { private[spark] val EXECUTOR_ID = ConfigBuilder("spark.executor.id").version("1.2.0").stringConf.createOptional + private[spark] val EXECUTOR_DEFAULT_EXTRA_CLASS_PATH = + ConfigBuilder(SparkLauncher.EXECUTOR_DEFAULT_EXTRA_CLASS_PATH) + .internal() + .version("4.0.0") + .stringConf + .createWithDefault(SparkLauncher.EXECUTOR_DEFAULT_EXTRA_CLASS_PATH_VALUE) + private[spark] val EXECUTOR_CLASS_PATH = ConfigBuilder(SparkLauncher.EXECUTOR_EXTRA_CLASSPATH) + .withPrepended(EXECUTOR_DEFAULT_EXTRA_CLASS_PATH.key, File.pathSeparator) .version("1.0.0") .stringConf .createOptional diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index ce5c94197d4ab..5c4c36df37a6b 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -189,6 +189,12 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE" # Copy jars cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/" +# Only create the hive-jackson directory if they exist. +for f in "$DISTDIR"/jars/jackson-*-asl-*.jar; do + mkdir -p "$DISTDIR"/hive-jackson + mv $f "$DISTDIR"/hive-jackson/ +done + # Only create the yarn directory if the yarn artifacts were built. if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then mkdir "$DISTDIR/yarn" diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java index 21861bdcb55e5..914f4e4d45705 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java @@ -271,6 +271,8 @@ Map getEffectiveConfig() throws IOException { Properties p = loadPropertiesFile(); p.stringPropertyNames().forEach(key -> effectiveConfig.computeIfAbsent(key, p::getProperty)); + effectiveConfig.putIfAbsent(SparkLauncher.DRIVER_DEFAULT_EXTRA_CLASS_PATH, + SparkLauncher.DRIVER_DEFAULT_EXTRA_CLASS_PATH_VALUE); } return effectiveConfig; } diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java index 5d36ef56d2cf0..f41474e12df93 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java @@ -54,6 +54,10 @@ public class SparkLauncher extends AbstractLauncher { /** Configuration key for the driver memory. */ public static final String DRIVER_MEMORY = "spark.driver.memory"; + /** Configuration key for the driver default extra class path. */ + public static final String DRIVER_DEFAULT_EXTRA_CLASS_PATH = + "spark.driver.defaultExtraClassPath"; + public static final String DRIVER_DEFAULT_EXTRA_CLASS_PATH_VALUE = "hive-jackson/*"; /** Configuration key for the driver class path. */ public static final String DRIVER_EXTRA_CLASSPATH = "spark.driver.extraClassPath"; /** Configuration key for the default driver VM options. */ @@ -65,6 +69,10 @@ public class SparkLauncher extends AbstractLauncher { /** Configuration key for the executor memory. */ public static final String EXECUTOR_MEMORY = "spark.executor.memory"; + /** Configuration key for the executor default extra class path. */ + public static final String EXECUTOR_DEFAULT_EXTRA_CLASS_PATH = + "spark.executor.defaultExtraClassPath"; + public static final String EXECUTOR_DEFAULT_EXTRA_CLASS_PATH_VALUE = "hive-jackson/*"; /** Configuration key for the executor class path. */ public static final String EXECUTOR_EXTRA_CLASSPATH = "spark.executor.extraClassPath"; /** Configuration key for the default executor VM options. */ diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index 5469b36cf9614..d884f7e474c07 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -267,6 +267,12 @@ private List buildSparkSubmitCommand(Map env) Map config = getEffectiveConfig(); boolean isClientMode = isClientMode(config); String extraClassPath = isClientMode ? config.get(SparkLauncher.DRIVER_EXTRA_CLASSPATH) : null; + String defaultExtraClassPath = config.get(SparkLauncher.DRIVER_DEFAULT_EXTRA_CLASS_PATH); + if (extraClassPath == null || extraClassPath.trim().isEmpty()) { + extraClassPath = defaultExtraClassPath; + } else { + extraClassPath += File.pathSeparator + defaultExtraClassPath; + } List cmd = buildJavaCommand(extraClassPath); // Take Thrift/Connect Server as daemon @@ -498,6 +504,8 @@ protected boolean handle(String opt, String value) { case DRIVER_MEMORY -> conf.put(SparkLauncher.DRIVER_MEMORY, value); case DRIVER_JAVA_OPTIONS -> conf.put(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, value); case DRIVER_LIBRARY_PATH -> conf.put(SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH, value); + case DRIVER_DEFAULT_CLASS_PATH -> + conf.put(SparkLauncher.DRIVER_DEFAULT_EXTRA_CLASS_PATH, value); case DRIVER_CLASS_PATH -> conf.put(SparkLauncher.DRIVER_EXTRA_CLASSPATH, value); case CONF -> { checkArgument(value != null, "Missing argument to %s", CONF); diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java index ea54986daab7d..df4fccd0f01e7 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java @@ -40,6 +40,7 @@ class SparkSubmitOptionParser { protected final String CONF = "--conf"; protected final String DEPLOY_MODE = "--deploy-mode"; protected final String DRIVER_CLASS_PATH = "--driver-class-path"; + protected final String DRIVER_DEFAULT_CLASS_PATH = "--driver-default-class-path"; protected final String DRIVER_CORES = "--driver-cores"; protected final String DRIVER_JAVA_OPTIONS = "--driver-java-options"; protected final String DRIVER_LIBRARY_PATH = "--driver-library-path"; @@ -94,6 +95,7 @@ class SparkSubmitOptionParser { { DEPLOY_MODE }, { DRIVER_CLASS_PATH }, { DRIVER_CORES }, + { DRIVER_DEFAULT_CLASS_PATH }, { DRIVER_JAVA_OPTIONS }, { DRIVER_LIBRARY_PATH }, { DRIVER_MEMORY },