From e7b4de44cc83cf6d9f5d78f3aa0f9c249ec5b129 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 17 Feb 2025 17:34:58 +0800 Subject: [PATCH 1/2] [SPARK-51243][CORE][ML] Configurable allow native BLAS --- .../org/apache/spark/util/SparkEnvUtils.scala | 4 +++ .../scala/org/apache/spark/SparkContext.scala | 29 ++++++++++------- .../spark/internal/config/package.scala | 8 +++++ docs/ml-linalg-guide.md | 5 +-- .../launcher/SparkSubmitCommandBuilder.java | 5 +++ mllib-local/pom.xml | 5 +++ .../org/apache/spark/ml/linalg/BLAS.scala | 11 +++++-- .../apache/spark/mllib/linalg/ARPACK.scala | 11 +++++-- .../org/apache/spark/mllib/linalg/BLAS.scala | 32 +------------------ .../apache/spark/mllib/linalg/LAPACK.scala | 11 +++++-- .../apache/spark/mllib/linalg/Matrices.scala | 2 +- .../MatrixFactorizationModel.scala | 2 +- .../apache/spark/mllib/linalg/BLASSuite.scala | 6 ---- .../org/apache/spark/deploy/yarn/Client.scala | 2 ++ 14 files changed, 74 insertions(+), 59 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala index 01e3f52de41f3..2a82bbbebeb2a 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala @@ -25,6 +25,10 @@ private[spark] trait SparkEnvUtils { */ def isTesting: Boolean = JavaUtils.isTesting + /** + * Whether allow using native BLAS/LAPACK/ARPACK libraries if available. + */ + val allowNativeBlas = "true".equals(System.getProperty("netlib.allowNativeBlas", "true")) } object SparkEnvUtils extends SparkEnvUtils diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index b0ac6d96a0010..e57d304685efc 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -432,6 +432,7 @@ class SparkContext(config: SparkConf) extends Logging { SparkContext.supplementJavaModuleOptions(_conf) SparkContext.supplementJavaIPv6Options(_conf) + SparkContext.supplementBlasOptions(_conf) _driverLogger = DriverLogger(_conf) @@ -3414,26 +3415,30 @@ object SparkContext extends Logging { } } + private def supplementJavaOpts(conf: SparkConf, key: String, javaOpts: String): Unit = { + conf.set(key, s"$javaOpts ${conf.get(key, "")}".trim()) + } + /** * SPARK-36796: This is a helper function to supplement some JVM runtime options to * `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions`. */ private def supplementJavaModuleOptions(conf: SparkConf): Unit = { - def supplement(key: String): Unit = { - val v = s"${JavaModuleOptions.defaultModuleOptions()} ${conf.get(key, "")}".trim() - conf.set(key, v) - } - supplement(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS) - supplement(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS) + val opts = JavaModuleOptions.defaultModuleOptions() + supplementJavaOpts(conf, SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, opts) + supplementJavaOpts(conf, SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS, opts) } private def supplementJavaIPv6Options(conf: SparkConf): Unit = { - def supplement(key: String): Unit = { - val v = s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6} ${conf.get(key, "")}".trim() - conf.set(key, v) - } - supplement(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS) - supplement(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS) + val opts = s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6}" + supplementJavaOpts(conf, SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, opts) + supplementJavaOpts(conf, SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS, opts) + } + + private def supplementBlasOptions(conf: SparkConf): Unit = { + val opts = s"-Dnetlib.allowNativeBlas=${Utils.allowNativeBlas}" + supplementJavaOpts(conf, SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, opts) + supplementJavaOpts(conf, SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS, opts) } } diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index d0f4806c49482..21ea53f7a721b 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -2912,4 +2912,12 @@ package object config { .checkValue(v => v.forall(Set("stdout", "stderr").contains), "The value only can be one or more of 'stdout, stderr'.") .createWithDefault(Seq("stdout", "stderr")) + + private[spark] val SPARK_ML_ALLOW_NATIVE_BLAS = + ConfigBuilder("spark.ml.allowNativeBlas") + .doc("Whether allow using native BLAS/LAPACK/ARPACK implementations when native " + + "libraries are available. If disabled, always use Java implementations.") + .version("4.1.0") + .booleanConf + .createWithDefault(true) } diff --git a/docs/ml-linalg-guide.md b/docs/ml-linalg-guide.md index 6e91d81f49760..aa1471f0df995 100644 --- a/docs/ml-linalg-guide.md +++ b/docs/ml-linalg-guide.md @@ -46,8 +46,7 @@ The installation should be done on all nodes of the cluster. Generic version of For Debian / Ubuntu: ``` -sudo apt-get install libopenblas-base -sudo update-alternatives --config libblas.so.3 +sudo apt-get install libopenblas-dev ``` For CentOS / RHEL: ``` @@ -76,6 +75,8 @@ You can also point `dev.ludovic.netlib` to specific libraries names and paths. F If native libraries are not properly configured in the system, the Java implementation (javaBLAS) will be used as fallback option. +You can also set spark conf `spark.ml.allowNativeBlas` or Java system property `netlib.allowNativeBlas` to `false` to disable native BLAS and always use the Java implementation. + ## Spark Configuration The default behavior of multi-threading in either Intel MKL or OpenBLAS may not be optimal with Spark's execution model [^1]. diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index 7b9f90ac7b7a6..477eb470c0577 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -352,6 +352,11 @@ private List buildSparkSubmitCommand(Map env) config.get(SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH)); } + if (config.containsKey("spark.ml.allowNativeBlas")) { + String allowNativeBlas = config.get("spark.ml.allowNativeBlas"); + addOptionString(cmd, "-Dnetlib.allowNativeBlas=" + allowNativeBlas); + } + // SPARK-36796: Always add some JVM runtime default options to submit command addOptionString(cmd, JavaModuleOptions.defaultModuleOptions()); addOptionString(cmd, "-Dderby.connection.requireAuthentication=false"); diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index d0d310b9371df..a52f5ae444074 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -66,6 +66,11 @@ org.apache.spark spark-tags_${scala.binary.version} + + org.apache.spark + spark-common-utils_${scala.binary.version} + ${project.version} +