From 5de7eb435fd21f8dc29f256c0ce72757a0640cf0 Mon Sep 17 00:00:00 2001 From: nemccarthy Date: Mon, 16 Mar 2015 17:43:30 +1100 Subject: [PATCH 1/2] [SPARK-6313] Add config option to disable file locks/fetchFile cache to support NFS mounts --- .../src/main/scala/org/apache/spark/util/Utils.scala | 3 ++- docs/configuration.md | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index d3dc1d09cb7b4..af8a24553a461 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -403,7 +403,8 @@ private[spark] object Utils extends Logging { useCache: Boolean) { val fileName = url.split("/").last val targetFile = new File(targetDir, fileName) - if (useCache) { + val fetchCacheEnabled = conf.getBoolean("spark.files.useFetchCache", defaultValue = true) + if (useCache && fetchCacheEnabled) { val cachedFileName = s"${url.hashCode}${timestamp}_cache" val lockFileName = s"${url.hashCode}${timestamp}_lock" val localDir = new File(getLocalDir(conf)) diff --git a/docs/configuration.md b/docs/configuration.md index 63fc99e7d3e29..39f7e292121bb 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -745,6 +745,18 @@ Apart from these, the following properties are also available, and may be useful the driver, in seconds. + + spark.files.useFetchCache + true + + If file fetching should use local caching. The improves performance when running multiple + executors on the one host and is enabled by default (see + SPARK-6313 for more details). + When set to true (default) caching is enabled. When set to false, caching optimizations are + switched off and no lock files are created, this allows fetchFiles store to reside on a NFS + mount. + + spark.files.overwrite false From 2eaaf42af1db1f07b8c14ff1c443969969fd839c Mon Sep 17 00:00:00 2001 From: nemccarthy Date: Tue, 17 Mar 2015 10:02:36 +1100 Subject: [PATCH 2/2] [SPARK-6313] Update config wording doc for spark.files.useFetchCache --- docs/configuration.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 39f7e292121bb..7fe11475212b3 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -749,12 +749,12 @@ Apart from these, the following properties are also available, and may be useful spark.files.useFetchCache true - If file fetching should use local caching. The improves performance when running multiple - executors on the one host and is enabled by default (see + If set to true (default), file fetching will use a local cache that is shared by executors + that belong to the same application, which can improve task launching performance when + running many executors on the same host. If set to false, these caching optimizations will + be disabled and all executors will fetch their own copies of files. This optimization may be + disabled in order to use Spark local directories that reside on NFS filesystems (see SPARK-6313 for more details). - When set to true (default) caching is enabled. When set to false, caching optimizations are - switched off and no lock files are created, this allows fetchFiles store to reside on a NFS - mount.