From e74450b82ef0790d88cd871df4299a697ee0635b Mon Sep 17 00:00:00 2001 From: Sandy Ryza Date: Fri, 14 Mar 2014 16:11:03 -0700 Subject: [PATCH 1/4] SPARK-1252. On YARN, use container-log4j.properties for executors --- .../org/apache/spark/deploy/yarn/ClientBase.scala | 2 ++ .../spark/deploy/yarn/ExecutorRunnableUtil.scala | 1 + .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 14 ++++++++++++++ 3 files changed, 17 insertions(+) diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index bc267900fcf1d..93767259c4ee4 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -344,6 +344,8 @@ trait ClientBase extends Logging { JAVA_OPTS += " " + env("SPARK_JAVA_OPTS") } + JAVA_OPTS += " " + YarnSparkHadoopUtil.getLoggingArgsForContainerCommandLine() + // Command for the ApplicationMaster val commands = List[String]( Environment.JAVA_HOME.$() + "/bin/java" + diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala index 2079697d8160e..60dba42d12877 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala @@ -62,6 +62,7 @@ trait ExecutorRunnableUtil extends Logging { JAVA_OPTS += " -Djava.io.tmpdir=" + new Path(Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR) + " " + JAVA_OPTS += YarnSparkHadoopUtil.getLoggingArgsForContainerCommandLine() + " " // Commenting it out for now - so that people can refer to the properties if required. Remove // it once cpuset version is pushed out. diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala index 4c6e1dcd6dac3..2cd5040b374c8 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -22,6 +22,7 @@ import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.security.Credentials import org.apache.hadoop.security.UserGroupInformation import org.apache.hadoop.yarn.conf.YarnConfiguration +import org.apache.hadoop.yarn.api.ApplicationConstants import org.apache.hadoop.conf.Configuration import org.apache.spark.deploy.SparkHadoopUtil @@ -67,3 +68,16 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil { } } + +object YarnSparkHadoopUtil { + def getLoggingArgsForContainerCommandLine(): String = { + "-Dlog4j.configuration=container-log4j.properties " + + "-Dhadoop.root.logger=INFO,CLA " + + // for yarn/stable: + "-Dyarn.app.container.log.filesize=0 " + + "-Dyarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + " " + + // for yarn/alpha: + "-Dyarn.app.mapreduce.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + " " + + "-Dyarn.app.mapreduce.container.log.filesize=0" + } +} From 10934b88d998ba7c7565c5b7f49fb3f74c8fd6e5 Mon Sep 17 00:00:00 2001 From: Sandy Ryza Date: Wed, 26 Mar 2014 16:05:12 -0700 Subject: [PATCH 2/4] Add log4j-spark-container.properties and support SPARK_LOG4J_CONF --- .../main/resources/log4j-spark-container.properties | 11 +++++++++++ .../apache/spark/deploy/yarn/ExecutorRunnable.scala | 3 ++- .../org/apache/spark/deploy/yarn/ClientBase.scala | 8 +++++--- .../spark/deploy/yarn/ExecutorRunnableUtil.scala | 8 ++++++-- .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 9 +-------- .../main/resources/log4j-spark-container.properties | 11 +++++++++++ .../apache/spark/deploy/yarn/ExecutorRunnable.scala | 3 ++- 7 files changed, 38 insertions(+), 15 deletions(-) create mode 100644 yarn/alpha/src/main/resources/log4j-spark-container.properties create mode 100644 yarn/stable/src/main/resources/log4j-spark-container.properties diff --git a/yarn/alpha/src/main/resources/log4j-spark-container.properties b/yarn/alpha/src/main/resources/log4j-spark-container.properties new file mode 100644 index 0000000000000..f7f853559468a --- /dev/null +++ b/yarn/alpha/src/main/resources/log4j-spark-container.properties @@ -0,0 +1,11 @@ +# Set everything to be logged to the console +log4j.rootCategory=INFO, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +# Settings to quiet third party logs that are too verbose +log4j.logger.org.eclipse.jetty=WARN +log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO +log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala index 981e8b05f602d..3469b7decedf6 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala @@ -81,7 +81,8 @@ class ExecutorRunnable( credentials.writeTokenStorageToStream(dob) ctx.setContainerTokens(ByteBuffer.wrap(dob.getData())) - val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores) + val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores, + localResources.contains(ClientBase.LOG4J_PROP)) logInfo("Setting up executor with commands: " + commands) ctx.setCommands(commands) diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 93767259c4ee4..eb42922aea228 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -266,11 +266,11 @@ trait ClientBase extends Logging { localResources: HashMap[String, LocalResource], stagingDir: String): HashMap[String, String] = { logInfo("Setting up the launch environment") - val log4jConfLocalRes = localResources.getOrElse(ClientBase.LOG4J_PROP, null) val env = new HashMap[String, String]() - ClientBase.populateClasspath(yarnConf, sparkConf, log4jConfLocalRes != null, env) + ClientBase.populateClasspath(yarnConf, sparkConf, localResources.contains(ClientBase.LOG4J_PROP), + env) env("SPARK_YARN_MODE") = "true" env("SPARK_YARN_STAGING_DIR") = stagingDir env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName() @@ -344,7 +344,9 @@ trait ClientBase extends Logging { JAVA_OPTS += " " + env("SPARK_JAVA_OPTS") } - JAVA_OPTS += " " + YarnSparkHadoopUtil.getLoggingArgsForContainerCommandLine() + if (!localResources.contains(ClientBase.LOG4J_PROP)) { + JAVA_OPTS += " " + YarnSparkHadoopUtil.getLoggingArgsForContainerCommandLine() + } // Command for the ApplicationMaster val commands = List[String]( diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala index 60dba42d12877..b3696c5fe7183 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala @@ -50,7 +50,8 @@ trait ExecutorRunnableUtil extends Logging { slaveId: String, hostname: String, executorMemory: Int, - executorCores: Int) = { + executorCores: Int, + userSpecifiedLogFile: Boolean) = { // Extra options for the JVM var JAVA_OPTS = "" // Set the JVM memory @@ -62,7 +63,10 @@ trait ExecutorRunnableUtil extends Logging { JAVA_OPTS += " -Djava.io.tmpdir=" + new Path(Environment.PWD.$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR) + " " - JAVA_OPTS += YarnSparkHadoopUtil.getLoggingArgsForContainerCommandLine() + " " + + if (!userSpecifiedLogFile) { + JAVA_OPTS += " " + YarnSparkHadoopUtil.getLoggingArgsForContainerCommandLine() + } // Commenting it out for now - so that people can refer to the properties if required. Remove // it once cpuset version is pushed out. diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala index 2cd5040b374c8..314a7550ada71 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -71,13 +71,6 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil { object YarnSparkHadoopUtil { def getLoggingArgsForContainerCommandLine(): String = { - "-Dlog4j.configuration=container-log4j.properties " + - "-Dhadoop.root.logger=INFO,CLA " + - // for yarn/stable: - "-Dyarn.app.container.log.filesize=0 " + - "-Dyarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + " " + - // for yarn/alpha: - "-Dyarn.app.mapreduce.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + " " + - "-Dyarn.app.mapreduce.container.log.filesize=0" + "-Dlog4j.configuration=log4j-spark-container.properties" } } diff --git a/yarn/stable/src/main/resources/log4j-spark-container.properties b/yarn/stable/src/main/resources/log4j-spark-container.properties new file mode 100644 index 0000000000000..f7f853559468a --- /dev/null +++ b/yarn/stable/src/main/resources/log4j-spark-container.properties @@ -0,0 +1,11 @@ +# Set everything to be logged to the console +log4j.rootCategory=INFO, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +# Settings to quiet third party logs that are too verbose +log4j.logger.org.eclipse.jetty=WARN +log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO +log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala index 53c403f7d0913..81d9d1b5c9280 100644 --- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala +++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala @@ -78,7 +78,8 @@ class ExecutorRunnable( credentials.writeTokenStorageToStream(dob) ctx.setTokens(ByteBuffer.wrap(dob.getData())) - val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores) + val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores, + localResources.contains(ClientBase.LOG4J_PROP)) logInfo("Setting up executor with commands: " + commands) ctx.setCommands(commands) From 55823daf68a25eeb6667cd7e8df1f81cfb6cbbbd Mon Sep 17 00:00:00 2001 From: Sandy Ryza Date: Wed, 26 Mar 2014 22:10:57 -0700 Subject: [PATCH 3/4] Add license headers to new files --- .../main/resources/log4j-spark-container.properties | 13 +++++++++++++ .../main/resources/log4j-spark-container.properties | 13 +++++++++++++ 2 files changed, 26 insertions(+) diff --git a/yarn/alpha/src/main/resources/log4j-spark-container.properties b/yarn/alpha/src/main/resources/log4j-spark-container.properties index f7f853559468a..a1e37a0be27dd 100644 --- a/yarn/alpha/src/main/resources/log4j-spark-container.properties +++ b/yarn/alpha/src/main/resources/log4j-spark-container.properties @@ -1,3 +1,16 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. See accompanying LICENSE file. + # Set everything to be logged to the console log4j.rootCategory=INFO, console log4j.appender.console=org.apache.log4j.ConsoleAppender diff --git a/yarn/stable/src/main/resources/log4j-spark-container.properties b/yarn/stable/src/main/resources/log4j-spark-container.properties index f7f853559468a..a1e37a0be27dd 100644 --- a/yarn/stable/src/main/resources/log4j-spark-container.properties +++ b/yarn/stable/src/main/resources/log4j-spark-container.properties @@ -1,3 +1,16 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. See accompanying LICENSE file. + # Set everything to be logged to the console log4j.rootCategory=INFO, console log4j.appender.console=org.apache.log4j.ConsoleAppender From c0043b877705094c17104d5d232744a92d809dc6 Mon Sep 17 00:00:00 2001 From: Sandy Ryza Date: Fri, 4 Apr 2014 12:42:39 -0700 Subject: [PATCH 4/4] Put log4j.properties file under common --- .../log4j-spark-container.properties | 0 yarn/pom.xml | 6 +++++ .../log4j-spark-container.properties | 24 ------------------- 3 files changed, 6 insertions(+), 24 deletions(-) rename yarn/{alpha => common}/src/main/resources/log4j-spark-container.properties (100%) delete mode 100644 yarn/stable/src/main/resources/log4j-spark-container.properties diff --git a/yarn/alpha/src/main/resources/log4j-spark-container.properties b/yarn/common/src/main/resources/log4j-spark-container.properties similarity index 100% rename from yarn/alpha/src/main/resources/log4j-spark-container.properties rename to yarn/common/src/main/resources/log4j-spark-container.properties diff --git a/yarn/pom.xml b/yarn/pom.xml index 35e31760c1f02..3342cb65edcd1 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -167,6 +167,12 @@ target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes + + + + ../common/src/main/resources + + diff --git a/yarn/stable/src/main/resources/log4j-spark-container.properties b/yarn/stable/src/main/resources/log4j-spark-container.properties deleted file mode 100644 index a1e37a0be27dd..0000000000000 --- a/yarn/stable/src/main/resources/log4j-spark-container.properties +++ /dev/null @@ -1,24 +0,0 @@ -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. See accompanying LICENSE file. - -# Set everything to be logged to the console -log4j.rootCategory=INFO, console -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.target=System.err -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n - -# Settings to quiet third party logs that are too verbose -log4j.logger.org.eclipse.jetty=WARN -log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO -log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO