Skip to content

Commit 108d3d7

Browse files
author
Ram Sriharsha
committed
merge with master
2 parents 4f74126 + e35cd36 commit 108d3d7

File tree

172 files changed

+2870
-1177
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

172 files changed

+2870
-1177
lines changed

.rat-excludes

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,5 +80,8 @@ local-1425081759269/*
8080
local-1426533911241/*
8181
local-1426633911242/*
8282
local-1430917381534/*
83+
local-1430917381535_1
84+
local-1430917381535_2
8385
DESCRIPTION
8486
NAMESPACE
87+
test_support/*

R/create-docs.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
# After running this script the html docs can be found in
2424
# $SPARK_HOME/R/pkg/html
2525

26+
set -o pipefail
27+
set -e
28+
2629
# Figure out where the script is
2730
export FWDIR="$(cd "`dirname "$0"`"; pwd)"
2831
pushd $FWDIR

R/install-dev.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory
2727
# to load the SparkR package on the worker nodes.
2828

29+
set -o pipefail
30+
set -e
2931

3032
FWDIR="$(cd `dirname $0`; pwd)"
3133
LIB_DIR="$FWDIR/lib"

R/pkg/inst/profile/shell.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
old <- getOption("defaultPackages")
2525
options(defaultPackages = c(old, "SparkR"))
2626

27-
sc <- SparkR::sparkR.init(Sys.getenv("MASTER", unset = ""))
27+
sc <- SparkR::sparkR.init()
2828
assign("sc", sc, envir=.GlobalEnv)
2929
sqlContext <- SparkR::sparkRSQL.init(sc)
3030
assign("sqlContext", sqlContext, envir=.GlobalEnv)

assembly/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent_2.10</artifactId>
24-
<version>1.4.0-SNAPSHOT</version>
24+
<version>1.5.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

bagel/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent_2.10</artifactId>
24-
<version>1.4.0-SNAPSHOT</version>
24+
<version>1.5.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

core/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent_2.10</artifactId>
24-
<version>1.4.0-SNAPSHOT</version>
24+
<version>1.5.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,14 @@ private[spark] class ExecutorAllocationManager(
266266
// executors and inform the cluster manager to cancel the extra pending requests
267267
val oldNumExecutorsTarget = numExecutorsTarget
268268
numExecutorsTarget = math.max(maxNeeded, minNumExecutors)
269-
client.requestTotalExecutors(numExecutorsTarget)
270269
numExecutorsToAdd = 1
271-
logInfo(s"Lowering target number of executors to $numExecutorsTarget because " +
272-
s"not all requests are actually needed (previously $oldNumExecutorsTarget)")
270+
271+
// If the new target has not changed, avoid sending a message to the cluster manager
272+
if (numExecutorsTarget < oldNumExecutorsTarget) {
273+
client.requestTotalExecutors(numExecutorsTarget)
274+
logInfo(s"Lowering target number of executors to $numExecutorsTarget (previously " +
275+
s"$oldNumExecutorsTarget) because not all requested executors are actually needed")
276+
}
273277
numExecutorsTarget - oldNumExecutorsTarget
274278
} else if (addTime != NOT_SET && now >= addTime) {
275279
val delta = addExecutors(maxNeeded)

core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717

1818
package org.apache.spark.deploy.history
1919

20+
import java.util.zip.ZipOutputStream
21+
22+
import org.apache.spark.SparkException
2023
import org.apache.spark.ui.SparkUI
2124

2225
private[spark] case class ApplicationAttemptInfo(
@@ -62,4 +65,12 @@ private[history] abstract class ApplicationHistoryProvider {
6265
*/
6366
def getConfig(): Map[String, String] = Map()
6467

68+
/**
69+
* Writes out the event logs to the output stream provided. The logs will be compressed into a
70+
* single zip file and written out.
71+
* @throws SparkException if the logs for the app id cannot be found.
72+
*/
73+
@throws(classOf[SparkException])
74+
def writeEventLogs(appId: String, attemptId: Option[String], zipStream: ZipOutputStream): Unit
75+
6576
}

core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,18 @@
1717

1818
package org.apache.spark.deploy.history
1919

20-
import java.io.{BufferedInputStream, FileNotFoundException, IOException, InputStream}
20+
import java.io.{BufferedInputStream, FileNotFoundException, InputStream, IOException, OutputStream}
2121
import java.util.concurrent.{ExecutorService, Executors, TimeUnit}
22+
import java.util.zip.{ZipEntry, ZipOutputStream}
2223

2324
import scala.collection.mutable
2425

26+
import com.google.common.io.ByteStreams
2527
import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder}
26-
import org.apache.hadoop.fs.{FileStatus, Path}
28+
import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
2729
import org.apache.hadoop.fs.permission.AccessControlException
2830

29-
import org.apache.spark.{Logging, SecurityManager, SparkConf}
31+
import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException}
3032
import org.apache.spark.deploy.SparkHadoopUtil
3133
import org.apache.spark.io.CompressionCodec
3234
import org.apache.spark.scheduler._
@@ -59,7 +61,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
5961
.map { d => Utils.resolveURI(d).toString }
6062
.getOrElse(DEFAULT_LOG_DIR)
6163

62-
private val fs = Utils.getHadoopFileSystem(logDir, SparkHadoopUtil.get.newConfiguration(conf))
64+
private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
65+
private val fs = Utils.getHadoopFileSystem(logDir, hadoopConf)
6366

6467
// Used by check event thread and clean log thread.
6568
// Scheduled thread pool size must be one, otherwise it will have concurrent issues about fs
@@ -219,6 +222,58 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
219222
}
220223
}
221224

225+
override def writeEventLogs(
226+
appId: String,
227+
attemptId: Option[String],
228+
zipStream: ZipOutputStream): Unit = {
229+
230+
/**
231+
* This method compresses the files passed in, and writes the compressed data out into the
232+
* [[OutputStream]] passed in. Each file is written as a new [[ZipEntry]] with its name being
233+
* the name of the file being compressed.
234+
*/
235+
def zipFileToStream(file: Path, entryName: String, outputStream: ZipOutputStream): Unit = {
236+
val fs = FileSystem.get(hadoopConf)
237+
val inputStream = fs.open(file, 1 * 1024 * 1024) // 1MB Buffer
238+
try {
239+
outputStream.putNextEntry(new ZipEntry(entryName))
240+
ByteStreams.copy(inputStream, outputStream)
241+
outputStream.closeEntry()
242+
} finally {
243+
inputStream.close()
244+
}
245+
}
246+
247+
applications.get(appId) match {
248+
case Some(appInfo) =>
249+
try {
250+
// If no attempt is specified, or there is no attemptId for attempts, return all attempts
251+
appInfo.attempts.filter { attempt =>
252+
attempt.attemptId.isEmpty || attemptId.isEmpty || attempt.attemptId.get == attemptId.get
253+
}.foreach { attempt =>
254+
val logPath = new Path(logDir, attempt.logPath)
255+
// If this is a legacy directory, then add the directory to the zipStream and add
256+
// each file to that directory.
257+
if (isLegacyLogDirectory(fs.getFileStatus(logPath))) {
258+
val files = fs.listStatus(logPath)
259+
zipStream.putNextEntry(new ZipEntry(attempt.logPath + "/"))
260+
zipStream.closeEntry()
261+
files.foreach { file =>
262+
val path = file.getPath
263+
zipFileToStream(path, attempt.logPath + Path.SEPARATOR + path.getName, zipStream)
264+
}
265+
} else {
266+
zipFileToStream(new Path(logDir, attempt.logPath), attempt.logPath, zipStream)
267+
}
268+
}
269+
} finally {
270+
zipStream.close()
271+
}
272+
case None => throw new SparkException(s"Logs for $appId not found.")
273+
}
274+
}
275+
276+
222277
/**
223278
* Replay the log files in the list and merge the list of old applications with new ones
224279
*/

0 commit comments

Comments
 (0)