apache · mstewart141 · Mar 12, 2018 · Mar 12, 2018 · Mar 12, 2018 · Mar 13, 2018
diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
@@ -57,6 +57,7 @@ function build {
   else
     # Not passed as an argument to docker, but used to validate the Spark directory.
     IMG_PATH="kubernetes/dockerfiles"
+    BUILD_ARGS=()
   fi
 
   if [ ! -d "$IMG_PATH" ]; then

diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -113,7 +113,7 @@ $(document).ready(function() {
       status: (requestedIncomplete ? "running" : "completed")
     };
 
-    $.getJSON("api/v1/applications", appParams, function(response,status,jqXHR) {
+    $.getJSON(uiRoot + "/api/v1/applications", appParams, function(response,status,jqXHR) {
       var array = [];
       var hasMultipleAttempts = false;
       for (i in response) {
@@ -151,7 +151,7 @@ $(document).ready(function() {
         "showCompletedColumns": !requestedIncomplete,
       }
 
-      $.get("static/historypage-template.html", function(template) {
+      $.get(uiRoot + "/static/historypage-template.html", function(template) {
         var sibling = historySummary.prev();
         historySummary.detach();
         var apps = $(Mustache.render($(template).filter("#history-summary-template").html(),data));

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -173,7 +173,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
    * Fixed size thread pool to fetch and parse log files.
    */
   private val replayExecutor: ExecutorService = {
-    if (Utils.isTesting) {
+    if (!Utils.isTesting) {
       ThreadUtils.newDaemonFixedThreadPool(NUM_PROCESSING_THREADS, "log-replay-executor")
     } else {
       MoreExecutors.sameThreadExecutor()

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -150,14 +150,18 @@ class HistoryServer(
       ui: SparkUI,
       completed: Boolean) {
     assert(serverInfo.isDefined, "HistoryServer must be bound before attaching SparkUIs")
-    ui.getHandlers.foreach(attachHandler)
-    addFilters(ui.getHandlers, conf)
+    handlers.synchronized {
+      ui.getHandlers.foreach(attachHandler)
+      addFilters(ui.getHandlers, conf)
+    }
   }
 
   /** Detach a reconstructed UI from this server. Only valid after bind(). */
   override def detachSparkUI(appId: String, attemptId: Option[String], ui: SparkUI): Unit = {
     assert(serverInfo.isDefined, "HistoryServer must be bound before detaching SparkUIs")
-    ui.getHandlers.foreach(detachHandler)
+    handlers.synchronized {
+      ui.getHandlers.foreach(detachHandler)
+    }
     provider.onUIDetached(appId, attemptId, ui)
   }
 

diff --git a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
@@ -20,6 +20,7 @@ package org.apache.spark.internal.io
 import org.apache.hadoop.fs._
 import org.apache.hadoop.mapreduce._
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
 
@@ -132,7 +133,7 @@ abstract class FileCommitProtocol {
 }
 
 
-object FileCommitProtocol {
+object FileCommitProtocol extends Logging {
   class TaskCommitMessage(val obj: Any) extends Serializable
 
   object EmptyTaskCommitMessage extends TaskCommitMessage(null)
@@ -145,15 +146,23 @@ object FileCommitProtocol {
       jobId: String,
       outputPath: String,
       dynamicPartitionOverwrite: Boolean = false): FileCommitProtocol = {
+
+    logDebug(s"Creating committer $className; job $jobId; output=$outputPath;" +
+      s" dynamic=$dynamicPartitionOverwrite")
     val clazz = Utils.classForName(className).asInstanceOf[Class[FileCommitProtocol]]
     // First try the constructor with arguments (jobId: String, outputPath: String,
     // dynamicPartitionOverwrite: Boolean).
     // If that doesn't exist, try the one with (jobId: string, outputPath: String).
     try {
       val ctor = clazz.getDeclaredConstructor(classOf[String], classOf[String], classOf[Boolean])
+      logDebug("Using (String, String, Boolean) constructor")
       ctor.newInstance(jobId, outputPath, dynamicPartitionOverwrite.asInstanceOf[java.lang.Boolean])
     } catch {
       case _: NoSuchMethodException =>
+        logDebug("Falling back to (String, String) constructor")
+        require(!dynamicPartitionOverwrite,
+          "Dynamic Partition Overwrite is enabled but" +
+            s" the committer ${className} does not have the appropriate constructor")
         val ctor = clazz.getDeclaredConstructor(classOf[String], classOf[String])
         ctor.newInstance(jobId, outputPath)
     }

diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -290,7 +290,8 @@ class LongAccumulator extends AccumulatorV2[jl.Long, jl.Long] {
   private var _count = 0L
 
   /**
-   * Adds v to the accumulator, i.e. increment sum by v and count by 1.
+   * Returns false if this accumulator has had any values added to it or the sum is non-zero.
+   *
    * @since 2.0.0
    */
   override def isZero: Boolean = _sum == 0L && _count == 0
@@ -368,6 +369,9 @@ class DoubleAccumulator extends AccumulatorV2[jl.Double, jl.Double] {
   private var _sum = 0.0
   private var _count = 0L
 
+  /**
+   * Returns false if this accumulator has had any values added to it or the sum is non-zero.
+   */
   override def isZero: Boolean = _sum == 0.0 && _count == 0
 
   override def copy(): DoubleAccumulator = {
@@ -441,6 +445,9 @@ class DoubleAccumulator extends AccumulatorV2[jl.Double, jl.Double] {
 class CollectionAccumulator[T] extends AccumulatorV2[T, java.util.List[T]] {
   private val _list: java.util.List[T] = Collections.synchronizedList(new ArrayList[T]())
 
+  /**
+   * Returns false if this accumulator instance has any values in it.
+   */
   override def isZero: Boolean = _list.isEmpty
 
   override def copyAndReset(): CollectionAccumulator[T] = new CollectionAccumulator

diff --git a/core/src/test/scala/org/apache/spark/internal/io/FileCommitProtocolInstantiationSuite.scala b/core/src/test/scala/org/apache/spark/internal/io/FileCommitProtocolInstantiationSuite.scala
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.io
+
+import org.apache.spark.SparkFunSuite
+
+/**
+ * Unit tests for instantiation of FileCommitProtocol implementations.
+ */
+class FileCommitProtocolInstantiationSuite extends SparkFunSuite {
+
+  test("Dynamic partitions require appropriate constructor") {
+
+    // you cannot instantiate a two-arg client with dynamic partitions
+    // enabled.
+    val ex = intercept[IllegalArgumentException] {
+      instantiateClassic(true)
+    }
+    // check the contents of the message and rethrow if unexpected.
+    // this preserves the stack trace of the unexpected
+    // exception.
+    if (!ex.toString.contains("Dynamic Partition Overwrite")) {
+      fail(s"Wrong text in caught exception $ex", ex)
+    }
+  }
+
+  test("Standard partitions work with classic constructor") {
+    instantiateClassic(false)
+  }
+
+  test("Three arg constructors have priority") {
+    assert(3 == instantiateNew(false).argCount,
+      "Wrong constructor argument count")
+  }
+
+  test("Three arg constructors have priority when dynamic") {
+    assert(3 == instantiateNew(true).argCount,
+      "Wrong constructor argument count")
+  }
+
+  test("The protocol must be of the correct class") {
+    intercept[ClassCastException] {
+      FileCommitProtocol.instantiate(
+        classOf[Other].getCanonicalName,
+        "job",
+        "path",
+        false)
+    }
+  }
+
+  test("If there is no matching constructor, class hierarchy is irrelevant") {
+    intercept[NoSuchMethodException] {
+      FileCommitProtocol.instantiate(
+        classOf[NoMatchingArgs].getCanonicalName,
+        "job",
+        "path",
+        false)
+    }
+  }
+
+  /**
+   * Create a classic two-arg protocol instance.
+   * @param dynamic dyanmic partitioning mode
+   * @return the instance
+   */
+  private def instantiateClassic(dynamic: Boolean): ClassicConstructorCommitProtocol = {
+    FileCommitProtocol.instantiate(
+      classOf[ClassicConstructorCommitProtocol].getCanonicalName,
+      "job",
+      "path",
+      dynamic).asInstanceOf[ClassicConstructorCommitProtocol]
+  }
+
+  /**
+   * Create a three-arg protocol instance.
+   * @param dynamic dyanmic partitioning mode
+   * @return the instance
+   */
+  private def instantiateNew(
+    dynamic: Boolean): FullConstructorCommitProtocol = {
+    FileCommitProtocol.instantiate(
+      classOf[FullConstructorCommitProtocol].getCanonicalName,
+      "job",
+      "path",
+      dynamic).asInstanceOf[FullConstructorCommitProtocol]
+  }
+
+}
+
+/**
+ * This protocol implementation does not have the new three-arg
+ * constructor.
+ */
+private class ClassicConstructorCommitProtocol(arg1: String, arg2: String)
+  extends HadoopMapReduceCommitProtocol(arg1, arg2) {
+}
+
+/**
+ * This protocol implementation does have the new three-arg constructor
+ * alongside the original, and a 4 arg one for completeness.
+ * The final value of the real constructor is the number of arguments
+ * used in the 2- and 3- constructor, for test assertions.
+ */
+private class FullConstructorCommitProtocol(
+  arg1: String,
+  arg2: String,
+  b: Boolean,
+  val argCount: Int)
+  extends HadoopMapReduceCommitProtocol(arg1, arg2, b) {
+
+  def this(arg1: String, arg2: String) = {
+    this(arg1, arg2, false, 2)
+  }
+
+  def this(arg1: String, arg2: String, b: Boolean) = {
+    this(arg1, arg2, false, 3)
+  }
+}
+
+/**
+ * This has the 2-arity constructor, but isn't the right class.
+ */
+private class Other(arg1: String, arg2: String) {
+
+}
+
+/**
+ * This has no matching arguments as well as being the wrong class.
+ */
+private class NoMatchingArgs() {
+
+}
+
diff --git a/...l/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReader.scala b/...l/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousReader.scala
@@ -164,7 +164,16 @@ case class KafkaContinuousDataReaderFactory(
     startOffset: Long,
     kafkaParams: ju.Map[String, Object],
     pollTimeoutMs: Long,
-    failOnDataLoss: Boolean) extends DataReaderFactory[UnsafeRow] {
+    failOnDataLoss: Boolean) extends ContinuousDataReaderFactory[UnsafeRow] {
+
+  override def createDataReaderWithOffset(offset: PartitionOffset): DataReader[UnsafeRow] = {
+    val kafkaOffset = offset.asInstanceOf[KafkaSourcePartitionOffset]
+    require(kafkaOffset.topicPartition == topicPartition,
+      s"Expected topicPartition: $topicPartition, but got: ${kafkaOffset.topicPartition}")
+    new KafkaContinuousDataReader(
+      topicPartition, kafkaOffset.partitionOffset, kafkaParams, pollTimeoutMs, failOnDataLoss)
+  }
+
   override def createDataReader(): KafkaContinuousDataReader = {
     new KafkaContinuousDataReader(
       topicPartition, startOffset, kafkaParams, pollTimeoutMs, failOnDataLoss)
@@ -187,8 +196,7 @@ class KafkaContinuousDataReader(
     kafkaParams: ju.Map[String, Object],
     pollTimeoutMs: Long,
     failOnDataLoss: Boolean) extends ContinuousDataReader[UnsafeRow] {
-  private val consumer =
-    CachedKafkaConsumer.createUncached(topicPartition.topic, topicPartition.partition, kafkaParams)
+  private val consumer = KafkaDataConsumer.acquire(topicPartition, kafkaParams, useCache = false)
   private val converter = new KafkaRecordToUnsafeRowConverter
 
   private var nextKafkaOffset = startOffset
@@ -236,6 +244,6 @@ class KafkaContinuousDataReader(
   }
 
   override def close(): Unit = {
-    consumer.close()
+    consumer.release()
   }
 }