apache
diff --git a/‎core/src/main/scala/org/apache/spark/internal/config/package.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/internal/config/package.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala‎
Lines changed: 36 additions & 18 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala‎
Lines changed: 36 additions & 18 deletions
diff --git a/‎core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala‎
Lines changed: 18 additions & 2 deletions b/‎core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎docs/_data/menu-sql.yaml‎
Lines changed: 2 additions & 2 deletions b/‎docs/_data/menu-sql.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/core-migration-guide.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/core-migration-guide.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/monitoring.md‎
Lines changed: 0 additions & 1 deletion b/‎docs/monitoring.md‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/sql-migration-guide.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/sql-migration-guide.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/sql-ref-datatypes.md‎
Lines changed: 119 additions & 0 deletions b/‎docs/sql-ref-datatypes.md‎
Lines changed: 119 additions & 0 deletions
@@ -550,7 +550,7 @@ package object config {
       "anytime a task is scheduled. See Delay Scheduling section of TaskSchedulerImpl's class " +
       "documentation for more details.")
     .internal()
-    .version("3.0.0")
+    .version("3.1.0")
     .booleanConf
     .createWithDefault(false)
 
 
@@ -18,7 +18,7 @@
 package org.apache.spark.scheduler
 
 import java.nio.ByteBuffer
-import java.util.{Locale, Timer, TimerTask}
+import java.util.{Timer, TimerTask}
 import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 import java.util.concurrent.atomic.AtomicLong
 
@@ -58,6 +58,11 @@ import org.apache.spark.util.{AccumulatorV2, Clock, SystemClock, ThreadUtils, Ut
  *      scheduling
  *   * task-result-getter threads
  *
+ * CAUTION: Any non fatal exception thrown within Spark RPC framework can be swallowed.
+ * Thus, throwing exception in methods like resourceOffers, statusUpdate won't fail
+ * the application, but could lead to undefined behavior. Instead, we shall use method like
+ * TaskSetManger.abort() to abort a stage and then fail the application (SPARK-31485).
+ *
  * Delay Scheduling:
  *  Delay scheduling is an optimization that sacrifices job fairness for data locality in order to
  *  improve cluster and workload throughput. One useful definition of "delay" is how much time
@@ -356,7 +361,7 @@ private[spark] class TaskSchedulerImpl(
    *                           value at index 'i' corresponds to shuffledOffers[i]
    * @param tasks tasks scheduled per offer, value at index 'i' corresponds to shuffledOffers[i]
    * @param addressesWithDescs tasks scheduler per host:port, used for barrier tasks
-   * @return tuple of (had delay schedule rejects?, option of min locality of launched task)
+   * @return tuple of (no delay schedule rejects?, option of min locality of launched task)
    */
   private def resourceOfferSingleTaskSet(
       taskSet: TaskSetManager,
@@ -402,9 +407,7 @@ private[spark] class TaskSchedulerImpl(
                 // addresses are the same as that we allocated in taskResourceAssignments since it's
                 // synchronized. We don't remove the exact addresses allocated because the current
                 // approach produces the identical result with less time complexity.
-                availableResources(i).getOrElse(rName,
-                  throw new SparkException(s"Try to acquire resource $rName that doesn't exist."))
-                  .remove(0, rInfo.addresses.size)
+                availableResources(i)(rName).remove(0, rInfo.addresses.size)
               }
               // Only update hosts for a barrier task.
               if (taskSet.isBarrier) {
@@ -469,8 +472,9 @@ private[spark] class TaskSchedulerImpl(
       resourceProfileIds: Array[Int],
       availableCpus: Array[Int],
       availableResources: Array[Map[String, Buffer[String]]],
-      rpId: Int): Int = {
-    val resourceProfile = sc.resourceProfileManager.resourceProfileFromId(rpId)
+      taskSet: TaskSetManager): Int = {
+    val resourceProfile = sc.resourceProfileManager.resourceProfileFromId(
+      taskSet.taskSet.resourceProfileId)
     val offersForResourceProfile = resourceProfileIds.zipWithIndex.filter { case (id, _) =>
       (id == resourceProfile.id)
     }
@@ -485,9 +489,12 @@ private[spark] class TaskSchedulerImpl(
         numTasksPerExecCores
       } else {
         val taskLimit = resourceProfile.taskResources.get(limitingResource).map(_.amount)
-          .getOrElse(throw new SparkException("limitingResource returns from ResourceProfile" +
-            s" $resourceProfile doesn't actually contain that task resource!")
-          )
+          .getOrElse {
+            val errorMsg = "limitingResource returns from ResourceProfile " +
+              s"$resourceProfile doesn't actually contain that task resource!"
+            taskSet.abort(errorMsg)
+            throw new SparkException(errorMsg)
+          }
         // available addresses already takes into account if there are fractional
         // task resource requests
         val availAddrs = availableResources(index).get(limitingResource).map(_.size).getOrElse(0)
@@ -583,7 +590,7 @@ private[spark] class TaskSchedulerImpl(
       // value is -1
       val numBarrierSlotsAvailable = if (taskSet.isBarrier) {
         val slots = calculateAvailableSlots(resourceProfileIds, availableCpus, availableResources,
-          taskSet.taskSet.resourceProfileId)
+          taskSet)
         slots
       } else {
         -1
@@ -677,11 +684,18 @@ private[spark] class TaskSchedulerImpl(
           // Check whether the barrier tasks are partially launched.
           // TODO SPARK-24818 handle the assert failure case (that can happen when some locality
           // requirements are not fulfilled, and we should revert the launched tasks).
-          require(addressesWithDescs.size == taskSet.numTasks,
-            s"Skip current round of resource offers for barrier stage ${taskSet.stageId} " +
-              s"because only ${addressesWithDescs.size} out of a total number of " +
-              s"${taskSet.numTasks} tasks got resource offers. The resource offers may have " +
-              "been blacklisted or cannot fulfill task locality requirements.")
+          if (addressesWithDescs.size != taskSet.numTasks) {
+            val errorMsg =
+              s"Fail resource offers for barrier stage ${taskSet.stageId} because only " +
+                s"${addressesWithDescs.size} out of a total number of ${taskSet.numTasks}" +
+                s" tasks got resource offers. This happens because barrier execution currently " +
+                s"does not work gracefully with delay scheduling. We highly recommend you to " +
+                s"disable delay scheduling by setting spark.locality.wait=0 as a workaround if " +
+                s"you see this error frequently."
+            logWarning(errorMsg)
+            taskSet.abort(errorMsg)
+            throw new SparkException(errorMsg)
+          }
 
           // materialize the barrier coordinator.
           maybeInitBarrierCoordinator()
@@ -743,8 +757,12 @@ private[spark] class TaskSchedulerImpl(
             if (state == TaskState.LOST) {
               // TaskState.LOST is only used by the deprecated Mesos fine-grained scheduling mode,
               // where each executor corresponds to a single task, so mark the executor as failed.
-              val execId = taskIdToExecutorId.getOrElse(tid, throw new IllegalStateException(
-                "taskIdToTaskSetManager.contains(tid) <=> taskIdToExecutorId.contains(tid)"))
+              val execId = taskIdToExecutorId.getOrElse(tid, {
+                val errorMsg =
+                  "taskIdToTaskSetManager.contains(tid) <=> taskIdToExecutorId.contains(tid)"
+                taskSet.abort(errorMsg)
+                throw new SparkException(errorMsg)
+              })
               if (executorIdToRunningTaskIds.contains(execId)) {
                 reason = Some(
                   SlaveLost(s"Task $tid was lost, so marking the executor as lost as well."))
 
@@ -26,11 +26,11 @@ import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 
 class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
 
-  def initLocalClusterSparkContext(): Unit = {
+  def initLocalClusterSparkContext(numWorker: Int = 4): Unit = {
     val conf = new SparkConf()
       // Init local cluster here so each barrier task runs in a separated process, thus `barrier()`
       // call is actually useful.
-      .setMaster("local-cluster[4, 1, 1024]")
+      .setMaster(s"local-cluster[$numWorker, 1, 1024]")
       .setAppName("test-cluster")
       .set(TEST_NO_STAGE_RETRY, true)
     sc = new SparkContext(conf)
@@ -276,4 +276,20 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
     initLocalClusterSparkContext()
     testBarrierTaskKilled(interruptOnKill = true)
   }
+
+  test("SPARK-31485: barrier stage should fail if only partial tasks are launched") {
+    initLocalClusterSparkContext(2)
+    val rdd0 = sc.parallelize(Seq(0, 1, 2, 3), 2)
+    val dep = new OneToOneDependency[Int](rdd0)
+    // set up a barrier stage with 2 tasks and both tasks prefer executor 0 (only 1 core) for
+    // scheduling. So, one of tasks won't be scheduled in one round of resource offer.
+    val rdd = new MyRDD(sc, 2, List(dep), Seq(Seq("executor_h_0"), Seq("executor_h_0")))
+    val errorMsg = intercept[SparkException] {
+      rdd.barrier().mapPartitions { iter =>
+        BarrierTaskContext.get().barrier()
+        iter
+      }.collect()
+    }.getMessage
+    assert(errorMsg.contains("Fail resource offers for barrier stage"))
+  }
 }
@@ -78,12 +78,12 @@
   subitems:
     - text: Data Types
       url: sql-ref-datatypes.html
+    - text: Identifiers
+      url: sql-ref-identifier.html
     - text: Literals
       url: sql-ref-literals.html
     - text: Null Semantics
       url: sql-ref-null-semantics.html
-    - text: NaN Semantics
-      url: sql-ref-nan-semantics.html
     - text: ANSI Compliance
       url: sql-ref-ansi-compliance.html
       subitems:
 
@@ -35,6 +35,8 @@ license: |
 
 - Deprecated method `AccumulableInfo.apply` have been removed because creating `AccumulableInfo` is disallowed.
 
+- Deprecated accumulator v1 APIs have been removed and please use v2 APIs instead.
+
 - Event log file will be written as UTF-8 encoding, and Spark History Server will replay event log files as UTF-8 encoding. Previously Spark wrote the event log file as default charset of driver JVM process, so Spark History Server of Spark 2.x is needed to read the old event log files in case of incompatible encoding.
 
 - A new protocol for fetching shuffle blocks is used. It's recommended that external shuffle services be upgraded when running Spark 3.0 apps. You can still use old external shuffle services by setting the configuration `spark.shuffle.useOldFetchProtocol` to `true`. Otherwise, Spark may run into errors with messages like `IllegalArgumentException: Unexpected message type: <number>`.
 
@@ -1056,7 +1056,6 @@ This is the component with the largest amount of instrumented metrics
   - compilationTime (histogram)
   - generatedClassSize (histogram)
   - generatedMethodSize (histogram)
-  - hiveClientCalls.count
   - sourceCodeSize (histogram)
 
 - namespace=DAGScheduler
 
@@ -81,7 +81,7 @@ license: |
 
   - In Spark version 2.4 and below, you can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, for example, map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. In Spark 3.0, Spark throws `RuntimeException` when duplicated keys are found. You can set `spark.sql.mapKeyDedupPolicy` to `LAST_WIN` to deduplicate map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (for example, Parquet), the behavior is undefined.
 
-  - In Spark 3.0, using `org.apache.spark.sql.functions.udf(AnyRef, DataType)` is not allowed by default. Set `spark.sql.legacy.allowUntypedScalaUDF` to true to keep using it. In Spark version 2.4 and below, if `org.apache.spark.sql.functions.udf(AnyRef, DataType)` gets a Scala closure with primitive-type argument, the returned UDF returns null if the input values is null. However, in Spark 3.0, the UDF returns the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` returns null in Spark 2.4 and below if column `x` is null, and return 0 in Spark 3.0. This behavior change is introduced because Spark 3.0 is built with Scala 2.12 by default.
+  - In Spark 3.0, using `org.apache.spark.sql.functions.udf(AnyRef, DataType)` is not allowed by default. Remove the return type parameter to automatically switch to typed Scala udf is recommended, or set `spark.sql.legacy.allowUntypedScalaUDF` to true to keep using it. In Spark version 2.4 and below, if `org.apache.spark.sql.functions.udf(AnyRef, DataType)` gets a Scala closure with primitive-type argument, the returned UDF returns null if the input values is null. However, in Spark 3.0, the UDF returns the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` returns null in Spark 2.4 and below if column `x` is null, and return 0 in Spark 3.0. This behavior change is introduced because Spark 3.0 is built with Scala 2.12 by default.
 
   - In Spark 3.0, a higher-order function `exists` follows the three-valued boolean logic, that is, if the `predicate` returns any `null`s and no `true` is obtained, then `exists` returns `null` instead of `false`. For example, `exists(array(1, null, 3), x -> x % 2 == 0)` is `null`. The previous behaviorcan be restored by setting `spark.sql.legacy.followThreeValuedLogicInArrayExists` to `false`.
 
 
@@ -19,6 +19,8 @@ license: |
   limitations under the License.
 ---
 
+### Supported Data Types
+
 Spark SQL and DataFrames support the following data types:
 
 * Numeric types
@@ -706,3 +708,120 @@ The following table shows the type names as well as aliases used in Spark SQL pa
 </table>
 </div>
 </div>
+
+### Floating Point Special Values
+
+Spark SQL supports several special floating point values in a case-insensitive manner:
+
+ * Inf/+Inf/Infinity/+Infinity: positive infinity
+   * ```FloatType```: equivalent to Scala <code>Float.PositiveInfinity</code>.
+   * ```DoubleType```: equivalent to Scala <code>Double.PositiveInfinity</code>.
+ * -Inf/-Infinity: negative infinity
+   * ```FloatType```: equivalent to Scala <code>Float.NegativeInfinity</code>.
+   * ```DoubleType```: equivalent to Scala <code>Double.NegativeInfinity</code>.
+ * NaN: not a number
+   * ```FloatType```: equivalent to Scala <code>Float.NaN</code>.
+   * ```DoubleType```:  equivalent to Scala <code>Double.NaN</code>.
+
+#### Positive/Negative Infinity Semantics
+
+There is special handling for positive and negative infinity. They have the following semantics:
+
+ * Positive infinity multiplied by any positive value returns positive infinity.
+ * Negative infinity multiplied by any positive value returns negative infinity.
+ * Positive infinity multiplied by any negative value returns negative infinity.
+ * Negative infinity multiplied by any negative value returns positive infinity.
+ * Positive/negative infinity multiplied by 0 returns NaN.
+ * Positive/negative infinity is equal to itself.
+ * In aggregations, all positive infinity values are grouped together. Similarly, all negative infinity values are grouped together.
+ * Positive infinity and negative infinity are treated as normal values in join keys.
+ * Positive infinity sorts lower than NaN and higher than any other values.
+ * Negative infinity sorts lower than any other values.
+
+#### NaN Semantics
+
+There is special handling for not-a-number (NaN) when dealing with `float` or `double` types that
+do not exactly match standard floating point semantics.
+Specifically:
+
+ * NaN = NaN returns true.
+ * In aggregations, all NaN values are grouped together.
+ * NaN is treated as a normal value in join keys.
+ * NaN values go last when in ascending order, larger than any other numeric value.
+
+#### Examples
+
+{% highlight sql %}
+SELECT double('infinity') AS col;
++--------+
+|     col|
++--------+
+|Infinity|
++--------+
+
+SELECT float('-inf') AS col;
++---------+
+|      col|
++---------+
+|-Infinity|
++---------+
+
+SELECT float('NaN') AS col;
++---+
+|col|
++---+
+|NaN|
++---+
+
+SELECT double('infinity') * 0 AS col;
++---+
+|col|
++---+
+|NaN|
++---+
+
+SELECT double('-infinity') * (-1234567) AS col;
++--------+
+|     col|
++--------+
+|Infinity|
++--------+
+
+SELECT double('infinity') < double('NaN') AS col;
++----+
+| col|
++----+
+|true|
++----+
+
+SELECT double('NaN') = double('NaN') AS col;
++----+
+| col|
++----+
+|true|
++----+
+
+SELECT double('inf') = double('infinity') AS col;
++----+
+| col|
++----+
+|true|
++----+
+
+CREATE TABLE test (c1 int, c2 double);
+INSERT INTO test VALUES (1, double('infinity'));
+INSERT INTO test VALUES (2, double('infinity'));
+INSERT INTO test VALUES (3, double('inf'));
+INSERT INTO test VALUES (4, double('-inf'));
+INSERT INTO test VALUES (5, double('NaN'));
+INSERT INTO test VALUES (6, double('NaN'));
+INSERT INTO test VALUES (7, double('-infinity'));
+SELECT COUNT(*), c2 FROM test GROUP BY c2;
++---------+---------+
+| count(1)|       c2|
++---------+---------+
+|        2|      NaN|
+|        2|-Infinity|
+|        3| Infinity|
++---------+---------+
+{% endhighlight %}