apache
diff --git a/‎.github/workflows/master.yml‎ renamed to ‎.github/workflows/build_and_test.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/master.yml‎ renamed to ‎.github/workflows/build_and_test.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.github/workflows/test_report.yml‎
Lines changed: 3 additions & 14 deletions b/‎.github/workflows/test_report.yml‎
Lines changed: 3 additions & 14 deletions
diff --git a/‎R/pkg/tests/run-all.R‎
Lines changed: 6 additions & 3 deletions b/‎R/pkg/tests/run-all.R‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎appveyor.yml‎
Lines changed: 2 additions & 2 deletions b/‎appveyor.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java‎
Lines changed: 70 additions & 0 deletions b/‎core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala‎
Lines changed: 8 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/executor/Executor.scala‎
Lines changed: 15 additions & 16 deletions b/‎core/src/main/scala/org/apache/spark/executor/Executor.scala‎
Lines changed: 15 additions & 16 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/internal/config/Worker.scala‎
Lines changed: 0 additions & 6 deletions b/‎core/src/main/scala/org/apache/spark/internal/config/Worker.scala‎
Lines changed: 0 additions & 6 deletions
@@ -183,6 +183,12 @@ jobs:
       with:
         name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
         path: "**/target/test-reports/*.xml"
+    - name: Upload unit tests log files
+      if: failure()
+      uses: actions/upload-artifact@v2
+      with:
+        name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
+        path: "**/target/unit-tests.log"
 
   # Static analysis, and documentation build
   lint:
 
@@ -10,26 +10,15 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - name: Download test results to report
-      # TODO(SPARK-32605): It was forked to have a custom fix
-      # https://github.com/HyukjinKwon/action-surefire-report/commit/c96094cc35061fcf154a7cb46807f2f3e2339476
-      # in order to add the support of custom target commit SHA. It should be contributed back to the original
-      # plugin and avoid using the fork.
-      uses: HyukjinKwon/action-download-artifact@master
+      uses: dawidd6/action-download-artifact@v2
       with:
         github_token: ${{ secrets.GITHUB_TOKEN }}
         workflow: ${{ github.event.workflow_run.workflow_id }}
         commit: ${{ github.event.workflow_run.head_commit.id }}
     - name: Publish test report
-      # TODO(SPARK-32606): It was forked to have a custom fix
-      # https://github.com/HyukjinKwon/action-download-artifact/commit/750b71af351aba467757d7be6924199bb08db4ed
-      # in order to add the support to download all artifacts. It should be contributed back to the original
-      # plugin and avoid using the fork.
-      # Alternatively, we can use the official actions/download-artifact once they support to download artifacts
-      # between different workloads, see also https://github.com/actions/download-artifact/issues/3
-      uses: HyukjinKwon/action-surefire-report@master
+      uses: scacap/action-surefire-report@v1
       with:
-        check_name: Test report
+        check_name: Report test results
         github_token: ${{ secrets.GITHUB_TOKEN }}
         report_paths: "**/target/test-reports/*.xml"
         commit: ${{ github.event.workflow_run.head_commit.id }}
-
@@ -61,15 +61,18 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
     set.seed(42)
 
     # TODO (SPARK-30663) To be removed once testthat 1.x is removed from all builds
-    if (grepl("^1\\..*", packageVersion("testthat"))) {
+    if (packageVersion("testthat")$major <= 1) {
       # testthat 1.x
       test_runner <- testthat:::run_tests
       reporter <- "summary"
-
     } else {
       # testthat >= 2.0.0
       test_runner <- testthat:::test_package_dir
-      reporter <- testthat::default_reporter()
+      dir.create("target/test-reports", showWarnings = FALSE)
+      reporter <- MultiReporter$new(list(
+        SummaryReporter$new(),
+        JunitReporter$new(file = "target/test-reports/test-results.xml")
+      ))
     }
 
     test_runner("SparkR",
 
@@ -41,8 +41,8 @@ cache:
 install:
   # Install maven and dependencies
   - ps: .\dev\appveyor-install-dependencies.ps1
-  # Required package for R unit tests
-  - cmd: Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow'), repos='https://cloud.r-project.org/')"
+  # Required package for R unit tests. xml2 is required to use jUnit reporter in testthat.
+  - cmd: Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'xml2'), repos='https://cloud.r-project.org/')"
   - cmd: Rscript -e "pkg_list <- as.data.frame(installed.packages()[,c(1, 3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]"
 
 build_script:
 
@@ -428,6 +428,68 @@ public MapIterator destructiveIterator() {
     return new MapIterator(numValues, new Location(), true);
   }
 
+  /**
+   * Iterator for the entries of this map. This is to first iterate over key indices in
+   * `longArray` then accessing values in `dataPages`. NOTE: this is different from `MapIterator`
+   * in the sense that key index is preserved here
+   * (See `UnsafeHashedRelation` for example of usage).
+   */
+  public final class MapIteratorWithKeyIndex implements Iterator<Location> {
+
+    /**
+     * The index in `longArray` where the key is stored.
+     */
+    private int keyIndex = 0;
+
+    private int numRecords;
+    private final Location loc;
+
+    private MapIteratorWithKeyIndex() {
+      this.numRecords = numValues;
+      this.loc = new Location();
+    }
+
+    @Override
+    public boolean hasNext() {
+      return numRecords > 0;
+    }
+
+    @Override
+    public Location next() {
+      if (!loc.isDefined() || !loc.nextValue()) {
+        while (longArray.get(keyIndex * 2) == 0) {
+          keyIndex++;
+        }
+        loc.with(keyIndex, 0, true);
+        keyIndex++;
+      }
+      numRecords--;
+      return loc;
+    }
+  }
+
+  /**
+   * Returns an iterator for iterating over the entries of this map,
+   * by first iterating over the key index inside hash map's `longArray`.
+   *
+   * For efficiency, all calls to `next()` will return the same {@link Location} object.
+   *
+   * The returned iterator is NOT thread-safe. If the map is modified while iterating over it,
+   * the behavior of the returned iterator is undefined.
+   */
+  public MapIteratorWithKeyIndex iteratorWithKeyIndex() {
+    return new MapIteratorWithKeyIndex();
+  }
+
+  /**
+   * The maximum number of allowed keys index.
+   *
+   * The value of allowed keys index is in the range of [0, maxNumKeysIndex - 1].
+   */
+  public int maxNumKeysIndex() {
+    return (int) (longArray.size() / 2);
+  }
+
   /**
    * Looks up a key, and return a {@link Location} handle that can be used to test existence
    * and read/write values.
@@ -601,6 +663,14 @@ public boolean isDefined() {
       return isDefined;
     }
 
+    /**
+     * Returns index for key.
+     */
+    public int getKeyIndex() {
+      assert (isDefined);
+      return pos;
+    }
+
     /**
      * Returns the base object for key.
      */
 
@@ -27,8 +27,8 @@ import com.codahale.metrics.{Gauge, MetricRegistry}
 
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.DECOMMISSION_ENABLED
 import org.apache.spark.internal.config.Tests.TEST_SCHEDULE_INTERVAL
-import org.apache.spark.internal.config.Worker.WORKER_DECOMMISSION_ENABLED
 import org.apache.spark.metrics.source.Source
 import org.apache.spark.resource.ResourceProfile.UNKNOWN_RESOURCE_PROFILE_ID
 import org.apache.spark.resource.ResourceProfileManager
@@ -128,7 +128,7 @@ private[spark] class ExecutorAllocationManager(
   private val executorAllocationRatio =
     conf.get(DYN_ALLOCATION_EXECUTOR_ALLOCATION_RATIO)
 
-  private val decommissionEnabled = conf.get(WORKER_DECOMMISSION_ENABLED)
+  private val decommissionEnabled = conf.get(DECOMMISSION_ENABLED)
 
   private val defaultProfileId = resourceProfileManager.defaultResourceProfile.id
 
 
@@ -67,7 +67,7 @@ private[deploy] class Worker(
   assert (port > 0)
 
   // If worker decommissioning is enabled register a handler on PWR to shutdown.
-  if (conf.get(WORKER_DECOMMISSION_ENABLED)) {
+  if (conf.get(config.DECOMMISSION_ENABLED)) {
     logInfo("Registering SIGPWR handler to trigger decommissioning.")
     SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
       "disabling worker decommission feature.")(decommissionSelf)
@@ -769,7 +769,7 @@ private[deploy] class Worker(
   }
 
   private[deploy] def decommissionSelf(): Boolean = {
-    if (conf.get(WORKER_DECOMMISSION_ENABLED)) {
+    if (conf.get(config.DECOMMISSION_ENABLED)) {
       logDebug("Decommissioning self")
       decommissioned = true
       sendToMaster(WorkerDecommission(workerId, self))
 
@@ -294,10 +294,15 @@ private[spark] class CoarseGrainedExecutorBackend(
         override def run(): Unit = {
           var lastTaskRunningTime = System.nanoTime()
           val sleep_time = 1000 // 1s
-
+          // This config is internal and only used by unit tests to force an executor
+          // to hang around for longer when decommissioned.
+          val initialSleepMillis = env.conf.getInt(
+            "spark.test.executor.decommission.initial.sleep.millis", sleep_time)
+          if (initialSleepMillis > 0) {
+            Thread.sleep(initialSleepMillis)
+          }
           while (true) {
             logInfo("Checking to see if we can shutdown.")
-            Thread.sleep(sleep_time)
             if (executor == null || executor.numRunningTasks == 0) {
               if (env.conf.get(STORAGE_DECOMMISSION_ENABLED)) {
                 logInfo("No running tasks, checking migrations")
@@ -323,6 +328,7 @@ private[spark] class CoarseGrainedExecutorBackend(
               // move forward.
               lastTaskRunningTime = System.nanoTime()
             }
+            Thread.sleep(sleep_time)
           }
         }
       }
 
@@ -336,8 +336,8 @@ private[spark] class Executor(
     extends Runnable {
 
     val taskId = taskDescription.taskId
-    val threadName = s"Executor task launch worker for task $taskId"
     val taskName = taskDescription.name
+    val threadName = s"Executor task launch worker for $taskName"
     val mdcProperties = taskDescription.properties.asScala
       .filter(_._1.startsWith("mdc.")).toSeq
 
@@ -364,7 +364,7 @@ private[spark] class Executor(
     @volatile var task: Task[Any] = _
 
     def kill(interruptThread: Boolean, reason: String): Unit = {
-      logInfo(s"Executor is trying to kill $taskName (TID $taskId), reason: $reason")
+      logInfo(s"Executor is trying to kill $taskName, reason: $reason")
       reasonIfKilled = Some(reason)
       if (task != null) {
         synchronized {
@@ -425,7 +425,7 @@ private[spark] class Executor(
       } else 0L
       Thread.currentThread.setContextClassLoader(replClassLoader)
       val ser = env.closureSerializer.newInstance()
-      logInfo(s"Running $taskName (TID $taskId)")
+      logInfo(s"Running $taskName")
       execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
       var taskStartTimeNs: Long = 0
       var taskStartCpu: Long = 0
@@ -459,7 +459,7 @@ private[spark] class Executor(
         // MapOutputTrackerMaster and its cache invalidation is not based on epoch numbers so
         // we don't need to make any special calls here.
         if (!isLocal) {
-          logDebug("Task " + taskId + "'s epoch is " + task.epoch)
+          logDebug(s"$taskName's epoch is ${task.epoch}")
           env.mapOutputTracker.asInstanceOf[MapOutputTrackerWorker].updateEpoch(task.epoch)
         }
 
@@ -485,7 +485,7 @@ private[spark] class Executor(
           val freedMemory = taskMemoryManager.cleanUpAllAllocatedMemory()
 
           if (freedMemory > 0 && !threwException) {
-            val errMsg = s"Managed memory leak detected; size = $freedMemory bytes, TID = $taskId"
+            val errMsg = s"Managed memory leak detected; size = $freedMemory bytes, $taskName"
             if (conf.get(UNSAFE_EXCEPTION_ON_MEMORY_LEAK)) {
               throw new SparkException(errMsg)
             } else {
@@ -495,7 +495,7 @@ private[spark] class Executor(
 
           if (releasedLocks.nonEmpty && !threwException) {
             val errMsg =
-              s"${releasedLocks.size} block locks were not released by TID = $taskId:\n" +
+              s"${releasedLocks.size} block locks were not released by $taskName\n" +
                 releasedLocks.mkString("[", ", ", "]")
             if (conf.get(STORAGE_EXCEPTION_PIN_LEAK)) {
               throw new SparkException(errMsg)
@@ -508,7 +508,7 @@ private[spark] class Executor(
           // uh-oh.  it appears the user code has caught the fetch-failure without throwing any
           // other exceptions.  Its *possible* this is what the user meant to do (though highly
           // unlikely).  So we will log an error and keep going.
-          logError(s"TID ${taskId} completed successfully though internally it encountered " +
+          logError(s"$taskName completed successfully though internally it encountered " +
             s"unrecoverable fetch failures!  Most likely this means user code is incorrectly " +
             s"swallowing Spark's internal ${classOf[FetchFailedException]}", fetchFailure)
         }
@@ -592,7 +592,7 @@ private[spark] class Executor(
         // directSend = sending directly back to the driver
         val serializedResult: ByteBuffer = {
           if (maxResultSize > 0 && resultSize > maxResultSize) {
-            logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " +
+            logWarning(s"Finished $taskName. Result is larger than maxResultSize " +
               s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " +
               s"dropping it.")
             ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))
@@ -602,11 +602,10 @@ private[spark] class Executor(
               blockId,
               new ChunkedByteBuffer(serializedDirectResult.duplicate()),
               StorageLevel.MEMORY_AND_DISK_SER)
-            logInfo(
-              s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")
+            logInfo(s"Finished $taskName. $resultSize bytes result sent via BlockManager)")
             ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))
           } else {
-            logInfo(s"Finished $taskName (TID $taskId). $resultSize bytes result sent to driver")
+            logInfo(s"Finished $taskName. $resultSize bytes result sent to driver")
             serializedDirectResult
           }
         }
@@ -616,7 +615,7 @@ private[spark] class Executor(
         execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)
       } catch {
         case t: TaskKilledException =>
-          logInfo(s"Executor killed $taskName (TID $taskId), reason: ${t.reason}")
+          logInfo(s"Executor killed $taskName, reason: ${t.reason}")
 
           val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs)
           // Here and below, put task metric peaks in a WrappedArray to expose them as a Seq
@@ -629,7 +628,7 @@ private[spark] class Executor(
         case _: InterruptedException | NonFatal(_) if
             task != null && task.reasonIfKilled.isDefined =>
           val killReason = task.reasonIfKilled.getOrElse("unknown reason")
-          logInfo(s"Executor interrupted and killed $taskName (TID $taskId), reason: $killReason")
+          logInfo(s"Executor interrupted and killed $taskName, reason: $killReason")
 
           val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs)
           val metricPeaks = WrappedArray.make(metricsPoller.getTaskMetricPeaks(taskId))
@@ -643,7 +642,7 @@ private[spark] class Executor(
             // there was a fetch failure in the task, but some user code wrapped that exception
             // and threw something else.  Regardless, we treat it as a fetch failure.
             val fetchFailedCls = classOf[FetchFailedException].getName
-            logWarning(s"TID ${taskId} encountered a ${fetchFailedCls} and " +
+            logWarning(s"$taskName encountered a ${fetchFailedCls} and " +
               s"failed, but the ${fetchFailedCls} was hidden by another " +
               s"exception.  Spark is handling this like a fetch failure and ignoring the " +
               s"other exception: $t")
@@ -659,13 +658,13 @@ private[spark] class Executor(
         case t: Throwable if env.isStopped =>
           // Log the expected exception after executor.stop without stack traces
           // see: SPARK-19147
-          logError(s"Exception in $taskName (TID $taskId): ${t.getMessage}")
+          logError(s"Exception in $taskName: ${t.getMessage}")
 
         case t: Throwable =>
           // Attempt to exit cleanly by informing the driver of our failure.
           // If anything goes wrong (or this was a fatal exception), we will delegate to
           // the default uncaught exception handler, which will terminate the Executor.
-          logError(s"Exception in $taskName (TID $taskId)", t)
+          logError(s"Exception in $taskName", t)
 
           // SPARK-20904: Do not report failure to driver if if happened during shut down. Because
           // libraries may set up shutdown hooks that race with running tasks during shutdown,
 
@@ -82,10 +82,4 @@ private[spark] object Worker {
       .version("2.0.2")
       .intConf
       .createWithDefault(100)
-
-  private[spark] val WORKER_DECOMMISSION_ENABLED =
-    ConfigBuilder("spark.worker.decommission.enabled")
-      .version("3.1.0")
-      .booleanConf
-      .createWithDefault(false)
 }