Code enhancement: Replaced explicit synchronized access to a hashmap with a concurrent map. (#392)

varunkatta · ash211 · commit e5838c1d2bf7 · 2017-09-14T21:46:47.000-07:00
* Replaced explicit synchronized access to hashmap with a concurrent map

* Removed usages of scala.collection.concurrent.Map
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala
@@ -25,7 +25,7 @@ import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference}
 import io.fabric8.kubernetes.api.model._
 import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher}
 import io.fabric8.kubernetes.client.Watcher.Action
-import scala.collection.{concurrent, mutable}
+import scala.collection.mutable
 import scala.collection.JavaConverters._
 import scala.concurrent.{ExecutionContext, Future}
 
@@ -56,14 +56,9 @@ private[spark] class KubernetesClusterSchedulerBackend(
   private val runningExecutorsToPods = new mutable.HashMap[String, Pod]
   // Indexed by executor pod names and guarded by RUNNING_EXECUTOR_PODS_LOCK.
   private val runningPodsToExecutors = new mutable.HashMap[String, String]
-  // TODO(varun): Get rid of this lock object by my making the underlying map a concurrent hash map.
-  private val EXECUTOR_PODS_BY_IPS_LOCK = new Object
-  // Indexed by executor IP addrs and guarded by EXECUTOR_PODS_BY_IPS_LOCK
-  private val executorPodsByIPs = new mutable.HashMap[String, Pod]
-  private val podsWithKnownExitReasons: concurrent.Map[String, ExecutorExited] =
-      new ConcurrentHashMap[String, ExecutorExited]().asScala
-  private val disconnectedPodsByExecutorIdPendingRemoval =
-      new ConcurrentHashMap[String, Pod]().asScala
+  private val executorPodsByIPs = new ConcurrentHashMap[String, Pod]()
+  private val podsWithKnownExitReasons = new ConcurrentHashMap[String, ExecutorExited]()
+  private val disconnectedPodsByExecutorIdPendingRemoval = new ConcurrentHashMap[String, Pod]()
 
   private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE)
 
@@ -102,13 +97,13 @@ private[spark] class KubernetesClusterSchedulerBackend(
 
   private val podAllocationInterval = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY)
   require(podAllocationInterval > 0, s"Allocation batch delay " +
-    s"${KUBERNETES_ALLOCATION_BATCH_DELAY} " +
-    s"is ${podAllocationInterval}, should be a positive integer")
+    s"$KUBERNETES_ALLOCATION_BATCH_DELAY " +
+    s"is $podAllocationInterval, should be a positive integer")
 
   private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE)
   require(podAllocationSize > 0, s"Allocation batch size " +
-    s"${KUBERNETES_ALLOCATION_BATCH_SIZE} " +
-    s"is ${podAllocationSize}, should be a positive integer")
+    s"$KUBERNETES_ALLOCATION_BATCH_SIZE " +
+    s"is $podAllocationSize, should be a positive integer")
 
   private val allocatorRunnable = new Runnable {
 
@@ -141,10 +136,10 @@ private[spark] class KubernetesClusterSchedulerBackend(
       // For each disconnected executor, synchronize with the loss reasons that may have been found
       // by the executor pod watcher. If the loss reason was discovered by the watcher,
       // inform the parent class with removeExecutor.
-      val disconnectedPodsByExecutorIdPendingRemovalCopy =
-          Map.empty ++ disconnectedPodsByExecutorIdPendingRemoval
-      disconnectedPodsByExecutorIdPendingRemovalCopy.foreach { case (executorId, executorPod) =>
-        val knownExitReason = podsWithKnownExitReasons.remove(executorPod.getMetadata.getName)
+      disconnectedPodsByExecutorIdPendingRemoval.keys().asScala.foreach { case (executorId) =>
+        val executorPod = disconnectedPodsByExecutorIdPendingRemoval.get(executorId)
+        val knownExitReason = Option(podsWithKnownExitReasons.remove(
+          executorPod.getMetadata.getName))
         knownExitReason.fold {
           removeExecutorOrIncrementLossReasonCheckCount(executorId)
         } { executorExited =>
@@ -171,7 +166,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
     }
 
     def deleteExecutorFromClusterAndDataStructures(executorId: String): Unit = {
-      disconnectedPodsByExecutorIdPendingRemoval -= executorId
+      disconnectedPodsByExecutorIdPendingRemoval.remove(executorId)
       executorReasonCheckAttemptCounts -= executorId
       RUNNING_EXECUTOR_PODS_LOCK.synchronized {
         runningExecutorsToPods.remove(executorId).map { pod =>
@@ -239,9 +234,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
         runningExecutorsToPods.clear()
         runningPodsToExecutors.clear()
       }
-      EXECUTOR_PODS_BY_IPS_LOCK.synchronized {
-        executorPodsByIPs.clear()
-      }
+      executorPodsByIPs.clear()
       val resource = executorWatchResource.getAndSet(null)
       if (resource != null) {
         resource.close()
@@ -262,14 +255,11 @@ private[spark] class KubernetesClusterSchedulerBackend(
    *         locality if an executor launches on the cluster node.
    */
   private def getNodesWithLocalTaskCounts() : Map[String, Int] = {
-    val executorPodsWithIPs = EXECUTOR_PODS_BY_IPS_LOCK.synchronized {
-      executorPodsByIPs.values.toList  // toList makes a defensive copy.
-    }
     val nodeToLocalTaskCount = mutable.Map[String, Int]() ++
       KubernetesClusterSchedulerBackend.this.synchronized {
         hostToLocalTaskCount
       }
-    for (pod <- executorPodsWithIPs) {
+    for (pod <- executorPodsByIPs.values().asScala) {
       // Remove cluster nodes that are running our executors already.
       // TODO: This prefers spreading out executors across nodes. In case users want
       // consolidating executors on fewer nodes, introduce a flag. See the spark.deploy.spreadOut
@@ -319,7 +309,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
         val maybeRemovedExecutor = runningExecutorsToPods.remove(executor)
         maybeRemovedExecutor.foreach { executorPod =>
           kubernetesClient.pods().delete(executorPod)
-          disconnectedPodsByExecutorIdPendingRemoval(executor) = executorPod
+          disconnectedPodsByExecutorIdPendingRemoval.put(executor, executorPod)
           runningPodsToExecutors.remove(executorPod.getMetadata.getName)
         }
         if (maybeRemovedExecutor.isEmpty) {
@@ -331,9 +321,11 @@ private[spark] class KubernetesClusterSchedulerBackend(
   }
 
   def getExecutorPodByIP(podIP: String): Option[Pod] = {
-    EXECUTOR_PODS_BY_IPS_LOCK.synchronized {
-      executorPodsByIPs.get(podIP)
-    }
+    // Note: Per https://github.com/databricks/scala-style-guide#concurrency, we don't
+    // want to be switching to scala.collection.concurrent.Map on
+    // executorPodsByIPs.
+      val pod = executorPodsByIPs.get(podIP)
+      Option(pod)
   }
 
   private class ExecutorPodsWatcher extends Watcher[Pod] {
@@ -346,18 +338,14 @@ private[spark] class KubernetesClusterSchedulerBackend(
         val podIP = pod.getStatus.getPodIP
         val clusterNodeName = pod.getSpec.getNodeName
         logDebug(s"Executor pod $pod ready, launched at $clusterNodeName as IP $podIP.")
-        EXECUTOR_PODS_BY_IPS_LOCK.synchronized {
-          executorPodsByIPs += ((podIP, pod))
-        }
+          executorPodsByIPs.put(podIP, pod)
       } else if ((action == Action.MODIFIED && pod.getMetadata.getDeletionTimestamp != null) ||
           action == Action.DELETED || action == Action.ERROR) {
         val podName = pod.getMetadata.getName
         val podIP = pod.getStatus.getPodIP
         logDebug(s"Executor pod $podName at IP $podIP was at $action.")
         if (podIP != null) {
-          EXECUTOR_PODS_BY_IPS_LOCK.synchronized {
-            executorPodsByIPs -= podIP
-          }
+          executorPodsByIPs.remove(podIP)
         }
         if (action == Action.ERROR) {
           logInfo(s"Received pod $podName exited event. Reason: " + pod.getStatus.getReason)
@@ -445,7 +433,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
         if (disableExecutor(executorId)) {
           RUNNING_EXECUTOR_PODS_LOCK.synchronized {
             runningExecutorsToPods.get(executorId).foreach { pod =>
-              disconnectedPodsByExecutorIdPendingRemoval(executorId) = pod
+              disconnectedPodsByExecutorIdPendingRemoval.put(executorId, pod)
             }
           }
         }