|
17 | 17 |
|
18 | 18 | package org.apache.spark.executor |
19 | 19 |
|
| 20 | +import java.io.{IOException, ObjectInputStream} |
| 21 | +import java.util.concurrent.ConcurrentHashMap |
| 22 | + |
20 | 23 | import scala.collection.mutable.ArrayBuffer |
21 | 24 |
|
22 | 25 | import org.apache.spark.annotation.DeveloperApi |
23 | 26 | import org.apache.spark.executor.DataReadMethod.DataReadMethod |
24 | 27 | import org.apache.spark.storage.{BlockId, BlockStatus} |
| 28 | +import org.apache.spark.util.Utils |
25 | 29 |
|
26 | 30 | /** |
27 | 31 | * :: DeveloperApi :: |
@@ -210,10 +214,26 @@ class TaskMetrics extends Serializable { |
210 | 214 | private[spark] def updateInputMetrics(): Unit = synchronized { |
211 | 215 | inputMetrics.foreach(_.updateBytesRead()) |
212 | 216 | } |
| 217 | + |
| 218 | + @throws(classOf[IOException]) |
| 219 | + private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { |
| 220 | + in.defaultReadObject() |
| 221 | + // Get the hostname from cached data, since hostname is the order of number of nodes in |
| 222 | + // cluster, so using cached hostname will decrease the object number and alleviate the GC |
| 223 | + // overhead. |
| 224 | + _hostname = TaskMetrics.getCachedHostName(_hostname) |
| 225 | + } |
213 | 226 | } |
214 | 227 |
|
215 | 228 | private[spark] object TaskMetrics { |
| 229 | + private val hostNameCache = new ConcurrentHashMap[String, String]() |
| 230 | + |
216 | 231 | def empty: TaskMetrics = new TaskMetrics |
| 232 | + |
| 233 | + def getCachedHostName(host: String): String = { |
| 234 | + val canonicalHost = hostNameCache.putIfAbsent(host, host) |
| 235 | + if (canonicalHost != null) canonicalHost else host |
| 236 | + } |
217 | 237 | } |
218 | 238 |
|
219 | 239 | /** |
|
0 commit comments