@@ -47,6 +47,11 @@ private[spark] object SizeEstimator extends Logging {
4747 private val FLOAT_SIZE = 4
4848 private val DOUBLE_SIZE = 8
4949
50+ // Fields can be primitive types, sizes are: 1, 2, 4, 8. Or fields can be pointers. The size of
51+ // a pointer is 4 or 8 depending on the JVM (32-bit or 64-bit) and UseCompressedOops flag.
52+ // The sizes should be in descending order, as we will use that information for fields placement.
53+ private val fieldSizes = List (8 , 4 , 2 , 1 )
54+
5055 // Alignment boundary for objects
5156 // TODO: Is this arch dependent ?
5257 private val ALIGN_SIZE = 8
@@ -171,7 +176,7 @@ private[spark] object SizeEstimator extends Logging {
171176 // general all ClassLoaders and Classes will be shared between objects anyway.
172177 } else {
173178 val classInfo = getClassInfo(cls)
174- state.size += classInfo.shellSize
179+ state.size += alignSize( classInfo.shellSize)
175180 for (field <- classInfo.pointerFields) {
176181 state.enqueue(field.get(obj))
177182 }
@@ -237,8 +242,8 @@ private[spark] object SizeEstimator extends Logging {
237242 }
238243 size
239244 }
240-
241- private def primitiveSize (cls : Class [_]): Long = {
245+
246+ private def primitiveSize (cls : Class [_]): Int = {
242247 if (cls == classOf [Byte ]) {
243248 BYTE_SIZE
244249 } else if (cls == classOf [Boolean ]) {
@@ -274,30 +279,66 @@ private[spark] object SizeEstimator extends Logging {
274279 val parent = getClassInfo(cls.getSuperclass)
275280 var shellSize = parent.shellSize
276281 var pointerFields = parent.pointerFields
282+ val sizeCount = Array .fill(fieldSizes.max + 1 )(0 )
277283
284+ // iterate through the fields of this class and gather information.
278285 for (field <- cls.getDeclaredFields) {
279286 if (! Modifier .isStatic(field.getModifiers)) {
280287 val fieldClass = field.getType
281288 if (fieldClass.isPrimitive) {
282- shellSize += primitiveSize(fieldClass)
289+ sizeCount( primitiveSize(fieldClass)) += 1
283290 } else {
284291 field.setAccessible(true ) // Enable future get()'s on this field
285- shellSize += pointerSize
292+ sizeCount(pointerSize) += 1
286293 pointerFields = field :: pointerFields
287294 }
288295 }
289296 }
290297
291- shellSize = alignSize(shellSize)
298+ // Based on the simulated field layout code in Aleksey Shipilev's report:
299+ // http://cr.openjdk.java.net/~shade/papers/2013-shipilev-fieldlayout-latest.pdf
300+ // The code is in Figure 9.
301+ // The simplified idea of field layout consists of 4 parts (see more details in the report):
302+ //
303+ // 1. field alignment: HotSpot lays out the fields aligned by their size.
304+ // 2. object alignment: HotSpot rounds instance size up to 8 bytes
305+ // 3. consistent fields layouts throughout the hierarchy: This means we should layout
306+ // superclass first. And we can use superclass's shellSize as a starting point to layout the
307+ // other fields in this class.
308+ // 4. class alignment: HotSpot rounds field blocks up to to HeapOopSize not 4 bytes, confirmed
309+ // with Aleksey. see https://bugs.openjdk.java.net/browse/CODETOOLS-7901322
310+ //
311+ // The real world field layout is much more complicated. There are three kinds of fields
312+ // order in Java 8. And we don't consider the @contended annotation introduced by Java 8.
313+ // see the HotSpot classloader code, layout_fields method for more details.
314+ // hg.openjdk.java.net/jdk8/jdk8/hotspot/file/tip/src/share/vm/classfile/classFileParser.cpp
315+ var alignedSize = shellSize
316+ for (size <- fieldSizes if sizeCount(size) > 0 ) {
317+ val count = sizeCount(size)
318+ // If there are internal gaps, smaller field can fit in.
319+ alignedSize = math.max(alignedSize, alignSizeUp(shellSize, size) + size * count)
320+ shellSize += size * count
321+ }
322+
323+ // Should choose a larger size to be new shellSize and clearly alignedSize >= shellSize, and
324+ // round up the instance filed blocks
325+ shellSize = alignSizeUp(alignedSize, pointerSize)
292326
293327 // Create and cache a new ClassInfo
294328 val newInfo = new ClassInfo (shellSize, pointerFields)
295329 classInfos.put(cls, newInfo)
296330 newInfo
297331 }
298332
299- private def alignSize (size : Long ): Long = {
300- val rem = size % ALIGN_SIZE
301- if (rem == 0 ) size else (size + ALIGN_SIZE - rem)
302- }
333+ private def alignSize (size : Long ): Long = alignSizeUp(size, ALIGN_SIZE )
334+
335+ /**
336+ * Compute aligned size. The alignSize must be 2^n, otherwise the result will be wrong.
337+ * When alignSize = 2^n, alignSize - 1 = 2^n - 1. The binary representation of (alignSize - 1)
338+ * will only have n trailing 1s(0b00...001..1). ~(alignSize - 1) will be 0b11..110..0. Hence,
339+ * (size + alignSize - 1) & ~(alignSize - 1) will set the last n bits to zeros, which leads to
340+ * multiple of alignSize.
341+ */
342+ private def alignSizeUp (size : Long , alignSize : Int ): Long =
343+ (size + alignSize - 1 ) & ~ (alignSize - 1 )
303344}
0 commit comments