Hacky approach to copying of UnsafeRows for sort followed by limit.

JoshRosen · JoshRosen · commit d13ac55eaa02 · 2015-07-08T00:56:36.000-07:00
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeFixedWidthAggregationMap.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeFixedWidthAggregationMap.java
@@ -120,9 +120,11 @@ public UnsafeFixedWidthAggregationMap(
     this.bufferPool = new ObjectPool(initialCapacity);
 
     InternalRow initRow = initProjection.apply(emptyRow);
-    this.emptyBuffer = new byte[bufferConverter.getSizeRequirement(initRow)];
+    int emptyBufferSize = bufferConverter.getSizeRequirement(initRow);
+    this.emptyBuffer = new byte[emptyBufferSize];
     int writtenLength = bufferConverter.writeRow(
-      initRow, emptyBuffer, PlatformDependent.BYTE_ARRAY_OFFSET, bufferPool);
+      initRow, emptyBuffer, PlatformDependent.BYTE_ARRAY_OFFSET, emptyBufferSize,
+      bufferPool);
     assert (writtenLength == emptyBuffer.length): "Size requirement calculation was wrong!";
     // re-use the empty buffer only when there is no object saved in pool.
     reuseEmptyBuffer = bufferPool.size() == 0;
@@ -142,6 +144,7 @@ public UnsafeRow getAggregationBuffer(InternalRow groupingKey) {
       groupingKey,
       groupingKeyConversionScratchSpace,
       PlatformDependent.BYTE_ARRAY_OFFSET,
+      groupingKeySize,
       keyPool);
     assert (groupingKeySize == actualGroupingKeySize) : "Size requirement calculation was wrong!";
 
@@ -157,7 +160,7 @@ public UnsafeRow getAggregationBuffer(InternalRow groupingKey) {
         // There is some objects referenced by emptyBuffer, so generate a new one
         InternalRow initRow = initProjection.apply(emptyRow);
         bufferConverter.writeRow(initRow, emptyBuffer, PlatformDependent.BYTE_ARRAY_OFFSET,
-          bufferPool);
+          groupingKeySize, bufferPool);
       }
       loc.putNewKey(
         groupingKeyConversionScratchSpace,
@@ -175,6 +178,7 @@ public UnsafeRow getAggregationBuffer(InternalRow groupingKey) {
       address.getBaseObject(),
       address.getBaseOffset(),
       bufferConverter.numFields(),
+      loc.getValueLength(),
       bufferPool
     );
     return currentBuffer;
@@ -214,12 +218,14 @@ public MapEntry next() {
           keyAddress.getBaseObject(),
           keyAddress.getBaseOffset(),
           keyConverter.numFields(),
+          loc.getKeyLength(),
           keyPool
         );
         entry.value.pointTo(
           valueAddress.getBaseObject(),
           valueAddress.getBaseOffset(),
           bufferConverter.numFields(),
+          loc.getValueLength(),
           bufferPool
         );
         return entry;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -63,11 +63,15 @@ public final class UnsafeRow extends MutableRow {
 
   public Object getBaseObject() { return baseObject; }
   public long getBaseOffset() { return baseOffset; }
+  public int getSizeInBytes() { return sizeInBytes; }
   public ObjectPool getPool() { return pool; }
 
   /** The number of fields in this row, used for calculating the bitset width (and in assertions) */
   private int numFields;
 
+  /** The size of this row's backing data, in bytes) */
+  private int sizeInBytes;
+
   public int length() { return numFields; }
 
   /** The width of the null tracking bit set, in bytes */
@@ -95,14 +99,17 @@ public UnsafeRow() { }
    * @param baseObject the base object
    * @param baseOffset the offset within the base object
    * @param numFields the number of fields in this row
+   * @param sizeInBytes the size of this row's backing data, in bytes
    * @param pool the object pool to hold arbitrary objects
    */
-  public void pointTo(Object baseObject, long baseOffset, int numFields, ObjectPool pool) {
+  public void pointTo(
+      Object baseObject, long baseOffset, int numFields, int sizeInBytes, ObjectPool pool) {
     assert numFields >= 0 : "numFields should >= 0";
     this.bitSetWidthInBytes = calculateBitSetWidthInBytes(numFields);
     this.baseObject = baseObject;
     this.baseOffset = baseOffset;
     this.numFields = numFields;
+    this.sizeInBytes = sizeInBytes;
     this.pool = pool;
   }
 
@@ -338,7 +345,23 @@ public double getDouble(int i) {
 
   @Override
   public InternalRow copy() {
-    throw new UnsupportedOperationException();
+    if (pool != null) {
+      throw new UnsupportedOperationException(
+        "Copy is not supported for UnsafeRows that use object pools");
+    } else {
+      UnsafeRow rowCopy = new UnsafeRow();
+      final byte[] rowDataCopy = new byte[sizeInBytes];
+      PlatformDependent.copyMemory(
+        baseObject,
+        baseOffset,
+        rowDataCopy,
+        PlatformDependent.BYTE_ARRAY_OFFSET,
+        sizeInBytes
+      );
+      rowCopy.pointTo(
+        rowDataCopy, PlatformDependent.BYTE_ARRAY_OFFSET, numFields, sizeInBytes, null);
+      return rowCopy;
+    }
   }
 
   @Override
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
@@ -54,7 +54,6 @@ final class UnsafeExternalRowSorter {
   private final StructType schema;
   private final UnsafeRowConverter rowConverter;
   private final PrefixComputer prefixComputer;
-  private final ObjectPool objPool = new ObjectPool(128);
   private final UnsafeExternalSorter sorter;
   private byte[] rowConversionBuffer = new byte[1024 * 8];
 
@@ -77,7 +76,7 @@ public UnsafeExternalRowSorter(
       sparkEnv.shuffleMemoryManager(),
       sparkEnv.blockManager(),
       taskContext,
-      new RowComparator(ordering, schema.length(), objPool),
+      new RowComparator(ordering, schema.length(), null),
       prefixComparator,
       4096,
       sparkEnv.conf()
@@ -100,7 +99,7 @@ void insertRow(InternalRow row) throws IOException {
       rowConversionBuffer = new byte[sizeRequirement];
     }
     final int bytesWritten = rowConverter.writeRow(
-      row, rowConversionBuffer, PlatformDependent.BYTE_ARRAY_OFFSET, objPool);
+      row, rowConversionBuffer, PlatformDependent.BYTE_ARRAY_OFFSET, sizeRequirement, null);
     assert (bytesWritten == sizeRequirement);
     final long prefix = prefixComputer.computePrefix(row);
     sorter.insertRecord(
@@ -143,31 +142,18 @@ public boolean hasNext() {
           return sortedIterator.hasNext();
         }
 
-        /**
-         * Called prior to returning this iterator's last row. This copies the row's data into an
-         * on-heap byte array so that the pointer to the row data will not be dangling after the
-         * sorter's memory pages are freed.
-         */
-        private void detachRowFromPage(UnsafeRow row, int rowLength) {
-          final byte[] rowDataCopy = new byte[rowLength];
-          PlatformDependent.copyMemory(
-            row.getBaseObject(),
-            row.getBaseOffset(),
-            rowDataCopy,
-            PlatformDependent.BYTE_ARRAY_OFFSET,
-            rowLength
-          );
-          row.pointTo(rowDataCopy, PlatformDependent.BYTE_ARRAY_OFFSET, numFields, row.getPool());
-        }
-
         @Override
         public InternalRow next() {
           try {
             sortedIterator.loadNext();
             row.pointTo(
-              sortedIterator.getBaseObject(), sortedIterator.getBaseOffset(), numFields, objPool);
+              sortedIterator.getBaseObject(),
+              sortedIterator.getBaseOffset(),
+              numFields,
+              sortedIterator.getRecordLength(),
+              null);
             if (!hasNext()) {
-              detachRowFromPage(row, sortedIterator.getRecordLength());
+              row.copy(); // so that we don't have dangling pointers to freed page
               cleanupResources();
             }
             return row;
@@ -198,7 +184,7 @@ public Iterator<InternalRow> sort(Iterator<InternalRow> inputIterator) throws IO
    * Return true if UnsafeExternalRowSorter can sort rows with the given schema, false otherwise.
    */
   public static boolean supportsSchema(StructType schema) {
-    // TODO: add spilling note.
+    // TODO: add spilling note to explain why we do this for now:
     for (StructField field : schema.fields()) {
       if (UnsafeColumnWriter.forType(field.dataType()) instanceof ObjectUnsafeColumnWriter) {
         return false;
@@ -222,8 +208,8 @@ public RowComparator(Ordering<InternalRow> ordering, int numFields, ObjectPool o
 
     @Override
     public int compare(Object baseObj1, long baseOff1, Object baseObj2, long baseOff2) {
-      row1.pointTo(baseObj1, baseOff1, numFields, objPool);
-      row2.pointTo(baseObj2, baseOff2, numFields, objPool);
+      row1.pointTo(baseObj1, baseOff1, numFields, -1, objPool);
+      row2.pointTo(baseObj2, baseOff2, numFields, -1, objPool);
       return ordering.compare(row1, row2);
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverter.scala
@@ -70,10 +70,16 @@ class UnsafeRowConverter(fieldTypes: Array[DataType]) {
    * @param row the row to convert
    * @param baseObject the base object of the destination address
    * @param baseOffset the base offset of the destination address
+   * @param rowLengthInBytes the length calculated by `getSizeRequirement(row)`
    * @return the number of bytes written. This should be equal to `getSizeRequirement(row)`.
    */
-  def writeRow(row: InternalRow, baseObject: Object, baseOffset: Long, pool: ObjectPool): Int = {
-    unsafeRow.pointTo(baseObject, baseOffset, writers.length, pool)
+  def writeRow(
+      row: InternalRow,
+      baseObject: Object,
+      baseOffset: Long,
+      rowLengthInBytes: Int,
+      pool: ObjectPool): Int = {
+    unsafeRow.pointTo(baseObject, baseOffset, writers.length, rowLengthInBytes, pool)
 
     if (writers.length > 0) {
       // zero-out the bitset
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
@@ -44,11 +44,13 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers {
     val sizeRequired: Int = converter.getSizeRequirement(row)
     assert(sizeRequired === 8 + (3 * 8))
     val buffer: Array[Long] = new Array[Long](sizeRequired / 8)
-    val numBytesWritten = converter.writeRow(row, buffer, PlatformDependent.LONG_ARRAY_OFFSET, null)
+    val numBytesWritten =
+      converter.writeRow(row, buffer, PlatformDependent.LONG_ARRAY_OFFSET, sizeRequired, null)
     assert(numBytesWritten === sizeRequired)
 
     val unsafeRow = new UnsafeRow()
-    unsafeRow.pointTo(buffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, null)
+    unsafeRow.pointTo(
+      buffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, sizeRequired, null)
     assert(unsafeRow.getLong(0) === 0)
     assert(unsafeRow.getLong(1) === 1)
     assert(unsafeRow.getInt(2) === 2)
@@ -73,12 +75,14 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers {
       ByteArrayMethods.roundNumberOfBytesToNearestWord("Hello".getBytes.length) +
       ByteArrayMethods.roundNumberOfBytesToNearestWord("World".getBytes.length))
     val buffer: Array[Long] = new Array[Long](sizeRequired / 8)
-    val numBytesWritten = converter.writeRow(row, buffer, PlatformDependent.LONG_ARRAY_OFFSET, null)
+    val numBytesWritten = converter.writeRow(
+      row, buffer, PlatformDependent.LONG_ARRAY_OFFSET, sizeRequired, null)
     assert(numBytesWritten === sizeRequired)
 
     val unsafeRow = new UnsafeRow()
     val pool = new ObjectPool(10)
-    unsafeRow.pointTo(buffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, pool)
+    unsafeRow.pointTo(
+      buffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, sizeRequired, pool)
     assert(unsafeRow.getLong(0) === 0)
     assert(unsafeRow.getString(1) === "Hello")
     assert(unsafeRow.get(2) === "World".getBytes)
@@ -111,12 +115,14 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers {
     val sizeRequired: Int = converter.getSizeRequirement(row)
     assert(sizeRequired === 8 + (8 * 3))
     val buffer: Array[Long] = new Array[Long](sizeRequired / 8)
-    val numBytesWritten = converter.writeRow(row, buffer, PlatformDependent.LONG_ARRAY_OFFSET, pool)
+    val numBytesWritten =
+      converter.writeRow(row, buffer, PlatformDependent.LONG_ARRAY_OFFSET, sizeRequired, pool)
     assert(numBytesWritten === sizeRequired)
     assert(pool.size === 2)
 
     val unsafeRow = new UnsafeRow()
-    unsafeRow.pointTo(buffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, pool)
+    unsafeRow.pointTo(
+      buffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, sizeRequired, pool)
     assert(unsafeRow.getLong(0) === 0)
     assert(unsafeRow.get(1) === Decimal(1))
     assert(unsafeRow.get(2) === Array(2))
@@ -142,11 +148,13 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers {
     assert(sizeRequired === 8 + (8 * 4) +
       ByteArrayMethods.roundNumberOfBytesToNearestWord("Hello".getBytes.length))
     val buffer: Array[Long] = new Array[Long](sizeRequired / 8)
-    val numBytesWritten = converter.writeRow(row, buffer, PlatformDependent.LONG_ARRAY_OFFSET, null)
+    val numBytesWritten =
+      converter.writeRow(row, buffer, PlatformDependent.LONG_ARRAY_OFFSET, sizeRequired, null)
     assert(numBytesWritten === sizeRequired)
 
     val unsafeRow = new UnsafeRow()
-    unsafeRow.pointTo(buffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, null)
+    unsafeRow.pointTo(
+      buffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, sizeRequired, null)
     assert(unsafeRow.getLong(0) === 0)
     assert(unsafeRow.getString(1) === "Hello")
     // Date is represented as Int in unsafeRow
@@ -190,12 +198,14 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers {
     val sizeRequired: Int = converter.getSizeRequirement(rowWithAllNullColumns)
     val createdFromNullBuffer: Array[Long] = new Array[Long](sizeRequired / 8)
     val numBytesWritten = converter.writeRow(
-      rowWithAllNullColumns, createdFromNullBuffer, PlatformDependent.LONG_ARRAY_OFFSET, null)
+      rowWithAllNullColumns, createdFromNullBuffer, PlatformDependent.LONG_ARRAY_OFFSET,
+      sizeRequired, null)
     assert(numBytesWritten === sizeRequired)
 
     val createdFromNull = new UnsafeRow()
     createdFromNull.pointTo(
-      createdFromNullBuffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, null)
+      createdFromNullBuffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length,
+      sizeRequired, null)
     for (i <- 0 to fieldTypes.length - 1) {
       assert(createdFromNull.isNullAt(i))
     }
@@ -233,10 +243,12 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers {
     val pool = new ObjectPool(1)
     val setToNullAfterCreationBuffer: Array[Long] = new Array[Long](sizeRequired / 8 + 2)
     converter.writeRow(
-      rowWithNoNullColumns, setToNullAfterCreationBuffer, PlatformDependent.LONG_ARRAY_OFFSET, pool)
+      rowWithNoNullColumns, setToNullAfterCreationBuffer, PlatformDependent.LONG_ARRAY_OFFSET,
+      sizeRequired, pool)
     val setToNullAfterCreation = new UnsafeRow()
     setToNullAfterCreation.pointTo(
-      setToNullAfterCreationBuffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length, pool)
+      setToNullAfterCreationBuffer, PlatformDependent.LONG_ARRAY_OFFSET, fieldTypes.length,
+      sizeRequired, pool)
 
     assert(setToNullAfterCreation.isNullAt(0) === rowWithNoNullColumns.isNullAt(0))
     assert(setToNullAfterCreation.getBoolean(1) === rowWithNoNullColumns.getBoolean(1))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeExternalSortSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeExternalSortSuite.scala
@@ -36,6 +36,33 @@ class UnsafeExternalSortSuite extends SparkPlanTest with BeforeAndAfterAll {
     TestSQLContext.conf.setConf(SQLConf.CODEGEN_ENABLED, SQLConf.CODEGEN_ENABLED.defaultValue.get)
   }
 
+  ignore("sort followed by limit should not leak memory") {
+    // TODO: this test is going to fail until we implement a proper iterator interface
+    // with a close() method.
+    TestSQLContext.sparkContext.conf.set("spark.unsafe.exceptionOnMemoryLeak", "true")
+    checkAnswer(
+      (1 to 100).map(v => Tuple1(v)).toDF("a"),
+      (child: SparkPlan) => Limit(10, UnsafeExternalSort('a.asc :: Nil, true, child)),
+      (child: SparkPlan) => Limit(10, Sort('a.asc :: Nil, global = true, child)),
+      sortAnswers = false
+    )
+  }
+
+  test("sort followed by limit") {
+    TestSQLContext.sparkContext.conf.set("spark.unsafe.exceptionOnMemoryLeak", "false")
+    try {
+      checkAnswer(
+        (1 to 100).map(v => Tuple1(v)).toDF("a"),
+        (child: SparkPlan) => Limit(10, UnsafeExternalSort('a.asc :: Nil, true, child)),
+        (child: SparkPlan) => Limit(10, Sort('a.asc :: Nil, global = true, child)),
+        sortAnswers = false
+      )
+    } finally {
+      TestSQLContext.sparkContext.conf.set("spark.unsafe.exceptionOnMemoryLeak", "true")
+
+    }
+  }
+
   // Test sorting on different data types
   for (
     dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType)