From 5cdcd4246e586346a8e1ac2242dd795fdb1ae068 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 20 Feb 2015 16:35:12 -0600
Subject: [PATCH 01/97] add some failing tests, though these probably shouldnt
 actually get merged

---
 .../spark/broadcast/BroadcastSuite.scala      |  8 +++++
 .../rdd/LargePartitionCachingSuite.scala      | 36 +++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala

diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index af3272692d7a1..49815890623df 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -179,6 +179,14 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
     assert(thrown.getMessage.toLowerCase.contains("stopped"))
   }
 
+  test("large broadcast variable") {
+    //Note this currently fails by killing the whole test runner
+    sc = new SparkContext("local", "test", httpConf)
+    val bigArr = new Array[Long]((2.3e9 / 8).toInt)
+    val bcArr = sc.broadcast(bigArr)
+    sc.parallelize(1 to 1).map{x => bcArr.value.size}.count()
+  }
+
   /**
    * Verify the persistence of state associated with an HttpBroadcast in either local mode or
    * local-cluster mode (when distributed = true).
diff --git a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
new file mode 100644
index 0000000000000..21c6e5fe3ab50
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.rdd
+
+import org.apache.spark.SharedSparkContext
+import org.apache.spark.storage.StorageLevel
+import org.scalatest.FunSuite
+
+class LargePartitionCachingSuite extends FunSuite with SharedSparkContext {
+
+  def largePartitionRdd = sc.parallelize(1 to 1e6.toInt, 1).map{i => new Array[Byte](2.2e3.toInt)}
+
+  test("memory serialized cache large partitions") {
+    //this test doesn't actually work, b/c we'll just think we don't have enough memory,
+    // and so it won't get persisted :(
+    largePartitionRdd.persist(StorageLevel.MEMORY_ONLY_SER).count()
+  }
+
+  test("disk cache large partitions") {
+    largePartitionRdd.persist(StorageLevel.DISK_ONLY).count()
+  }
+}

From 03db862833f3c4feef2d72620bc5c9a893dab2f5 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 23 Feb 2015 14:28:22 -0600
Subject: [PATCH 02/97] steal some code from earlier work of @mridulm

---
 .../org/apache/spark/io/LargeByteBuffer.scala | 1505 +++++++++++++++++
 .../io/WrappedByteArrayOutputStream.scala     |  121 ++
 .../org/apache/spark/storage/BlockStore.scala |   22 +-
 3 files changed, 1640 insertions(+), 8 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
 create mode 100644 core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala

diff --git a/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala b/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
new file mode 100644
index 0000000000000..509fe186793b6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
@@ -0,0 +1,1505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.io
+
+import java.io.{RandomAccessFile, DataInput, InputStream, OutputStream}
+import java.nio.{ByteBuffer, BufferUnderflowException, BufferOverflowException}
+import java.nio.channels.{WritableByteChannel, ReadableByteChannel}
+
+import scala.collection.mutable.{ArrayBuffer, HashSet}
+
+import org.apache.spark.Logging
+import org.apache.spark.io.IOConfig.BufferType
+import org.apache.spark.storage.{FileSegment, BlockManager}
+
+/**
+ * This is used to control chaining of cleaners.
+ * For some usecases, invocation of clean on LargeByteBuffer must not immediately clean
+ * but be tied to clean of other buffers.
+ * This allows a way to override how/when clean is invoked
+ *
+ * TODO: We should revisit this design and see if we can use ref tracking : might be more general
+ * at higher cost ? Punting on it for now
+ */
+trait BufferCleaner extends Logging {
+  @volatile private var invoked = false
+
+  protected def doClean(buffer: LargeByteBuffer)
+
+  final def clean(buffer: LargeByteBuffer) {
+    if (invoked) return
+    invoked = true
+    doClean(buffer)
+  }
+}
+
+/**
+ * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G
+ * which ByteBuffers are limited to.
+ * Externally, it exposes all the api which java.nio.ByteBuffer exposes.
+ * Internally, it maintains a sequence of Containers which manage the ByteBuffer data.
+ * Not all the data might be loaded into memory  (like disk or tachyon data) - so actual
+ * memory footprint - heap and vm could be much lower than capacity.
+ *
+ * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this
+ * buffer, maybe revisit this later ? Note: this is not much different from earlier though !
+ *
+ * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this
+ * will require the file to be kept open (repeatedly opening/closing file is not good
+ * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is
+ * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy)
+ *
+ * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is
+ * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some
+ * cases (when we duplicate/slice them). Currently spark does not need this, but might in future
+ * so relook at it later.
+ */
+// We should make this constructor private: but for now,
+// leaving it public since TachyonStore needs it
+class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer],
+    private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging {
+
+  // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME
+  private val allocateLocationThrowable: Throwable = {
+    if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) {
+      new Throwable("blockId = " + BlockManager.getLookupBlockId)
+    } else {
+      null
+    }
+  }
+  private var disposeLocationThrowable: Throwable = null
+
+  @volatile private var allowCleanerOverride = true
+  @volatile private var cleaner: BufferCleaner = new BufferCleaner {
+    override def doClean(buffer: LargeByteBuffer) = {
+      assert (LargeByteBuffer.this == buffer)
+      doDispose(needRelease = false)
+    }
+  }
+
+  // should not be empty
+  assert (null != inputContainers && ! inputContainers.isEmpty)
+  // should not have any null's
+  assert (inputContainers.find(_ == null).isEmpty)
+
+  // println("Num containers = " + inputContainers.size)
+
+  // Position, limit and capacity relevant over the engire LargeByteBuffer
+  @volatile private var globalPosition = 0L
+  @volatile private var globalLimit = 0L
+  @volatile private var currentContainerIndex = 0
+
+  // The buffers in which the actual data is held.
+  private var containers: Array[ByteBufferContainer] = null
+
+  // aggregate capacities of the individual buffers.
+  // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be
+  // sum of capacity of 0th and 1st block buffer
+  private var bufferPositionStart: Array[Long] = null
+
+  // Contains the indices of a containers which requires release before subsequent invocation of
+  // read/write should be serviced. This is required since current read/write might have moved the
+  // position but since we are returning bytebuffers which depend on the validity of the existing
+  // bytebuffer, we cant release them yet.
+  private var needReleaseIndices = new HashSet[Int]()
+
+  private val readable = ! inputContainers.exists(! _.isReadable)
+  private val writable = ! inputContainers.exists(! _.isWritable)
+
+
+  // initialize
+  @volatile private var globalCapacity = {
+
+    // Ensure that there are no empty buffers : messes up with our code : unless it
+    // is a single buffer (for empty buffer for marker case)
+    assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length)
+
+    containers = {
+      if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray
+    }
+    containers.foreach(_.validate())
+
+    def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) {
+      val buff = new ArrayBuffer[Long](arr.length + 1)
+      buff += 0L
+
+      buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1)
+      assert (buff.length == arr.length + 1)
+      bufferPositionStart = buff.toArray
+    }
+
+    initializeBufferPositionStart(containers)
+
+    // remove references from inputBuffers
+    inputContainers.clear()
+
+    globalLimit = bufferPositionStart(containers.length)
+    globalPosition = 0L
+    currentContainerIndex = 0
+
+    assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum)
+
+    globalLimit
+  }
+
+  final def position(): Long = globalPosition
+
+  final def limit(): Long = globalLimit
+
+  final def capacity(): Long = globalCapacity
+
+  final def limit(newLimit: Long) {
+    if ((newLimit > capacity()) || (newLimit < 0)) {
+      throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity())
+    }
+
+    globalLimit = newLimit
+    if (position() > newLimit) position(newLimit)
+  }
+
+  def skip(skipBy: Long) = position(position() + skipBy)
+
+  private def releasePendingContainers() {
+    if (! needReleaseIndices.isEmpty) {
+      val iter = needReleaseIndices.iterator
+      while (iter.hasNext) {
+        val index = iter.next()
+        assert (index >= 0 && index < containers.length)
+        // It is possible to move from one container to next before the previous
+        // container was acquired. For example, get forcing move to next container
+        // since current was exhausted immediatelly followed by a position()
+        // so the container we moved to was never acquired.
+
+        // assert (containers(index).isAcquired)
+        // will this always be satisfied ?
+        // assert (index != currentContainerIndex)
+        if (containers(index).isAcquired) containers(index).release()
+      }
+      needReleaseIndices.clear()
+    }
+  }
+
+  private def toNewContainer(newIndex: Int) {
+    if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) {
+
+      assert (currentContainerIndex >= 0)
+      needReleaseIndices += currentContainerIndex
+    }
+    currentContainerIndex = newIndex
+  }
+
+  // expensive method, sigh ... optimize it later ?
+  final def position(newPosition: Long) {
+
+    if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException()
+
+    if (currentContainerIndex < bufferPositionStart.length - 1 &&
+        newPosition >= bufferPositionStart(currentContainerIndex) &&
+        newPosition < bufferPositionStart(currentContainerIndex + 1)) {
+      // Same buffer - easy method ...
+      globalPosition = newPosition
+      // Changed position - free previously returned buffers.
+      releasePendingContainers()
+      return
+    }
+
+    // Find appropriate currentContainerIndex
+    // Since bufferPositionStart is sorted, can be replaced with binary search if required.
+    // For now, not in the perf critical path since buffers size is very low typically.
+    var index = 0
+    val cLen = containers.length
+    while (index < cLen) {
+      if (newPosition >= bufferPositionStart(index) &&
+        newPosition < bufferPositionStart(index + 1)) {
+        globalPosition = newPosition
+        toNewContainer(index)
+        // Changed position - free earlier and previously returned buffers.
+        releasePendingContainers()
+        return
+      }
+      index += 1
+    }
+
+    if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) {
+      // boundary.
+      globalPosition = newPosition
+      toNewContainer(cLen)
+      // Changed position - free earlier and previously returned buffers.
+      releasePendingContainers()
+      return
+    }
+
+    assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition +
+      ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]"))
+  }
+
+
+  /**
+   * Clears this buffer.  The position is set to zero, the limit is set to
+   * the capacity, and the mark is discarded.
+   *
+   * <p> Invoke this method before using a sequence of channel-read or
+   * <i>put</i> operations to fill this buffer.
+   *
+   * <p> This method does not actually erase the data in the buffer, but it
+   * is named as if it did because it will most often be used in situations
+   * in which that might as well be the case. </p>
+   */
+  final def clear() {
+    // if (0 == globalCapacity) return
+
+    needReleaseIndices += 0
+    globalPosition = 0L
+    toNewContainer(0)
+    globalLimit = globalCapacity
+
+    // Now free all pending containers
+    releasePendingContainers()
+  }
+
+  /**
+   * Flips this buffer.  The limit is set to the current position and then
+   * the position is set to zero.  If the mark is defined then it is
+   * discarded.
+   *
+   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
+   * this method to prepare for a sequence of channel-write or relative
+   * <i>get</i> operations.
+   */
+  final def flip() {
+    needReleaseIndices += 0
+    globalLimit = globalPosition
+    globalPosition = 0L
+    toNewContainer(0)
+
+    // Now free all pending containers
+    releasePendingContainers()
+  }
+
+  /**
+   * Rewinds this buffer.  The position is set to zero and the mark is
+   * discarded.
+   *
+   * <p> Invoke this method before a sequence of channel-write or <i>get</i>
+   * operations, assuming that the limit has already been set
+   * appropriately.
+   */
+  final def rewind() {
+    needReleaseIndices += 0
+    globalPosition = 0L
+    toNewContainer(0)
+
+    // Now free all pending containers
+    releasePendingContainers()
+  }
+
+  /**
+   * Returns the number of elements between the current position and the
+   * limit. </p>
+   *
+   * @return  The number of elements remaining in this buffer
+   */
+  final def remaining(): Long = {
+    globalLimit - globalPosition
+  }
+
+  /**
+   * Tells whether there are any elements between the current position and
+   * the limit. </p>
+   *
+   * @return  <tt>true</tt> if, and only if, there is at least one element
+   *          remaining in this buffer
+   */
+  final def hasRemaining() = {
+    globalPosition < globalLimit
+  }
+
+  // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex)
+
+  // number of bytes remaining in currently active underlying buffer
+  private def currentRemaining(): Int = {
+    if (hasRemaining()) {
+      // validate currentContainerIndex is valid
+      assert (globalPosition >= bufferPositionStart(currentContainerIndex) &&
+        globalPosition < bufferPositionStart(currentContainerIndex + 1),
+        "globalPosition = " + globalPosition +
+          ", currentContainerIndex = " + currentContainerIndex +
+        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+
+      currentRemaining0(currentContainerIndex)
+    } else 0
+  }
+
+  // Without any validation : required when we are bumping the index (when validation will fail) ...
+  private def currentRemaining0(which: Int): Int = {
+    // currentBuffer().remaining()
+    math.max(0, math.min(bufferPositionStart(which + 1),
+      globalLimit) - globalPosition).asInstanceOf[Int]
+  }
+
+  // Set the approppriate position/limit for the current underlying buffer to mirror our
+  // the LargeByteBuffer's state.
+  private def fetchCurrentBuffer(): ByteBuffer = {
+    releasePendingContainers()
+
+    assert (currentContainerIndex < containers.length)
+
+    val container = containers(currentContainerIndex)
+    if (! container.isAcquired) {
+      container.acquire()
+    }
+
+    assert (container.isAcquired)
+    if (LargeByteBuffer.enableExpensiveAssert) {
+      assert (! containers.exists( b => (b ne container) && b.isAcquired))
+    }
+
+    assert (currentContainerIndex < bufferPositionStart.length &&
+      globalPosition < bufferPositionStart(currentContainerIndex + 1),
+      "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " +
+        bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this)
+
+    val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)).
+      asInstanceOf[Int]
+
+    val buffer = container.getByteBuffer
+    buffer.position(buffPosition)
+    val diff = buffer.capacity - buffPosition
+    val left = remaining()
+    if (diff <= left) {
+      buffer.limit(buffer.capacity())
+    } else {
+      // Can happen if limit() was called.
+      buffer.limit(buffPosition + left.asInstanceOf[Int])
+    }
+
+    buffer
+  }
+
+  // To be used ONLY to test in suites.
+  private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = {
+    if ("1" != System.getProperty("SPARK_TESTING")) {
+      throw new IllegalStateException("This method is to be used ONLY within spark test suites")
+    }
+
+    fetchCurrentBuffer()
+  }
+
+  // Expects that the invoker has ensured that this can be safely invoked.
+  // That is, it wont be invoked when the loop wont terminate.
+  private def toNonEmptyBuffer() {
+
+    if (! hasRemaining()) {
+      var newIndex = currentContainerIndex
+      // Ensure we are in the right block or not.
+      while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) {
+        newIndex += 1
+      }
+      toNewContainer(newIndex)
+      // Do not do this - since we might not yet have consumed the buffer which caused EOF right now
+      /*
+      // Add last one also, and release it too - since we are at the end of the buffer with nothing
+      // more pending.
+      if (newIndex >= 0 && currentContainerIndex < containers.length) {
+        needReleaseIndices += newIndex
+      }
+      */
+      assert (currentContainerIndex >= 0)
+      // releasePendingContainers()
+      return
+    }
+
+    var index = currentContainerIndex
+    while (0 == currentRemaining0(index) && index < containers.length) {
+      index += 1
+    }
+    assert (currentContainerIndex < containers.length)
+    toNewContainer(index)
+    assert (0 != currentRemaining())
+  }
+
+  private def assertPreconditions(containerIndex: Int) {
+    assert (globalPosition >= bufferPositionStart(containerIndex),
+      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
+        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+    assert (globalPosition < bufferPositionStart(containerIndex + 1),
+      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
+        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+
+    assert (globalLimit <= globalCapacity)
+    assert (containerIndex < containers.length)
+  }
+
+
+  /**
+   * Attempts to return a ByteBuffer of the requested size.
+   * It is possible to return a buffer of size smaller than requested
+   * even though hasRemaining == true
+   *
+   * On return, position would have been moved 'ahead' by the size of the buffer returned :
+   * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer
+   *
+   *
+   * This is used to primarily retrieve content of this buffer to expose via ByteBuffer
+   * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the
+   * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer
+   * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying
+   * container is a disk backed container, and we make subsequent calls to get(), the returned
+   * ByteBuffer can be dispose'ed off
+   *
+   * @param maxChunkSize Max size of the ByteBuffer to retrieve.
+   * @return
+   */
+
+  private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = {
+    fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true)
+  }
+
+  private def fetchBufferOfSizeImpl(maxChunkSize: Int,
+      canReleaseContainers: Boolean): ByteBuffer = {
+    if (canReleaseContainers) releasePendingContainers()
+    assert (maxChunkSize > 0)
+
+    // not checking for degenerate case of maxChunkSize == 0
+    if (globalPosition >= globalLimit) {
+      // throw exception
+      throw new BufferUnderflowException()
+    }
+
+    // Check preconditions : disable these later, since they might be expensive to
+    // evaluate for every IO op
+    assertPreconditions(currentContainerIndex)
+
+    val currentBufferRemaining = currentRemaining()
+
+    assert (currentBufferRemaining > 0)
+
+    val size = math.min(currentBufferRemaining, maxChunkSize)
+
+    val newBuffer = if (currentBufferRemaining > maxChunkSize) {
+      val currentBuffer = fetchCurrentBuffer()
+      val buff = ByteBufferContainer.createSlice(currentBuffer,
+        currentBuffer.position(), maxChunkSize)
+      assert (buff.remaining() == maxChunkSize)
+      buff
+    } else {
+      val currentBuffer = fetchCurrentBuffer()
+      val buff = currentBuffer.slice()
+      assert (buff.remaining() == currentBufferRemaining)
+      buff
+    }
+
+    assert (size == newBuffer.remaining())
+    assert (0 == newBuffer.position())
+    assert (size == newBuffer.limit())
+    assert (newBuffer.capacity() == newBuffer.limit())
+
+    globalPosition += newBuffer.remaining
+    toNonEmptyBuffer()
+
+    newBuffer
+  }
+
+  // Can we service the read/write from the currently active (underlying) bytebuffer or not.
+  // For almost all cases, this will return true allowing us to optimize away the more expensive
+  // computations.
+  private def localReadWritePossible(size: Int) =
+    size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1)
+
+
+  def getLong(): Long = {
+    assert (readable)
+    releasePendingContainers()
+
+    if (remaining() < 8) throw new BufferUnderflowException
+
+    if (localReadWritePossible(8)) {
+      val buff = fetchCurrentBuffer()
+      assert (buff.remaining() >= 8)
+      val retval = buff.getLong
+      globalPosition += 8
+      toNonEmptyBuffer()
+      return retval
+    }
+
+    val buff = readFully(8)
+    buff.getLong
+  }
+
+  def getInt(): Int = {
+    assert (readable)
+    releasePendingContainers()
+
+    if (remaining() < 4) throw new BufferUnderflowException
+
+    if (localReadWritePossible(4)) {
+      val buff = fetchCurrentBuffer()
+      assert (buff.remaining() >= 4)
+      val retval = buff.getInt
+      globalPosition += 4
+      toNonEmptyBuffer()
+      return retval
+    }
+
+    val buff = readFully(4)
+    buff.getInt
+ }
+
+  def getChar(): Char = {
+    assert (readable)
+    releasePendingContainers()
+
+    if (remaining() < 2) throw new BufferUnderflowException
+
+    if (localReadWritePossible(2)) {
+      val buff = fetchCurrentBuffer()
+      assert (buff.remaining() >= 2)
+      val retval = buff.getChar
+      globalPosition += 2
+      toNonEmptyBuffer()
+      return retval
+    }
+
+    // if slice is becoming too expensive, revisit this ...
+    val buff = readFully(2)
+    buff.getChar
+  }
+
+  def get(): Byte = {
+    assert (readable)
+    releasePendingContainers()
+
+    if (! hasRemaining()) throw new BufferUnderflowException
+
+    // If we have remaining bytes, previous invocations MUST have ensured that we are at
+    // a buffer which has data to be read.
+    assert (localReadWritePossible(1))
+
+    val buff = fetchCurrentBuffer()
+    assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining())
+    val retval = buff.get()
+    globalPosition += 1
+    toNonEmptyBuffer()
+
+    retval
+  }
+
+  def get(arr: Array[Byte], offset: Int, size: Int): Int = {
+    assert (readable)
+    releasePendingContainers()
+
+    LargeByteBuffer.checkOffsets(arr, offset, size)
+
+    // kyro depends on this it seems ?
+    // assert (size > 0)
+    if (0 == size) return 0
+
+    if (! hasRemaining()) return -1
+
+    if (localReadWritePossible(size)) {
+      val buff = fetchCurrentBuffer()
+      assert (buff.remaining() >= size)
+      buff.get(arr, offset, size)
+      globalPosition += size
+      toNonEmptyBuffer()
+      return size
+    }
+
+    var remainingSize = math.min(size, remaining()).asInstanceOf[Int]
+    var currentOffset = offset
+
+    while (remainingSize > 0) {
+      val buff = fetchBufferOfSize(remainingSize)
+      val toCopy = math.min(buff.remaining(), remainingSize)
+
+      buff.get(arr, currentOffset, toCopy)
+      currentOffset += toCopy
+      remainingSize -= toCopy
+    }
+
+    currentOffset - offset
+  }
+
+
+  private def createSlice(size: Long): LargeByteBuffer = {
+
+    releasePendingContainers()
+
+    if (remaining() < size) {
+      // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this)
+      throw new BufferOverflowException
+    }
+
+    // kyro depends on this it seems ?
+    // assert (size > 0)
+    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
+
+    val arr = new ArrayBuffer[ByteBufferContainer](2)
+    var totalLeft = size
+
+    // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer)
+
+    var containerIndex = currentContainerIndex
+    while (totalLeft > 0 && hasRemaining()) {
+      assertPreconditions(containerIndex)
+      val container = containers(containerIndex)
+      val currentLeft = currentRemaining0(containerIndex)
+
+      assert (globalPosition + currentLeft <= globalLimit)
+      assert (globalPosition >= bufferPositionStart(containerIndex) &&
+        (globalPosition < bufferPositionStart(containerIndex + 1)))
+
+      val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int]
+      val sliceSize = math.min(totalLeft, currentLeft)
+      assert (from >= 0)
+      assert (sliceSize > 0 && sliceSize <= Int.MaxValue)
+
+      val slice = container.createSlice(from, sliceSize.asInstanceOf[Int])
+      arr += slice
+
+      globalPosition += sliceSize
+      totalLeft -= sliceSize
+      if (currentLeft == sliceSize) containerIndex += 1
+    }
+
+    // Using toNonEmptyBuffer instead of directly moving to next here so that
+    // other checks can be performed there.
+    toNonEmptyBuffer()
+    // force cleanup - this is fine since we are not using the buffers directly
+    // which are actively needed (the returned value is on containers which can
+    // recreate)
+    releasePendingContainers()
+    // free current container if acquired.
+    if (currentContainerIndex < containers.length) {
+      containers(currentContainerIndex).release()
+    }
+    assert (currentContainerIndex == containerIndex)
+
+    val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked)
+    retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction)
+    retval
+  }
+
+  // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers
+  // This is to be used only for writes : and ensures that writes are done into the appropriate
+  // underlying bytebuffers.
+  def getCompositeWriteBuffer(size: Long): LargeByteBuffer = {
+    assert(writable)
+    assert(size >= 0)
+
+    createSlice(size)
+  }
+
+  // get a buffer which is of the specified size and contains data from the underlying buffers
+  // Note, the actual data might be spread across the underlying buffers.
+  // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy !
+  private def readFully(size: Int): ByteBuffer = {
+    assert (readable)
+
+    if (remaining() < size) {
+      // throw exception
+      throw new BufferUnderflowException()
+    }
+
+    // kyro depends on this it seems ?
+    // assert (size > 0)
+    if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER
+
+    // Expected to be handled elsewhere.
+    assert (! localReadWritePossible(size))
+
+    val localBuff =  {
+      val buff = fetchBufferOfSize(size)
+      // assert(buff.remaining() <= size)
+      // if (buff.remaining() == size) return buff
+      assert(buff.remaining() < size)
+      ByteBuffer.allocate(size).put(buff)
+    }
+
+    // assert (localBuff.hasRemaining)
+
+    while (localBuff.hasRemaining) {
+      val buff = fetchBufferOfSize(localBuff.remaining())
+      localBuff.put(buff)
+    }
+
+    localBuff.flip()
+    localBuff
+  }
+
+
+
+  def put(b: Byte) {
+    assert (writable)
+    if (remaining() < 1) {
+      // logInfo("put byte. remaining = " + remaining() + ", this = " + this)
+      throw new BufferOverflowException
+    }
+
+    assert (currentRemaining() > 0)
+
+    fetchCurrentBuffer().put(b)
+    globalPosition += 1
+    // Check to need to bump the index ?
+    toNonEmptyBuffer()
+  }
+
+
+  def put(buffer: ByteBuffer) {
+    assert (writable)
+    if (remaining() < buffer.remaining()) {
+      throw new BufferOverflowException
+    }
+
+    val bufferRemaining = buffer.remaining()
+    if (localReadWritePossible(bufferRemaining)) {
+
+      assert (currentRemaining() >= bufferRemaining)
+
+      fetchCurrentBuffer().put(buffer)
+
+      globalPosition += bufferRemaining
+      toNonEmptyBuffer()
+      return
+    }
+
+    while (buffer.hasRemaining) {
+      val currentBufferRemaining = currentRemaining()
+      val bufferRemaining = buffer.remaining()
+
+      if (currentBufferRemaining >= bufferRemaining) {
+        fetchCurrentBuffer().put(buffer)
+        globalPosition += bufferRemaining
+      } else {
+        // Split across buffers.
+        val currentBuffer = fetchCurrentBuffer()
+        assert (currentBuffer.remaining() >= currentBufferRemaining)
+        val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(),
+          currentBufferRemaining)
+        assert (sliced.remaining() == currentBufferRemaining)
+        currentBuffer.put(sliced)
+        // move buffer pos
+        buffer.position(buffer.position() + currentBufferRemaining)
+
+        globalPosition += currentBufferRemaining
+      }
+      toNonEmptyBuffer()
+    }
+
+    assert (! hasRemaining() || currentRemaining() > 0)
+  }
+
+  def put(other: LargeByteBuffer) {
+    assert (writable)
+    if (this.remaining() < other.remaining()) {
+      throw new BufferOverflowException
+    }
+
+    while (other.hasRemaining()) {
+      val buffer = other.fetchBufferOfSize(other.currentRemaining())
+      this.put(buffer)
+    }
+  }
+
+
+  def duplicate(): LargeByteBuffer = {
+    val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size)
+    // We do a duplicate as part of construction - so avoid double duplicate.
+    // containersCopy ++= containers.map(_.duplicate())
+    containersCopy ++= containers
+    val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked)
+
+    // set limit and position (in that order) ...
+    retval.limit(this.limit())
+    retval.position(this.position())
+
+    // Now release our containers - if any had been acquired
+    releasePendingContainers()
+
+    retval
+  }
+
+
+  /**
+   * 'read' a LargeByteBuffer of size specified and return that.
+   * Position will be incremented by size
+   *
+   * The name might be slightly confusing : rename ?
+   *
+   * @param size Amount of data to be read from this buffer and returned
+   * @return
+   */
+  def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = {
+    if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException
+    if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException
+
+
+    assert (readable)
+    assert (size >= 0)
+
+    releasePendingContainers()
+
+    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
+
+    createSlice(size)
+  }
+
+
+  // This is essentially a workaround to exposing underlying buffers
+  def readFrom(channel: ReadableByteChannel): Long = {
+
+    assert (writable)
+    releasePendingContainers()
+
+    // this also allows us to avoid nasty corner cases in the loop.
+    if (! hasRemaining()) {
+      // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this)
+      throw new BufferOverflowException
+    }
+
+    var totalBytesRead = 0L
+
+    while (hasRemaining()) {
+      // read what we can ...
+      val buffer = fetchCurrentBuffer()
+      val bufferRemaining = currentRemaining()
+      val bytesRead = channel.read(buffer)
+
+      if (bytesRead > 0) {
+        totalBytesRead += bytesRead
+        // bump position too ..
+        globalPosition += bytesRead
+        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+      }
+      else if (-1 == bytesRead) {
+        // if we had already read some data in the loop, return that.
+        if (totalBytesRead > 0) return totalBytesRead
+        return -1
+      }  // nothing available to read, retry later. return
+      else if (0 == bytesRead) {
+        return totalBytesRead
+      }
+
+      // toNonEmptyBuffer()
+    }
+
+    // Cleanup last buffer ?
+    toNonEmptyBuffer()
+    totalBytesRead
+  }
+
+  // This is essentially a workaround to exposing underlying buffers
+  def readFrom(inStrm: InputStream): Long = {
+
+    assert (writable)
+    releasePendingContainers()
+
+    // this also allows us to avoid nasty corner cases in the loop.
+    // if (! hasRemaining()) throw new BufferOverflowException
+    if (! hasRemaining()) return 0
+
+    var totalBytesRead = 0L
+
+    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+
+    while (hasRemaining()) {
+      // read what we can ... note, since there is no gaurantee that underlying buffer might
+      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
+      // see if we can optimize this later ...
+      val buffer = fetchCurrentBuffer()
+      val bufferRemaining = buffer.remaining()
+      val max = math.min(buff.length, bufferRemaining)
+      val bytesRead = inStrm.read(buff, 0, max)
+
+      if (bytesRead > 0) {
+        buffer.put(buff, 0, bytesRead)
+        totalBytesRead += bytesRead
+        // bump position too ..
+        globalPosition += bytesRead
+        // buffer.position(buffer.position + bytesRead)
+        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+      }
+      else if (-1 == bytesRead) {
+        // if we had already read some data in the loop, return that.
+        if (totalBytesRead > 0) return totalBytesRead
+        return -1
+      }  // nothing available to read, retry later. return
+      else if (0 == bytesRead) {
+        return totalBytesRead
+      }
+
+      // toNonEmptyBuffer()
+    }
+
+    totalBytesRead
+  }
+
+  // This is essentially a workaround to exposing underlying buffers
+  // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce
+  // code for performance reasons.
+  def readFrom(inStrm: DataInput): Long = {
+
+    assert (writable)
+    releasePendingContainers()
+
+    // this also allows us to avoid nasty corner cases in the loop.
+    // if (! hasRemaining()) throw new BufferOverflowException
+    if (! hasRemaining()) return 0
+
+    var totalBytesRead = 0L
+
+    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+
+    while (hasRemaining()) {
+      // read what we can ... note, since there is no gaurantee that underlying buffer might
+      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
+      // see if we can optimize this later ...
+      val buffer = fetchCurrentBuffer()
+      val bufferRemaining = buffer.remaining()
+      val max = math.min(buff.length, bufferRemaining)
+      inStrm.readFully(buff, 0, max)
+      val bytesRead = max
+
+      if (bytesRead > 0) {
+        buffer.put(buff, 0, bytesRead)
+        totalBytesRead += bytesRead
+        // bump position too ..
+        globalPosition += bytesRead
+        // buffer.position(buffer.position() + bytesRead)
+        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+      }
+      else if (-1 == bytesRead) {
+        // if we had already read some data in the loop, return that.
+        if (totalBytesRead > 0) return totalBytesRead
+        return -1
+      }  // nothing available to read, retry later. return
+      else if (0 == bytesRead) {
+        return totalBytesRead
+      }
+
+      // toNonEmptyBuffer()
+    }
+
+    totalBytesRead
+  }
+
+  // This is essentially a workaround to exposing underlying buffers
+  // Note: tries to do it efficiently without needing to load everything into memory
+  // (particularly for diskbacked buffers, etc).
+  def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = {
+
+    assert (readable)
+    releasePendingContainers()
+
+    // this also allows us to avoid nasty corner cases in the loop.
+    if (! hasRemaining()) throw new BufferUnderflowException
+
+    var totalBytesWritten = 0L
+
+    while (hasRemaining()) {
+      // Write what we can ...
+      val buffer = fetchCurrentBuffer()
+      val bufferRemaining = buffer.remaining()
+      assert (bufferRemaining > 0)
+      val bytesWritten = channel.write(buffer)
+
+      if (bytesWritten > 0) {
+        totalBytesWritten += bytesWritten
+        // bump position too ..
+        globalPosition += bytesWritten
+        if (bytesWritten >= bufferRemaining) toNonEmptyBuffer()
+        assert (! hasRemaining() || currentRemaining() > 0)
+      }
+      else if (0 == bytesWritten) {
+        return totalBytesWritten
+      }
+
+      // toNonEmptyBuffer()
+    }
+
+    assert (! hasRemaining())
+    if (cleanup) {
+      free()
+    }
+    totalBytesWritten
+  }
+
+  // This is essentially a workaround to exposing underlying buffers
+  def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = {
+
+    assert (readable)
+    releasePendingContainers()
+
+    // this also allows us to avoid nasty corner cases in the loop.
+    if (! hasRemaining()) throw new BufferUnderflowException
+
+    var totalBytesWritten = 0L
+    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+
+    while (hasRemaining()) {
+      // write what we can ... note, since there is no gaurantee that underlying buffer might
+      // expose array() method, we do double copy - from bytearray to buff and from
+      // buff to outputstream. see if we can optimize this later ...
+      val buffer = fetchCurrentBuffer()
+      val bufferRemaining = buffer.remaining()
+      val size = math.min(bufferRemaining, buff.length)
+      buffer.get(buff, 0, size)
+      outStrm.write(buff, 0, size)
+
+      totalBytesWritten += size
+      // bump position too ..
+      globalPosition += size
+
+      if (size >= bufferRemaining) toNonEmptyBuffer()
+    }
+
+    toNonEmptyBuffer()
+    if (cleanup) {
+      free()
+    }
+    totalBytesWritten
+  }
+
+  def asInputStream(): InputStream = {
+    new InputStream() {
+      override def read(): Int = {
+        if (! hasRemaining()) return -1
+        get()
+      }
+
+      override def read(arr: Array[Byte], off: Int, len: Int): Int = {
+        if (! hasRemaining()) return -1
+
+        get(arr, off, len)
+      }
+
+      override def available(): Int = {
+        // current remaining is what can be read without blocking
+        // anything higher might need disk access/buffer swapping.
+        /*
+        val left = remaining()
+        math.min(left, Int.MaxValue).asInstanceOf[Int]
+        */
+        currentRemaining()
+      }
+    }
+  }
+
+  def getCleaner() = cleaner
+
+  /**
+   * @param cleaner The previous cleaner, so that the caller can chain them if required.
+   * @return
+   */
+  private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = {
+    overrideCleaner(cleaner, allowOverride = true)
+  }
+
+  private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = {
+    if (! this.allowCleanerOverride) {
+      // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free
+      return this.cleaner
+    }
+
+    this.allowCleanerOverride = allowOverride
+    assert (null != cleaner)
+    val prev = this.cleaner
+    this.cleaner = cleaner
+    // logInfo("Overriding " + prev + " with " + this.cleaner)
+    prev
+  }
+
+  private def doReleaseAll() {
+    for (container <- containers) {
+      container.release()
+    }
+  }
+
+  def free(invokeCleaner: Boolean = true) {
+    // logInfo("Free on " + this + ", cleaner = " + cleaner)
+    // always invoking release
+    doReleaseAll()
+
+    if (invokeCleaner) cleaner.clean(this)
+  }
+
+  private def doDispose(needRelease: Boolean) {
+
+    if (disposeLocationThrowable ne null) {
+      logError("Already free'ed earlier at : ", disposeLocationThrowable)
+      logError("Current at ", new Throwable)
+      throw new IllegalStateException("Already freed.")
+    }
+    disposeLocationThrowable = new Throwable()
+
+    // Forcefully cleanup all
+    if (needRelease) doReleaseAll()
+
+    // Free in a different loop, in case different containers refer to same resource
+    // to release (like file)
+    for (container <- containers) {
+      container.free()
+    }
+
+    needReleaseIndices.clear()
+
+    // We should not use this buffer anymore : set the values such that                 f
+    // we dont ...
+    globalPosition = 0
+    globalLimit = 0
+    globalCapacity = 0
+  }
+
+  // copy data over ... MUST be used only for cases where array is known to be
+  // small to begin with. slightly risky method due to that assumption
+  def toByteArray(): Array[Byte] = {
+    val positionBackup = position()
+    val size = remaining()
+    if (size > Int.MaxValue) {
+      throw new IllegalStateException(
+        "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G")
+    }
+
+    val retval = new Array[Byte](size.asInstanceOf[Int])
+    val readSize = get(retval, 0, retval.length)
+    assert (readSize == retval.length,
+      "readSize = " + readSize + ", retval.length = " + retval.length)
+
+    position(positionBackup)
+
+    retval
+  }
+
+  // copy data over ... MUST be used only for cases where array is known to be
+  // small to begin with. slightly risky method due to that assumption
+  def toByteBuffer(): ByteBuffer = {
+    ByteBuffer.wrap(toByteArray())
+  }
+
+  def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = {
+    val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf)
+    val currentPosition = position()
+    retval.put(this)
+    position(currentPosition)
+    retval.clear()
+    retval
+  }
+
+
+
+  // This is ONLY used for testing : that too as part of development of this and associated classes
+  // remove before contributing to spark.
+  def hexDump(): String = {
+    if (remaining() * 64 > Int.MaxValue) {
+      throw new UnsupportedOperationException("buffer too large " + remaining())
+    }
+
+    val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int])
+
+    var perLine = 0
+    var first = true
+    for (b <- toByteArray()) {
+      perLine += 1
+      if (perLine % 8 == 0) {
+        sb.append('\n')
+        first = true
+      }
+      if (! first) sb.append(' ')
+      first = false
+      sb.append(java.lang.Integer.toHexString(b & 0xff))
+    }
+    sb.append('\n')
+    sb.toString()
+  }
+
+  override def toString: String = {
+    val sb: StringBuffer = new StringBuffer
+    sb.append(getClass.getName)
+    sb.append(' ')
+    sb.append(System.identityHashCode(this))
+    sb.append("@[pos=")
+    sb.append(position())
+    sb.append(" lim=")
+    sb.append(limit())
+    sb.append(" cap=")
+    sb.append(capacity())
+    sb.append("]")
+    sb.toString
+  }
+
+
+
+  override def finalize(): Unit = {
+    var marked = false
+    if (containers ne null) {
+      if (containers.exists(container => container.isAcquired && container.requireRelease())) {
+        marked = true
+        logError("BUG: buffer was not released - and now going out of scope. " +
+          "Potential resource leak. Allocated at ", allocateLocationThrowable)
+        containers.foreach(_.release())
+      }
+      if (containers.exists(container => !container.isFreed && container.requireFree())) {
+        if (!marked) {
+          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak",
+            allocateLocationThrowable)
+        }
+        else {
+          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak")
+        }
+        containers.foreach(_.free())
+      }
+    }
+    super.finalize()
+  }
+}
+
+
+object LargeByteBuffer extends Logging {
+
+  private val noopDisposeFunction = new BufferCleaner() {
+    protected def doClean(buffer: LargeByteBuffer) {
+      buffer.free(invokeCleaner = false)
+    }
+  }
+
+  val enableExpensiveAssert = false
+  private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0)
+  val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer(
+    new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false)
+  // Do not allow anyone else to override cleaner
+  EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false)
+
+  // 8K sufficient ?
+  private val TEMP_ARRAY_SIZE = 8192
+
+  /**
+   * Create a LargeByteBuffer of specified size which is split across
+   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory
+   * ByteBuffer
+   *
+   */
+  def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = {
+    if (0 == totalSize) {
+      return EMPTY_BUFFER
+    }
+
+    assert (totalSize > 0)
+
+    val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
+    val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize)
+    val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize)
+
+    assert (lastBlockSize > 0)
+
+    val bufferArray = {
+      val arr = new ArrayBuffer[ByteBufferContainer](numBlocks)
+      for (index <- 0 until numBlocks - 1) {
+        val buff = ByteBuffer.allocate(blockSize)
+        // buff.clear()
+        arr += new HeapByteBufferContainer(buff, true)
+      }
+      arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true)
+      assert (arr.length == numBlocks)
+      arr
+    }
+
+    new LargeByteBuffer(bufferArray, false, false)
+  }
+
+  /**
+   * Create a LargeByteBuffer of specified size which is split across
+   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk
+   *
+   */
+  private def allocateDiskBuffer(totalSize: Long,
+      blockManager: BlockManager): LargeByteBuffer = {
+    if (0 == totalSize) {
+      return EMPTY_BUFFER
+    }
+
+    assert (totalSize > 0)
+
+    // Create a file of the specified size.
+    val file = blockManager.diskBlockManager.createTempBlock()._2
+    val raf = new RandomAccessFile(file, "rw")
+    try {
+      raf.setLength(totalSize)
+    } finally {
+      raf.close()
+    }
+
+    readWriteDiskSegment(new FileSegment(file, 0, totalSize),
+      ephemeralDiskBacked = true, blockManager.ioConf)
+  }
+
+  // The returned buffer takes up ownership of the underlying buffers
+  // (including dispos'ing that when done)
+  def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = {
+    val nonEmpty = buffers.filter(_.hasRemaining)
+
+    // cleanup the empty buffers
+    buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b))
+
+
+    if (nonEmpty.isEmpty) {
+      return EMPTY_BUFFER
+    }
+
+    // slice so that offsets match our requirement
+    new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b =>
+      new HeapByteBufferContainer(b.slice(), true)), false, false)
+  }
+
+  def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = {
+    // only non empty arrays
+    val arrays = byteArrays.filter(_.length > 0)
+    if (0 == arrays.length) return EMPTY_BUFFER
+
+    new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr =>
+      new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false)
+  }
+
+  def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = {
+
+    if (inputBuffers.isEmpty) return EMPTY_BUFFER
+
+    if (! inputBuffers.exists(_.hasRemaining())) {
+      if (canDispose) inputBuffers.map(_.free())
+      return EMPTY_BUFFER
+    }
+
+    // release all temp resources acquired
+    inputBuffers.foreach(buff => buff.releasePendingContainers())
+    // free current container if acquired.
+    inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) {
+      buff.containers(buff.currentContainerIndex).release()
+    })
+    // inputBuffers.foreach(b => b.doReleaseAll())
+
+
+    // Dispose of any empty buffers
+    if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free())
+
+    // Find all containers we need.
+    val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining()))
+
+    val containers = buffers.flatMap(_.containers)
+    assert (! containers.isEmpty)
+    // The in order containers of "buffers" seq constitute the required return value
+    val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers,
+      // if you cant dispose, then we dont own the buffers : in which case, need duplicate
+      ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked))
+
+    if (canDispose) {
+      // override dispose of all other buffers.
+      val disposeFunctions = inputBuffers.map {
+        buffer => {
+          (buffer, buffer.overrideCleaner(noopDisposeFunction))
+        }
+      }
+
+      val cleaner = retval.getCleaner()
+      val newCleaner = new BufferCleaner {
+        protected def doClean(buffer: LargeByteBuffer) {
+
+          assert (retval == buffer)
+          // default cleaner.
+          cleaner.clean(retval)
+          // not required, since we are within clean anyway.
+          // retval.free(invokeCleaner = false)
+
+          // retval.doDispose(needRelease = true)
+
+          // This might actually call dispose twice on some (initially) empty buffers,
+          // which is fine since we now guard against that.
+          disposeFunctions.foreach(v => v._2.clean(v._1))
+          // Call the free method too : so that buffers are marked free ...
+          disposeFunctions.foreach(v => v._1.free(invokeCleaner = false))
+        }
+      }
+
+      val prev = retval.overrideCleaner(newCleaner)
+      assert (prev == cleaner)
+    }
+
+    retval
+  }
+
+  private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) {
+    if (arr == null) {
+      throw new NullPointerException
+    } else if (offset < 0 || size < 0 || offset + size > arr.length) {
+      throw new IndexOutOfBoundsException
+    }
+  }
+
+  def allocateTransientBuffer(size: Long, blockManager: BlockManager) = {
+    if (size <= blockManager.ioConf.maxInMemSize) {
+      LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf)
+    } else {
+      LargeByteBuffer.allocateDiskBuffer(size, blockManager)
+    }
+  }
+
+  def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig,
+      ephemeralDiskBacked: Boolean): LargeByteBuffer = {
+    // Split the block into multiple of BlockStore.maxBlockSize
+    val segmentSize = segment.length
+    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
+    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
+    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
+
+    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
+
+    for (index <- 0 until numBlocks - 1) {
+      buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
+        segment.offset + index * blockSize, blockSize), ioConf)
+    }
+
+    // Last block
+    buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
+      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf)
+
+    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
+  }
+
+  def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean,
+      ioConf: IOConfig): LargeByteBuffer = {
+
+    // Split the block into multiple of BlockStore.maxBlockSize
+    val segmentSize = segment.length
+    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
+    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
+    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
+
+    logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks +
+      ", lastBlockSize = " + lastBlockSize)
+    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
+
+    for (index <- 0 until numBlocks - 1) {
+      buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
+        segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null)
+    }
+
+    // Last block
+    buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
+      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null)
+
+    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala b/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala
new file mode 100644
index 0000000000000..0dd7e8e736ad6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.io
+
+import java.io.OutputStream
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.Logging
+import org.apache.spark.io.IOConfig.BufferType
+
+/**
+ * byte array backed streams (FastByteArrayOutputStream, ByteArrayOutputStream, etc) are limited to
+ * array length of 2 gig - since that is the array size limit.
+ *
+ * So we move from one to the next as soon as we hit the limit per stream.
+ * And once done, asBuffers or toByteArrays can be used to pull data as a sequence of bytebuffers
+ * or byte arrays.
+ * @param initialSize initial size for the byte array stream ...
+ */
+class WrappedByteArrayOutputStream(private val initialSize: Int,
+    ioConf: IOConfig) extends OutputStream with Logging {
+
+  private val maxStreamSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
+
+  private val allStreams = new ArrayBuffer[SparkByteArrayOutputStream](4)
+
+  private var current: SparkByteArrayOutputStream = null
+  private var currentWritten = 0
+
+  nextWriter()
+
+  override def flush(): Unit = {
+    current.flush()
+  }
+
+  override def write(b: Int): Unit = {
+    if (currentWritten >= maxStreamSize) {
+      nextWriter()
+    }
+    current.write(b)
+    currentWritten += 1
+  }
+
+
+  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
+    // invariant checks - from OutputStream.java
+    if (b == null) {
+      throw new NullPointerException
+    } else if ((off < 0) || (off > b.length) || (len < 0) ||
+      ((off + len) > b.length) || ((off + len) < 0)) {
+      throw new IndexOutOfBoundsException
+    } else if (len == 0) {
+      return
+    }
+
+    // Else, write to stream.
+
+    // common case first
+    if (currentWritten + len < maxStreamSize) {
+      current.write(b, off, len)
+      currentWritten += len
+      return
+    }
+
+    // We might need to split the write into two streams.
+    var startOff = off
+    var remaining = len
+
+    while (remaining > 0) {
+      var toCurrent = math.min(remaining, maxStreamSize - currentWritten)
+      if (toCurrent > 0) {
+        current.write(b, startOff, toCurrent)
+        currentWritten += toCurrent
+        remaining -= toCurrent
+        startOff += toCurrent
+      }
+
+      if (currentWritten >= maxStreamSize) {
+        // to next
+        nextWriter()
+      }
+    }
+  }
+
+  def toLargeByteBuffer(): LargeByteBuffer = {
+    current.compact()
+    val seq = allStreams.filter(_.size > 0).map(_.toByteBuffer)
+    val retval = LargeByteBuffer.fromBuffers(seq:_*)
+
+    retval
+  }
+
+  private def nextWriter() {
+    if (null != current) {
+      current.flush()
+      current.compact()
+      current = null
+    }
+
+    current = new SparkByteArrayOutputStream(initialSize, ioConf)
+    currentWritten = 0
+    allStreams += current
+  }
+}
+
+
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
index 69985c9759e2d..b14b5e91d1794 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
@@ -17,18 +17,24 @@
 
 package org.apache.spark.storage
 
-import java.nio.ByteBuffer
-
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.Logging
+import org.apache.spark.io.LargeByteBuffer
 
 /**
  * Abstract class to store blocks.
  */
 private[spark] abstract class BlockStore(val blockManager: BlockManager) extends Logging {
-
-  def putBytes(blockId: BlockId, bytes: ByteBuffer, level: StorageLevel): PutResult
+  // TODO: We have inconsistent usage of the bytes in spark.
+  // In DiskStore, we simply emit bytes to the file without a rewind
+  // While in memory and tachyon store, we do a rewind.
+  // Not sure which is correct - since both seem to be working fine in the tests !
+  // There is some underlying assumption which is probably unspecified and incorrect
+  // in a general case.
+  // Change: consistently modified to do a rewind before calling this method.
+  // Now, it validates that position == 0 (and so remaining == limit obviously)
+  def putBytes(blockId: BlockId, bytes: LargeByteBuffer, level: StorageLevel) : PutResult
 
   /**
    * Put in a block and, possibly, also return its content as either bytes or another Iterator.
@@ -37,15 +43,15 @@ private[spark] abstract class BlockStore(val blockManager: BlockManager) extends
    * @return a PutResult that contains the size of the data, as well as the values put if
    *         returnValues is true (if not, the result's data field can be null)
    */
-  def putIterator(
+  def putValues(
     blockId: BlockId,
     values: Iterator[Any],
     level: StorageLevel,
     returnValues: Boolean): PutResult
 
-  def putArray(
+  def putValues(
     blockId: BlockId,
-    values: Array[Any],
+    values: ArrayBuffer[Any],
     level: StorageLevel,
     returnValues: Boolean): PutResult
 
@@ -54,7 +60,7 @@ private[spark] abstract class BlockStore(val blockManager: BlockManager) extends
    */
   def getSize(blockId: BlockId): Long
 
-  def getBytes(blockId: BlockId): Option[ByteBuffer]
+  def getBytes(blockId: BlockId): Option[LargeByteBuffer]
 
   def getValues(blockId: BlockId): Option[Iterator[Any]]
 

From d6337f03a4ac2971a004ef821281723e857f9008 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 24 Feb 2015 12:18:38 -0600
Subject: [PATCH 03/97] wip -- changed a bunch of types to LargeByteBuffer; 
 discovered problem on replicate()

---
 .../org/apache/spark/io/LargeByteBuffer.scala | 2961 +++++++++--------
 .../io/WrappedByteArrayOutputStream.scala     |  242 +-
 .../apache/spark/storage/BlockManager.scala   |   46 +-
 .../org/apache/spark/storage/BlockStore.scala |    8 +-
 .../org/apache/spark/storage/DiskStore.scala  |    7 +-
 .../apache/spark/storage/MemoryStore.scala    |    4 +-
 .../org/apache/spark/storage/PutResult.scala  |    4 +-
 .../apache/spark/storage/TachyonStore.scala   |    5 +-
 .../util/LargeByteBufferInputStream.scala     |   82 +
 .../util/LargeByteBufferOutputStream.scala    |   45 +
 .../spark/util/collection/ChainedBuffer.scala |  126 +
 .../apache/spark/io/LargeByteBufferTest.scala |   57 +
 .../util/collection/ChainedBufferTest.scala   |  135 +
 13 files changed, 2173 insertions(+), 1549 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
 create mode 100644 core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
 create mode 100644 core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala
 create mode 100644 core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
 create mode 100644 core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala

diff --git a/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala b/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
index 509fe186793b6..01bd433f55c78 100644
--- a/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
@@ -21,275 +21,80 @@ import java.io.{RandomAccessFile, DataInput, InputStream, OutputStream}
 import java.nio.{ByteBuffer, BufferUnderflowException, BufferOverflowException}
 import java.nio.channels.{WritableByteChannel, ReadableByteChannel}
 
+import org.apache.spark.util.collection.ChainedBuffer
+
 import scala.collection.mutable.{ArrayBuffer, HashSet}
 
 import org.apache.spark.Logging
-import org.apache.spark.io.IOConfig.BufferType
 import org.apache.spark.storage.{FileSegment, BlockManager}
 
-/**
- * This is used to control chaining of cleaners.
- * For some usecases, invocation of clean on LargeByteBuffer must not immediately clean
- * but be tied to clean of other buffers.
- * This allows a way to override how/when clean is invoked
- *
- * TODO: We should revisit this design and see if we can use ref tracking : might be more general
- * at higher cost ? Punting on it for now
- */
-trait BufferCleaner extends Logging {
-  @volatile private var invoked = false
-
-  protected def doClean(buffer: LargeByteBuffer)
-
-  final def clean(buffer: LargeByteBuffer) {
-    if (invoked) return
-    invoked = true
-    doClean(buffer)
-  }
-}
-
-/**
- * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G
- * which ByteBuffers are limited to.
- * Externally, it exposes all the api which java.nio.ByteBuffer exposes.
- * Internally, it maintains a sequence of Containers which manage the ByteBuffer data.
- * Not all the data might be loaded into memory  (like disk or tachyon data) - so actual
- * memory footprint - heap and vm could be much lower than capacity.
- *
- * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this
- * buffer, maybe revisit this later ? Note: this is not much different from earlier though !
- *
- * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this
- * will require the file to be kept open (repeatedly opening/closing file is not good
- * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is
- * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy)
- *
- * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is
- * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some
- * cases (when we duplicate/slice them). Currently spark does not need this, but might in future
- * so relook at it later.
- */
-// We should make this constructor private: but for now,
-// leaving it public since TachyonStore needs it
-class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer],
-    private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging {
-
-  // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME
-  private val allocateLocationThrowable: Throwable = {
-    if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) {
-      new Throwable("blockId = " + BlockManager.getLookupBlockId)
-    } else {
-      null
-    }
-  }
-  private var disposeLocationThrowable: Throwable = null
-
-  @volatile private var allowCleanerOverride = true
-  @volatile private var cleaner: BufferCleaner = new BufferCleaner {
-    override def doClean(buffer: LargeByteBuffer) = {
-      assert (LargeByteBuffer.this == buffer)
-      doDispose(needRelease = false)
-    }
-  }
-
-  // should not be empty
-  assert (null != inputContainers && ! inputContainers.isEmpty)
-  // should not have any null's
-  assert (inputContainers.find(_ == null).isEmpty)
-
-  // println("Num containers = " + inputContainers.size)
-
-  // Position, limit and capacity relevant over the engire LargeByteBuffer
-  @volatile private var globalPosition = 0L
-  @volatile private var globalLimit = 0L
-  @volatile private var currentContainerIndex = 0
-
-  // The buffers in which the actual data is held.
-  private var containers: Array[ByteBufferContainer] = null
-
-  // aggregate capacities of the individual buffers.
-  // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be
-  // sum of capacity of 0th and 1st block buffer
-  private var bufferPositionStart: Array[Long] = null
 
-  // Contains the indices of a containers which requires release before subsequent invocation of
-  // read/write should be serviced. This is required since current read/write might have moved the
-  // position but since we are returning bytebuffers which depend on the validity of the existing
-  // bytebuffer, we cant release them yet.
-  private var needReleaseIndices = new HashSet[Int]()
 
-  private val readable = ! inputContainers.exists(! _.isReadable)
-  private val writable = ! inputContainers.exists(! _.isWritable)
+trait LargeByteBuffer {
+//  def position(): Long
+//
+//  def limit(): Long
 
+  def capacity(): Long
 
-  // initialize
-  @volatile private var globalCapacity = {
+  def get(): Byte //needed for ByteBufferInputStream
 
-    // Ensure that there are no empty buffers : messes up with our code : unless it
-    // is a single buffer (for empty buffer for marker case)
-    assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length)
+  def get(dst: Array[Byte], offset: Int, length: Int): Unit // for ByteBufferInputStream
 
-    containers = {
-      if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray
-    }
-    containers.foreach(_.validate())
+  def position(position: Long): Unit //for ByteBufferInputStream
 
-    def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) {
-      val buff = new ArrayBuffer[Long](arr.length + 1)
-      buff += 0L
+  def position(): Long //for ByteBufferInputStream
 
-      buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1)
-      assert (buff.length == arr.length + 1)
-      bufferPositionStart = buff.toArray
-    }
+  /** doesn't copy data, just copies references & offsets */
+  def duplicate(): LargeByteBuffer
 
-    initializeBufferPositionStart(containers)
+  def put(bytes: LargeByteBuffer): Unit
 
-    // remove references from inputBuffers
-    inputContainers.clear()
+  //also need whatever is necessary for ByteArrayOutputStream for BlockManager#dataSerialize
 
-    globalLimit = bufferPositionStart(containers.length)
-    globalPosition = 0L
-    currentContainerIndex = 0
-
-    assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum)
-
-    globalLimit
-  }
-
-  final def position(): Long = globalPosition
-
-  final def limit(): Long = globalLimit
-
-  final def capacity(): Long = globalCapacity
-
-  final def limit(newLimit: Long) {
-    if ((newLimit > capacity()) || (newLimit < 0)) {
-      throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity())
-    }
-
-    globalLimit = newLimit
-    if (position() > newLimit) position(newLimit)
-  }
-
-  def skip(skipBy: Long) = position(position() + skipBy)
-
-  private def releasePendingContainers() {
-    if (! needReleaseIndices.isEmpty) {
-      val iter = needReleaseIndices.iterator
-      while (iter.hasNext) {
-        val index = iter.next()
-        assert (index >= 0 && index < containers.length)
-        // It is possible to move from one container to next before the previous
-        // container was acquired. For example, get forcing move to next container
-        // since current was exhausted immediatelly followed by a position()
-        // so the container we moved to was never acquired.
-
-        // assert (containers(index).isAcquired)
-        // will this always be satisfied ?
-        // assert (index != currentContainerIndex)
-        if (containers(index).isAcquired) containers(index).release()
-      }
-      needReleaseIndices.clear()
-    }
-  }
-
-  private def toNewContainer(newIndex: Int) {
-    if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) {
-
-      assert (currentContainerIndex >= 0)
-      needReleaseIndices += currentContainerIndex
-    }
-    currentContainerIndex = newIndex
-  }
-
-  // expensive method, sigh ... optimize it later ?
-  final def position(newPosition: Long) {
-
-    if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException()
-
-    if (currentContainerIndex < bufferPositionStart.length - 1 &&
-        newPosition >= bufferPositionStart(currentContainerIndex) &&
-        newPosition < bufferPositionStart(currentContainerIndex + 1)) {
-      // Same buffer - easy method ...
-      globalPosition = newPosition
-      // Changed position - free previously returned buffers.
-      releasePendingContainers()
-      return
-    }
-
-    // Find appropriate currentContainerIndex
-    // Since bufferPositionStart is sorted, can be replaced with binary search if required.
-    // For now, not in the perf critical path since buffers size is very low typically.
-    var index = 0
-    val cLen = containers.length
-    while (index < cLen) {
-      if (newPosition >= bufferPositionStart(index) &&
-        newPosition < bufferPositionStart(index + 1)) {
-        globalPosition = newPosition
-        toNewContainer(index)
-        // Changed position - free earlier and previously returned buffers.
-        releasePendingContainers()
-        return
-      }
-      index += 1
-    }
-
-    if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) {
-      // boundary.
-      globalPosition = newPosition
-      toNewContainer(cLen)
-      // Changed position - free earlier and previously returned buffers.
-      releasePendingContainers()
-      return
-    }
-
-    assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition +
-      ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]"))
-  }
 
+  //TODO checks on limit semantics
 
   /**
-   * Clears this buffer.  The position is set to zero, the limit is set to
-   * the capacity, and the mark is discarded.
-   *
-   * <p> Invoke this method before using a sequence of channel-read or
-   * <i>put</i> operations to fill this buffer.
-   *
-   * <p> This method does not actually erase the data in the buffer, but it
-   * is named as if it did because it will most often be used in situations
-   * in which that might as well be the case. </p>
+   * Sets this buffer's limit. If the position is larger than the new limit then it is set to the
+   * new limit. If the mark is defined and larger than the new limit then it is discarded.
    */
-  final def clear() {
-    // if (0 == globalCapacity) return
-
-    needReleaseIndices += 0
-    globalPosition = 0L
-    toNewContainer(0)
-    globalLimit = globalCapacity
-
-    // Now free all pending containers
-    releasePendingContainers()
-  }
+  def limit(newLimit: Long): Unit
 
   /**
-   * Flips this buffer.  The limit is set to the current position and then
-   * the position is set to zero.  If the mark is defined then it is
-   * discarded.
-   *
-   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
-   * this method to prepare for a sequence of channel-write or relative
-   * <i>get</i> operations.
+   * return this buffer's limit
+   * @return
    */
-  final def flip() {
-    needReleaseIndices += 0
-    globalLimit = globalPosition
-    globalPosition = 0L
-    toNewContainer(0)
-
-    // Now free all pending containers
-    releasePendingContainers()
-  }
+  def limit(): Long
+
+//
+//  def skip(skipBy: Long): Unit
+//
+//  def position(newPosition: Long): Unit
+//
+//  /**
+//   * Clears this buffer.  The position is set to zero, the limit is set to
+//   * the capacity, and the mark is discarded.
+//   *
+//   * <p> Invoke this method before using a sequence of channel-read or
+//   * <i>put</i> operations to fill this buffer.
+//   *
+//   * <p> This method does not actually erase the data in the buffer, but it
+//   * is named as if it did because it will most often be used in situations
+//   * in which that might as well be the case. </p>
+//   */
+//  def clear(): Unit
+//
+//  /**
+//   * Flips this buffer.  The limit is set to the current position and then
+//   * the position is set to zero.  If the mark is defined then it is
+//   * discarded.
+//   *
+//   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
+//   * this method to prepare for a sequence of channel-write or relative
+//   * <i>get</i> operations.
+//   */
+//  def flip(): Unit
 
   /**
    * Rewinds this buffer.  The position is set to zero and the mark is
@@ -299,14 +104,7 @@ class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[By
    * operations, assuming that the limit has already been set
    * appropriately.
    */
-  final def rewind() {
-    needReleaseIndices += 0
-    globalPosition = 0L
-    toNewContainer(0)
-
-    // Now free all pending containers
-    releasePendingContainers()
-  }
+  def rewind(): Unit
 
   /**
    * Returns the number of elements between the current position and the
@@ -314,1192 +112,1555 @@ class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[By
    *
    * @return  The number of elements remaining in this buffer
    */
-  final def remaining(): Long = {
-    globalLimit - globalPosition
-  }
-
-  /**
-   * Tells whether there are any elements between the current position and
-   * the limit. </p>
-   *
-   * @return  <tt>true</tt> if, and only if, there is at least one element
-   *          remaining in this buffer
-   */
-  final def hasRemaining() = {
-    globalPosition < globalLimit
-  }
-
-  // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex)
-
-  // number of bytes remaining in currently active underlying buffer
-  private def currentRemaining(): Int = {
-    if (hasRemaining()) {
-      // validate currentContainerIndex is valid
-      assert (globalPosition >= bufferPositionStart(currentContainerIndex) &&
-        globalPosition < bufferPositionStart(currentContainerIndex + 1),
-        "globalPosition = " + globalPosition +
-          ", currentContainerIndex = " + currentContainerIndex +
-        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-
-      currentRemaining0(currentContainerIndex)
-    } else 0
-  }
-
-  // Without any validation : required when we are bumping the index (when validation will fail) ...
-  private def currentRemaining0(which: Int): Int = {
-    // currentBuffer().remaining()
-    math.max(0, math.min(bufferPositionStart(which + 1),
-      globalLimit) - globalPosition).asInstanceOf[Int]
-  }
-
-  // Set the approppriate position/limit for the current underlying buffer to mirror our
-  // the LargeByteBuffer's state.
-  private def fetchCurrentBuffer(): ByteBuffer = {
-    releasePendingContainers()
-
-    assert (currentContainerIndex < containers.length)
-
-    val container = containers(currentContainerIndex)
-    if (! container.isAcquired) {
-      container.acquire()
-    }
-
-    assert (container.isAcquired)
-    if (LargeByteBuffer.enableExpensiveAssert) {
-      assert (! containers.exists( b => (b ne container) && b.isAcquired))
-    }
-
-    assert (currentContainerIndex < bufferPositionStart.length &&
-      globalPosition < bufferPositionStart(currentContainerIndex + 1),
-      "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " +
-        bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this)
-
-    val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)).
-      asInstanceOf[Int]
-
-    val buffer = container.getByteBuffer
-    buffer.position(buffPosition)
-    val diff = buffer.capacity - buffPosition
-    val left = remaining()
-    if (diff <= left) {
-      buffer.limit(buffer.capacity())
-    } else {
-      // Can happen if limit() was called.
-      buffer.limit(buffPosition + left.asInstanceOf[Int])
-    }
-
-    buffer
-  }
-
-  // To be used ONLY to test in suites.
-  private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = {
-    if ("1" != System.getProperty("SPARK_TESTING")) {
-      throw new IllegalStateException("This method is to be used ONLY within spark test suites")
-    }
-
-    fetchCurrentBuffer()
-  }
-
-  // Expects that the invoker has ensured that this can be safely invoked.
-  // That is, it wont be invoked when the loop wont terminate.
-  private def toNonEmptyBuffer() {
-
-    if (! hasRemaining()) {
-      var newIndex = currentContainerIndex
-      // Ensure we are in the right block or not.
-      while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) {
-        newIndex += 1
-      }
-      toNewContainer(newIndex)
-      // Do not do this - since we might not yet have consumed the buffer which caused EOF right now
-      /*
-      // Add last one also, and release it too - since we are at the end of the buffer with nothing
-      // more pending.
-      if (newIndex >= 0 && currentContainerIndex < containers.length) {
-        needReleaseIndices += newIndex
-      }
-      */
-      assert (currentContainerIndex >= 0)
-      // releasePendingContainers()
-      return
-    }
-
-    var index = currentContainerIndex
-    while (0 == currentRemaining0(index) && index < containers.length) {
-      index += 1
-    }
-    assert (currentContainerIndex < containers.length)
-    toNewContainer(index)
-    assert (0 != currentRemaining())
-  }
-
-  private def assertPreconditions(containerIndex: Int) {
-    assert (globalPosition >= bufferPositionStart(containerIndex),
-      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
-        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-    assert (globalPosition < bufferPositionStart(containerIndex + 1),
-      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
-        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-
-    assert (globalLimit <= globalCapacity)
-    assert (containerIndex < containers.length)
-  }
-
-
-  /**
-   * Attempts to return a ByteBuffer of the requested size.
-   * It is possible to return a buffer of size smaller than requested
-   * even though hasRemaining == true
-   *
-   * On return, position would have been moved 'ahead' by the size of the buffer returned :
-   * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer
-   *
-   *
-   * This is used to primarily retrieve content of this buffer to expose via ByteBuffer
-   * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the
-   * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer
-   * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying
-   * container is a disk backed container, and we make subsequent calls to get(), the returned
-   * ByteBuffer can be dispose'ed off
-   *
-   * @param maxChunkSize Max size of the ByteBuffer to retrieve.
-   * @return
-   */
-
-  private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = {
-    fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true)
-  }
-
-  private def fetchBufferOfSizeImpl(maxChunkSize: Int,
-      canReleaseContainers: Boolean): ByteBuffer = {
-    if (canReleaseContainers) releasePendingContainers()
-    assert (maxChunkSize > 0)
-
-    // not checking for degenerate case of maxChunkSize == 0
-    if (globalPosition >= globalLimit) {
-      // throw exception
-      throw new BufferUnderflowException()
-    }
-
-    // Check preconditions : disable these later, since they might be expensive to
-    // evaluate for every IO op
-    assertPreconditions(currentContainerIndex)
-
-    val currentBufferRemaining = currentRemaining()
-
-    assert (currentBufferRemaining > 0)
-
-    val size = math.min(currentBufferRemaining, maxChunkSize)
-
-    val newBuffer = if (currentBufferRemaining > maxChunkSize) {
-      val currentBuffer = fetchCurrentBuffer()
-      val buff = ByteBufferContainer.createSlice(currentBuffer,
-        currentBuffer.position(), maxChunkSize)
-      assert (buff.remaining() == maxChunkSize)
-      buff
-    } else {
-      val currentBuffer = fetchCurrentBuffer()
-      val buff = currentBuffer.slice()
-      assert (buff.remaining() == currentBufferRemaining)
-      buff
-    }
-
-    assert (size == newBuffer.remaining())
-    assert (0 == newBuffer.position())
-    assert (size == newBuffer.limit())
-    assert (newBuffer.capacity() == newBuffer.limit())
-
-    globalPosition += newBuffer.remaining
-    toNonEmptyBuffer()
-
-    newBuffer
-  }
-
-  // Can we service the read/write from the currently active (underlying) bytebuffer or not.
-  // For almost all cases, this will return true allowing us to optimize away the more expensive
-  // computations.
-  private def localReadWritePossible(size: Int) =
-    size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1)
-
-
-  def getLong(): Long = {
-    assert (readable)
-    releasePendingContainers()
-
-    if (remaining() < 8) throw new BufferUnderflowException
-
-    if (localReadWritePossible(8)) {
-      val buff = fetchCurrentBuffer()
-      assert (buff.remaining() >= 8)
-      val retval = buff.getLong
-      globalPosition += 8
-      toNonEmptyBuffer()
-      return retval
-    }
-
-    val buff = readFully(8)
-    buff.getLong
-  }
-
-  def getInt(): Int = {
-    assert (readable)
-    releasePendingContainers()
-
-    if (remaining() < 4) throw new BufferUnderflowException
-
-    if (localReadWritePossible(4)) {
-      val buff = fetchCurrentBuffer()
-      assert (buff.remaining() >= 4)
-      val retval = buff.getInt
-      globalPosition += 4
-      toNonEmptyBuffer()
-      return retval
-    }
-
-    val buff = readFully(4)
-    buff.getInt
- }
+  def remaining(): Long
+}
 
-  def getChar(): Char = {
-    assert (readable)
-    releasePendingContainers()
+class ChainedLargeByteBuffer(private[io] val underlying: ChainedBuffer) extends LargeByteBuffer {
 
-    if (remaining() < 2) throw new BufferUnderflowException
+  def capacity = underlying.capacity
 
-    if (localReadWritePossible(2)) {
-      val buff = fetchCurrentBuffer()
-      assert (buff.remaining() >= 2)
-      val retval = buff.getChar
-      globalPosition += 2
-      toNonEmptyBuffer()
-      return retval
-    }
+  var _pos = 0l
 
-    // if slice is becoming too expensive, revisit this ...
-    val buff = readFully(2)
-    buff.getChar
+  def get(dst: Array[Byte],offset: Int,length: Int): Unit = {
+    underlying.read(_pos, dst, offset, length)
+    _pos += length
   }
 
   def get(): Byte = {
-    assert (readable)
-    releasePendingContainers()
-
-    if (! hasRemaining()) throw new BufferUnderflowException
-
-    // If we have remaining bytes, previous invocations MUST have ensured that we are at
-    // a buffer which has data to be read.
-    assert (localReadWritePossible(1))
-
-    val buff = fetchCurrentBuffer()
-    assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining())
-    val retval = buff.get()
-    globalPosition += 1
-    toNonEmptyBuffer()
-
-    retval
-  }
-
-  def get(arr: Array[Byte], offset: Int, size: Int): Int = {
-    assert (readable)
-    releasePendingContainers()
-
-    LargeByteBuffer.checkOffsets(arr, offset, size)
-
-    // kyro depends on this it seems ?
-    // assert (size > 0)
-    if (0 == size) return 0
-
-    if (! hasRemaining()) return -1
-
-    if (localReadWritePossible(size)) {
-      val buff = fetchCurrentBuffer()
-      assert (buff.remaining() >= size)
-      buff.get(arr, offset, size)
-      globalPosition += size
-      toNonEmptyBuffer()
-      return size
-    }
-
-    var remainingSize = math.min(size, remaining()).asInstanceOf[Int]
-    var currentOffset = offset
-
-    while (remainingSize > 0) {
-      val buff = fetchBufferOfSize(remainingSize)
-      val toCopy = math.min(buff.remaining(), remainingSize)
-
-      buff.get(arr, currentOffset, toCopy)
-      currentOffset += toCopy
-      remainingSize -= toCopy
-    }
-
-    currentOffset - offset
-  }
-
-
-  private def createSlice(size: Long): LargeByteBuffer = {
-
-    releasePendingContainers()
-
-    if (remaining() < size) {
-      // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this)
-      throw new BufferOverflowException
-    }
-
-    // kyro depends on this it seems ?
-    // assert (size > 0)
-    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
-
-    val arr = new ArrayBuffer[ByteBufferContainer](2)
-    var totalLeft = size
-
-    // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer)
-
-    var containerIndex = currentContainerIndex
-    while (totalLeft > 0 && hasRemaining()) {
-      assertPreconditions(containerIndex)
-      val container = containers(containerIndex)
-      val currentLeft = currentRemaining0(containerIndex)
-
-      assert (globalPosition + currentLeft <= globalLimit)
-      assert (globalPosition >= bufferPositionStart(containerIndex) &&
-        (globalPosition < bufferPositionStart(containerIndex + 1)))
-
-      val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int]
-      val sliceSize = math.min(totalLeft, currentLeft)
-      assert (from >= 0)
-      assert (sliceSize > 0 && sliceSize <= Int.MaxValue)
-
-      val slice = container.createSlice(from, sliceSize.asInstanceOf[Int])
-      arr += slice
-
-      globalPosition += sliceSize
-      totalLeft -= sliceSize
-      if (currentLeft == sliceSize) containerIndex += 1
-    }
-
-    // Using toNonEmptyBuffer instead of directly moving to next here so that
-    // other checks can be performed there.
-    toNonEmptyBuffer()
-    // force cleanup - this is fine since we are not using the buffers directly
-    // which are actively needed (the returned value is on containers which can
-    // recreate)
-    releasePendingContainers()
-    // free current container if acquired.
-    if (currentContainerIndex < containers.length) {
-      containers(currentContainerIndex).release()
-    }
-    assert (currentContainerIndex == containerIndex)
-
-    val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked)
-    retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction)
-    retval
-  }
-
-  // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers
-  // This is to be used only for writes : and ensures that writes are done into the appropriate
-  // underlying bytebuffers.
-  def getCompositeWriteBuffer(size: Long): LargeByteBuffer = {
-    assert(writable)
-    assert(size >= 0)
-
-    createSlice(size)
-  }
-
-  // get a buffer which is of the specified size and contains data from the underlying buffers
-  // Note, the actual data might be spread across the underlying buffers.
-  // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy !
-  private def readFully(size: Int): ByteBuffer = {
-    assert (readable)
-
-    if (remaining() < size) {
-      // throw exception
-      throw new BufferUnderflowException()
-    }
-
-    // kyro depends on this it seems ?
-    // assert (size > 0)
-    if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER
-
-    // Expected to be handled elsewhere.
-    assert (! localReadWritePossible(size))
-
-    val localBuff =  {
-      val buff = fetchBufferOfSize(size)
-      // assert(buff.remaining() <= size)
-      // if (buff.remaining() == size) return buff
-      assert(buff.remaining() < size)
-      ByteBuffer.allocate(size).put(buff)
-    }
-
-    // assert (localBuff.hasRemaining)
-
-    while (localBuff.hasRemaining) {
-      val buff = fetchBufferOfSize(localBuff.remaining())
-      localBuff.put(buff)
-    }
-
-    localBuff.flip()
-    localBuff
-  }
-
-
-
-  def put(b: Byte) {
-    assert (writable)
-    if (remaining() < 1) {
-      // logInfo("put byte. remaining = " + remaining() + ", this = " + this)
-      throw new BufferOverflowException
-    }
-
-    assert (currentRemaining() > 0)
-
-    fetchCurrentBuffer().put(b)
-    globalPosition += 1
-    // Check to need to bump the index ?
-    toNonEmptyBuffer()
-  }
-
-
-  def put(buffer: ByteBuffer) {
-    assert (writable)
-    if (remaining() < buffer.remaining()) {
-      throw new BufferOverflowException
-    }
-
-    val bufferRemaining = buffer.remaining()
-    if (localReadWritePossible(bufferRemaining)) {
-
-      assert (currentRemaining() >= bufferRemaining)
-
-      fetchCurrentBuffer().put(buffer)
-
-      globalPosition += bufferRemaining
-      toNonEmptyBuffer()
-      return
-    }
-
-    while (buffer.hasRemaining) {
-      val currentBufferRemaining = currentRemaining()
-      val bufferRemaining = buffer.remaining()
-
-      if (currentBufferRemaining >= bufferRemaining) {
-        fetchCurrentBuffer().put(buffer)
-        globalPosition += bufferRemaining
-      } else {
-        // Split across buffers.
-        val currentBuffer = fetchCurrentBuffer()
-        assert (currentBuffer.remaining() >= currentBufferRemaining)
-        val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(),
-          currentBufferRemaining)
-        assert (sliced.remaining() == currentBufferRemaining)
-        currentBuffer.put(sliced)
-        // move buffer pos
-        buffer.position(buffer.position() + currentBufferRemaining)
-
-        globalPosition += currentBufferRemaining
-      }
-      toNonEmptyBuffer()
-    }
-
-    assert (! hasRemaining() || currentRemaining() > 0)
-  }
-
-  def put(other: LargeByteBuffer) {
-    assert (writable)
-    if (this.remaining() < other.remaining()) {
-      throw new BufferOverflowException
-    }
-
-    while (other.hasRemaining()) {
-      val buffer = other.fetchBufferOfSize(other.currentRemaining())
-      this.put(buffer)
-    }
-  }
-
-
-  def duplicate(): LargeByteBuffer = {
-    val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size)
-    // We do a duplicate as part of construction - so avoid double duplicate.
-    // containersCopy ++= containers.map(_.duplicate())
-    containersCopy ++= containers
-    val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked)
-
-    // set limit and position (in that order) ...
-    retval.limit(this.limit())
-    retval.position(this.position())
-
-    // Now release our containers - if any had been acquired
-    releasePendingContainers()
-
-    retval
-  }
-
-
-  /**
-   * 'read' a LargeByteBuffer of size specified and return that.
-   * Position will be incremented by size
-   *
-   * The name might be slightly confusing : rename ?
-   *
-   * @param size Amount of data to be read from this buffer and returned
-   * @return
-   */
-  def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = {
-    if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException
-    if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException
-
-
-    assert (readable)
-    assert (size >= 0)
-
-    releasePendingContainers()
-
-    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
-
-    createSlice(size)
-  }
-
-
-  // This is essentially a workaround to exposing underlying buffers
-  def readFrom(channel: ReadableByteChannel): Long = {
-
-    assert (writable)
-    releasePendingContainers()
-
-    // this also allows us to avoid nasty corner cases in the loop.
-    if (! hasRemaining()) {
-      // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this)
-      throw new BufferOverflowException
-    }
-
-    var totalBytesRead = 0L
-
-    while (hasRemaining()) {
-      // read what we can ...
-      val buffer = fetchCurrentBuffer()
-      val bufferRemaining = currentRemaining()
-      val bytesRead = channel.read(buffer)
-
-      if (bytesRead > 0) {
-        totalBytesRead += bytesRead
-        // bump position too ..
-        globalPosition += bytesRead
-        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-      }
-      else if (-1 == bytesRead) {
-        // if we had already read some data in the loop, return that.
-        if (totalBytesRead > 0) return totalBytesRead
-        return -1
-      }  // nothing available to read, retry later. return
-      else if (0 == bytesRead) {
-        return totalBytesRead
-      }
-
-      // toNonEmptyBuffer()
-    }
-
-    // Cleanup last buffer ?
-    toNonEmptyBuffer()
-    totalBytesRead
-  }
-
-  // This is essentially a workaround to exposing underlying buffers
-  def readFrom(inStrm: InputStream): Long = {
-
-    assert (writable)
-    releasePendingContainers()
-
-    // this also allows us to avoid nasty corner cases in the loop.
-    // if (! hasRemaining()) throw new BufferOverflowException
-    if (! hasRemaining()) return 0
-
-    var totalBytesRead = 0L
-
-    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-
-    while (hasRemaining()) {
-      // read what we can ... note, since there is no gaurantee that underlying buffer might
-      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
-      // see if we can optimize this later ...
-      val buffer = fetchCurrentBuffer()
-      val bufferRemaining = buffer.remaining()
-      val max = math.min(buff.length, bufferRemaining)
-      val bytesRead = inStrm.read(buff, 0, max)
-
-      if (bytesRead > 0) {
-        buffer.put(buff, 0, bytesRead)
-        totalBytesRead += bytesRead
-        // bump position too ..
-        globalPosition += bytesRead
-        // buffer.position(buffer.position + bytesRead)
-        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-      }
-      else if (-1 == bytesRead) {
-        // if we had already read some data in the loop, return that.
-        if (totalBytesRead > 0) return totalBytesRead
-        return -1
-      }  // nothing available to read, retry later. return
-      else if (0 == bytesRead) {
-        return totalBytesRead
-      }
-
-      // toNonEmptyBuffer()
-    }
-
-    totalBytesRead
-  }
-
-  // This is essentially a workaround to exposing underlying buffers
-  // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce
-  // code for performance reasons.
-  def readFrom(inStrm: DataInput): Long = {
-
-    assert (writable)
-    releasePendingContainers()
-
-    // this also allows us to avoid nasty corner cases in the loop.
-    // if (! hasRemaining()) throw new BufferOverflowException
-    if (! hasRemaining()) return 0
-
-    var totalBytesRead = 0L
-
-    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-
-    while (hasRemaining()) {
-      // read what we can ... note, since there is no gaurantee that underlying buffer might
-      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
-      // see if we can optimize this later ...
-      val buffer = fetchCurrentBuffer()
-      val bufferRemaining = buffer.remaining()
-      val max = math.min(buff.length, bufferRemaining)
-      inStrm.readFully(buff, 0, max)
-      val bytesRead = max
-
-      if (bytesRead > 0) {
-        buffer.put(buff, 0, bytesRead)
-        totalBytesRead += bytesRead
-        // bump position too ..
-        globalPosition += bytesRead
-        // buffer.position(buffer.position() + bytesRead)
-        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-      }
-      else if (-1 == bytesRead) {
-        // if we had already read some data in the loop, return that.
-        if (totalBytesRead > 0) return totalBytesRead
-        return -1
-      }  // nothing available to read, retry later. return
-      else if (0 == bytesRead) {
-        return totalBytesRead
-      }
-
-      // toNonEmptyBuffer()
-    }
-
-    totalBytesRead
-  }
-
-  // This is essentially a workaround to exposing underlying buffers
-  // Note: tries to do it efficiently without needing to load everything into memory
-  // (particularly for diskbacked buffers, etc).
-  def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = {
-
-    assert (readable)
-    releasePendingContainers()
-
-    // this also allows us to avoid nasty corner cases in the loop.
-    if (! hasRemaining()) throw new BufferUnderflowException
-
-    var totalBytesWritten = 0L
-
-    while (hasRemaining()) {
-      // Write what we can ...
-      val buffer = fetchCurrentBuffer()
-      val bufferRemaining = buffer.remaining()
-      assert (bufferRemaining > 0)
-      val bytesWritten = channel.write(buffer)
-
-      if (bytesWritten > 0) {
-        totalBytesWritten += bytesWritten
-        // bump position too ..
-        globalPosition += bytesWritten
-        if (bytesWritten >= bufferRemaining) toNonEmptyBuffer()
-        assert (! hasRemaining() || currentRemaining() > 0)
-      }
-      else if (0 == bytesWritten) {
-        return totalBytesWritten
-      }
-
-      // toNonEmptyBuffer()
-    }
-
-    assert (! hasRemaining())
-    if (cleanup) {
-      free()
-    }
-    totalBytesWritten
-  }
-
-  // This is essentially a workaround to exposing underlying buffers
-  def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = {
-
-    assert (readable)
-    releasePendingContainers()
-
-    // this also allows us to avoid nasty corner cases in the loop.
-    if (! hasRemaining()) throw new BufferUnderflowException
-
-    var totalBytesWritten = 0L
-    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-
-    while (hasRemaining()) {
-      // write what we can ... note, since there is no gaurantee that underlying buffer might
-      // expose array() method, we do double copy - from bytearray to buff and from
-      // buff to outputstream. see if we can optimize this later ...
-      val buffer = fetchCurrentBuffer()
-      val bufferRemaining = buffer.remaining()
-      val size = math.min(bufferRemaining, buff.length)
-      buffer.get(buff, 0, size)
-      outStrm.write(buff, 0, size)
-
-      totalBytesWritten += size
-      // bump position too ..
-      globalPosition += size
-
-      if (size >= bufferRemaining) toNonEmptyBuffer()
-    }
-
-    toNonEmptyBuffer()
-    if (cleanup) {
-      free()
-    }
-    totalBytesWritten
+    val b = underlying.read(_pos)
+    _pos += 1
+    b
   }
 
-  def asInputStream(): InputStream = {
-    new InputStream() {
-      override def read(): Int = {
-        if (! hasRemaining()) return -1
-        get()
-      }
-
-      override def read(arr: Array[Byte], off: Int, len: Int): Int = {
-        if (! hasRemaining()) return -1
-
-        get(arr, off, len)
-      }
-
-      override def available(): Int = {
-        // current remaining is what can be read without blocking
-        // anything higher might need disk access/buffer swapping.
-        /*
-        val left = remaining()
-        math.min(left, Int.MaxValue).asInstanceOf[Int]
-        */
-        currentRemaining()
-      }
-    }
-  }
-
-  def getCleaner() = cleaner
-
-  /**
-   * @param cleaner The previous cleaner, so that the caller can chain them if required.
-   * @return
-   */
-  private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = {
-    overrideCleaner(cleaner, allowOverride = true)
-  }
-
-  private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = {
-    if (! this.allowCleanerOverride) {
-      // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free
-      return this.cleaner
-    }
-
-    this.allowCleanerOverride = allowOverride
-    assert (null != cleaner)
-    val prev = this.cleaner
-    this.cleaner = cleaner
-    // logInfo("Overriding " + prev + " with " + this.cleaner)
-    prev
-  }
-
-  private def doReleaseAll() {
-    for (container <- containers) {
-      container.release()
-    }
-  }
-
-  def free(invokeCleaner: Boolean = true) {
-    // logInfo("Free on " + this + ", cleaner = " + cleaner)
-    // always invoking release
-    doReleaseAll()
-
-    if (invokeCleaner) cleaner.clean(this)
-  }
-
-  private def doDispose(needRelease: Boolean) {
-
-    if (disposeLocationThrowable ne null) {
-      logError("Already free'ed earlier at : ", disposeLocationThrowable)
-      logError("Current at ", new Throwable)
-      throw new IllegalStateException("Already freed.")
-    }
-    disposeLocationThrowable = new Throwable()
-
-    // Forcefully cleanup all
-    if (needRelease) doReleaseAll()
-
-    // Free in a different loop, in case different containers refer to same resource
-    // to release (like file)
-    for (container <- containers) {
-      container.free()
-    }
-
-    needReleaseIndices.clear()
-
-    // We should not use this buffer anymore : set the values such that                 f
-    // we dont ...
-    globalPosition = 0
-    globalLimit = 0
-    globalCapacity = 0
+  def put(bytes: LargeByteBuffer): Unit = {
+    ???
   }
 
-  // copy data over ... MUST be used only for cases where array is known to be
-  // small to begin with. slightly risky method due to that assumption
-  def toByteArray(): Array[Byte] = {
-    val positionBackup = position()
-    val size = remaining()
-    if (size > Int.MaxValue) {
-      throw new IllegalStateException(
-        "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G")
-    }
-
-    val retval = new Array[Byte](size.asInstanceOf[Int])
-    val readSize = get(retval, 0, retval.length)
-    assert (readSize == retval.length,
-      "readSize = " + readSize + ", retval.length = " + retval.length)
-
-    position(positionBackup)
-
-    retval
+  def position: Long = _pos
+  def position(position: Long): Unit = {
+    _pos = position
   }
-
-  // copy data over ... MUST be used only for cases where array is known to be
-  // small to begin with. slightly risky method due to that assumption
-  def toByteBuffer(): ByteBuffer = {
-    ByteBuffer.wrap(toByteArray())
+  def remaining(): Long = {
+    underlying.size - position
   }
 
-  def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = {
-    val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf)
-    val currentPosition = position()
-    retval.put(this)
-    position(currentPosition)
-    retval.clear()
-    retval
+  def duplicate(): ChainedLargeByteBuffer = {
+    new ChainedLargeByteBuffer(underlying)
   }
 
-
-
-  // This is ONLY used for testing : that too as part of development of this and associated classes
-  // remove before contributing to spark.
-  def hexDump(): String = {
-    if (remaining() * 64 > Int.MaxValue) {
-      throw new UnsupportedOperationException("buffer too large " + remaining())
-    }
-
-    val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int])
-
-    var perLine = 0
-    var first = true
-    for (b <- toByteArray()) {
-      perLine += 1
-      if (perLine % 8 == 0) {
-        sb.append('\n')
-        first = true
-      }
-      if (! first) sb.append(' ')
-      first = false
-      sb.append(java.lang.Integer.toHexString(b & 0xff))
-    }
-    sb.append('\n')
-    sb.toString()
+  def rewind(): Unit = {
+    _pos = 0
   }
 
-  override def toString: String = {
-    val sb: StringBuffer = new StringBuffer
-    sb.append(getClass.getName)
-    sb.append(' ')
-    sb.append(System.identityHashCode(this))
-    sb.append("@[pos=")
-    sb.append(position())
-    sb.append(" lim=")
-    sb.append(limit())
-    sb.append(" cap=")
-    sb.append(capacity())
-    sb.append("]")
-    sb.toString
+  def limit(): Long = {
+    capacity
   }
 
-
-
-  override def finalize(): Unit = {
-    var marked = false
-    if (containers ne null) {
-      if (containers.exists(container => container.isAcquired && container.requireRelease())) {
-        marked = true
-        logError("BUG: buffer was not released - and now going out of scope. " +
-          "Potential resource leak. Allocated at ", allocateLocationThrowable)
-        containers.foreach(_.release())
-      }
-      if (containers.exists(container => !container.isFreed && container.requireFree())) {
-        if (!marked) {
-          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak",
-            allocateLocationThrowable)
-        }
-        else {
-          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak")
-        }
-        containers.foreach(_.free())
-      }
-    }
-    super.finalize()
+  def limit(newLimit: Long): Unit = {
+    ???
   }
 }
 
+class WrappedLargeByteBuffer(private val underlying: ByteBuffer) extends LargeByteBuffer {
+  def capacity = underlying.capacity
 
-object LargeByteBuffer extends Logging {
-
-  private val noopDisposeFunction = new BufferCleaner() {
-    protected def doClean(buffer: LargeByteBuffer) {
-      buffer.free(invokeCleaner = false)
-    }
+  def get(dst: Array[Byte], offset: Int, length: Int): Unit = {
+    underlying.get(dst, offset, length)
   }
 
-  val enableExpensiveAssert = false
-  private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0)
-  val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer(
-    new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false)
-  // Do not allow anyone else to override cleaner
-  EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false)
-
-  // 8K sufficient ?
-  private val TEMP_ARRAY_SIZE = 8192
-
-  /**
-   * Create a LargeByteBuffer of specified size which is split across
-   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory
-   * ByteBuffer
-   *
-   */
-  def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = {
-    if (0 == totalSize) {
-      return EMPTY_BUFFER
-    }
-
-    assert (totalSize > 0)
-
-    val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
-    val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize)
-    val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize)
-
-    assert (lastBlockSize > 0)
-
-    val bufferArray = {
-      val arr = new ArrayBuffer[ByteBufferContainer](numBlocks)
-      for (index <- 0 until numBlocks - 1) {
-        val buff = ByteBuffer.allocate(blockSize)
-        // buff.clear()
-        arr += new HeapByteBufferContainer(buff, true)
-      }
-      arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true)
-      assert (arr.length == numBlocks)
-      arr
-    }
-
-    new LargeByteBuffer(bufferArray, false, false)
+  def get(): Byte = {
+    underlying.get()
   }
 
-  /**
-   * Create a LargeByteBuffer of specified size which is split across
-   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk
-   *
-   */
-  private def allocateDiskBuffer(totalSize: Long,
-      blockManager: BlockManager): LargeByteBuffer = {
-    if (0 == totalSize) {
-      return EMPTY_BUFFER
-    }
-
-    assert (totalSize > 0)
-
-    // Create a file of the specified size.
-    val file = blockManager.diskBlockManager.createTempBlock()._2
-    val raf = new RandomAccessFile(file, "rw")
-    try {
-      raf.setLength(totalSize)
-    } finally {
-      raf.close()
-    }
-
-    readWriteDiskSegment(new FileSegment(file, 0, totalSize),
-      ephemeralDiskBacked = true, blockManager.ioConf)
+  def position: Long = underlying.position
+  def position(position: Long): Unit = {
+    //XXX check range?
+    underlying.position(position.toInt)
   }
-
-  // The returned buffer takes up ownership of the underlying buffers
-  // (including dispos'ing that when done)
-  def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = {
-    val nonEmpty = buffers.filter(_.hasRemaining)
-
-    // cleanup the empty buffers
-    buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b))
-
-
-    if (nonEmpty.isEmpty) {
-      return EMPTY_BUFFER
-    }
-
-    // slice so that offsets match our requirement
-    new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b =>
-      new HeapByteBufferContainer(b.slice(), true)), false, false)
+  def remaining(): Long = {
+    underlying.remaining()
   }
 
-  def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = {
-    // only non empty arrays
-    val arrays = byteArrays.filter(_.length > 0)
-    if (0 == arrays.length) return EMPTY_BUFFER
-
-    new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr =>
-      new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false)
+  def duplicate(): WrappedLargeByteBuffer = {
+    new WrappedLargeByteBuffer(underlying.duplicate())
   }
 
-  def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = {
-
-    if (inputBuffers.isEmpty) return EMPTY_BUFFER
-
-    if (! inputBuffers.exists(_.hasRemaining())) {
-      if (canDispose) inputBuffers.map(_.free())
-      return EMPTY_BUFFER
-    }
-
-    // release all temp resources acquired
-    inputBuffers.foreach(buff => buff.releasePendingContainers())
-    // free current container if acquired.
-    inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) {
-      buff.containers(buff.currentContainerIndex).release()
-    })
-    // inputBuffers.foreach(b => b.doReleaseAll())
-
-
-    // Dispose of any empty buffers
-    if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free())
-
-    // Find all containers we need.
-    val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining()))
-
-    val containers = buffers.flatMap(_.containers)
-    assert (! containers.isEmpty)
-    // The in order containers of "buffers" seq constitute the required return value
-    val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers,
-      // if you cant dispose, then we dont own the buffers : in which case, need duplicate
-      ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked))
-
-    if (canDispose) {
-      // override dispose of all other buffers.
-      val disposeFunctions = inputBuffers.map {
-        buffer => {
-          (buffer, buffer.overrideCleaner(noopDisposeFunction))
-        }
-      }
-
-      val cleaner = retval.getCleaner()
-      val newCleaner = new BufferCleaner {
-        protected def doClean(buffer: LargeByteBuffer) {
-
-          assert (retval == buffer)
-          // default cleaner.
-          cleaner.clean(retval)
-          // not required, since we are within clean anyway.
-          // retval.free(invokeCleaner = false)
-
-          // retval.doDispose(needRelease = true)
-
-          // This might actually call dispose twice on some (initially) empty buffers,
-          // which is fine since we now guard against that.
-          disposeFunctions.foreach(v => v._2.clean(v._1))
-          // Call the free method too : so that buffers are marked free ...
-          disposeFunctions.foreach(v => v._1.free(invokeCleaner = false))
-        }
-      }
-
-      val prev = retval.overrideCleaner(newCleaner)
-      assert (prev == cleaner)
-    }
-
-    retval
+  def rewind(): Unit = {
+    underlying.duplicate()
   }
 
-  private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) {
-    if (arr == null) {
-      throw new NullPointerException
-    } else if (offset < 0 || size < 0 || offset + size > arr.length) {
-      throw new IndexOutOfBoundsException
-    }
+  def limit(): Long = {
+    underlying.limit()
   }
 
-  def allocateTransientBuffer(size: Long, blockManager: BlockManager) = {
-    if (size <= blockManager.ioConf.maxInMemSize) {
-      LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf)
-    } else {
-      LargeByteBuffer.allocateDiskBuffer(size, blockManager)
-    }
+  def limit(newLimit: Long) = {
+    //XXX check range?
+    underlying.limit(newLimit.toInt)
   }
 
-  def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig,
-      ephemeralDiskBacked: Boolean): LargeByteBuffer = {
-    // Split the block into multiple of BlockStore.maxBlockSize
-    val segmentSize = segment.length
-    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
-    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
-    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
-
-    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
-
-    for (index <- 0 until numBlocks - 1) {
-      buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
-        segment.offset + index * blockSize, blockSize), ioConf)
-    }
-
-    // Last block
-    buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
-      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf)
+}
 
-    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
+object LargeByteBuffer {
+  def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = {
+    val buffer = ChainedBuffer.withInitialSize(maxChunk, size)
+    new ChainedLargeByteBuffer(buffer)
   }
+}
 
-  def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean,
-      ioConf: IOConfig): LargeByteBuffer = {
-
-    // Split the block into multiple of BlockStore.maxBlockSize
-    val segmentSize = segment.length
-    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
-    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
-    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
-
-    logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks +
-      ", lastBlockSize = " + lastBlockSize)
-    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
-
-    for (index <- 0 until numBlocks - 1) {
-      buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
-        segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null)
-    }
-
-    // Last block
-    buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
-      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null)
 
-    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
-  }
-}
+//
+///**
+// * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G
+// * which ByteBuffers are limited to.
+// * Externally, it exposes all the api which java.nio.ByteBuffer exposes.
+// * Internally, it maintains a sequence of Containers which manage the ByteBuffer data.
+// * Not all the data might be loaded into memory  (like disk or tachyon data) - so actual
+// * memory footprint - heap and vm could be much lower than capacity.
+// *
+// * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this
+// * buffer, maybe revisit this later ? Note: this is not much different from earlier though !
+// *
+// * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this
+// * will require the file to be kept open (repeatedly opening/closing file is not good
+// * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is
+// * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy)
+// *
+// * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is
+// * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some
+// * cases (when we duplicate/slice them). Currently spark does not need this, but might in future
+// * so relook at it later.
+// */
+//// We should make this constructor private: but for now,
+//// leaving it public since TachyonStore needs it
+//class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer],
+//    private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging {
+//
+//  // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME
+//  private val allocateLocationThrowable: Throwable = {
+//    if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) {
+//      new Throwable("blockId = " + BlockManager.getLookupBlockId)
+//    } else {
+//      null
+//    }
+//  }
+//  private var disposeLocationThrowable: Throwable = null
+//
+//  @volatile private var allowCleanerOverride = true
+//  @volatile private var cleaner: BufferCleaner = new BufferCleaner {
+//    override def doClean(buffer: LargeByteBuffer) = {
+//      assert (LargeByteBuffer.this == buffer)
+//      doDispose(needRelease = false)
+//    }
+//  }
+//
+//  // should not be empty
+//  assert (null != inputContainers && ! inputContainers.isEmpty)
+//  // should not have any null's
+//  assert (inputContainers.find(_ == null).isEmpty)
+//
+//  // println("Num containers = " + inputContainers.size)
+//
+//  // Position, limit and capacity relevant over the engire LargeByteBuffer
+//  @volatile private var globalPosition = 0L
+//  @volatile private var globalLimit = 0L
+//  @volatile private var currentContainerIndex = 0
+//
+//  // The buffers in which the actual data is held.
+//  private var containers: Array[ByteBufferContainer] = null
+//
+//  // aggregate capacities of the individual buffers.
+//  // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be
+//  // sum of capacity of 0th and 1st block buffer
+//  private var bufferPositionStart: Array[Long] = null
+//
+//  // Contains the indices of a containers which requires release before subsequent invocation of
+//  // read/write should be serviced. This is required since current read/write might have moved the
+//  // position but since we are returning bytebuffers which depend on the validity of the existing
+//  // bytebuffer, we cant release them yet.
+//  private var needReleaseIndices = new HashSet[Int]()
+//
+//  private val readable = ! inputContainers.exists(! _.isReadable)
+//  private val writable = ! inputContainers.exists(! _.isWritable)
+//
+//
+//  // initialize
+//  @volatile private var globalCapacity = {
+//
+//    // Ensure that there are no empty buffers : messes up with our code : unless it
+//    // is a single buffer (for empty buffer for marker case)
+//    assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length)
+//
+//    containers = {
+//      if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray
+//    }
+//    containers.foreach(_.validate())
+//
+//    def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) {
+//      val buff = new ArrayBuffer[Long](arr.length + 1)
+//      buff += 0L
+//
+//      buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1)
+//      assert (buff.length == arr.length + 1)
+//      bufferPositionStart = buff.toArray
+//    }
+//
+//    initializeBufferPositionStart(containers)
+//
+//    // remove references from inputBuffers
+//    inputContainers.clear()
+//
+//    globalLimit = bufferPositionStart(containers.length)
+//    globalPosition = 0L
+//    currentContainerIndex = 0
+//
+//    assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum)
+//
+//    globalLimit
+//  }
+//
+//  final def position(): Long = globalPosition
+//
+//  final def limit(): Long = globalLimit
+//
+//  final def capacity(): Long = globalCapacity
+//
+//  final def limit(newLimit: Long) {
+//    if ((newLimit > capacity()) || (newLimit < 0)) {
+//      throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity())
+//    }
+//
+//    globalLimit = newLimit
+//    if (position() > newLimit) position(newLimit)
+//  }
+//
+//  def skip(skipBy: Long) = position(position() + skipBy)
+//
+//  private def releasePendingContainers() {
+//    if (! needReleaseIndices.isEmpty) {
+//      val iter = needReleaseIndices.iterator
+//      while (iter.hasNext) {
+//        val index = iter.next()
+//        assert (index >= 0 && index < containers.length)
+//        // It is possible to move from one container to next before the previous
+//        // container was acquired. For example, get forcing move to next container
+//        // since current was exhausted immediatelly followed by a position()
+//        // so the container we moved to was never acquired.
+//
+//        // assert (containers(index).isAcquired)
+//        // will this always be satisfied ?
+//        // assert (index != currentContainerIndex)
+//        if (containers(index).isAcquired) containers(index).release()
+//      }
+//      needReleaseIndices.clear()
+//    }
+//  }
+//
+//  private def toNewContainer(newIndex: Int) {
+//    if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) {
+//
+//      assert (currentContainerIndex >= 0)
+//      needReleaseIndices += currentContainerIndex
+//    }
+//    currentContainerIndex = newIndex
+//  }
+//
+//  // expensive method, sigh ... optimize it later ?
+//  final def position(newPosition: Long) {
+//
+//    if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException()
+//
+//    if (currentContainerIndex < bufferPositionStart.length - 1 &&
+//        newPosition >= bufferPositionStart(currentContainerIndex) &&
+//        newPosition < bufferPositionStart(currentContainerIndex + 1)) {
+//      // Same buffer - easy method ...
+//      globalPosition = newPosition
+//      // Changed position - free previously returned buffers.
+//      releasePendingContainers()
+//      return
+//    }
+//
+//    // Find appropriate currentContainerIndex
+//    // Since bufferPositionStart is sorted, can be replaced with binary search if required.
+//    // For now, not in the perf critical path since buffers size is very low typically.
+//    var index = 0
+//    val cLen = containers.length
+//    while (index < cLen) {
+//      if (newPosition >= bufferPositionStart(index) &&
+//        newPosition < bufferPositionStart(index + 1)) {
+//        globalPosition = newPosition
+//        toNewContainer(index)
+//        // Changed position - free earlier and previously returned buffers.
+//        releasePendingContainers()
+//        return
+//      }
+//      index += 1
+//    }
+//
+//    if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) {
+//      // boundary.
+//      globalPosition = newPosition
+//      toNewContainer(cLen)
+//      // Changed position - free earlier and previously returned buffers.
+//      releasePendingContainers()
+//      return
+//    }
+//
+//    assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition +
+//      ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]"))
+//  }
+//
+//
+//  /**
+//   * Clears this buffer.  The position is set to zero, the limit is set to
+//   * the capacity, and the mark is discarded.
+//   *
+//   * <p> Invoke this method before using a sequence of channel-read or
+//   * <i>put</i> operations to fill this buffer.
+//   *
+//   * <p> This method does not actually erase the data in the buffer, but it
+//   * is named as if it did because it will most often be used in situations
+//   * in which that might as well be the case. </p>
+//   */
+//  final def clear() {
+//    // if (0 == globalCapacity) return
+//
+//    needReleaseIndices += 0
+//    globalPosition = 0L
+//    toNewContainer(0)
+//    globalLimit = globalCapacity
+//
+//    // Now free all pending containers
+//    releasePendingContainers()
+//  }
+//
+//  /**
+//   * Flips this buffer.  The limit is set to the current position and then
+//   * the position is set to zero.  If the mark is defined then it is
+//   * discarded.
+//   *
+//   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
+//   * this method to prepare for a sequence of channel-write or relative
+//   * <i>get</i> operations.
+//   */
+//  final def flip() {
+//    needReleaseIndices += 0
+//    globalLimit = globalPosition
+//    globalPosition = 0L
+//    toNewContainer(0)
+//
+//    // Now free all pending containers
+//    releasePendingContainers()
+//  }
+//
+//  /**
+//   * Rewinds this buffer.  The position is set to zero and the mark is
+//   * discarded.
+//   *
+//   * <p> Invoke this method before a sequence of channel-write or <i>get</i>
+//   * operations, assuming that the limit has already been set
+//   * appropriately.
+//   */
+//  final def rewind() {
+//    needReleaseIndices += 0
+//    globalPosition = 0L
+//    toNewContainer(0)
+//
+//    // Now free all pending containers
+//    releasePendingContainers()
+//  }
+//
+//  /**
+//   * Returns the number of elements between the current position and the
+//   * limit. </p>
+//   *
+//   * @return  The number of elements remaining in this buffer
+//   */
+//  final def remaining(): Long = {
+//    globalLimit - globalPosition
+//  }
+//
+//  /**
+//   * Tells whether there are any elements between the current position and
+//   * the limit. </p>
+//   *
+//   * @return  <tt>true</tt> if, and only if, there is at least one element
+//   *          remaining in this buffer
+//   */
+//  final def hasRemaining() = {
+//    globalPosition < globalLimit
+//  }
+//
+//  // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex)
+//
+//  // number of bytes remaining in currently active underlying buffer
+//  private def currentRemaining(): Int = {
+//    if (hasRemaining()) {
+//      // validate currentContainerIndex is valid
+//      assert (globalPosition >= bufferPositionStart(currentContainerIndex) &&
+//        globalPosition < bufferPositionStart(currentContainerIndex + 1),
+//        "globalPosition = " + globalPosition +
+//          ", currentContainerIndex = " + currentContainerIndex +
+//        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+//
+//      currentRemaining0(currentContainerIndex)
+//    } else 0
+//  }
+//
+//  // Without any validation : required when we are bumping the index (when validation will fail) ...
+//  private def currentRemaining0(which: Int): Int = {
+//    // currentBuffer().remaining()
+//    math.max(0, math.min(bufferPositionStart(which + 1),
+//      globalLimit) - globalPosition).asInstanceOf[Int]
+//  }
+//
+//  // Set the approppriate position/limit for the current underlying buffer to mirror our
+//  // the LargeByteBuffer's state.
+//  private def fetchCurrentBuffer(): ByteBuffer = {
+//    releasePendingContainers()
+//
+//    assert (currentContainerIndex < containers.length)
+//
+//    val container = containers(currentContainerIndex)
+//    if (! container.isAcquired) {
+//      container.acquire()
+//    }
+//
+//    assert (container.isAcquired)
+//    if (LargeByteBuffer.enableExpensiveAssert) {
+//      assert (! containers.exists( b => (b ne container) && b.isAcquired))
+//    }
+//
+//    assert (currentContainerIndex < bufferPositionStart.length &&
+//      globalPosition < bufferPositionStart(currentContainerIndex + 1),
+//      "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " +
+//        bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this)
+//
+//    val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)).
+//      asInstanceOf[Int]
+//
+//    val buffer = container.getByteBuffer
+//    buffer.position(buffPosition)
+//    val diff = buffer.capacity - buffPosition
+//    val left = remaining()
+//    if (diff <= left) {
+//      buffer.limit(buffer.capacity())
+//    } else {
+//      // Can happen if limit() was called.
+//      buffer.limit(buffPosition + left.asInstanceOf[Int])
+//    }
+//
+//    buffer
+//  }
+//
+//  // To be used ONLY to test in suites.
+//  private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = {
+//    if ("1" != System.getProperty("SPARK_TESTING")) {
+//      throw new IllegalStateException("This method is to be used ONLY within spark test suites")
+//    }
+//
+//    fetchCurrentBuffer()
+//  }
+//
+//  // Expects that the invoker has ensured that this can be safely invoked.
+//  // That is, it wont be invoked when the loop wont terminate.
+//  private def toNonEmptyBuffer() {
+//
+//    if (! hasRemaining()) {
+//      var newIndex = currentContainerIndex
+//      // Ensure we are in the right block or not.
+//      while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) {
+//        newIndex += 1
+//      }
+//      toNewContainer(newIndex)
+//      // Do not do this - since we might not yet have consumed the buffer which caused EOF right now
+//      /*
+//      // Add last one also, and release it too - since we are at the end of the buffer with nothing
+//      // more pending.
+//      if (newIndex >= 0 && currentContainerIndex < containers.length) {
+//        needReleaseIndices += newIndex
+//      }
+//      */
+//      assert (currentContainerIndex >= 0)
+//      // releasePendingContainers()
+//      return
+//    }
+//
+//    var index = currentContainerIndex
+//    while (0 == currentRemaining0(index) && index < containers.length) {
+//      index += 1
+//    }
+//    assert (currentContainerIndex < containers.length)
+//    toNewContainer(index)
+//    assert (0 != currentRemaining())
+//  }
+//
+//  private def assertPreconditions(containerIndex: Int) {
+//    assert (globalPosition >= bufferPositionStart(containerIndex),
+//      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
+//        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+//    assert (globalPosition < bufferPositionStart(containerIndex + 1),
+//      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
+//        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+//
+//    assert (globalLimit <= globalCapacity)
+//    assert (containerIndex < containers.length)
+//  }
+//
+//
+//  /**
+//   * Attempts to return a ByteBuffer of the requested size.
+//   * It is possible to return a buffer of size smaller than requested
+//   * even though hasRemaining == true
+//   *
+//   * On return, position would have been moved 'ahead' by the size of the buffer returned :
+//   * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer
+//   *
+//   *
+//   * This is used to primarily retrieve content of this buffer to expose via ByteBuffer
+//   * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the
+//   * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer
+//   * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying
+//   * container is a disk backed container, and we make subsequent calls to get(), the returned
+//   * ByteBuffer can be dispose'ed off
+//   *
+//   * @param maxChunkSize Max size of the ByteBuffer to retrieve.
+//   * @return
+//   */
+//
+//  private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = {
+//    fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true)
+//  }
+//
+//  private def fetchBufferOfSizeImpl(maxChunkSize: Int,
+//      canReleaseContainers: Boolean): ByteBuffer = {
+//    if (canReleaseContainers) releasePendingContainers()
+//    assert (maxChunkSize > 0)
+//
+//    // not checking for degenerate case of maxChunkSize == 0
+//    if (globalPosition >= globalLimit) {
+//      // throw exception
+//      throw new BufferUnderflowException()
+//    }
+//
+//    // Check preconditions : disable these later, since they might be expensive to
+//    // evaluate for every IO op
+//    assertPreconditions(currentContainerIndex)
+//
+//    val currentBufferRemaining = currentRemaining()
+//
+//    assert (currentBufferRemaining > 0)
+//
+//    val size = math.min(currentBufferRemaining, maxChunkSize)
+//
+//    val newBuffer = if (currentBufferRemaining > maxChunkSize) {
+//      val currentBuffer = fetchCurrentBuffer()
+//      val buff = ByteBufferContainer.createSlice(currentBuffer,
+//        currentBuffer.position(), maxChunkSize)
+//      assert (buff.remaining() == maxChunkSize)
+//      buff
+//    } else {
+//      val currentBuffer = fetchCurrentBuffer()
+//      val buff = currentBuffer.slice()
+//      assert (buff.remaining() == currentBufferRemaining)
+//      buff
+//    }
+//
+//    assert (size == newBuffer.remaining())
+//    assert (0 == newBuffer.position())
+//    assert (size == newBuffer.limit())
+//    assert (newBuffer.capacity() == newBuffer.limit())
+//
+//    globalPosition += newBuffer.remaining
+//    toNonEmptyBuffer()
+//
+//    newBuffer
+//  }
+//
+//  // Can we service the read/write from the currently active (underlying) bytebuffer or not.
+//  // For almost all cases, this will return true allowing us to optimize away the more expensive
+//  // computations.
+//  private def localReadWritePossible(size: Int) =
+//    size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1)
+//
+//
+//  def getLong(): Long = {
+//    assert (readable)
+//    releasePendingContainers()
+//
+//    if (remaining() < 8) throw new BufferUnderflowException
+//
+//    if (localReadWritePossible(8)) {
+//      val buff = fetchCurrentBuffer()
+//      assert (buff.remaining() >= 8)
+//      val retval = buff.getLong
+//      globalPosition += 8
+//      toNonEmptyBuffer()
+//      return retval
+//    }
+//
+//    val buff = readFully(8)
+//    buff.getLong
+//  }
+//
+//  def getInt(): Int = {
+//    assert (readable)
+//    releasePendingContainers()
+//
+//    if (remaining() < 4) throw new BufferUnderflowException
+//
+//    if (localReadWritePossible(4)) {
+//      val buff = fetchCurrentBuffer()
+//      assert (buff.remaining() >= 4)
+//      val retval = buff.getInt
+//      globalPosition += 4
+//      toNonEmptyBuffer()
+//      return retval
+//    }
+//
+//    val buff = readFully(4)
+//    buff.getInt
+// }
+//
+//  def getChar(): Char = {
+//    assert (readable)
+//    releasePendingContainers()
+//
+//    if (remaining() < 2) throw new BufferUnderflowException
+//
+//    if (localReadWritePossible(2)) {
+//      val buff = fetchCurrentBuffer()
+//      assert (buff.remaining() >= 2)
+//      val retval = buff.getChar
+//      globalPosition += 2
+//      toNonEmptyBuffer()
+//      return retval
+//    }
+//
+//    // if slice is becoming too expensive, revisit this ...
+//    val buff = readFully(2)
+//    buff.getChar
+//  }
+//
+//  def get(): Byte = {
+//    assert (readable)
+//    releasePendingContainers()
+//
+//    if (! hasRemaining()) throw new BufferUnderflowException
+//
+//    // If we have remaining bytes, previous invocations MUST have ensured that we are at
+//    // a buffer which has data to be read.
+//    assert (localReadWritePossible(1))
+//
+//    val buff = fetchCurrentBuffer()
+//    assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining())
+//    val retval = buff.get()
+//    globalPosition += 1
+//    toNonEmptyBuffer()
+//
+//    retval
+//  }
+//
+//  def get(arr: Array[Byte], offset: Int, size: Int): Int = {
+//    assert (readable)
+//    releasePendingContainers()
+//
+//    LargeByteBuffer.checkOffsets(arr, offset, size)
+//
+//    // kyro depends on this it seems ?
+//    // assert (size > 0)
+//    if (0 == size) return 0
+//
+//    if (! hasRemaining()) return -1
+//
+//    if (localReadWritePossible(size)) {
+//      val buff = fetchCurrentBuffer()
+//      assert (buff.remaining() >= size)
+//      buff.get(arr, offset, size)
+//      globalPosition += size
+//      toNonEmptyBuffer()
+//      return size
+//    }
+//
+//    var remainingSize = math.min(size, remaining()).asInstanceOf[Int]
+//    var currentOffset = offset
+//
+//    while (remainingSize > 0) {
+//      val buff = fetchBufferOfSize(remainingSize)
+//      val toCopy = math.min(buff.remaining(), remainingSize)
+//
+//      buff.get(arr, currentOffset, toCopy)
+//      currentOffset += toCopy
+//      remainingSize -= toCopy
+//    }
+//
+//    currentOffset - offset
+//  }
+//
+//
+//  private def createSlice(size: Long): LargeByteBuffer = {
+//
+//    releasePendingContainers()
+//
+//    if (remaining() < size) {
+//      // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this)
+//      throw new BufferOverflowException
+//    }
+//
+//    // kyro depends on this it seems ?
+//    // assert (size > 0)
+//    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
+//
+//    val arr = new ArrayBuffer[ByteBufferContainer](2)
+//    var totalLeft = size
+//
+//    // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer)
+//
+//    var containerIndex = currentContainerIndex
+//    while (totalLeft > 0 && hasRemaining()) {
+//      assertPreconditions(containerIndex)
+//      val container = containers(containerIndex)
+//      val currentLeft = currentRemaining0(containerIndex)
+//
+//      assert (globalPosition + currentLeft <= globalLimit)
+//      assert (globalPosition >= bufferPositionStart(containerIndex) &&
+//        (globalPosition < bufferPositionStart(containerIndex + 1)))
+//
+//      val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int]
+//      val sliceSize = math.min(totalLeft, currentLeft)
+//      assert (from >= 0)
+//      assert (sliceSize > 0 && sliceSize <= Int.MaxValue)
+//
+//      val slice = container.createSlice(from, sliceSize.asInstanceOf[Int])
+//      arr += slice
+//
+//      globalPosition += sliceSize
+//      totalLeft -= sliceSize
+//      if (currentLeft == sliceSize) containerIndex += 1
+//    }
+//
+//    // Using toNonEmptyBuffer instead of directly moving to next here so that
+//    // other checks can be performed there.
+//    toNonEmptyBuffer()
+//    // force cleanup - this is fine since we are not using the buffers directly
+//    // which are actively needed (the returned value is on containers which can
+//    // recreate)
+//    releasePendingContainers()
+//    // free current container if acquired.
+//    if (currentContainerIndex < containers.length) {
+//      containers(currentContainerIndex).release()
+//    }
+//    assert (currentContainerIndex == containerIndex)
+//
+//    val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked)
+//    retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction)
+//    retval
+//  }
+//
+//  // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers
+//  // This is to be used only for writes : and ensures that writes are done into the appropriate
+//  // underlying bytebuffers.
+//  def getCompositeWriteBuffer(size: Long): LargeByteBuffer = {
+//    assert(writable)
+//    assert(size >= 0)
+//
+//    createSlice(size)
+//  }
+//
+//  // get a buffer which is of the specified size and contains data from the underlying buffers
+//  // Note, the actual data might be spread across the underlying buffers.
+//  // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy !
+//  private def readFully(size: Int): ByteBuffer = {
+//    assert (readable)
+//
+//    if (remaining() < size) {
+//      // throw exception
+//      throw new BufferUnderflowException()
+//    }
+//
+//    // kyro depends on this it seems ?
+//    // assert (size > 0)
+//    if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER
+//
+//    // Expected to be handled elsewhere.
+//    assert (! localReadWritePossible(size))
+//
+//    val localBuff =  {
+//      val buff = fetchBufferOfSize(size)
+//      // assert(buff.remaining() <= size)
+//      // if (buff.remaining() == size) return buff
+//      assert(buff.remaining() < size)
+//      ByteBuffer.allocate(size).put(buff)
+//    }
+//
+//    // assert (localBuff.hasRemaining)
+//
+//    while (localBuff.hasRemaining) {
+//      val buff = fetchBufferOfSize(localBuff.remaining())
+//      localBuff.put(buff)
+//    }
+//
+//    localBuff.flip()
+//    localBuff
+//  }
+//
+//
+//
+//  def put(b: Byte) {
+//    assert (writable)
+//    if (remaining() < 1) {
+//      // logInfo("put byte. remaining = " + remaining() + ", this = " + this)
+//      throw new BufferOverflowException
+//    }
+//
+//    assert (currentRemaining() > 0)
+//
+//    fetchCurrentBuffer().put(b)
+//    globalPosition += 1
+//    // Check to need to bump the index ?
+//    toNonEmptyBuffer()
+//  }
+//
+//
+//  def put(buffer: ByteBuffer) {
+//    assert (writable)
+//    if (remaining() < buffer.remaining()) {
+//      throw new BufferOverflowException
+//    }
+//
+//    val bufferRemaining = buffer.remaining()
+//    if (localReadWritePossible(bufferRemaining)) {
+//
+//      assert (currentRemaining() >= bufferRemaining)
+//
+//      fetchCurrentBuffer().put(buffer)
+//
+//      globalPosition += bufferRemaining
+//      toNonEmptyBuffer()
+//      return
+//    }
+//
+//    while (buffer.hasRemaining) {
+//      val currentBufferRemaining = currentRemaining()
+//      val bufferRemaining = buffer.remaining()
+//
+//      if (currentBufferRemaining >= bufferRemaining) {
+//        fetchCurrentBuffer().put(buffer)
+//        globalPosition += bufferRemaining
+//      } else {
+//        // Split across buffers.
+//        val currentBuffer = fetchCurrentBuffer()
+//        assert (currentBuffer.remaining() >= currentBufferRemaining)
+//        val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(),
+//          currentBufferRemaining)
+//        assert (sliced.remaining() == currentBufferRemaining)
+//        currentBuffer.put(sliced)
+//        // move buffer pos
+//        buffer.position(buffer.position() + currentBufferRemaining)
+//
+//        globalPosition += currentBufferRemaining
+//      }
+//      toNonEmptyBuffer()
+//    }
+//
+//    assert (! hasRemaining() || currentRemaining() > 0)
+//  }
+//
+//  def put(other: LargeByteBuffer) {
+//    assert (writable)
+//    if (this.remaining() < other.remaining()) {
+//      throw new BufferOverflowException
+//    }
+//
+//    while (other.hasRemaining()) {
+//      val buffer = other.fetchBufferOfSize(other.currentRemaining())
+//      this.put(buffer)
+//    }
+//  }
+//
+//
+//  def duplicate(): LargeByteBuffer = {
+//    val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size)
+//    // We do a duplicate as part of construction - so avoid double duplicate.
+//    // containersCopy ++= containers.map(_.duplicate())
+//    containersCopy ++= containers
+//    val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked)
+//
+//    // set limit and position (in that order) ...
+//    retval.limit(this.limit())
+//    retval.position(this.position())
+//
+//    // Now release our containers - if any had been acquired
+//    releasePendingContainers()
+//
+//    retval
+//  }
+//
+//
+//  /**
+//   * 'read' a LargeByteBuffer of size specified and return that.
+//   * Position will be incremented by size
+//   *
+//   * The name might be slightly confusing : rename ?
+//   *
+//   * @param size Amount of data to be read from this buffer and returned
+//   * @return
+//   */
+//  def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = {
+//    if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException
+//    if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException
+//
+//
+//    assert (readable)
+//    assert (size >= 0)
+//
+//    releasePendingContainers()
+//
+//    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
+//
+//    createSlice(size)
+//  }
+//
+//
+//  // This is essentially a workaround to exposing underlying buffers
+//  def readFrom(channel: ReadableByteChannel): Long = {
+//
+//    assert (writable)
+//    releasePendingContainers()
+//
+//    // this also allows us to avoid nasty corner cases in the loop.
+//    if (! hasRemaining()) {
+//      // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this)
+//      throw new BufferOverflowException
+//    }
+//
+//    var totalBytesRead = 0L
+//
+//    while (hasRemaining()) {
+//      // read what we can ...
+//      val buffer = fetchCurrentBuffer()
+//      val bufferRemaining = currentRemaining()
+//      val bytesRead = channel.read(buffer)
+//
+//      if (bytesRead > 0) {
+//        totalBytesRead += bytesRead
+//        // bump position too ..
+//        globalPosition += bytesRead
+//        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+//      }
+//      else if (-1 == bytesRead) {
+//        // if we had already read some data in the loop, return that.
+//        if (totalBytesRead > 0) return totalBytesRead
+//        return -1
+//      }  // nothing available to read, retry later. return
+//      else if (0 == bytesRead) {
+//        return totalBytesRead
+//      }
+//
+//      // toNonEmptyBuffer()
+//    }
+//
+//    // Cleanup last buffer ?
+//    toNonEmptyBuffer()
+//    totalBytesRead
+//  }
+//
+//  // This is essentially a workaround to exposing underlying buffers
+//  def readFrom(inStrm: InputStream): Long = {
+//
+//    assert (writable)
+//    releasePendingContainers()
+//
+//    // this also allows us to avoid nasty corner cases in the loop.
+//    // if (! hasRemaining()) throw new BufferOverflowException
+//    if (! hasRemaining()) return 0
+//
+//    var totalBytesRead = 0L
+//
+//    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+//
+//    while (hasRemaining()) {
+//      // read what we can ... note, since there is no gaurantee that underlying buffer might
+//      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
+//      // see if we can optimize this later ...
+//      val buffer = fetchCurrentBuffer()
+//      val bufferRemaining = buffer.remaining()
+//      val max = math.min(buff.length, bufferRemaining)
+//      val bytesRead = inStrm.read(buff, 0, max)
+//
+//      if (bytesRead > 0) {
+//        buffer.put(buff, 0, bytesRead)
+//        totalBytesRead += bytesRead
+//        // bump position too ..
+//        globalPosition += bytesRead
+//        // buffer.position(buffer.position + bytesRead)
+//        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+//      }
+//      else if (-1 == bytesRead) {
+//        // if we had already read some data in the loop, return that.
+//        if (totalBytesRead > 0) return totalBytesRead
+//        return -1
+//      }  // nothing available to read, retry later. return
+//      else if (0 == bytesRead) {
+//        return totalBytesRead
+//      }
+//
+//      // toNonEmptyBuffer()
+//    }
+//
+//    totalBytesRead
+//  }
+//
+//  // This is essentially a workaround to exposing underlying buffers
+//  // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce
+//  // code for performance reasons.
+//  def readFrom(inStrm: DataInput): Long = {
+//
+//    assert (writable)
+//    releasePendingContainers()
+//
+//    // this also allows us to avoid nasty corner cases in the loop.
+//    // if (! hasRemaining()) throw new BufferOverflowException
+//    if (! hasRemaining()) return 0
+//
+//    var totalBytesRead = 0L
+//
+//    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+//
+//    while (hasRemaining()) {
+//      // read what we can ... note, since there is no gaurantee that underlying buffer might
+//      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
+//      // see if we can optimize this later ...
+//      val buffer = fetchCurrentBuffer()
+//      val bufferRemaining = buffer.remaining()
+//      val max = math.min(buff.length, bufferRemaining)
+//      inStrm.readFully(buff, 0, max)
+//      val bytesRead = max
+//
+//      if (bytesRead > 0) {
+//        buffer.put(buff, 0, bytesRead)
+//        totalBytesRead += bytesRead
+//        // bump position too ..
+//        globalPosition += bytesRead
+//        // buffer.position(buffer.position() + bytesRead)
+//        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+//      }
+//      else if (-1 == bytesRead) {
+//        // if we had already read some data in the loop, return that.
+//        if (totalBytesRead > 0) return totalBytesRead
+//        return -1
+//      }  // nothing available to read, retry later. return
+//      else if (0 == bytesRead) {
+//        return totalBytesRead
+//      }
+//
+//      // toNonEmptyBuffer()
+//    }
+//
+//    totalBytesRead
+//  }
+//
+//  // This is essentially a workaround to exposing underlying buffers
+//  // Note: tries to do it efficiently without needing to load everything into memory
+//  // (particularly for diskbacked buffers, etc).
+//  def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = {
+//
+//    assert (readable)
+//    releasePendingContainers()
+//
+//    // this also allows us to avoid nasty corner cases in the loop.
+//    if (! hasRemaining()) throw new BufferUnderflowException
+//
+//    var totalBytesWritten = 0L
+//
+//    while (hasRemaining()) {
+//      // Write what we can ...
+//      val buffer = fetchCurrentBuffer()
+//      val bufferRemaining = buffer.remaining()
+//      assert (bufferRemaining > 0)
+//      val bytesWritten = channel.write(buffer)
+//
+//      if (bytesWritten > 0) {
+//        totalBytesWritten += bytesWritten
+//        // bump position too ..
+//        globalPosition += bytesWritten
+//        if (bytesWritten >= bufferRemaining) toNonEmptyBuffer()
+//        assert (! hasRemaining() || currentRemaining() > 0)
+//      }
+//      else if (0 == bytesWritten) {
+//        return totalBytesWritten
+//      }
+//
+//      // toNonEmptyBuffer()
+//    }
+//
+//    assert (! hasRemaining())
+//    if (cleanup) {
+//      free()
+//    }
+//    totalBytesWritten
+//  }
+//
+//  // This is essentially a workaround to exposing underlying buffers
+//  def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = {
+//
+//    assert (readable)
+//    releasePendingContainers()
+//
+//    // this also allows us to avoid nasty corner cases in the loop.
+//    if (! hasRemaining()) throw new BufferUnderflowException
+//
+//    var totalBytesWritten = 0L
+//    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+//
+//    while (hasRemaining()) {
+//      // write what we can ... note, since there is no gaurantee that underlying buffer might
+//      // expose array() method, we do double copy - from bytearray to buff and from
+//      // buff to outputstream. see if we can optimize this later ...
+//      val buffer = fetchCurrentBuffer()
+//      val bufferRemaining = buffer.remaining()
+//      val size = math.min(bufferRemaining, buff.length)
+//      buffer.get(buff, 0, size)
+//      outStrm.write(buff, 0, size)
+//
+//      totalBytesWritten += size
+//      // bump position too ..
+//      globalPosition += size
+//
+//      if (size >= bufferRemaining) toNonEmptyBuffer()
+//    }
+//
+//    toNonEmptyBuffer()
+//    if (cleanup) {
+//      free()
+//    }
+//    totalBytesWritten
+//  }
+//
+//  def asInputStream(): InputStream = {
+//    new InputStream() {
+//      override def read(): Int = {
+//        if (! hasRemaining()) return -1
+//        get()
+//      }
+//
+//      override def read(arr: Array[Byte], off: Int, len: Int): Int = {
+//        if (! hasRemaining()) return -1
+//
+//        get(arr, off, len)
+//      }
+//
+//      override def available(): Int = {
+//        // current remaining is what can be read without blocking
+//        // anything higher might need disk access/buffer swapping.
+//        /*
+//        val left = remaining()
+//        math.min(left, Int.MaxValue).asInstanceOf[Int]
+//        */
+//        currentRemaining()
+//      }
+//    }
+//  }
+//
+//  def getCleaner() = cleaner
+//
+//  /**
+//   * @param cleaner The previous cleaner, so that the caller can chain them if required.
+//   * @return
+//   */
+//  private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = {
+//    overrideCleaner(cleaner, allowOverride = true)
+//  }
+//
+//  private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = {
+//    if (! this.allowCleanerOverride) {
+//      // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free
+//      return this.cleaner
+//    }
+//
+//    this.allowCleanerOverride = allowOverride
+//    assert (null != cleaner)
+//    val prev = this.cleaner
+//    this.cleaner = cleaner
+//    // logInfo("Overriding " + prev + " with " + this.cleaner)
+//    prev
+//  }
+//
+//  private def doReleaseAll() {
+//    for (container <- containers) {
+//      container.release()
+//    }
+//  }
+//
+//  def free(invokeCleaner: Boolean = true) {
+//    // logInfo("Free on " + this + ", cleaner = " + cleaner)
+//    // always invoking release
+//    doReleaseAll()
+//
+//    if (invokeCleaner) cleaner.clean(this)
+//  }
+//
+//  private def doDispose(needRelease: Boolean) {
+//
+//    if (disposeLocationThrowable ne null) {
+//      logError("Already free'ed earlier at : ", disposeLocationThrowable)
+//      logError("Current at ", new Throwable)
+//      throw new IllegalStateException("Already freed.")
+//    }
+//    disposeLocationThrowable = new Throwable()
+//
+//    // Forcefully cleanup all
+//    if (needRelease) doReleaseAll()
+//
+//    // Free in a different loop, in case different containers refer to same resource
+//    // to release (like file)
+//    for (container <- containers) {
+//      container.free()
+//    }
+//
+//    needReleaseIndices.clear()
+//
+//    // We should not use this buffer anymore : set the values such that                 f
+//    // we dont ...
+//    globalPosition = 0
+//    globalLimit = 0
+//    globalCapacity = 0
+//  }
+//
+//  // copy data over ... MUST be used only for cases where array is known to be
+//  // small to begin with. slightly risky method due to that assumption
+//  def toByteArray(): Array[Byte] = {
+//    val positionBackup = position()
+//    val size = remaining()
+//    if (size > Int.MaxValue) {
+//      throw new IllegalStateException(
+//        "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G")
+//    }
+//
+//    val retval = new Array[Byte](size.asInstanceOf[Int])
+//    val readSize = get(retval, 0, retval.length)
+//    assert (readSize == retval.length,
+//      "readSize = " + readSize + ", retval.length = " + retval.length)
+//
+//    position(positionBackup)
+//
+//    retval
+//  }
+//
+//  // copy data over ... MUST be used only for cases where array is known to be
+//  // small to begin with. slightly risky method due to that assumption
+//  def toByteBuffer(): ByteBuffer = {
+//    ByteBuffer.wrap(toByteArray())
+//  }
+//
+//  def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = {
+//    val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf)
+//    val currentPosition = position()
+//    retval.put(this)
+//    position(currentPosition)
+//    retval.clear()
+//    retval
+//  }
+//
+//
+//
+//  // This is ONLY used for testing : that too as part of development of this and associated classes
+//  // remove before contributing to spark.
+//  def hexDump(): String = {
+//    if (remaining() * 64 > Int.MaxValue) {
+//      throw new UnsupportedOperationException("buffer too large " + remaining())
+//    }
+//
+//    val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int])
+//
+//    var perLine = 0
+//    var first = true
+//    for (b <- toByteArray()) {
+//      perLine += 1
+//      if (perLine % 8 == 0) {
+//        sb.append('\n')
+//        first = true
+//      }
+//      if (! first) sb.append(' ')
+//      first = false
+//      sb.append(java.lang.Integer.toHexString(b & 0xff))
+//    }
+//    sb.append('\n')
+//    sb.toString()
+//  }
+//
+//  override def toString: String = {
+//    val sb: StringBuffer = new StringBuffer
+//    sb.append(getClass.getName)
+//    sb.append(' ')
+//    sb.append(System.identityHashCode(this))
+//    sb.append("@[pos=")
+//    sb.append(position())
+//    sb.append(" lim=")
+//    sb.append(limit())
+//    sb.append(" cap=")
+//    sb.append(capacity())
+//    sb.append("]")
+//    sb.toString
+//  }
+//
+//
+//
+//  override def finalize(): Unit = {
+//    var marked = false
+//    if (containers ne null) {
+//      if (containers.exists(container => container.isAcquired && container.requireRelease())) {
+//        marked = true
+//        logError("BUG: buffer was not released - and now going out of scope. " +
+//          "Potential resource leak. Allocated at ", allocateLocationThrowable)
+//        containers.foreach(_.release())
+//      }
+//      if (containers.exists(container => !container.isFreed && container.requireFree())) {
+//        if (!marked) {
+//          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak",
+//            allocateLocationThrowable)
+//        }
+//        else {
+//          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak")
+//        }
+//        containers.foreach(_.free())
+//      }
+//    }
+//    super.finalize()
+//  }
+//}
+//
+//
+//object LargeByteBuffer extends Logging {
+//
+//  private val noopDisposeFunction = new BufferCleaner() {
+//    protected def doClean(buffer: LargeByteBuffer) {
+//      buffer.free(invokeCleaner = false)
+//    }
+//  }
+//
+//  val enableExpensiveAssert = false
+//  private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0)
+//  val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer(
+//    new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false)
+//  // Do not allow anyone else to override cleaner
+//  EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false)
+//
+//  // 8K sufficient ?
+//  private val TEMP_ARRAY_SIZE = 8192
+//
+//  /**
+//   * Create a LargeByteBuffer of specified size which is split across
+//   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory
+//   * ByteBuffer
+//   *
+//   */
+//  def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = {
+//    if (0 == totalSize) {
+//      return EMPTY_BUFFER
+//    }
+//
+//    assert (totalSize > 0)
+//
+//    val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
+//    val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize)
+//    val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize)
+//
+//    assert (lastBlockSize > 0)
+//
+//    val bufferArray = {
+//      val arr = new ArrayBuffer[ByteBufferContainer](numBlocks)
+//      for (index <- 0 until numBlocks - 1) {
+//        val buff = ByteBuffer.allocate(blockSize)
+//        // buff.clear()
+//        arr += new HeapByteBufferContainer(buff, true)
+//      }
+//      arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true)
+//      assert (arr.length == numBlocks)
+//      arr
+//    }
+//
+//    new LargeByteBuffer(bufferArray, false, false)
+//  }
+//
+//  /**
+//   * Create a LargeByteBuffer of specified size which is split across
+//   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk
+//   *
+//   */
+//  private def allocateDiskBuffer(totalSize: Long,
+//      blockManager: BlockManager): LargeByteBuffer = {
+//    if (0 == totalSize) {
+//      return EMPTY_BUFFER
+//    }
+//
+//    assert (totalSize > 0)
+//
+//    // Create a file of the specified size.
+//    val file = blockManager.diskBlockManager.createTempBlock()._2
+//    val raf = new RandomAccessFile(file, "rw")
+//    try {
+//      raf.setLength(totalSize)
+//    } finally {
+//      raf.close()
+//    }
+//
+//    readWriteDiskSegment(new FileSegment(file, 0, totalSize),
+//      ephemeralDiskBacked = true, blockManager.ioConf)
+//  }
+//
+//  // The returned buffer takes up ownership of the underlying buffers
+//  // (including dispos'ing that when done)
+//  def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = {
+//    val nonEmpty = buffers.filter(_.hasRemaining)
+//
+//    // cleanup the empty buffers
+//    buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b))
+//
+//
+//    if (nonEmpty.isEmpty) {
+//      return EMPTY_BUFFER
+//    }
+//
+//    // slice so that offsets match our requirement
+//    new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b =>
+//      new HeapByteBufferContainer(b.slice(), true)), false, false)
+//  }
+//
+//  def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = {
+//    // only non empty arrays
+//    val arrays = byteArrays.filter(_.length > 0)
+//    if (0 == arrays.length) return EMPTY_BUFFER
+//
+//    new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr =>
+//      new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false)
+//  }
+//
+//  def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = {
+//
+//    if (inputBuffers.isEmpty) return EMPTY_BUFFER
+//
+//    if (! inputBuffers.exists(_.hasRemaining())) {
+//      if (canDispose) inputBuffers.map(_.free())
+//      return EMPTY_BUFFER
+//    }
+//
+//    // release all temp resources acquired
+//    inputBuffers.foreach(buff => buff.releasePendingContainers())
+//    // free current container if acquired.
+//    inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) {
+//      buff.containers(buff.currentContainerIndex).release()
+//    })
+//    // inputBuffers.foreach(b => b.doReleaseAll())
+//
+//
+//    // Dispose of any empty buffers
+//    if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free())
+//
+//    // Find all containers we need.
+//    val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining()))
+//
+//    val containers = buffers.flatMap(_.containers)
+//    assert (! containers.isEmpty)
+//    // The in order containers of "buffers" seq constitute the required return value
+//    val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers,
+//      // if you cant dispose, then we dont own the buffers : in which case, need duplicate
+//      ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked))
+//
+//    if (canDispose) {
+//      // override dispose of all other buffers.
+//      val disposeFunctions = inputBuffers.map {
+//        buffer => {
+//          (buffer, buffer.overrideCleaner(noopDisposeFunction))
+//        }
+//      }
+//
+//      val cleaner = retval.getCleaner()
+//      val newCleaner = new BufferCleaner {
+//        protected def doClean(buffer: LargeByteBuffer) {
+//
+//          assert (retval == buffer)
+//          // default cleaner.
+//          cleaner.clean(retval)
+//          // not required, since we are within clean anyway.
+//          // retval.free(invokeCleaner = false)
+//
+//          // retval.doDispose(needRelease = true)
+//
+//          // This might actually call dispose twice on some (initially) empty buffers,
+//          // which is fine since we now guard against that.
+//          disposeFunctions.foreach(v => v._2.clean(v._1))
+//          // Call the free method too : so that buffers are marked free ...
+//          disposeFunctions.foreach(v => v._1.free(invokeCleaner = false))
+//        }
+//      }
+//
+//      val prev = retval.overrideCleaner(newCleaner)
+//      assert (prev == cleaner)
+//    }
+//
+//    retval
+//  }
+//
+//  private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) {
+//    if (arr == null) {
+//      throw new NullPointerException
+//    } else if (offset < 0 || size < 0 || offset + size > arr.length) {
+//      throw new IndexOutOfBoundsException
+//    }
+//  }
+//
+//  def allocateTransientBuffer(size: Long, blockManager: BlockManager) = {
+//    if (size <= blockManager.ioConf.maxInMemSize) {
+//      LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf)
+//    } else {
+//      LargeByteBuffer.allocateDiskBuffer(size, blockManager)
+//    }
+//  }
+//
+//  def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig,
+//      ephemeralDiskBacked: Boolean): LargeByteBuffer = {
+//    // Split the block into multiple of BlockStore.maxBlockSize
+//    val segmentSize = segment.length
+//    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
+//    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
+//    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
+//
+//    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
+//
+//    for (index <- 0 until numBlocks - 1) {
+//      buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
+//        segment.offset + index * blockSize, blockSize), ioConf)
+//    }
+//
+//    // Last block
+//    buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
+//      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf)
+//
+//    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
+//  }
+//
+//  def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean,
+//      ioConf: IOConfig): LargeByteBuffer = {
+//
+//    // Split the block into multiple of BlockStore.maxBlockSize
+//    val segmentSize = segment.length
+//    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
+//    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
+//    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
+//
+//    logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks +
+//      ", lastBlockSize = " + lastBlockSize)
+//    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
+//
+//    for (index <- 0 until numBlocks - 1) {
+//      buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
+//        segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null)
+//    }
+//
+//    // Last block
+//    buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
+//      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null)
+//
+//    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
+//  }
+//}
diff --git a/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala b/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala
index 0dd7e8e736ad6..6657c4f7efc52 100644
--- a/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala
@@ -1,121 +1,121 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.io
-
-import java.io.OutputStream
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.Logging
-import org.apache.spark.io.IOConfig.BufferType
-
-/**
- * byte array backed streams (FastByteArrayOutputStream, ByteArrayOutputStream, etc) are limited to
- * array length of 2 gig - since that is the array size limit.
- *
- * So we move from one to the next as soon as we hit the limit per stream.
- * And once done, asBuffers or toByteArrays can be used to pull data as a sequence of bytebuffers
- * or byte arrays.
- * @param initialSize initial size for the byte array stream ...
- */
-class WrappedByteArrayOutputStream(private val initialSize: Int,
-    ioConf: IOConfig) extends OutputStream with Logging {
-
-  private val maxStreamSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
-
-  private val allStreams = new ArrayBuffer[SparkByteArrayOutputStream](4)
-
-  private var current: SparkByteArrayOutputStream = null
-  private var currentWritten = 0
-
-  nextWriter()
-
-  override def flush(): Unit = {
-    current.flush()
-  }
-
-  override def write(b: Int): Unit = {
-    if (currentWritten >= maxStreamSize) {
-      nextWriter()
-    }
-    current.write(b)
-    currentWritten += 1
-  }
-
-
-  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
-    // invariant checks - from OutputStream.java
-    if (b == null) {
-      throw new NullPointerException
-    } else if ((off < 0) || (off > b.length) || (len < 0) ||
-      ((off + len) > b.length) || ((off + len) < 0)) {
-      throw new IndexOutOfBoundsException
-    } else if (len == 0) {
-      return
-    }
-
-    // Else, write to stream.
-
-    // common case first
-    if (currentWritten + len < maxStreamSize) {
-      current.write(b, off, len)
-      currentWritten += len
-      return
-    }
-
-    // We might need to split the write into two streams.
-    var startOff = off
-    var remaining = len
-
-    while (remaining > 0) {
-      var toCurrent = math.min(remaining, maxStreamSize - currentWritten)
-      if (toCurrent > 0) {
-        current.write(b, startOff, toCurrent)
-        currentWritten += toCurrent
-        remaining -= toCurrent
-        startOff += toCurrent
-      }
-
-      if (currentWritten >= maxStreamSize) {
-        // to next
-        nextWriter()
-      }
-    }
-  }
-
-  def toLargeByteBuffer(): LargeByteBuffer = {
-    current.compact()
-    val seq = allStreams.filter(_.size > 0).map(_.toByteBuffer)
-    val retval = LargeByteBuffer.fromBuffers(seq:_*)
-
-    retval
-  }
-
-  private def nextWriter() {
-    if (null != current) {
-      current.flush()
-      current.compact()
-      current = null
-    }
-
-    current = new SparkByteArrayOutputStream(initialSize, ioConf)
-    currentWritten = 0
-    allStreams += current
-  }
-}
-
-
+///*
+// * Licensed to the Apache Software Foundation (ASF) under one or more
+// * contributor license agreements.  See the NOTICE file distributed with
+// * this work for additional information regarding copyright ownership.
+// * The ASF licenses this file to You under the Apache License, Version 2.0
+// * (the "License"); you may not use this file except in compliance with
+// * the License.  You may obtain a copy of the License at
+// *
+// *    http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS,
+// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// * See the License for the specific language governing permissions and
+// * limitations under the License.
+// */
+//
+//package org.apache.spark.io
+//
+//import java.io.OutputStream
+//import scala.collection.mutable.ArrayBuffer
+//
+//import org.apache.spark.Logging
+//import org.apache.spark.io.IOConfig.BufferType
+//
+///**
+// * byte array backed streams (FastByteArrayOutputStream, ByteArrayOutputStream, etc) are limited to
+// * array length of 2 gig - since that is the array size limit.
+// *
+// * So we move from one to the next as soon as we hit the limit per stream.
+// * And once done, asBuffers or toByteArrays can be used to pull data as a sequence of bytebuffers
+// * or byte arrays.
+// * @param initialSize initial size for the byte array stream ...
+// */
+//class WrappedByteArrayOutputStream(private val initialSize: Int,
+//    ioConf: IOConfig) extends OutputStream with Logging {
+//
+//  private val maxStreamSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
+//
+//  private val allStreams = new ArrayBuffer[SparkByteArrayOutputStream](4)
+//
+//  private var current: SparkByteArrayOutputStream = null
+//  private var currentWritten = 0
+//
+//  nextWriter()
+//
+//  override def flush(): Unit = {
+//    current.flush()
+//  }
+//
+//  override def write(b: Int): Unit = {
+//    if (currentWritten >= maxStreamSize) {
+//      nextWriter()
+//    }
+//    current.write(b)
+//    currentWritten += 1
+//  }
+//
+//
+//  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
+//    // invariant checks - from OutputStream.java
+//    if (b == null) {
+//      throw new NullPointerException
+//    } else if ((off < 0) || (off > b.length) || (len < 0) ||
+//      ((off + len) > b.length) || ((off + len) < 0)) {
+//      throw new IndexOutOfBoundsException
+//    } else if (len == 0) {
+//      return
+//    }
+//
+//    // Else, write to stream.
+//
+//    // common case first
+//    if (currentWritten + len < maxStreamSize) {
+//      current.write(b, off, len)
+//      currentWritten += len
+//      return
+//    }
+//
+//    // We might need to split the write into two streams.
+//    var startOff = off
+//    var remaining = len
+//
+//    while (remaining > 0) {
+//      var toCurrent = math.min(remaining, maxStreamSize - currentWritten)
+//      if (toCurrent > 0) {
+//        current.write(b, startOff, toCurrent)
+//        currentWritten += toCurrent
+//        remaining -= toCurrent
+//        startOff += toCurrent
+//      }
+//
+//      if (currentWritten >= maxStreamSize) {
+//        // to next
+//        nextWriter()
+//      }
+//    }
+//  }
+//
+//  def toLargeByteBuffer(): LargeByteBuffer = {
+//    current.compact()
+//    val seq = allStreams.filter(_.size > 0).map(_.toByteBuffer)
+//    val retval = LargeByteBuffer.fromBuffers(seq:_*)
+//
+//    retval
+//  }
+//
+//  private def nextWriter() {
+//    if (null != current) {
+//      current.flush()
+//      current.compact()
+//      current = null
+//    }
+//
+//    current = new SparkByteArrayOutputStream(initialSize, ioConf)
+//    currentWritten = 0
+//    allStreams += current
+//  }
+//}
+//
+//
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 86dbd89f0ffb8..ad895ff338d54 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -31,7 +31,7 @@ import sun.nio.ch.DirectBuffer
 
 import org.apache.spark._
 import org.apache.spark.executor._
-import org.apache.spark.io.CompressionCodec
+import org.apache.spark.io.{WrappedLargeByteBuffer, ChainedLargeByteBuffer, LargeByteBuffer, CompressionCodec}
 import org.apache.spark.network._
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.netty.SparkTransportConf
@@ -43,7 +43,7 @@ import org.apache.spark.shuffle.hash.HashShuffleManager
 import org.apache.spark.util._
 
 private[spark] sealed trait BlockValues
-private[spark] case class ByteBufferValues(buffer: ByteBuffer) extends BlockValues
+private[spark] case class ByteBufferValues(buffer: LargeByteBuffer) extends BlockValues
 private[spark] case class IteratorValues(iterator: Iterator[Any]) extends BlockValues
 private[spark] case class ArrayValues(buffer: Array[Any]) extends BlockValues
 
@@ -78,6 +78,9 @@ private[spark] class BlockManager(
 
   val diskBlockManager = new DiskBlockManager(this, conf)
 
+  //XXX
+  val largeByteBufferChunkSize = 65536
+
   private val blockInfo = new TimeStampedHashMap[BlockId, BlockInfo]
 
   // Actual storage of where blocks are kept
@@ -318,7 +321,7 @@ private[spark] class BlockManager(
    * Put the block locally, using the given storage level.
    */
   override def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Unit = {
-    putBytes(blockId, data.nioByteBuffer(), level)
+    putBytes(blockId, new WrappedLargeByteBuffer(data.nioByteBuffer()), level)
   }
 
   /**
@@ -513,7 +516,7 @@ private[spark] class BlockManager(
         // Look for block on disk, potentially storing it back in memory if required
         if (level.useDisk) {
           logDebug(s"Getting block $blockId from disk")
-          val bytes: ByteBuffer = diskStore.getBytes(blockId) match {
+          val bytes: LargeByteBuffer = diskStore.getBytes(blockId) match {
             case Some(b) => b
             case None =>
               throw new BlockException(
@@ -535,7 +538,7 @@ private[spark] class BlockManager(
               /* We'll store the bytes in memory if the block's storage level includes
                * "memory serialized", or if it should be cached as objects in memory
                * but we only requested its serialized bytes. */
-              val copyForMemory = ByteBuffer.allocate(bytes.limit)
+              val copyForMemory = LargeByteBuffer.allocateOnHeap(bytes.limit, largeByteBufferChunkSize)
               copyForMemory.put(bytes)
               memoryStore.putBytes(blockId, copyForMemory, level)
               bytes.rewind()
@@ -591,8 +594,9 @@ private[spark] class BlockManager(
     val locations = Random.shuffle(master.getLocations(blockId))
     for (loc <- locations) {
       logDebug(s"Getting remote block $blockId from $loc")
-      val data = blockTransferService.fetchBlockSync(
-        loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()
+      //the fetch will always be one byte buffer till we fix SPARK-5928
+      val data: LargeByteBuffer = new WrappedLargeByteBuffer(blockTransferService.fetchBlockSync(
+        loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer())
 
       if (data != null) {
         if (asBlockResult) {
@@ -674,7 +678,7 @@ private[spark] class BlockManager(
    */
   def putBytes(
       blockId: BlockId,
-      bytes: ByteBuffer,
+      bytes: LargeByteBuffer,
       level: StorageLevel,
       tellMaster: Boolean = true,
       effectiveStorageLevel: Option[StorageLevel] = None): Seq[(BlockId, BlockStatus)] = {
@@ -736,7 +740,7 @@ private[spark] class BlockManager(
     var valuesAfterPut: Iterator[Any] = null
 
     // Ditto for the bytes after the put
-    var bytesAfterPut: ByteBuffer = null
+    var bytesAfterPut: LargeByteBuffer = null
 
     // Size of the block in bytes
     var size = 0L
@@ -884,7 +888,7 @@ private[spark] class BlockManager(
    * Replicate block to another node. Not that this is a blocking call that returns after
    * the block has been replicated.
    */
-  private def replicate(blockId: BlockId, data: ByteBuffer, level: StorageLevel): Unit = {
+  private def replicate(blockId: BlockId, data: LargeByteBuffer, level: StorageLevel): Unit = {
     val maxReplicationFailures = conf.getInt("spark.storage.maxReplicationFailures", 1)
     val numPeersToReplicateTo = level.replication - 1
     val peersForReplication = new ArrayBuffer[BlockManagerId]
@@ -940,8 +944,11 @@ private[spark] class BlockManager(
             val onePeerStartTime = System.currentTimeMillis
             data.rewind()
             logTrace(s"Trying to replicate $blockId of ${data.limit()} bytes to $peer")
-            blockTransferService.uploadBlockSync(
-              peer.host, peer.port, peer.executorId, blockId, new NioManagedBuffer(data), tLevel)
+            //TODO
+            //ACK!  here we're stuck -- we can't replicate a large block until we figure out
+            // how to deal w/ shuffling more than 2 gb
+//            blockTransferService.uploadBlockSync(
+//              peer.host, peer.port, peer.executorId, blockId, new NioManagedBuffer(data), tLevel)
             logTrace(s"Replicated $blockId of ${data.limit()} bytes to $peer in %s ms"
               .format(System.currentTimeMillis - onePeerStartTime))
             peersReplicatedTo += peer
@@ -1180,10 +1187,10 @@ private[spark] class BlockManager(
   def dataSerialize(
       blockId: BlockId,
       values: Iterator[Any],
-      serializer: Serializer = defaultSerializer): ByteBuffer = {
-    val byteStream = new ByteArrayOutputStream(4096)
+      serializer: Serializer = defaultSerializer): LargeByteBuffer = {
+    val byteStream = new LargeByteBufferOutputStream()
     dataSerializeStream(blockId, byteStream, values, serializer)
-    ByteBuffer.wrap(byteStream.toByteArray)
+    byteStream.largeBuffer
   }
 
   /**
@@ -1192,10 +1199,10 @@ private[spark] class BlockManager(
    */
   def dataDeserialize(
       blockId: BlockId,
-      bytes: ByteBuffer,
+      bytes: LargeByteBuffer,
       serializer: Serializer = defaultSerializer): Iterator[Any] = {
     bytes.rewind()
-    val stream = wrapForCompression(blockId, new ByteBufferInputStream(bytes, true))
+    val stream = wrapForCompression(blockId, new LargeByteBufferInputStream(bytes, true))
     serializer.newInstance().deserializeStream(stream).asIterator
   }
 
@@ -1245,6 +1252,11 @@ private[spark] object BlockManager extends Logging {
     }
   }
 
+  def dispose(buffer: LargeByteBuffer): Unit = {
+    // TODO
+    ???
+  }
+
   def blockIdsToBlockManagers(
       blockIds: Array[BlockId],
       env: SparkEnv,
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
index b14b5e91d1794..38989f0c07681 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.storage
 
+import java.nio.ByteBuffer
+
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.Logging
@@ -43,15 +45,15 @@ private[spark] abstract class BlockStore(val blockManager: BlockManager) extends
    * @return a PutResult that contains the size of the data, as well as the values put if
    *         returnValues is true (if not, the result's data field can be null)
    */
-  def putValues(
+  def putIterator(
     blockId: BlockId,
     values: Iterator[Any],
     level: StorageLevel,
     returnValues: Boolean): PutResult
 
-  def putValues(
+  def putArray(
     blockId: BlockId,
-    values: ArrayBuffer[Any],
+    values: Array[Any],
     level: StorageLevel,
     returnValues: Boolean): PutResult
 
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 61ef5ff168791..18293f3314a5f 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -22,6 +22,7 @@ import java.nio.ByteBuffer
 import java.nio.channels.FileChannel.MapMode
 
 import org.apache.spark.Logging
+import org.apache.spark.io.LargeByteBuffer
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.util.Utils
 
@@ -104,7 +105,7 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
     }
   }
 
-  private def getBytes(file: File, offset: Long, length: Long): Option[ByteBuffer] = {
+  private def getBytes(file: File, offset: Long, length: Long): Option[LargeByteBuffer] = {
     val channel = new RandomAccessFile(file, "r").getChannel
 
     try {
@@ -128,12 +129,12 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
     }
   }
 
-  override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
+  override def getBytes(blockId: BlockId): Option[LargeByteBuffer] = {
     val file = diskManager.getFile(blockId.name)
     getBytes(file, 0, file.length)
   }
 
-  def getBytes(segment: FileSegment): Option[ByteBuffer] = {
+  def getBytes(segment: FileSegment): Option[LargeByteBuffer] = {
     getBytes(segment.file, segment.offset, segment.length)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index 71305a46bf570..90ced59104432 100644
--- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -20,6 +20,8 @@ package org.apache.spark.storage
 import java.nio.ByteBuffer
 import java.util.LinkedHashMap
 
+import org.apache.spark.io.LargeByteBuffer
+
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
@@ -77,7 +79,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     }
   }
 
-  override def putBytes(blockId: BlockId, _bytes: ByteBuffer, level: StorageLevel): PutResult = {
+  override def putBytes(blockId: BlockId, _bytes: LargeByteBuffer, level: StorageLevel): PutResult = {
     // Work on a duplicate - since the original input might be used elsewhere.
     val bytes = _bytes.duplicate()
     bytes.rewind()
diff --git a/core/src/main/scala/org/apache/spark/storage/PutResult.scala b/core/src/main/scala/org/apache/spark/storage/PutResult.scala
index f0eac7594ecf6..2e00934bde243 100644
--- a/core/src/main/scala/org/apache/spark/storage/PutResult.scala
+++ b/core/src/main/scala/org/apache/spark/storage/PutResult.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.storage
 
-import java.nio.ByteBuffer
+import org.apache.spark.io.LargeByteBuffer
 
 /**
  * Result of adding a block into a BlockStore. This case class contains a few things:
@@ -28,5 +28,5 @@ import java.nio.ByteBuffer
  */
 private[spark] case class PutResult(
     size: Long,
-    data: Either[Iterator[_], ByteBuffer],
+    data: Either[Iterator[_], LargeByteBuffer],
     droppedBlocks: Seq[(BlockId, BlockStatus)] = Seq.empty)
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
index 233d1e2b7c616..fd7cb49ef9d50 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
@@ -21,6 +21,7 @@ import java.io.IOException
 import java.nio.ByteBuffer
 
 import com.google.common.io.ByteStreams
+import org.apache.spark.io.LargeByteBuffer
 import tachyon.client.{ReadType, WriteType}
 
 import org.apache.spark.Logging
@@ -64,7 +65,7 @@ private[spark] class TachyonStore(
 
   private def putIntoTachyonStore(
       blockId: BlockId,
-      bytes: ByteBuffer,
+      bytes: LargeByteBuffer,
       returnValues: Boolean): PutResult = {
     // So that we do not modify the input offsets !
     // duplicate does not copy buffer, so inexpensive
@@ -100,7 +101,7 @@ private[spark] class TachyonStore(
     getBytes(blockId).map(buffer => blockManager.dataDeserialize(blockId, buffer))
   }
 
-  override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
+  override def getBytes(blockId: BlockId): Option[LargeByteBuffer] = {
     val file = tachyonManager.getFile(blockId)
     if (file == null || file.getLocationHosts.size == 0) {
       return None
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
new file mode 100644
index 0000000000000..26f2d7848bb29
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.io.InputStream
+import java.nio.ByteBuffer
+
+import org.apache.spark.io.LargeByteBuffer
+import org.apache.spark.storage.BlockManager
+
+/**
+ * Reads data from a ByteBuffer, and optionally cleans it up using BlockManager.dispose()
+ * at the end of the stream (e.g. to close a memory-mapped file).
+ */
+private[spark]
+class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: Boolean = false)
+  extends InputStream {
+
+  override def read(): Int = {
+    if (buffer == null || buffer.remaining() == 0) {
+      cleanUp()
+      -1
+    } else {
+      buffer.get() & 0xFF
+    }
+  }
+
+  override def read(dest: Array[Byte]): Int = {
+    read(dest, 0, dest.length)
+  }
+
+  override def read(dest: Array[Byte], offset: Int, length: Int): Int = {
+    if (buffer == null || buffer.remaining() == 0) {
+      cleanUp()
+      -1
+    } else {
+      val amountToGet = math.min(buffer.remaining(), length).toInt
+      buffer.get(dest, offset, amountToGet)
+      amountToGet
+    }
+  }
+
+  override def skip(bytes: Long): Long = {
+    if (buffer != null) {
+      val amountToSkip = math.min(bytes, buffer.remaining).toInt
+      buffer.position(buffer.position + amountToSkip)
+      if (buffer.remaining() == 0) {
+        cleanUp()
+      }
+      amountToSkip
+    } else {
+      0L
+    }
+  }
+
+  /**
+   * Clean up the buffer, and potentially dispose of it using BlockManager.dispose().
+   */
+  private def cleanUp() {
+    if (buffer != null) {
+      if (dispose) {
+        BlockManager.dispose(buffer)
+      }
+      buffer = null
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
new file mode 100644
index 0000000000000..246ebca999437
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.io.OutputStream
+
+import org.apache.spark.io.{ChainedLargeByteBuffer, LargeByteBuffer}
+import org.apache.spark.util.collection.ChainedBuffer
+
+private[spark]
+class LargeByteBufferOutputStream(chunkSize: Int = 65536)
+  extends OutputStream {
+
+  val buffer = new ChainedBuffer(chunkSize)
+
+  private var _pos = 0
+
+  override def write(b: Int): Unit = {
+    throw new UnsupportedOperationException()
+  }
+
+  override def write(bytes: Array[Byte], offs: Int, len: Int): Unit = {
+    buffer.write(_pos, bytes, offs, len)
+    _pos += len
+  }
+
+  def pos: Int = _pos
+
+  def largeBuffer: LargeByteBuffer = new ChainedLargeByteBuffer(buffer)
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala
new file mode 100644
index 0000000000000..c39a2fd1f8a11
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import java.io.OutputStream
+
+import scala.collection.mutable.ArrayBuffer
+
+/**
+ * A logical byte buffer that wraps a list of byte arrays. All the byte arrays have equal size. The
+ * advantage of this over a standard ArrayBuffer is that it can grow without claiming large amounts
+ * of memory and needing to copy the full contents.
+ */
+private[spark] class ChainedBuffer private(val chunks: ArrayBuffer[Array[Byte]], chunkSize: Int) {
+  private val chunkSizeLog2 = (math.log(chunkSize) / math.log(2)).toInt
+  assert(math.pow(2, chunkSizeLog2).toInt == chunkSize)
+  private var _size: Long = _
+
+  /**
+   * Read bytes from this buffer into a byte array.
+   *
+   * @param pos Offset in the buffer to read from.
+   * @param bytes Byte array to read into.
+   * @param offs Offset in the byte array to read to.
+   * @param len Number of bytes to read.
+   */
+  def read(pos: Long, bytes: Array[Byte], offs: Int, len: Int): Unit = {
+    var chunkIndex = (pos >> chunkSizeLog2).toInt
+    var posInChunk = (pos - (chunkIndex << chunkSizeLog2)).toInt
+    var moved = 0
+    while (moved < len) {
+      val toRead = math.min(len - moved, chunkSize - posInChunk)
+      System.arraycopy(chunks(chunkIndex), posInChunk, bytes, offs + moved, toRead)
+      moved += toRead
+      chunkIndex += 1
+      posInChunk = 0
+    }
+  }
+
+  def read(pos:Long): Byte = {
+    val chunkIndex = (pos >> chunkSizeLog2).toInt
+    val posInChunk = (pos - (chunkIndex << chunkSizeLog2)).toInt
+    chunks(chunkIndex)(posInChunk)
+  }
+
+  /**
+   * Write bytes from a byte array into this buffer.
+   *
+   * @param pos Offset in the buffer to write to.
+   * @param bytes Byte array to write from.
+   * @param offs Offset in the byte array to write from.
+   * @param len Number of bytes to write.
+   */
+  def write(pos: Long, bytes: Array[Byte], offs: Int, len: Int): Unit = {
+    // Grow if needed
+    val endChunkIndex = ((pos + len - 1) >> chunkSizeLog2).toInt
+    while (endChunkIndex >= chunks.length) {
+      chunks += new Array[Byte](chunkSize)
+    }
+
+    var chunkIndex = (pos >> chunkSizeLog2).toInt
+    var posInChunk = (pos - (chunkIndex << chunkSizeLog2)).toInt
+    var moved = 0
+    while (moved < len) {
+      val toWrite = math.min(len - moved, chunkSize - posInChunk)
+      System.arraycopy(bytes, offs + moved, chunks(chunkIndex), posInChunk, toWrite)
+      moved += toWrite
+      chunkIndex += 1
+      posInChunk = 0
+    }
+
+    _size = math.max(_size, pos + len)
+  }
+
+  /**
+   * Total size of buffer that can be written to without allocating additional memory.
+   */
+  def capacity: Int = chunks.size * chunkSize
+
+  /**
+   * Size of the logical buffer.
+   */
+  def size: Long = _size
+}
+
+private[spark] object ChainedBuffer {
+  def withInitialSize(chunkSize: Int, minInitialSize: Long = 0): ChainedBuffer = {
+    val nChunks = (((minInitialSize - 1) / chunkSize).toInt) + 1
+    val chunks = new ArrayBuffer[Array[Byte]](nChunks)
+    (0 until nChunks).foreach{idx => chunks(idx) = new Array[Byte](chunkSize)}
+    new ChainedBuffer(chunks, chunkSize)
+  }
+}
+
+/**
+ * Output stream that writes to a ChainedBuffer.
+ */
+private[spark] class ChainedBufferOutputStream(chainedBuffer: ChainedBuffer) extends OutputStream {
+  private var _pos = 0
+
+  override def write(b: Int): Unit = {
+    throw new UnsupportedOperationException()
+  }
+
+  override def write(bytes: Array[Byte], offs: Int, len: Int): Unit = {
+    chainedBuffer.write(_pos, bytes, offs, len)
+    _pos += len
+  }
+
+  def pos: Int = _pos
+}
\ No newline at end of file
diff --git a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
new file mode 100644
index 0000000000000..06d3d223c3858
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.io
+
+import java.io.{ObjectInputStream, ObjectOutputStream}
+
+import org.apache.spark.util.{LargeByteBufferInputStream, LargeByteBufferOutputStream}
+import org.scalatest.{Matchers, FunSuite}
+
+class LargeByteBufferTest extends FunSuite with Matchers {
+
+//  test("allocateOnHeap") {
+//    val bufs = LargeByteBuffer.allocateOnHeap(10, 3).asInstanceOf[ChainedLargeByteBuffer]
+//    bufs.underlying.foreach{buf => buf.capacity should be <= 3}
+//    bufs.underlying.map{_.capacity}.sum should be (10)
+//  }
+//
+//  test("allocate large") {
+//    val size = Integer.MAX_VALUE.toLong + 10
+//    val bufs = LargeByteBuffer.allocateOnHeap(size, 1e9.toInt).asInstanceOf[WrappedLargeByteBuffer]
+//    bufs.capacity should be (size)
+//    bufs.underlying.map{_.capacity.toLong}.sum should be (Integer.MAX_VALUE.toLong + 10)
+//  }
+
+
+  test("io stream roundtrip") {
+
+    val rawOut = new LargeByteBufferOutputStream(128)
+    val objOut = new ObjectOutputStream(rawOut)
+    val someObject = (1 to 100).map{x => x -> scala.util.Random.nextInt(x)}.toMap
+    objOut.writeObject(someObject)
+    objOut.close()
+
+    rawOut.largeBuffer.asInstanceOf[ChainedLargeByteBuffer].underlying.chunks.size should be > 1
+
+    val rawIn = new LargeByteBufferInputStream(rawOut.largeBuffer)
+    val objIn = new ObjectInputStream(rawIn)
+    val deser = objIn.readObject()
+    deser should be (someObject)
+
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala b/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala
new file mode 100644
index 0000000000000..e99d5ecc639c9
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import java.nio.ByteBuffer
+
+import org.scalatest.FunSuite
+import org.scalatest.Matchers._
+
+class ChainedBufferSuite extends FunSuite {
+  test("write and read at start") {
+    // write from start of source array
+    val buffer = new ChainedBuffer(8)
+    buffer.capacity should be (0)
+    verifyWriteAndRead(buffer, 0, 0, 0, 4)
+    buffer.capacity should be (8)
+
+    // write from middle of source array
+    verifyWriteAndRead(buffer, 0, 5, 0, 4)
+    buffer.capacity should be (8)
+
+    // read to middle of target array
+    verifyWriteAndRead(buffer, 0, 0, 5, 4)
+    buffer.capacity should be (8)
+
+    // write up to border
+    verifyWriteAndRead(buffer, 0, 0, 0, 8)
+    buffer.capacity should be (8)
+
+    // expand into second buffer
+    verifyWriteAndRead(buffer, 0, 0, 0, 12)
+    buffer.capacity should be (16)
+
+    // expand into multiple buffers
+    verifyWriteAndRead(buffer, 0, 0, 0, 28)
+    buffer.capacity should be (32)
+  }
+
+  test("write and read at middle") {
+    // write from start of source array
+    val buffer = new ChainedBuffer(8)
+    verifyWriteAndRead(buffer, 3, 0, 0, 4)
+    buffer.capacity should be (8)
+
+    // write from middle of source array
+    verifyWriteAndRead(buffer, 3, 5, 0, 4)
+    buffer.capacity should be (8)
+
+    // read to middle of target array
+    verifyWriteAndRead(buffer, 3, 0, 5, 4)
+    buffer.capacity should be (8)
+
+    // write up to border
+    verifyWriteAndRead(buffer, 3, 0, 0, 5)
+    buffer.capacity should be (8)
+
+    // expand into second buffer
+    verifyWriteAndRead(buffer, 3, 0, 0, 12)
+    buffer.capacity should be (16)
+
+    // expand into multiple buffers
+    verifyWriteAndRead(buffer, 3, 0, 0, 28)
+    buffer.capacity should be (32)
+  }
+
+  test("write and read at later buffer") {
+    // write from start of source array
+    val buffer = new ChainedBuffer(8)
+    verifyWriteAndRead(buffer, 11, 0, 0, 4)
+    buffer.capacity should be (16)
+
+    // write from middle of source array
+    verifyWriteAndRead(buffer, 11, 5, 0, 4)
+    buffer.capacity should be (16)
+
+    // read to middle of target array
+    verifyWriteAndRead(buffer, 11, 0, 5, 4)
+    buffer.capacity should be (16)
+
+    // write up to border
+    verifyWriteAndRead(buffer, 11, 0, 0, 5)
+    buffer.capacity should be (16)
+
+    // expand into second buffer
+    verifyWriteAndRead(buffer, 11, 0, 0, 12)
+    buffer.capacity should be (24)
+
+    // expand into multiple buffers
+    verifyWriteAndRead(buffer, 11, 0, 0, 28)
+    buffer.capacity should be (40)
+  }
+
+
+  // Used to make sure we're writing different bytes each time
+  var rangeStart = 0
+
+  /**
+   * @param buffer The buffer to write to and read from.
+   * @param offsetInBuffer The offset to write to in the buffer.
+   * @param offsetInSource The offset in the array that the bytes are written from.
+   * @param offsetInTarget The offset in the array to read the bytes into.
+   * @param length The number of bytes to read and write
+   */
+  def verifyWriteAndRead(
+    buffer: ChainedBuffer,
+    offsetInBuffer: Int,
+    offsetInSource: Int,
+    offsetInTarget: Int,
+    length: Int): Unit = {
+    val source = new Array[Byte](offsetInSource + length)
+    (rangeStart until rangeStart + length).map(_.toByte).copyToArray(source, offsetInSource)
+    buffer.write(offsetInBuffer, source, offsetInSource, length)
+    val target = new Array[Byte](offsetInTarget + length)
+    buffer.read(offsetInBuffer, target, offsetInTarget, length)
+    ByteBuffer.wrap(source, offsetInSource, length) should be
+    (ByteBuffer.wrap(target, offsetInTarget, length))
+
+    rangeStart += 100
+  }
+}
\ No newline at end of file

From a139e97fe1aeac279b9c47119745c0f45eb7d8c5 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 25 Feb 2015 13:27:13 -0600
Subject: [PATCH 04/97] compiling but all sorts of bad casting etc.

---
 .../spark/broadcast/TorrentBroadcast.scala    |  22 +--
 .../org/apache/spark/executor/Executor.scala  |   5 +-
 .../org/apache/spark/io/LargeByteBuffer.scala | 128 ++++++++++++++++--
 .../spark/scheduler/TaskResultGetter.scala    |   5 +-
 .../shuffle/FileShuffleBlockManager.scala     |   7 +-
 .../shuffle/IndexShuffleBlockManager.scala    |   5 +-
 .../spark/shuffle/ShuffleBlockManager.scala   |   4 +-
 .../apache/spark/storage/BlockManager.scala   |  16 +--
 .../org/apache/spark/storage/DiskStore.scala  |  12 +-
 .../apache/spark/storage/MemoryStore.scala    |  11 +-
 .../apache/spark/storage/TachyonStore.scala   |   8 +-
 .../util/LargeByteBufferOutputStream.scala    |   2 +-
 .../org/apache/spark/DistributedSuite.scala   |   3 +-
 .../spark/storage/BlockManagerSuite.scala     |  10 +-
 .../util/collection/ChainedBufferTest.scala   |   6 +-
 15 files changed, 177 insertions(+), 67 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 94142d33369c7..fac7c01e558ff 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -25,10 +25,10 @@ import scala.reflect.ClassTag
 import scala.util.Random
 
 import org.apache.spark.{Logging, SparkConf, SparkEnv, SparkException}
-import org.apache.spark.io.CompressionCodec
+import org.apache.spark.io.{LargeByteBuffer, CompressionCodec}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
-import org.apache.spark.util.{ByteBufferInputStream, Utils}
+import org.apache.spark.util.{LargeByteBufferInputStream, ByteBufferInputStream, Utils}
 import org.apache.spark.util.io.ByteArrayChunkOutputStream
 
 /**
@@ -110,10 +110,10 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   }
 
   /** Fetch torrent blocks from the driver and/or other executors. */
-  private def readBlocks(): Array[ByteBuffer] = {
+  private def readBlocks(): Array[LargeByteBuffer] = {
     // Fetch chunks of data. Note that all these chunks are stored in the BlockManager and reported
     // to the driver, so other executors can pull these chunks from this executor as well.
-    val blocks = new Array[ByteBuffer](numBlocks)
+    val blocks = new Array[LargeByteBuffer](numBlocks)
     val bm = SparkEnv.get.blockManager
 
     for (pid <- Random.shuffle(Seq.range(0, numBlocks))) {
@@ -122,8 +122,8 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
       // First try getLocalBytes because there is a chance that previous attempts to fetch the
       // broadcast blocks have already fetched some of the blocks. In that case, some blocks
       // would be available locally (on this executor).
-      def getLocal: Option[ByteBuffer] = bm.getLocalBytes(pieceId)
-      def getRemote: Option[ByteBuffer] = bm.getRemoteBytes(pieceId).map { block =>
+      def getLocal: Option[LargeByteBuffer] = bm.getLocalBytes(pieceId)
+      def getRemote: Option[LargeByteBuffer] = bm.getRemoteBytes(pieceId).map { block =>
         // If we found the block from remote executors/driver's BlockManager, put the block
         // in this executor's BlockManager.
         SparkEnv.get.blockManager.putBytes(
@@ -133,7 +133,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
           tellMaster = true)
         block
       }
-      val block: ByteBuffer = getLocal.orElse(getRemote).getOrElse(
+      val block: LargeByteBuffer = getLocal.orElse(getRemote).getOrElse(
         throw new SparkException(s"Failed to get $pieceId of $broadcastId"))
       blocks(pid) = block
     }
@@ -194,22 +194,22 @@ private object TorrentBroadcast extends Logging {
       obj: T,
       blockSize: Int,
       serializer: Serializer,
-      compressionCodec: Option[CompressionCodec]): Array[ByteBuffer] = {
+      compressionCodec: Option[CompressionCodec]): Array[LargeByteBuffer] = {
     val bos = new ByteArrayChunkOutputStream(blockSize)
     val out: OutputStream = compressionCodec.map(c => c.compressedOutputStream(bos)).getOrElse(bos)
     val ser = serializer.newInstance()
     val serOut = ser.serializeStream(out)
     serOut.writeObject[T](obj).close()
-    bos.toArrays.map(ByteBuffer.wrap)
+    bos.toArrays.map(LargeByteBuffer.asLargeByteBuffer)
   }
 
   def unBlockifyObject[T: ClassTag](
-      blocks: Array[ByteBuffer],
+      blocks: Array[LargeByteBuffer],
       serializer: Serializer,
       compressionCodec: Option[CompressionCodec]): T = {
     require(blocks.nonEmpty, "Cannot unblockify an empty array of blocks")
     val is = new SequenceInputStream(
-      asJavaEnumeration(blocks.iterator.map(block => new ByteBufferInputStream(block))))
+      asJavaEnumeration(blocks.iterator.map(block => new LargeByteBufferInputStream(block))))
     val in: InputStream = compressionCodec.map(c => c.compressedInputStream(is)).getOrElse(is)
     val ser = serializer.newInstance()
     val serIn = ser.deserializeStream(in)
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index b684fb704956b..65c74db8ebf3a 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -23,6 +23,8 @@ import java.net.URL
 import java.nio.ByteBuffer
 import java.util.concurrent._
 
+import org.apache.spark.io.LargeByteBuffer
+
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.util.control.NonFatal
@@ -217,6 +219,7 @@ private[spark] class Executor(
         val accumUpdates = Accumulators.values
 
         val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.orNull)
+        //TODO should we allow task results over 2gb?
         val serializedDirectResult = ser.serialize(directResult)
         val resultSize = serializedDirectResult.limit
 
@@ -230,7 +233,7 @@ private[spark] class Executor(
           } else if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {
             val blockId = TaskResultBlockId(taskId)
             env.blockManager.putBytes(
-              blockId, serializedDirectResult, StorageLevel.MEMORY_AND_DISK_SER)
+              blockId, LargeByteBuffer.asLargeByteBuffer(serializedDirectResult), StorageLevel.MEMORY_AND_DISK_SER)
             logInfo(
               s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")
             ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))
diff --git a/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala b/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
index 01bd433f55c78..4bb5b5a101149 100644
--- a/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
@@ -18,15 +18,14 @@
 package org.apache.spark.io
 
 import java.io.{RandomAccessFile, DataInput, InputStream, OutputStream}
+import java.nio.channels.FileChannel.MapMode
 import java.nio.{ByteBuffer, BufferUnderflowException, BufferOverflowException}
-import java.nio.channels.{WritableByteChannel, ReadableByteChannel}
+import java.nio.channels.{FileChannel, WritableByteChannel, ReadableByteChannel}
 
 import org.apache.spark.util.collection.ChainedBuffer
 
 import scala.collection.mutable.{ArrayBuffer, HashSet}
 
-import org.apache.spark.Logging
-import org.apache.spark.storage.{FileSegment, BlockManager}
 
 
 
@@ -67,6 +66,10 @@ trait LargeByteBuffer {
    */
   def limit(): Long
 
+
+  //an alternative to having this method would be having a foreachBuffer(f: Buffer => T)
+  def writeTo(channel: WritableByteChannel): Long
+
 //
 //  def skip(skipBy: Long): Unit
 //
@@ -159,52 +162,147 @@ class ChainedLargeByteBuffer(private[io] val underlying: ChainedBuffer) extends
   def limit(newLimit: Long): Unit = {
     ???
   }
+
+  def writeTo(channel:WritableByteChannel): Long = {
+    var written = 0l
+    underlying.chunks.foreach{bytes =>
+      //TODO test this
+      val buffer = ByteBuffer.wrap(bytes)
+      while (buffer.hasRemaining)
+        channel.write(buffer)
+      written += bytes.length
+    }
+    written
+  }
 }
 
-class WrappedLargeByteBuffer(private val underlying: ByteBuffer) extends LargeByteBuffer {
-  def capacity = underlying.capacity
+class WrappedLargeByteBuffer(private[spark] val underlying: Array[ByteBuffer]) extends LargeByteBuffer {
+
+  val (totalCapacity, chunkOffsets) = {
+    var sum = 0l
+    val offsets = new Array[Long](underlying.size)
+    (0 until underlying.size).foreach{idx =>
+      offsets(idx) = sum
+      sum += underlying(idx).capacity()
+    }
+    (sum, offsets)
+  }
+
+  private var _pos = 0l
+  private var currentBufferIdx = 0
+  private var currentBuffer = underlying(0)
+  private var _limit = totalCapacity
+
+  def capacity = totalCapacity
 
   def get(dst: Array[Byte], offset: Int, length: Int): Unit = {
-    underlying.get(dst, offset, length)
+    var moved = 0
+    while (moved < length) {
+      val toRead = math.min(length - moved, currentBuffer.remaining())
+      currentBuffer.get(dst, offset, toRead)
+      moved += toRead
+      updateCurrentBuffer()
+    }
   }
 
   def get(): Byte = {
-    underlying.get()
+    val r = currentBuffer.get()
+    _pos += 1
+    updateCurrentBuffer()
+    r
+  }
+
+  private def updateCurrentBuffer(): Unit = {
+    //TODO fix end condition
+    while(!currentBuffer.hasRemaining()) {
+      currentBufferIdx += 1
+      currentBuffer = underlying(currentBufferIdx)
+    }
+  }
+
+  def put(bytes: LargeByteBuffer): Unit = {
+    ???
   }
 
-  def position: Long = underlying.position
+  def position: Long = _pos
   def position(position: Long): Unit = {
     //XXX check range?
-    underlying.position(position.toInt)
+    _pos = position
   }
   def remaining(): Long = {
-    underlying.remaining()
+    totalCapacity - _pos
   }
 
   def duplicate(): WrappedLargeByteBuffer = {
-    new WrappedLargeByteBuffer(underlying.duplicate())
+    new WrappedLargeByteBuffer(underlying.map{_.duplicate()})
   }
 
   def rewind(): Unit = {
-    underlying.duplicate()
+    _pos = 0
+    underlying.foreach{_.rewind()}
   }
 
   def limit(): Long = {
-    underlying.limit()
+    totalCapacity
   }
 
   def limit(newLimit: Long) = {
-    //XXX check range?
-    underlying.limit(newLimit.toInt)
+    //XXX check range?  set limits in sub buffers?
+    _limit = newLimit
+  }
+
+  def writeTo(channel: WritableByteChannel): Long = {
+    var written = 0l
+    underlying.foreach{buffer =>
+      //TODO test this
+      //XXX do we care about respecting the limit here?
+      written += buffer.remaining()
+      while (buffer.hasRemaining)
+        channel.write(buffer)
+    }
+    written
   }
 
 }
 
 object LargeByteBuffer {
+
+  def asLargeByteBuffer(byteBuffer: ByteBuffer): LargeByteBuffer = {
+    new WrappedLargeByteBuffer(Array(byteBuffer))
+  }
+
+  def asLargeByteBuffer(bytes: Array[Byte]): LargeByteBuffer = {
+    new WrappedLargeByteBuffer(Array(ByteBuffer.wrap(bytes)))
+  }
+
+
   def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = {
     val buffer = ChainedBuffer.withInitialSize(maxChunk, size)
     new ChainedLargeByteBuffer(buffer)
   }
+
+  def mapFile(
+    channel: FileChannel,
+    mode: MapMode,
+    offset: Long,
+    length: Long,
+    maxChunk: Int = Integer.MAX_VALUE - 1e6.toInt
+  ): LargeByteBuffer = {
+    val offsets = new ArrayBuffer[Long]()
+    var curOffset = offset
+    val end = offset + length
+    while (curOffset < end) {
+      offsets += curOffset
+      val length = math.min(end - curOffset, maxChunk)
+      curOffset += length
+    }
+    offsets += end
+    val chunks = new Array[ByteBuffer](offsets.size - 1)
+    (0 until offsets.size - 1).foreach{idx =>
+      chunks(idx) = channel.map(mode, offsets(idx), offsets(idx + 1) - offsets(idx))
+    }
+    new WrappedLargeByteBuffer(chunks)
+  }
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 3938580aeea59..9428273561cd8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -20,6 +20,8 @@ package org.apache.spark.scheduler
 import java.nio.ByteBuffer
 import java.util.concurrent.RejectedExecutionException
 
+import org.apache.spark.io.WrappedLargeByteBuffer
+
 import scala.language.existentials
 import scala.util.control.NonFatal
 
@@ -72,8 +74,9 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
                   taskSetManager, tid, TaskState.FINISHED, TaskResultLost)
                 return
               }
+              //TODO either change serializer interface, or ...
               val deserializedResult = serializer.get().deserialize[DirectTaskResult[_]](
-                serializedTaskResult.get)
+                serializedTaskResult.get.asInstanceOf[WrappedLargeByteBuffer].underlying(0))
               sparkEnv.blockManager.master.removeBlock(blockId)
               (deserializedResult, size)
           }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
index 7de2f9cbb2866..016964fc274d8 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
@@ -22,6 +22,8 @@ import java.nio.ByteBuffer
 import java.util.concurrent.ConcurrentLinkedQueue
 import java.util.concurrent.atomic.AtomicInteger
 
+import org.apache.spark.io.LargeByteBuffer
+
 import scala.collection.JavaConversions._
 
 import org.apache.spark.{Logging, SparkConf, SparkEnv}
@@ -171,9 +173,10 @@ class FileShuffleBlockManager(conf: SparkConf)
     }
   }
 
-  override def getBytes(blockId: ShuffleBlockId): Option[ByteBuffer] = {
+  override def getBytes(blockId: ShuffleBlockId): Option[LargeByteBuffer] = {
+    //TODO
     val segment = getBlockData(blockId)
-    Some(segment.nioByteBuffer())
+    Some(LargeByteBuffer.asLargeByteBuffer(segment.nioByteBuffer()))
   }
 
   override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = {
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
index b292587d37028..76c2e4180c838 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
@@ -21,6 +21,7 @@ import java.io._
 import java.nio.ByteBuffer
 
 import com.google.common.io.ByteStreams
+import org.apache.spark.io.LargeByteBuffer
 
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
@@ -97,8 +98,8 @@ class IndexShuffleBlockManager(conf: SparkConf) extends ShuffleBlockManager {
     }
   }
 
-  override def getBytes(blockId: ShuffleBlockId): Option[ByteBuffer] = {
-    Some(getBlockData(blockId).nioByteBuffer())
+  override def getBytes(blockId: ShuffleBlockId): Option[LargeByteBuffer] = {
+    Some(LargeByteBuffer.asLargeByteBuffer(getBlockData(blockId).nioByteBuffer()))
   }
 
   override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = {
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
index b521f0c7fc77e..fa737729b8758 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.shuffle
 
-import java.nio.ByteBuffer
+import org.apache.spark.io.LargeByteBuffer
 import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.storage.ShuffleBlockId
 
@@ -29,7 +29,7 @@ trait ShuffleBlockManager {
    * Get shuffle block data managed by the local ShuffleBlockManager.
    * @return Some(ByteBuffer) if block found, otherwise None.
    */
-  def getBytes(blockId: ShuffleBlockId): Option[ByteBuffer]
+  def getBytes(blockId: ShuffleBlockId): Option[LargeByteBuffer]
 
   def getBlockData(blockId: ShuffleBlockId): ManagedBuffer
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index ad895ff338d54..479fcf35283ad 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -321,7 +321,7 @@ private[spark] class BlockManager(
    * Put the block locally, using the given storage level.
    */
   override def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Unit = {
-    putBytes(blockId, new WrappedLargeByteBuffer(data.nioByteBuffer()), level)
+    putBytes(blockId, LargeByteBuffer.asLargeByteBuffer(data.nioByteBuffer()), level)
   }
 
   /**
@@ -437,7 +437,7 @@ private[spark] class BlockManager(
   /**
    * Get block from the local block manager as serialized bytes.
    */
-  def getLocalBytes(blockId: BlockId): Option[ByteBuffer] = {
+  def getLocalBytes(blockId: BlockId): Option[LargeByteBuffer] = {
     logDebug(s"Getting local block $blockId as bytes")
     // As an optimization for map output fetches, if the block is for a shuffle, return it
     // without acquiring a lock; the disk store never deletes (recent) items so this should work
@@ -451,7 +451,7 @@ private[spark] class BlockManager(
             blockId, s"Block $blockId not found on disk, though it should be")
       }
     } else {
-      doGetLocal(blockId, asBlockResult = false).asInstanceOf[Option[ByteBuffer]]
+      doGetLocal(blockId, asBlockResult = false).asInstanceOf[Option[LargeByteBuffer]]
     }
   }
 
@@ -584,9 +584,9 @@ private[spark] class BlockManager(
   /**
    * Get block from remote block managers as serialized bytes.
    */
-  def getRemoteBytes(blockId: BlockId): Option[ByteBuffer] = {
+  def getRemoteBytes(blockId: BlockId): Option[LargeByteBuffer] = {
     logDebug(s"Getting remote block $blockId as bytes")
-    doGetRemote(blockId, asBlockResult = false).asInstanceOf[Option[ByteBuffer]]
+    doGetRemote(blockId, asBlockResult = false).asInstanceOf[Option[LargeByteBuffer]]
   }
 
   private def doGetRemote(blockId: BlockId, asBlockResult: Boolean): Option[Any] = {
@@ -594,8 +594,8 @@ private[spark] class BlockManager(
     val locations = Random.shuffle(master.getLocations(blockId))
     for (loc <- locations) {
       logDebug(s"Getting remote block $blockId from $loc")
-      //the fetch will always be one byte buffer till we fix SPARK-5928
-      val data: LargeByteBuffer = new WrappedLargeByteBuffer(blockTransferService.fetchBlockSync(
+      //TODO the fetch will always be one byte buffer till we fix SPARK-5928
+      val data: LargeByteBuffer = LargeByteBuffer.asLargeByteBuffer(blockTransferService.fetchBlockSync(
         loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer())
 
       if (data != null) {
@@ -1006,7 +1006,7 @@ private[spark] class BlockManager(
    */
   def dropFromMemory(
       blockId: BlockId,
-      data: Either[Array[Any], ByteBuffer]): Option[BlockStatus] = {
+      data: Either[Array[Any], LargeByteBuffer]): Option[BlockStatus] = {
 
     logInfo(s"Dropping block $blockId from memory")
     val info = blockInfo.get(blockId).orNull
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 18293f3314a5f..180b237a1a1f2 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -22,7 +22,7 @@ import java.nio.ByteBuffer
 import java.nio.channels.FileChannel.MapMode
 
 import org.apache.spark.Logging
-import org.apache.spark.io.LargeByteBuffer
+import org.apache.spark.io.{WrappedLargeByteBuffer, LargeByteBuffer}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.util.Utils
 
@@ -39,7 +39,7 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
     diskManager.getFile(blockId.name).length
   }
 
-  override def putBytes(blockId: BlockId, _bytes: ByteBuffer, level: StorageLevel): PutResult = {
+  override def putBytes(blockId: BlockId, _bytes: LargeByteBuffer, level: StorageLevel): PutResult = {
     // So that we do not modify the input offsets !
     // duplicate does not copy buffer, so inexpensive
     val bytes = _bytes.duplicate()
@@ -47,9 +47,7 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
     val startTime = System.currentTimeMillis
     val file = diskManager.getFile(blockId)
     val channel = new FileOutputStream(file).getChannel
-    while (bytes.remaining > 0) {
-      channel.write(bytes)
-    }
+    bytes.writeTo(channel)
     channel.close()
     val finishTime = System.currentTimeMillis
     logDebug("Block %s stored as %s file on disk in %d ms".format(
@@ -120,9 +118,9 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
           }
         }
         buf.flip()
-        Some(buf)
+        Some(LargeByteBuffer.asLargeByteBuffer(buf))
       } else {
-        Some(channel.map(MapMode.READ_ONLY, offset, length))
+        Some(LargeByteBuffer.mapFile(channel, MapMode.READ_ONLY, offset, length))
       }
     } finally {
       channel.close()
diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index 90ced59104432..8fccc0f3e78d2 100644
--- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.storage
 
-import java.nio.ByteBuffer
 import java.util.LinkedHashMap
 
 import org.apache.spark.io.LargeByteBuffer
@@ -154,7 +153,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     }
   }
 
-  override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
+  override def getBytes(blockId: BlockId): Option[LargeByteBuffer] = {
     val entry = entries.synchronized {
       entries.get(blockId)
     }
@@ -163,7 +162,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     } else if (entry.deserialized) {
       Some(blockManager.dataSerialize(blockId, entry.value.asInstanceOf[Array[Any]].iterator))
     } else {
-      Some(entry.value.asInstanceOf[ByteBuffer].duplicate()) // Doesn't actually copy the data
+      Some(entry.value.asInstanceOf[LargeByteBuffer].duplicate()) // Doesn't actually copy the data
     }
   }
 
@@ -176,7 +175,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     } else if (entry.deserialized) {
       Some(entry.value.asInstanceOf[Array[Any]].iterator)
     } else {
-      val buffer = entry.value.asInstanceOf[ByteBuffer].duplicate() // Doesn't actually copy data
+      val buffer = entry.value.asInstanceOf[LargeByteBuffer].duplicate() // Doesn't actually copy data
       Some(blockManager.dataDeserialize(blockId, buffer))
     }
   }
@@ -350,7 +349,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
         val data = if (deserialized) {
           Left(value.asInstanceOf[Array[Any]])
         } else {
-          Right(value.asInstanceOf[ByteBuffer].duplicate())
+          Right(value.asInstanceOf[LargeByteBuffer].duplicate())
         }
         val droppedBlockStatus = blockManager.dropFromMemory(blockId, data)
         droppedBlockStatus.foreach { status => droppedBlocks += ((blockId, status)) }
@@ -416,7 +415,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
             val data = if (entry.deserialized) {
               Left(entry.value.asInstanceOf[Array[Any]])
             } else {
-              Right(entry.value.asInstanceOf[ByteBuffer].duplicate())
+              Right(entry.value.asInstanceOf[LargeByteBuffer].duplicate())
             }
             val droppedBlockStatus = blockManager.dropFromMemory(blockId, data)
             droppedBlockStatus.foreach { status => droppedBlocks += ((blockId, status)) }
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
index fd7cb49ef9d50..9f964ed456d5e 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
@@ -41,7 +41,7 @@ private[spark] class TachyonStore(
     tachyonManager.getFile(blockId.name).length
   }
 
-  override def putBytes(blockId: BlockId, bytes: ByteBuffer, level: StorageLevel): PutResult = {
+  override def putBytes(blockId: BlockId, bytes: LargeByteBuffer, level: StorageLevel): PutResult = {
     putIntoTachyonStore(blockId, bytes, returnValues = true)
   }
 
@@ -75,7 +75,8 @@ private[spark] class TachyonStore(
     val startTime = System.currentTimeMillis
     val file = tachyonManager.getFile(blockId)
     val os = file.getOutStream(WriteType.TRY_CACHE)
-    os.write(byteBuffer.array())
+    // TODO need a better fix here for tachyon
+//    os.write(byteBuffer.array())
     os.close()
     val finishTime = System.currentTimeMillis
     logDebug("Block %s stored as %s file in Tachyon in %d ms".format(
@@ -110,9 +111,10 @@ private[spark] class TachyonStore(
     assert (is != null)
     try {
       val size = file.length
+      //TODO
       val bs = new Array[Byte](size.asInstanceOf[Int])
       ByteStreams.readFully(is, bs)
-      Some(ByteBuffer.wrap(bs))
+      Some(LargeByteBuffer.asLargeByteBuffer(ByteBuffer.wrap(bs)))
     } catch {
       case ioe: IOException =>
         logWarning(s"Failed to fetch the block $blockId from Tachyon", ioe)
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index 246ebca999437..2fe904b10c53c 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -26,7 +26,7 @@ private[spark]
 class LargeByteBufferOutputStream(chunkSize: Int = 65536)
   extends OutputStream {
 
-  val buffer = new ChainedBuffer(chunkSize)
+  val buffer = ChainedBuffer.withInitialSize(chunkSize)
 
   private var _pos = 0
 
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 97ea3578aa8ba..b183373de5019 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark
 
+import org.apache.spark.io.LargeByteBuffer
 import org.scalatest.FunSuite
 import org.scalatest.concurrent.Timeouts._
 import org.scalatest.Matchers
@@ -195,7 +196,7 @@ class DistributedSuite extends FunSuite with Matchers with LocalSparkContext {
     blockManager.master.getLocations(blockId).foreach { cmId =>
       val bytes = blockTransfer.fetchBlockSync(cmId.host, cmId.port, cmId.executorId,
         blockId.toString)
-      val deserialized = blockManager.dataDeserialize(blockId, bytes.nioByteBuffer())
+      val deserialized = blockManager.dataDeserialize(blockId, LargeByteBuffer.asLargeByteBuffer(bytes.nioByteBuffer()))
         .asInstanceOf[Iterator[Int]].toList
       assert(deserialized === (1 to 100).toList)
     }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index ffe6f039145ea..f692c0ffe967f 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -21,6 +21,8 @@ import java.nio.{ByteBuffer, MappedByteBuffer}
 import java.util.Arrays
 import java.util.concurrent.TimeUnit
 
+import org.apache.spark.io.LargeByteBuffer
+
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.Await
 import scala.concurrent.duration._
@@ -809,7 +811,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfterEach
     var counter = 0.toByte
     def incr = {counter = (counter + 1).toByte; counter;}
     val bytes = Array.fill[Byte](1000)(incr)
-    val byteBuffer = ByteBuffer.wrap(bytes)
+    val byteBuffer = LargeByteBuffer.asLargeByteBuffer(bytes)
 
     val blockId = BlockId("rdd_1_2")
 
@@ -834,9 +836,9 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfterEach
       "Expected HeapByteBuffer for un-mapped read")
     assert(mapped.isInstanceOf[MappedByteBuffer], "Expected MappedByteBuffer for mapped read")
 
-    def arrayFromByteBuffer(in: ByteBuffer): Array[Byte] = {
-      val array = new Array[Byte](in.remaining())
-      in.get(array)
+    def arrayFromByteBuffer(in: LargeByteBuffer): Array[Byte] = {
+      val array = new Array[Byte](in.remaining().toInt)
+      in.get(array, 0, in.remaining().toInt)
       array
     }
 
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala b/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala
index e99d5ecc639c9..cc96e24e3dc03 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala
@@ -25,7 +25,7 @@ import org.scalatest.Matchers._
 class ChainedBufferSuite extends FunSuite {
   test("write and read at start") {
     // write from start of source array
-    val buffer = new ChainedBuffer(8)
+    val buffer = ChainedBuffer.withInitialSize(8)
     buffer.capacity should be (0)
     verifyWriteAndRead(buffer, 0, 0, 0, 4)
     buffer.capacity should be (8)
@@ -53,7 +53,7 @@ class ChainedBufferSuite extends FunSuite {
 
   test("write and read at middle") {
     // write from start of source array
-    val buffer = new ChainedBuffer(8)
+    val buffer = ChainedBuffer.withInitialSize(8)
     verifyWriteAndRead(buffer, 3, 0, 0, 4)
     buffer.capacity should be (8)
 
@@ -80,7 +80,7 @@ class ChainedBufferSuite extends FunSuite {
 
   test("write and read at later buffer") {
     // write from start of source array
-    val buffer = new ChainedBuffer(8)
+    val buffer = ChainedBuffer.withInitialSize(8)
     verifyWriteAndRead(buffer, 11, 0, 0, 4)
     buffer.capacity should be (16)
 

From 4965bad00574a05e133e7caeba56cd6115fe35b6 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 25 Feb 2015 14:28:16 -0600
Subject: [PATCH 05/97] move LargeByteBuffer to network-common, since we need
 it there for the shuffles

---
 .../buffer/FileSegmentManagedBuffer.java      |   6 +-
 .../spark/network/buffer/LargeByteBuffer.java |  56 ++++++++
 .../network/buffer/LargeByteBufferHelper.java |  65 +++++++++
 .../spark/network/buffer/ManagedBuffer.java   |   3 +-
 .../network/buffer/NettyManagedBuffer.java    |   4 +-
 .../network/buffer/NioManagedBuffer.java      |   4 +-
 .../buffer/WrappedLargeByteBuffer.java        | 135 ++++++++++++++++++
 7 files changed, 264 insertions(+), 9 deletions(-)
 create mode 100644 network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
 create mode 100644 network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
 create mode 100644 network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
index 844eff4f4c701..75c426f379238 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -55,7 +55,7 @@ public long size() {
   }
 
   @Override
-  public ByteBuffer nioByteBuffer() throws IOException {
+  public LargeByteBuffer nioByteBuffer() throws IOException {
     FileChannel channel = null;
     try {
       channel = new RandomAccessFile(file, "r").getChannel();
@@ -71,9 +71,9 @@ public ByteBuffer nioByteBuffer() throws IOException {
           }
         }
         buf.flip();
-        return buf;
+        return LargeByteBufferHelper.asLargeByteBuffer(buf);
       } else {
-        return channel.map(FileChannel.MapMode.READ_ONLY, offset, length);
+        return LargeByteBufferHelper.mapFile(channel, FileChannel.MapMode.READ_ONLY, offset, length);
       }
     } catch (IOException e) {
       try {
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
new file mode 100644
index 0000000000000..c56fdebb887a1
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -0,0 +1,56 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.nio.channels.WritableByteChannel;
+
+public interface LargeByteBuffer {
+    public long capacity();
+
+    public byte get();
+
+    public void get(byte[] dst,int offset, int length);
+
+    public void position(long position);
+
+    public long position();
+
+    /** doesn't copy data, just copies references & offsets */
+    public LargeByteBuffer duplicate();
+
+    public void put(LargeByteBuffer bytes);
+
+    //TODO checks on limit semantics
+
+    /**
+     * Sets this buffer's limit. If the position is larger than the new limit then it is set to the
+     * new limit. If the mark is defined and larger than the new limit then it is discarded.
+     */
+    public void limit(long newLimit);
+
+    /**
+     * return this buffer's limit
+     * @return
+     */
+    public long limit();
+
+    //an alternative to having this method would be having a foreachBuffer(f: Buffer => T)
+    public long writeTo(WritableByteChannel channel) throws IOException;
+
+
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
new file mode 100644
index 0000000000000..2585b65b1f969
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.ArrayList;
+
+public class LargeByteBufferHelper {
+
+    static final int DEFAULT_MAX_CHUNK = Integer.MAX_VALUE - 1000000;
+
+    public static LargeByteBuffer asLargeByteBuffer(ByteBuffer buffer) {
+        return new WrappedLargeByteBuffer(new ByteBuffer[]{buffer});
+    }
+
+    public static LargeByteBuffer mapFile(
+            FileChannel channel,
+            FileChannel.MapMode mode,
+            long offset,
+            long length
+    ) throws IOException {
+        return mapFile(channel, mode, offset, length, DEFAULT_MAX_CHUNK);
+    }
+
+    public static LargeByteBuffer mapFile(
+            FileChannel channel,
+            FileChannel.MapMode mode,
+            long offset,
+            long length,
+            int maxChunk
+    ) throws IOException {
+        ArrayList<Long> offsets = new ArrayList<Long>();
+        long curOffset = offset;
+        long end = offset + length;
+        while (curOffset < end) {
+            offsets.add(curOffset);
+            int chunkLength = Math.min((int) (end - curOffset), maxChunk);
+            curOffset += chunkLength;
+        }
+        offsets.add(end);
+        ByteBuffer[] chunks = new ByteBuffer[offsets.size() - 1];
+        for (int i = 0; i< offsets.size() - 1; i++) {
+            chunks[i] = channel.map(mode, offsets.get(i), offsets.get(i+ 1) - offsets.get(i));
+        }
+        return new WrappedLargeByteBuffer(chunks);
+    }
+
+
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
index a415db593a788..8ebf72feef3ed 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
@@ -19,7 +19,6 @@
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.nio.ByteBuffer;
 
 /**
  * This interface provides an immutable view for data in the form of bytes. The implementation
@@ -44,7 +43,7 @@ public abstract class ManagedBuffer {
    * returned ByteBuffer should not affect the content of this buffer.
    */
   // TODO: Deprecate this, usage may require expensive memory mapping or allocation.
-  public abstract ByteBuffer nioByteBuffer() throws IOException;
+  public abstract LargeByteBuffer nioByteBuffer() throws IOException;
 
   /**
    * Exposes this buffer's data as an InputStream. The underlying implementation does not
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
index c806bfa45bef3..aa0fc74f00fd7 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
@@ -41,8 +41,8 @@ public long size() {
   }
 
   @Override
-  public ByteBuffer nioByteBuffer() throws IOException {
-    return buf.nioBuffer();
+  public LargeByteBuffer nioByteBuffer() throws IOException {
+    return LargeByteBufferHelper.asLargeByteBuffer(buf.nioBuffer());
   }
 
   @Override
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
index f55b884bc45ce..d4746a76e7d13 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
@@ -41,8 +41,8 @@ public long size() {
   }
 
   @Override
-  public ByteBuffer nioByteBuffer() throws IOException {
-    return buf.duplicate();
+  public LargeByteBuffer nioByteBuffer() throws IOException {
+    return LargeByteBufferHelper.asLargeByteBuffer(buf.duplicate());
   }
 
   @Override
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
new file mode 100644
index 0000000000000..bbe73a87e9105
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -0,0 +1,135 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+
+public class WrappedLargeByteBuffer implements LargeByteBuffer {
+
+    private final ByteBuffer[] underlying;
+    private final Long totalCapacity;
+    private final long[] chunkOffsets;
+
+    private long _pos;
+    private int currentBufferIdx;
+    private ByteBuffer currentBuffer;
+    private long limit;
+
+
+    public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
+        this.underlying = underlying;
+        long sum = 0l;
+        chunkOffsets = new long[underlying.length];
+        for (int i = 0; i < underlying.length; i++) {
+            chunkOffsets[i] = sum;
+            sum += underlying[i].capacity();
+        }
+        totalCapacity = sum;
+        _pos = 0l;
+        currentBufferIdx = 0;
+        currentBuffer = underlying[0];
+        limit = totalCapacity;
+    }
+
+    @Override
+    public long capacity() {return totalCapacity;}
+
+    @Override
+    public void get(byte[] dest, int offset, int length){
+        int moved = 0;
+        while (moved < length) {
+            int toRead = Math.min(length - moved, currentBuffer.remaining());
+            currentBuffer.get(dest, offset, toRead);
+            moved += toRead;
+            updateCurrentBuffer();
+        }
+    }
+
+    @Override
+    public byte get() {
+        byte r = currentBuffer.get();
+        _pos += 1;
+        updateCurrentBuffer();
+        return r;
+    }
+
+    private void updateCurrentBuffer() {
+        //TODO fix end condition
+        while(!currentBuffer.hasRemaining()) {
+            currentBufferIdx += 1;
+            currentBuffer = underlying[currentBufferIdx];
+        }
+    }
+
+    @Override
+    public void put(LargeByteBuffer bytes) {
+        throw new RuntimeException("not yet implemented");
+    }
+
+    @Override
+    public long position() { return _pos;}
+
+    @Override
+    public void position(long newPosition) {
+        //XXX check range?
+        _pos = newPosition;
+    }
+
+    @Override
+    public WrappedLargeByteBuffer duplicate() {
+        ByteBuffer[] duplicates = new ByteBuffer[underlying.length];
+        for (int i = 0; i < underlying.length; i++) {
+            duplicates[i] = underlying[i].duplicate();
+        }
+        //we could also avoid initializing offsets here, if we cared ...
+        return new WrappedLargeByteBuffer(duplicates);
+    }
+
+//    @Override
+//    public void rewind() {
+//        _pos = 0;
+//        for (ByteBuffer buf: underlying) {
+//            buf.rewind();
+//        }
+//    }
+
+    @Override
+    public long limit() {
+        return limit;
+    }
+
+    @Override
+    public void limit(long newLimit) {
+        //XXX check range?  set limits in sub buffers?
+        limit = newLimit;
+    }
+
+    @Override
+    public long writeTo(WritableByteChannel channel) throws IOException {
+        long written = 0l;
+        for(ByteBuffer buffer: underlying) {
+            //TODO test this
+            //XXX do we care about respecting the limit here?
+            written += buffer.remaining();
+            while (buffer.hasRemaining())
+                channel.write(buffer);
+        }
+        return written;
+    }
+}

From 149d4fa3fa55403df90109b440a3523d3f4ab92b Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 25 Feb 2015 15:50:33 -0600
Subject: [PATCH 06/97] move large byte buffer to network/common ... still lots
 of crud

---
 .../spark/broadcast/TorrentBroadcast.scala    |    9 +-
 .../org/apache/spark/executor/Executor.scala  |    4 +-
 .../spark/io/ChainedLargeByteBuffer.scala     |   85 +
 .../org/apache/spark/io/LargeByteBuffer.scala | 3304 ++++++++---------
 .../spark/network/BlockTransferService.scala  |    7 +-
 .../network/netty/NettyBlockRpcServer.scala   |    4 +-
 .../netty/NettyBlockTransferService.scala     |   11 +-
 .../network/nio/NioBlockTransferService.scala |   12 +-
 .../spark/scheduler/TaskResultGetter.scala    |    4 +-
 .../shuffle/FileShuffleBlockManager.scala     |    7 +-
 .../shuffle/IndexShuffleBlockManager.scala    |    6 +-
 .../spark/shuffle/ShuffleBlockManager.scala   |    3 +-
 .../apache/spark/storage/BlockManager.scala   |   24 +-
 .../org/apache/spark/storage/BlockStore.scala |    6 +-
 .../org/apache/spark/storage/DiskStore.scala  |    6 +-
 .../apache/spark/storage/MemoryStore.scala    |    4 +-
 .../org/apache/spark/storage/PutResult.scala  |    2 +-
 .../apache/spark/storage/TachyonStore.scala   |    6 +-
 .../util/LargeByteBufferInputStream.scala     |    3 +-
 .../util/LargeByteBufferOutputStream.scala    |    3 +-
 .../NettyBlockTransferSecuritySuite.scala     |    4 +-
 .../netty/NettyBlockTransferSuite.scala       |   80 +
 .../buffer/FileSegmentManagedBuffer.java      |   34 +-
 .../spark/network/buffer/LargeByteBuffer.java |    6 +
 .../network/buffer/LargeByteBufferHelper.java |   17 +
 .../network/buffer/NioManagedBuffer.java      |   14 +-
 .../buffer/WrappedLargeByteBuffer.java        |   12 +-
 .../network/buffer/LargeByteBuffer.scala      | 1760 +++++++++
 28 files changed, 3694 insertions(+), 1743 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala
 create mode 100644 core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
 create mode 100644 network/common/src/main/scala/org/apache/spark/network/buffer/LargeByteBuffer.scala

diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index fac7c01e558ff..2b166439485b0 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -18,17 +18,18 @@
 package org.apache.spark.broadcast
 
 import java.io._
-import java.nio.ByteBuffer
+
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer}
 
 import scala.collection.JavaConversions.asJavaEnumeration
 import scala.reflect.ClassTag
 import scala.util.Random
 
 import org.apache.spark.{Logging, SparkConf, SparkEnv, SparkException}
-import org.apache.spark.io.{LargeByteBuffer, CompressionCodec}
+import org.apache.spark.io.CompressionCodec
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
-import org.apache.spark.util.{LargeByteBufferInputStream, ByteBufferInputStream, Utils}
+import org.apache.spark.util.{LargeByteBufferInputStream, Utils}
 import org.apache.spark.util.io.ByteArrayChunkOutputStream
 
 /**
@@ -200,7 +201,7 @@ private object TorrentBroadcast extends Logging {
     val ser = serializer.newInstance()
     val serOut = ser.serializeStream(out)
     serOut.writeObject[T](obj).close()
-    bos.toArrays.map(LargeByteBuffer.asLargeByteBuffer)
+    bos.toArrays.map(LargeByteBufferHelper.asLargeByteBuffer)
   }
 
   def unBlockifyObject[T: ClassTag](
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 65c74db8ebf3a..faec7463d9f5f 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -23,7 +23,7 @@ import java.net.URL
 import java.nio.ByteBuffer
 import java.util.concurrent._
 
-import org.apache.spark.io.LargeByteBuffer
+import org.apache.spark.network.buffer.LargeByteBufferHelper
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap}
@@ -233,7 +233,7 @@ private[spark] class Executor(
           } else if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {
             val blockId = TaskResultBlockId(taskId)
             env.blockManager.putBytes(
-              blockId, LargeByteBuffer.asLargeByteBuffer(serializedDirectResult), StorageLevel.MEMORY_AND_DISK_SER)
+              blockId, LargeByteBufferHelper.asLargeByteBuffer(serializedDirectResult), StorageLevel.MEMORY_AND_DISK_SER)
             logInfo(
               s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")
             ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))
diff --git a/core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala b/core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala
new file mode 100644
index 0000000000000..eea1114ec35ca
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.io
+
+import java.nio.ByteBuffer
+import java.nio.channels.WritableByteChannel
+
+import org.apache.spark.network.buffer.LargeByteBuffer
+import org.apache.spark.util.collection.ChainedBuffer
+
+class ChainedLargeByteBuffer(private[io] val underlying: ChainedBuffer) extends LargeByteBuffer {
+
+  def capacity = underlying.capacity
+
+  var _pos = 0l
+
+  def get(dst: Array[Byte],offset: Int,length: Int): Unit = {
+    underlying.read(_pos, dst, offset, length)
+    _pos += length
+  }
+
+  def get(): Byte = {
+    val b = underlying.read(_pos)
+    _pos += 1
+    b
+  }
+
+  def put(bytes: LargeByteBuffer): Unit = {
+    ???
+  }
+
+  def position: Long = _pos
+  def position(position: Long): Unit = {
+    _pos = position
+  }
+  def remaining(): Long = {
+    underlying.size - position
+  }
+
+  def duplicate(): ChainedLargeByteBuffer = {
+    new ChainedLargeByteBuffer(underlying)
+  }
+
+  def rewind(): Unit = {
+    _pos = 0
+  }
+
+  def limit(): Long = {
+    capacity
+  }
+
+  def limit(newLimit: Long): Unit = {
+    ???
+  }
+
+  def writeTo(channel:WritableByteChannel): Long = {
+    var written = 0l
+    underlying.chunks.foreach{bytes =>
+      //TODO test this
+      val buffer = ByteBuffer.wrap(bytes)
+      while (buffer.hasRemaining)
+        channel.write(buffer)
+      written += bytes.length
+    }
+    written
+  }
+
+  override def firstByteBuffer(): ByteBuffer = {
+    ByteBuffer.wrap(underlying.chunks(0))
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala b/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
index 4bb5b5a101149..56cdd9e6938b3 100644
--- a/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
@@ -1,554 +1,103 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.io
-
-import java.io.{RandomAccessFile, DataInput, InputStream, OutputStream}
-import java.nio.channels.FileChannel.MapMode
-import java.nio.{ByteBuffer, BufferUnderflowException, BufferOverflowException}
-import java.nio.channels.{FileChannel, WritableByteChannel, ReadableByteChannel}
-
-import org.apache.spark.util.collection.ChainedBuffer
-
-import scala.collection.mutable.{ArrayBuffer, HashSet}
-
-
-
-
-trait LargeByteBuffer {
-//  def position(): Long
-//
-//  def limit(): Long
-
-  def capacity(): Long
-
-  def get(): Byte //needed for ByteBufferInputStream
-
-  def get(dst: Array[Byte], offset: Int, length: Int): Unit // for ByteBufferInputStream
-
-  def position(position: Long): Unit //for ByteBufferInputStream
-
-  def position(): Long //for ByteBufferInputStream
-
-  /** doesn't copy data, just copies references & offsets */
-  def duplicate(): LargeByteBuffer
-
-  def put(bytes: LargeByteBuffer): Unit
-
-  //also need whatever is necessary for ByteArrayOutputStream for BlockManager#dataSerialize
-
-
-  //TODO checks on limit semantics
-
-  /**
-   * Sets this buffer's limit. If the position is larger than the new limit then it is set to the
-   * new limit. If the mark is defined and larger than the new limit then it is discarded.
-   */
-  def limit(newLimit: Long): Unit
-
-  /**
-   * return this buffer's limit
-   * @return
-   */
-  def limit(): Long
-
-
-  //an alternative to having this method would be having a foreachBuffer(f: Buffer => T)
-  def writeTo(channel: WritableByteChannel): Long
-
-//
-//  def skip(skipBy: Long): Unit
-//
-//  def position(newPosition: Long): Unit
-//
-//  /**
-//   * Clears this buffer.  The position is set to zero, the limit is set to
-//   * the capacity, and the mark is discarded.
-//   *
-//   * <p> Invoke this method before using a sequence of channel-read or
-//   * <i>put</i> operations to fill this buffer.
-//   *
-//   * <p> This method does not actually erase the data in the buffer, but it
-//   * is named as if it did because it will most often be used in situations
-//   * in which that might as well be the case. </p>
-//   */
-//  def clear(): Unit
-//
-//  /**
-//   * Flips this buffer.  The limit is set to the current position and then
-//   * the position is set to zero.  If the mark is defined then it is
-//   * discarded.
-//   *
-//   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
-//   * this method to prepare for a sequence of channel-write or relative
-//   * <i>get</i> operations.
-//   */
-//  def flip(): Unit
-
-  /**
-   * Rewinds this buffer.  The position is set to zero and the mark is
-   * discarded.
-   *
-   * <p> Invoke this method before a sequence of channel-write or <i>get</i>
-   * operations, assuming that the limit has already been set
-   * appropriately.
-   */
-  def rewind(): Unit
-
-  /**
-   * Returns the number of elements between the current position and the
-   * limit. </p>
-   *
-   * @return  The number of elements remaining in this buffer
-   */
-  def remaining(): Long
-}
-
-class ChainedLargeByteBuffer(private[io] val underlying: ChainedBuffer) extends LargeByteBuffer {
-
-  def capacity = underlying.capacity
-
-  var _pos = 0l
-
-  def get(dst: Array[Byte],offset: Int,length: Int): Unit = {
-    underlying.read(_pos, dst, offset, length)
-    _pos += length
-  }
-
-  def get(): Byte = {
-    val b = underlying.read(_pos)
-    _pos += 1
-    b
-  }
-
-  def put(bytes: LargeByteBuffer): Unit = {
-    ???
-  }
-
-  def position: Long = _pos
-  def position(position: Long): Unit = {
-    _pos = position
-  }
-  def remaining(): Long = {
-    underlying.size - position
-  }
-
-  def duplicate(): ChainedLargeByteBuffer = {
-    new ChainedLargeByteBuffer(underlying)
-  }
-
-  def rewind(): Unit = {
-    _pos = 0
-  }
-
-  def limit(): Long = {
-    capacity
-  }
-
-  def limit(newLimit: Long): Unit = {
-    ???
-  }
-
-  def writeTo(channel:WritableByteChannel): Long = {
-    var written = 0l
-    underlying.chunks.foreach{bytes =>
-      //TODO test this
-      val buffer = ByteBuffer.wrap(bytes)
-      while (buffer.hasRemaining)
-        channel.write(buffer)
-      written += bytes.length
-    }
-    written
-  }
-}
-
-class WrappedLargeByteBuffer(private[spark] val underlying: Array[ByteBuffer]) extends LargeByteBuffer {
-
-  val (totalCapacity, chunkOffsets) = {
-    var sum = 0l
-    val offsets = new Array[Long](underlying.size)
-    (0 until underlying.size).foreach{idx =>
-      offsets(idx) = sum
-      sum += underlying(idx).capacity()
-    }
-    (sum, offsets)
-  }
-
-  private var _pos = 0l
-  private var currentBufferIdx = 0
-  private var currentBuffer = underlying(0)
-  private var _limit = totalCapacity
-
-  def capacity = totalCapacity
-
-  def get(dst: Array[Byte], offset: Int, length: Int): Unit = {
-    var moved = 0
-    while (moved < length) {
-      val toRead = math.min(length - moved, currentBuffer.remaining())
-      currentBuffer.get(dst, offset, toRead)
-      moved += toRead
-      updateCurrentBuffer()
-    }
-  }
-
-  def get(): Byte = {
-    val r = currentBuffer.get()
-    _pos += 1
-    updateCurrentBuffer()
-    r
-  }
-
-  private def updateCurrentBuffer(): Unit = {
-    //TODO fix end condition
-    while(!currentBuffer.hasRemaining()) {
-      currentBufferIdx += 1
-      currentBuffer = underlying(currentBufferIdx)
-    }
-  }
-
-  def put(bytes: LargeByteBuffer): Unit = {
-    ???
-  }
-
-  def position: Long = _pos
-  def position(position: Long): Unit = {
-    //XXX check range?
-    _pos = position
-  }
-  def remaining(): Long = {
-    totalCapacity - _pos
-  }
-
-  def duplicate(): WrappedLargeByteBuffer = {
-    new WrappedLargeByteBuffer(underlying.map{_.duplicate()})
-  }
-
-  def rewind(): Unit = {
-    _pos = 0
-    underlying.foreach{_.rewind()}
-  }
-
-  def limit(): Long = {
-    totalCapacity
-  }
-
-  def limit(newLimit: Long) = {
-    //XXX check range?  set limits in sub buffers?
-    _limit = newLimit
-  }
-
-  def writeTo(channel: WritableByteChannel): Long = {
-    var written = 0l
-    underlying.foreach{buffer =>
-      //TODO test this
-      //XXX do we care about respecting the limit here?
-      written += buffer.remaining()
-      while (buffer.hasRemaining)
-        channel.write(buffer)
-    }
-    written
-  }
-
-}
-
-object LargeByteBuffer {
-
-  def asLargeByteBuffer(byteBuffer: ByteBuffer): LargeByteBuffer = {
-    new WrappedLargeByteBuffer(Array(byteBuffer))
-  }
-
-  def asLargeByteBuffer(bytes: Array[Byte]): LargeByteBuffer = {
-    new WrappedLargeByteBuffer(Array(ByteBuffer.wrap(bytes)))
-  }
-
-
-  def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = {
-    val buffer = ChainedBuffer.withInitialSize(maxChunk, size)
-    new ChainedLargeByteBuffer(buffer)
-  }
-
-  def mapFile(
-    channel: FileChannel,
-    mode: MapMode,
-    offset: Long,
-    length: Long,
-    maxChunk: Int = Integer.MAX_VALUE - 1e6.toInt
-  ): LargeByteBuffer = {
-    val offsets = new ArrayBuffer[Long]()
-    var curOffset = offset
-    val end = offset + length
-    while (curOffset < end) {
-      offsets += curOffset
-      val length = math.min(end - curOffset, maxChunk)
-      curOffset += length
-    }
-    offsets += end
-    val chunks = new Array[ByteBuffer](offsets.size - 1)
-    (0 until offsets.size - 1).foreach{idx =>
-      chunks(idx) = channel.map(mode, offsets(idx), offsets(idx + 1) - offsets(idx))
-    }
-    new WrappedLargeByteBuffer(chunks)
-  }
-}
-
-
-//
-///**
-// * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G
-// * which ByteBuffers are limited to.
-// * Externally, it exposes all the api which java.nio.ByteBuffer exposes.
-// * Internally, it maintains a sequence of Containers which manage the ByteBuffer data.
-// * Not all the data might be loaded into memory  (like disk or tachyon data) - so actual
-// * memory footprint - heap and vm could be much lower than capacity.
+///*
+// * Licensed to the Apache Software Foundation (ASF) under one or more
+// * contributor license agreements.  See the NOTICE file distributed with
+// * this work for additional information regarding copyright ownership.
+// * The ASF licenses this file to You under the Apache License, Version 2.0
+// * (the "License"); you may not use this file except in compliance with
+// * the License.  You may obtain a copy of the License at
 // *
-// * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this
-// * buffer, maybe revisit this later ? Note: this is not much different from earlier though !
+// *    http://www.apache.org/licenses/LICENSE-2.0
 // *
-// * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this
-// * will require the file to be kept open (repeatedly opening/closing file is not good
-// * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is
-// * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy)
-// *
-// * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is
-// * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some
-// * cases (when we duplicate/slice them). Currently spark does not need this, but might in future
-// * so relook at it later.
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS,
+// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// * See the License for the specific language governing permissions and
+// * limitations under the License.
 // */
-//// We should make this constructor private: but for now,
-//// leaving it public since TachyonStore needs it
-//class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer],
-//    private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging {
-//
-//  // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME
-//  private val allocateLocationThrowable: Throwable = {
-//    if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) {
-//      new Throwable("blockId = " + BlockManager.getLookupBlockId)
-//    } else {
-//      null
-//    }
-//  }
-//  private var disposeLocationThrowable: Throwable = null
-//
-//  @volatile private var allowCleanerOverride = true
-//  @volatile private var cleaner: BufferCleaner = new BufferCleaner {
-//    override def doClean(buffer: LargeByteBuffer) = {
-//      assert (LargeByteBuffer.this == buffer)
-//      doDispose(needRelease = false)
-//    }
-//  }
 //
-//  // should not be empty
-//  assert (null != inputContainers && ! inputContainers.isEmpty)
-//  // should not have any null's
-//  assert (inputContainers.find(_ == null).isEmpty)
+//package org.apache.spark.io
 //
-//  // println("Num containers = " + inputContainers.size)
+//import java.io.{RandomAccessFile, DataInput, InputStream, OutputStream}
+//import java.nio.channels.FileChannel.MapMode
+//import java.nio.{ByteBuffer, BufferUnderflowException, BufferOverflowException}
+//import java.nio.channels.{FileChannel, WritableByteChannel, ReadableByteChannel}
 //
-//  // Position, limit and capacity relevant over the engire LargeByteBuffer
-//  @volatile private var globalPosition = 0L
-//  @volatile private var globalLimit = 0L
-//  @volatile private var currentContainerIndex = 0
+//import org.apache.spark.util.collection.ChainedBuffer
 //
-//  // The buffers in which the actual data is held.
-//  private var containers: Array[ByteBufferContainer] = null
+//import scala.collection.mutable.{ArrayBuffer, HashSet}
 //
-//  // aggregate capacities of the individual buffers.
-//  // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be
-//  // sum of capacity of 0th and 1st block buffer
-//  private var bufferPositionStart: Array[Long] = null
 //
-//  // Contains the indices of a containers which requires release before subsequent invocation of
-//  // read/write should be serviced. This is required since current read/write might have moved the
-//  // position but since we are returning bytebuffers which depend on the validity of the existing
-//  // bytebuffer, we cant release them yet.
-//  private var needReleaseIndices = new HashSet[Int]()
 //
-//  private val readable = ! inputContainers.exists(! _.isReadable)
-//  private val writable = ! inputContainers.exists(! _.isWritable)
 //
+//trait LargeByteBuffer {
+////  def position(): Long
+////
+////  def limit(): Long
 //
-//  // initialize
-//  @volatile private var globalCapacity = {
+//  def capacity(): Long
 //
-//    // Ensure that there are no empty buffers : messes up with our code : unless it
-//    // is a single buffer (for empty buffer for marker case)
-//    assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length)
+//  def get(): Byte //needed for ByteBufferInputStream
 //
-//    containers = {
-//      if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray
-//    }
-//    containers.foreach(_.validate())
+//  def get(dst: Array[Byte], offset: Int, length: Int): Unit // for ByteBufferInputStream
 //
-//    def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) {
-//      val buff = new ArrayBuffer[Long](arr.length + 1)
-//      buff += 0L
+//  def position(position: Long): Unit //for ByteBufferInputStream
 //
-//      buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1)
-//      assert (buff.length == arr.length + 1)
-//      bufferPositionStart = buff.toArray
-//    }
+//  def position(): Long //for ByteBufferInputStream
 //
-//    initializeBufferPositionStart(containers)
+//  /** doesn't copy data, just copies references & offsets */
+//  def duplicate(): LargeByteBuffer
 //
-//    // remove references from inputBuffers
-//    inputContainers.clear()
+//  def put(bytes: LargeByteBuffer): Unit
 //
-//    globalLimit = bufferPositionStart(containers.length)
-//    globalPosition = 0L
-//    currentContainerIndex = 0
+//  //also need whatever is necessary for ByteArrayOutputStream for BlockManager#dataSerialize
 //
-//    assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum)
-//
-//    globalLimit
-//  }
-//
-//  final def position(): Long = globalPosition
-//
-//  final def limit(): Long = globalLimit
-//
-//  final def capacity(): Long = globalCapacity
-//
-//  final def limit(newLimit: Long) {
-//    if ((newLimit > capacity()) || (newLimit < 0)) {
-//      throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity())
-//    }
-//
-//    globalLimit = newLimit
-//    if (position() > newLimit) position(newLimit)
-//  }
-//
-//  def skip(skipBy: Long) = position(position() + skipBy)
-//
-//  private def releasePendingContainers() {
-//    if (! needReleaseIndices.isEmpty) {
-//      val iter = needReleaseIndices.iterator
-//      while (iter.hasNext) {
-//        val index = iter.next()
-//        assert (index >= 0 && index < containers.length)
-//        // It is possible to move from one container to next before the previous
-//        // container was acquired. For example, get forcing move to next container
-//        // since current was exhausted immediatelly followed by a position()
-//        // so the container we moved to was never acquired.
-//
-//        // assert (containers(index).isAcquired)
-//        // will this always be satisfied ?
-//        // assert (index != currentContainerIndex)
-//        if (containers(index).isAcquired) containers(index).release()
-//      }
-//      needReleaseIndices.clear()
-//    }
-//  }
-//
-//  private def toNewContainer(newIndex: Int) {
-//    if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) {
-//
-//      assert (currentContainerIndex >= 0)
-//      needReleaseIndices += currentContainerIndex
-//    }
-//    currentContainerIndex = newIndex
-//  }
-//
-//  // expensive method, sigh ... optimize it later ?
-//  final def position(newPosition: Long) {
-//
-//    if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException()
-//
-//    if (currentContainerIndex < bufferPositionStart.length - 1 &&
-//        newPosition >= bufferPositionStart(currentContainerIndex) &&
-//        newPosition < bufferPositionStart(currentContainerIndex + 1)) {
-//      // Same buffer - easy method ...
-//      globalPosition = newPosition
-//      // Changed position - free previously returned buffers.
-//      releasePendingContainers()
-//      return
-//    }
-//
-//    // Find appropriate currentContainerIndex
-//    // Since bufferPositionStart is sorted, can be replaced with binary search if required.
-//    // For now, not in the perf critical path since buffers size is very low typically.
-//    var index = 0
-//    val cLen = containers.length
-//    while (index < cLen) {
-//      if (newPosition >= bufferPositionStart(index) &&
-//        newPosition < bufferPositionStart(index + 1)) {
-//        globalPosition = newPosition
-//        toNewContainer(index)
-//        // Changed position - free earlier and previously returned buffers.
-//        releasePendingContainers()
-//        return
-//      }
-//      index += 1
-//    }
-//
-//    if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) {
-//      // boundary.
-//      globalPosition = newPosition
-//      toNewContainer(cLen)
-//      // Changed position - free earlier and previously returned buffers.
-//      releasePendingContainers()
-//      return
-//    }
-//
-//    assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition +
-//      ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]"))
-//  }
 //
+//  //TODO checks on limit semantics
 //
 //  /**
-//   * Clears this buffer.  The position is set to zero, the limit is set to
-//   * the capacity, and the mark is discarded.
-//   *
-//   * <p> Invoke this method before using a sequence of channel-read or
-//   * <i>put</i> operations to fill this buffer.
-//   *
-//   * <p> This method does not actually erase the data in the buffer, but it
-//   * is named as if it did because it will most often be used in situations
-//   * in which that might as well be the case. </p>
+//   * Sets this buffer's limit. If the position is larger than the new limit then it is set to the
+//   * new limit. If the mark is defined and larger than the new limit then it is discarded.
 //   */
-//  final def clear() {
-//    // if (0 == globalCapacity) return
-//
-//    needReleaseIndices += 0
-//    globalPosition = 0L
-//    toNewContainer(0)
-//    globalLimit = globalCapacity
-//
-//    // Now free all pending containers
-//    releasePendingContainers()
-//  }
+//  def limit(newLimit: Long): Unit
 //
 //  /**
-//   * Flips this buffer.  The limit is set to the current position and then
-//   * the position is set to zero.  If the mark is defined then it is
-//   * discarded.
-//   *
-//   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
-//   * this method to prepare for a sequence of channel-write or relative
-//   * <i>get</i> operations.
+//   * return this buffer's limit
+//   * @return
 //   */
-//  final def flip() {
-//    needReleaseIndices += 0
-//    globalLimit = globalPosition
-//    globalPosition = 0L
-//    toNewContainer(0)
+//  def limit(): Long
 //
-//    // Now free all pending containers
-//    releasePendingContainers()
-//  }
+//
+//  //an alternative to having this method would be having a foreachBuffer(f: Buffer => T)
+//  def writeTo(channel: WritableByteChannel): Long
+//
+////
+////  def skip(skipBy: Long): Unit
+////
+////  def position(newPosition: Long): Unit
+////
+////  /**
+////   * Clears this buffer.  The position is set to zero, the limit is set to
+////   * the capacity, and the mark is discarded.
+////   *
+////   * <p> Invoke this method before using a sequence of channel-read or
+////   * <i>put</i> operations to fill this buffer.
+////   *
+////   * <p> This method does not actually erase the data in the buffer, but it
+////   * is named as if it did because it will most often be used in situations
+////   * in which that might as well be the case. </p>
+////   */
+////  def clear(): Unit
+////
+////  /**
+////   * Flips this buffer.  The limit is set to the current position and then
+////   * the position is set to zero.  If the mark is defined then it is
+////   * discarded.
+////   *
+////   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
+////   * this method to prepare for a sequence of channel-write or relative
+////   * <i>get</i> operations.
+////   */
+////  def flip(): Unit
 //
 //  /**
 //   * Rewinds this buffer.  The position is set to zero and the mark is
@@ -558,14 +107,7 @@ object LargeByteBuffer {
 //   * operations, assuming that the limit has already been set
 //   * appropriately.
 //   */
-//  final def rewind() {
-//    needReleaseIndices += 0
-//    globalPosition = 0L
-//    toNewContainer(0)
-//
-//    // Now free all pending containers
-//    releasePendingContainers()
-//  }
+//  def rewind(): Unit
 //
 //  /**
 //   * Returns the number of elements between the current position and the
@@ -573,1192 +115,1650 @@ object LargeByteBuffer {
 //   *
 //   * @return  The number of elements remaining in this buffer
 //   */
-//  final def remaining(): Long = {
-//    globalLimit - globalPosition
-//  }
-//
-//  /**
-//   * Tells whether there are any elements between the current position and
-//   * the limit. </p>
-//   *
-//   * @return  <tt>true</tt> if, and only if, there is at least one element
-//   *          remaining in this buffer
-//   */
-//  final def hasRemaining() = {
-//    globalPosition < globalLimit
-//  }
-//
-//  // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex)
-//
-//  // number of bytes remaining in currently active underlying buffer
-//  private def currentRemaining(): Int = {
-//    if (hasRemaining()) {
-//      // validate currentContainerIndex is valid
-//      assert (globalPosition >= bufferPositionStart(currentContainerIndex) &&
-//        globalPosition < bufferPositionStart(currentContainerIndex + 1),
-//        "globalPosition = " + globalPosition +
-//          ", currentContainerIndex = " + currentContainerIndex +
-//        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-//
-//      currentRemaining0(currentContainerIndex)
-//    } else 0
-//  }
-//
-//  // Without any validation : required when we are bumping the index (when validation will fail) ...
-//  private def currentRemaining0(which: Int): Int = {
-//    // currentBuffer().remaining()
-//    math.max(0, math.min(bufferPositionStart(which + 1),
-//      globalLimit) - globalPosition).asInstanceOf[Int]
-//  }
-//
-//  // Set the approppriate position/limit for the current underlying buffer to mirror our
-//  // the LargeByteBuffer's state.
-//  private def fetchCurrentBuffer(): ByteBuffer = {
-//    releasePendingContainers()
-//
-//    assert (currentContainerIndex < containers.length)
-//
-//    val container = containers(currentContainerIndex)
-//    if (! container.isAcquired) {
-//      container.acquire()
-//    }
-//
-//    assert (container.isAcquired)
-//    if (LargeByteBuffer.enableExpensiveAssert) {
-//      assert (! containers.exists( b => (b ne container) && b.isAcquired))
-//    }
-//
-//    assert (currentContainerIndex < bufferPositionStart.length &&
-//      globalPosition < bufferPositionStart(currentContainerIndex + 1),
-//      "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " +
-//        bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this)
-//
-//    val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)).
-//      asInstanceOf[Int]
-//
-//    val buffer = container.getByteBuffer
-//    buffer.position(buffPosition)
-//    val diff = buffer.capacity - buffPosition
-//    val left = remaining()
-//    if (diff <= left) {
-//      buffer.limit(buffer.capacity())
-//    } else {
-//      // Can happen if limit() was called.
-//      buffer.limit(buffPosition + left.asInstanceOf[Int])
-//    }
-//
-//    buffer
-//  }
-//
-//  // To be used ONLY to test in suites.
-//  private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = {
-//    if ("1" != System.getProperty("SPARK_TESTING")) {
-//      throw new IllegalStateException("This method is to be used ONLY within spark test suites")
-//    }
-//
-//    fetchCurrentBuffer()
-//  }
-//
-//  // Expects that the invoker has ensured that this can be safely invoked.
-//  // That is, it wont be invoked when the loop wont terminate.
-//  private def toNonEmptyBuffer() {
-//
-//    if (! hasRemaining()) {
-//      var newIndex = currentContainerIndex
-//      // Ensure we are in the right block or not.
-//      while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) {
-//        newIndex += 1
-//      }
-//      toNewContainer(newIndex)
-//      // Do not do this - since we might not yet have consumed the buffer which caused EOF right now
-//      /*
-//      // Add last one also, and release it too - since we are at the end of the buffer with nothing
-//      // more pending.
-//      if (newIndex >= 0 && currentContainerIndex < containers.length) {
-//        needReleaseIndices += newIndex
-//      }
-//      */
-//      assert (currentContainerIndex >= 0)
-//      // releasePendingContainers()
-//      return
-//    }
-//
-//    var index = currentContainerIndex
-//    while (0 == currentRemaining0(index) && index < containers.length) {
-//      index += 1
-//    }
-//    assert (currentContainerIndex < containers.length)
-//    toNewContainer(index)
-//    assert (0 != currentRemaining())
-//  }
-//
-//  private def assertPreconditions(containerIndex: Int) {
-//    assert (globalPosition >= bufferPositionStart(containerIndex),
-//      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
-//        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-//    assert (globalPosition < bufferPositionStart(containerIndex + 1),
-//      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
-//        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-//
-//    assert (globalLimit <= globalCapacity)
-//    assert (containerIndex < containers.length)
-//  }
-//
-//
-//  /**
-//   * Attempts to return a ByteBuffer of the requested size.
-//   * It is possible to return a buffer of size smaller than requested
-//   * even though hasRemaining == true
-//   *
-//   * On return, position would have been moved 'ahead' by the size of the buffer returned :
-//   * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer
-//   *
-//   *
-//   * This is used to primarily retrieve content of this buffer to expose via ByteBuffer
-//   * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the
-//   * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer
-//   * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying
-//   * container is a disk backed container, and we make subsequent calls to get(), the returned
-//   * ByteBuffer can be dispose'ed off
-//   *
-//   * @param maxChunkSize Max size of the ByteBuffer to retrieve.
-//   * @return
-//   */
-//
-//  private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = {
-//    fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true)
-//  }
-//
-//  private def fetchBufferOfSizeImpl(maxChunkSize: Int,
-//      canReleaseContainers: Boolean): ByteBuffer = {
-//    if (canReleaseContainers) releasePendingContainers()
-//    assert (maxChunkSize > 0)
-//
-//    // not checking for degenerate case of maxChunkSize == 0
-//    if (globalPosition >= globalLimit) {
-//      // throw exception
-//      throw new BufferUnderflowException()
-//    }
-//
-//    // Check preconditions : disable these later, since they might be expensive to
-//    // evaluate for every IO op
-//    assertPreconditions(currentContainerIndex)
-//
-//    val currentBufferRemaining = currentRemaining()
-//
-//    assert (currentBufferRemaining > 0)
-//
-//    val size = math.min(currentBufferRemaining, maxChunkSize)
-//
-//    val newBuffer = if (currentBufferRemaining > maxChunkSize) {
-//      val currentBuffer = fetchCurrentBuffer()
-//      val buff = ByteBufferContainer.createSlice(currentBuffer,
-//        currentBuffer.position(), maxChunkSize)
-//      assert (buff.remaining() == maxChunkSize)
-//      buff
-//    } else {
-//      val currentBuffer = fetchCurrentBuffer()
-//      val buff = currentBuffer.slice()
-//      assert (buff.remaining() == currentBufferRemaining)
-//      buff
-//    }
-//
-//    assert (size == newBuffer.remaining())
-//    assert (0 == newBuffer.position())
-//    assert (size == newBuffer.limit())
-//    assert (newBuffer.capacity() == newBuffer.limit())
-//
-//    globalPosition += newBuffer.remaining
-//    toNonEmptyBuffer()
-//
-//    newBuffer
-//  }
-//
-//  // Can we service the read/write from the currently active (underlying) bytebuffer or not.
-//  // For almost all cases, this will return true allowing us to optimize away the more expensive
-//  // computations.
-//  private def localReadWritePossible(size: Int) =
-//    size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1)
-//
-//
-//  def getLong(): Long = {
-//    assert (readable)
-//    releasePendingContainers()
-//
-//    if (remaining() < 8) throw new BufferUnderflowException
-//
-//    if (localReadWritePossible(8)) {
-//      val buff = fetchCurrentBuffer()
-//      assert (buff.remaining() >= 8)
-//      val retval = buff.getLong
-//      globalPosition += 8
-//      toNonEmptyBuffer()
-//      return retval
-//    }
-//
-//    val buff = readFully(8)
-//    buff.getLong
-//  }
-//
-//  def getInt(): Int = {
-//    assert (readable)
-//    releasePendingContainers()
-//
-//    if (remaining() < 4) throw new BufferUnderflowException
-//
-//    if (localReadWritePossible(4)) {
-//      val buff = fetchCurrentBuffer()
-//      assert (buff.remaining() >= 4)
-//      val retval = buff.getInt
-//      globalPosition += 4
-//      toNonEmptyBuffer()
-//      return retval
-//    }
-//
-//    val buff = readFully(4)
-//    buff.getInt
-// }
+//  def remaining(): Long
+//}
 //
-//  def getChar(): Char = {
-//    assert (readable)
-//    releasePendingContainers()
+//class ChainedLargeByteBuffer(private[io] val underlying: ChainedBuffer) extends LargeByteBuffer {
 //
-//    if (remaining() < 2) throw new BufferUnderflowException
+//  def capacity = underlying.capacity
 //
-//    if (localReadWritePossible(2)) {
-//      val buff = fetchCurrentBuffer()
-//      assert (buff.remaining() >= 2)
-//      val retval = buff.getChar
-//      globalPosition += 2
-//      toNonEmptyBuffer()
-//      return retval
-//    }
+//  var _pos = 0l
 //
-//    // if slice is becoming too expensive, revisit this ...
-//    val buff = readFully(2)
-//    buff.getChar
+//  def get(dst: Array[Byte],offset: Int,length: Int): Unit = {
+//    underlying.read(_pos, dst, offset, length)
+//    _pos += length
 //  }
 //
 //  def get(): Byte = {
-//    assert (readable)
-//    releasePendingContainers()
-//
-//    if (! hasRemaining()) throw new BufferUnderflowException
-//
-//    // If we have remaining bytes, previous invocations MUST have ensured that we are at
-//    // a buffer which has data to be read.
-//    assert (localReadWritePossible(1))
-//
-//    val buff = fetchCurrentBuffer()
-//    assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining())
-//    val retval = buff.get()
-//    globalPosition += 1
-//    toNonEmptyBuffer()
-//
-//    retval
-//  }
-//
-//  def get(arr: Array[Byte], offset: Int, size: Int): Int = {
-//    assert (readable)
-//    releasePendingContainers()
-//
-//    LargeByteBuffer.checkOffsets(arr, offset, size)
-//
-//    // kyro depends on this it seems ?
-//    // assert (size > 0)
-//    if (0 == size) return 0
-//
-//    if (! hasRemaining()) return -1
-//
-//    if (localReadWritePossible(size)) {
-//      val buff = fetchCurrentBuffer()
-//      assert (buff.remaining() >= size)
-//      buff.get(arr, offset, size)
-//      globalPosition += size
-//      toNonEmptyBuffer()
-//      return size
-//    }
-//
-//    var remainingSize = math.min(size, remaining()).asInstanceOf[Int]
-//    var currentOffset = offset
-//
-//    while (remainingSize > 0) {
-//      val buff = fetchBufferOfSize(remainingSize)
-//      val toCopy = math.min(buff.remaining(), remainingSize)
-//
-//      buff.get(arr, currentOffset, toCopy)
-//      currentOffset += toCopy
-//      remainingSize -= toCopy
-//    }
-//
-//    currentOffset - offset
-//  }
-//
-//
-//  private def createSlice(size: Long): LargeByteBuffer = {
-//
-//    releasePendingContainers()
-//
-//    if (remaining() < size) {
-//      // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this)
-//      throw new BufferOverflowException
-//    }
-//
-//    // kyro depends on this it seems ?
-//    // assert (size > 0)
-//    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
-//
-//    val arr = new ArrayBuffer[ByteBufferContainer](2)
-//    var totalLeft = size
-//
-//    // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer)
-//
-//    var containerIndex = currentContainerIndex
-//    while (totalLeft > 0 && hasRemaining()) {
-//      assertPreconditions(containerIndex)
-//      val container = containers(containerIndex)
-//      val currentLeft = currentRemaining0(containerIndex)
-//
-//      assert (globalPosition + currentLeft <= globalLimit)
-//      assert (globalPosition >= bufferPositionStart(containerIndex) &&
-//        (globalPosition < bufferPositionStart(containerIndex + 1)))
-//
-//      val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int]
-//      val sliceSize = math.min(totalLeft, currentLeft)
-//      assert (from >= 0)
-//      assert (sliceSize > 0 && sliceSize <= Int.MaxValue)
-//
-//      val slice = container.createSlice(from, sliceSize.asInstanceOf[Int])
-//      arr += slice
-//
-//      globalPosition += sliceSize
-//      totalLeft -= sliceSize
-//      if (currentLeft == sliceSize) containerIndex += 1
-//    }
-//
-//    // Using toNonEmptyBuffer instead of directly moving to next here so that
-//    // other checks can be performed there.
-//    toNonEmptyBuffer()
-//    // force cleanup - this is fine since we are not using the buffers directly
-//    // which are actively needed (the returned value is on containers which can
-//    // recreate)
-//    releasePendingContainers()
-//    // free current container if acquired.
-//    if (currentContainerIndex < containers.length) {
-//      containers(currentContainerIndex).release()
-//    }
-//    assert (currentContainerIndex == containerIndex)
-//
-//    val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked)
-//    retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction)
-//    retval
-//  }
-//
-//  // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers
-//  // This is to be used only for writes : and ensures that writes are done into the appropriate
-//  // underlying bytebuffers.
-//  def getCompositeWriteBuffer(size: Long): LargeByteBuffer = {
-//    assert(writable)
-//    assert(size >= 0)
-//
-//    createSlice(size)
+//    val b = underlying.read(_pos)
+//    _pos += 1
+//    b
 //  }
 //
-//  // get a buffer which is of the specified size and contains data from the underlying buffers
-//  // Note, the actual data might be spread across the underlying buffers.
-//  // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy !
-//  private def readFully(size: Int): ByteBuffer = {
-//    assert (readable)
-//
-//    if (remaining() < size) {
-//      // throw exception
-//      throw new BufferUnderflowException()
-//    }
-//
-//    // kyro depends on this it seems ?
-//    // assert (size > 0)
-//    if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER
-//
-//    // Expected to be handled elsewhere.
-//    assert (! localReadWritePossible(size))
-//
-//    val localBuff =  {
-//      val buff = fetchBufferOfSize(size)
-//      // assert(buff.remaining() <= size)
-//      // if (buff.remaining() == size) return buff
-//      assert(buff.remaining() < size)
-//      ByteBuffer.allocate(size).put(buff)
-//    }
-//
-//    // assert (localBuff.hasRemaining)
-//
-//    while (localBuff.hasRemaining) {
-//      val buff = fetchBufferOfSize(localBuff.remaining())
-//      localBuff.put(buff)
-//    }
-//
-//    localBuff.flip()
-//    localBuff
+//  def put(bytes: LargeByteBuffer): Unit = {
+//    ???
 //  }
 //
-//
-//
-//  def put(b: Byte) {
-//    assert (writable)
-//    if (remaining() < 1) {
-//      // logInfo("put byte. remaining = " + remaining() + ", this = " + this)
-//      throw new BufferOverflowException
-//    }
-//
-//    assert (currentRemaining() > 0)
-//
-//    fetchCurrentBuffer().put(b)
-//    globalPosition += 1
-//    // Check to need to bump the index ?
-//    toNonEmptyBuffer()
+//  def position: Long = _pos
+//  def position(position: Long): Unit = {
+//    _pos = position
 //  }
-//
-//
-//  def put(buffer: ByteBuffer) {
-//    assert (writable)
-//    if (remaining() < buffer.remaining()) {
-//      throw new BufferOverflowException
-//    }
-//
-//    val bufferRemaining = buffer.remaining()
-//    if (localReadWritePossible(bufferRemaining)) {
-//
-//      assert (currentRemaining() >= bufferRemaining)
-//
-//      fetchCurrentBuffer().put(buffer)
-//
-//      globalPosition += bufferRemaining
-//      toNonEmptyBuffer()
-//      return
-//    }
-//
-//    while (buffer.hasRemaining) {
-//      val currentBufferRemaining = currentRemaining()
-//      val bufferRemaining = buffer.remaining()
-//
-//      if (currentBufferRemaining >= bufferRemaining) {
-//        fetchCurrentBuffer().put(buffer)
-//        globalPosition += bufferRemaining
-//      } else {
-//        // Split across buffers.
-//        val currentBuffer = fetchCurrentBuffer()
-//        assert (currentBuffer.remaining() >= currentBufferRemaining)
-//        val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(),
-//          currentBufferRemaining)
-//        assert (sliced.remaining() == currentBufferRemaining)
-//        currentBuffer.put(sliced)
-//        // move buffer pos
-//        buffer.position(buffer.position() + currentBufferRemaining)
-//
-//        globalPosition += currentBufferRemaining
-//      }
-//      toNonEmptyBuffer()
-//    }
-//
-//    assert (! hasRemaining() || currentRemaining() > 0)
+//  def remaining(): Long = {
+//    underlying.size - position
 //  }
 //
-//  def put(other: LargeByteBuffer) {
-//    assert (writable)
-//    if (this.remaining() < other.remaining()) {
-//      throw new BufferOverflowException
-//    }
-//
-//    while (other.hasRemaining()) {
-//      val buffer = other.fetchBufferOfSize(other.currentRemaining())
-//      this.put(buffer)
-//    }
+//  def duplicate(): ChainedLargeByteBuffer = {
+//    new ChainedLargeByteBuffer(underlying)
 //  }
 //
-//
-//  def duplicate(): LargeByteBuffer = {
-//    val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size)
-//    // We do a duplicate as part of construction - so avoid double duplicate.
-//    // containersCopy ++= containers.map(_.duplicate())
-//    containersCopy ++= containers
-//    val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked)
-//
-//    // set limit and position (in that order) ...
-//    retval.limit(this.limit())
-//    retval.position(this.position())
-//
-//    // Now release our containers - if any had been acquired
-//    releasePendingContainers()
-//
-//    retval
+//  def rewind(): Unit = {
+//    _pos = 0
 //  }
 //
-//
-//  /**
-//   * 'read' a LargeByteBuffer of size specified and return that.
-//   * Position will be incremented by size
-//   *
-//   * The name might be slightly confusing : rename ?
-//   *
-//   * @param size Amount of data to be read from this buffer and returned
-//   * @return
-//   */
-//  def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = {
-//    if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException
-//    if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException
-//
-//
-//    assert (readable)
-//    assert (size >= 0)
-//
-//    releasePendingContainers()
-//
-//    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
-//
-//    createSlice(size)
+//  def limit(): Long = {
+//    capacity
 //  }
 //
-//
-//  // This is essentially a workaround to exposing underlying buffers
-//  def readFrom(channel: ReadableByteChannel): Long = {
-//
-//    assert (writable)
-//    releasePendingContainers()
-//
-//    // this also allows us to avoid nasty corner cases in the loop.
-//    if (! hasRemaining()) {
-//      // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this)
-//      throw new BufferOverflowException
-//    }
-//
-//    var totalBytesRead = 0L
-//
-//    while (hasRemaining()) {
-//      // read what we can ...
-//      val buffer = fetchCurrentBuffer()
-//      val bufferRemaining = currentRemaining()
-//      val bytesRead = channel.read(buffer)
-//
-//      if (bytesRead > 0) {
-//        totalBytesRead += bytesRead
-//        // bump position too ..
-//        globalPosition += bytesRead
-//        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-//      }
-//      else if (-1 == bytesRead) {
-//        // if we had already read some data in the loop, return that.
-//        if (totalBytesRead > 0) return totalBytesRead
-//        return -1
-//      }  // nothing available to read, retry later. return
-//      else if (0 == bytesRead) {
-//        return totalBytesRead
-//      }
-//
-//      // toNonEmptyBuffer()
-//    }
-//
-//    // Cleanup last buffer ?
-//    toNonEmptyBuffer()
-//    totalBytesRead
+//  def limit(newLimit: Long): Unit = {
+//    ???
 //  }
 //
-//  // This is essentially a workaround to exposing underlying buffers
-//  def readFrom(inStrm: InputStream): Long = {
-//
-//    assert (writable)
-//    releasePendingContainers()
-//
-//    // this also allows us to avoid nasty corner cases in the loop.
-//    // if (! hasRemaining()) throw new BufferOverflowException
-//    if (! hasRemaining()) return 0
-//
-//    var totalBytesRead = 0L
-//
-//    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-//
-//    while (hasRemaining()) {
-//      // read what we can ... note, since there is no gaurantee that underlying buffer might
-//      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
-//      // see if we can optimize this later ...
-//      val buffer = fetchCurrentBuffer()
-//      val bufferRemaining = buffer.remaining()
-//      val max = math.min(buff.length, bufferRemaining)
-//      val bytesRead = inStrm.read(buff, 0, max)
-//
-//      if (bytesRead > 0) {
-//        buffer.put(buff, 0, bytesRead)
-//        totalBytesRead += bytesRead
-//        // bump position too ..
-//        globalPosition += bytesRead
-//        // buffer.position(buffer.position + bytesRead)
-//        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-//      }
-//      else if (-1 == bytesRead) {
-//        // if we had already read some data in the loop, return that.
-//        if (totalBytesRead > 0) return totalBytesRead
-//        return -1
-//      }  // nothing available to read, retry later. return
-//      else if (0 == bytesRead) {
-//        return totalBytesRead
-//      }
-//
-//      // toNonEmptyBuffer()
+//  def writeTo(channel:WritableByteChannel): Long = {
+//    var written = 0l
+//    underlying.chunks.foreach{bytes =>
+//      //TODO test this
+//      val buffer = ByteBuffer.wrap(bytes)
+//      while (buffer.hasRemaining)
+//        channel.write(buffer)
+//      written += bytes.length
 //    }
-//
-//    totalBytesRead
+//    written
 //  }
+//}
 //
-//  // This is essentially a workaround to exposing underlying buffers
-//  // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce
-//  // code for performance reasons.
-//  def readFrom(inStrm: DataInput): Long = {
-//
-//    assert (writable)
-//    releasePendingContainers()
-//
-//    // this also allows us to avoid nasty corner cases in the loop.
-//    // if (! hasRemaining()) throw new BufferOverflowException
-//    if (! hasRemaining()) return 0
-//
-//    var totalBytesRead = 0L
-//
-//    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-//
-//    while (hasRemaining()) {
-//      // read what we can ... note, since there is no gaurantee that underlying buffer might
-//      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
-//      // see if we can optimize this later ...
-//      val buffer = fetchCurrentBuffer()
-//      val bufferRemaining = buffer.remaining()
-//      val max = math.min(buff.length, bufferRemaining)
-//      inStrm.readFully(buff, 0, max)
-//      val bytesRead = max
-//
-//      if (bytesRead > 0) {
-//        buffer.put(buff, 0, bytesRead)
-//        totalBytesRead += bytesRead
-//        // bump position too ..
-//        globalPosition += bytesRead
-//        // buffer.position(buffer.position() + bytesRead)
-//        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-//      }
-//      else if (-1 == bytesRead) {
-//        // if we had already read some data in the loop, return that.
-//        if (totalBytesRead > 0) return totalBytesRead
-//        return -1
-//      }  // nothing available to read, retry later. return
-//      else if (0 == bytesRead) {
-//        return totalBytesRead
-//      }
+//class WrappedLargeByteBuffer(private[spark] val underlying: Array[ByteBuffer]) extends LargeByteBuffer {
 //
-//      // toNonEmptyBuffer()
+//  val (totalCapacity, chunkOffsets) = {
+//    var sum = 0l
+//    val offsets = new Array[Long](underlying.size)
+//    (0 until underlying.size).foreach{idx =>
+//      offsets(idx) = sum
+//      sum += underlying(idx).capacity()
 //    }
-//
-//    totalBytesRead
+//    (sum, offsets)
 //  }
 //
-//  // This is essentially a workaround to exposing underlying buffers
-//  // Note: tries to do it efficiently without needing to load everything into memory
-//  // (particularly for diskbacked buffers, etc).
-//  def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = {
-//
-//    assert (readable)
-//    releasePendingContainers()
-//
-//    // this also allows us to avoid nasty corner cases in the loop.
-//    if (! hasRemaining()) throw new BufferUnderflowException
-//
-//    var totalBytesWritten = 0L
-//
-//    while (hasRemaining()) {
-//      // Write what we can ...
-//      val buffer = fetchCurrentBuffer()
-//      val bufferRemaining = buffer.remaining()
-//      assert (bufferRemaining > 0)
-//      val bytesWritten = channel.write(buffer)
-//
-//      if (bytesWritten > 0) {
-//        totalBytesWritten += bytesWritten
-//        // bump position too ..
-//        globalPosition += bytesWritten
-//        if (bytesWritten >= bufferRemaining) toNonEmptyBuffer()
-//        assert (! hasRemaining() || currentRemaining() > 0)
-//      }
-//      else if (0 == bytesWritten) {
-//        return totalBytesWritten
-//      }
+//  private var _pos = 0l
+//  private var currentBufferIdx = 0
+//  private var currentBuffer = underlying(0)
+//  private var _limit = totalCapacity
 //
-//      // toNonEmptyBuffer()
-//    }
+//  def capacity = totalCapacity
 //
-//    assert (! hasRemaining())
-//    if (cleanup) {
-//      free()
+//  def get(dst: Array[Byte], offset: Int, length: Int): Unit = {
+//    var moved = 0
+//    while (moved < length) {
+//      val toRead = math.min(length - moved, currentBuffer.remaining())
+//      currentBuffer.get(dst, offset, toRead)
+//      moved += toRead
+//      updateCurrentBuffer()
 //    }
-//    totalBytesWritten
 //  }
 //
-//  // This is essentially a workaround to exposing underlying buffers
-//  def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = {
-//
-//    assert (readable)
-//    releasePendingContainers()
-//
-//    // this also allows us to avoid nasty corner cases in the loop.
-//    if (! hasRemaining()) throw new BufferUnderflowException
-//
-//    var totalBytesWritten = 0L
-//    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-//
-//    while (hasRemaining()) {
-//      // write what we can ... note, since there is no gaurantee that underlying buffer might
-//      // expose array() method, we do double copy - from bytearray to buff and from
-//      // buff to outputstream. see if we can optimize this later ...
-//      val buffer = fetchCurrentBuffer()
-//      val bufferRemaining = buffer.remaining()
-//      val size = math.min(bufferRemaining, buff.length)
-//      buffer.get(buff, 0, size)
-//      outStrm.write(buff, 0, size)
-//
-//      totalBytesWritten += size
-//      // bump position too ..
-//      globalPosition += size
-//
-//      if (size >= bufferRemaining) toNonEmptyBuffer()
-//    }
-//
-//    toNonEmptyBuffer()
-//    if (cleanup) {
-//      free()
-//    }
-//    totalBytesWritten
+//  def get(): Byte = {
+//    val r = currentBuffer.get()
+//    _pos += 1
+//    updateCurrentBuffer()
+//    r
 //  }
 //
-//  def asInputStream(): InputStream = {
-//    new InputStream() {
-//      override def read(): Int = {
-//        if (! hasRemaining()) return -1
-//        get()
-//      }
-//
-//      override def read(arr: Array[Byte], off: Int, len: Int): Int = {
-//        if (! hasRemaining()) return -1
-//
-//        get(arr, off, len)
-//      }
-//
-//      override def available(): Int = {
-//        // current remaining is what can be read without blocking
-//        // anything higher might need disk access/buffer swapping.
-//        /*
-//        val left = remaining()
-//        math.min(left, Int.MaxValue).asInstanceOf[Int]
-//        */
-//        currentRemaining()
-//      }
+//  private def updateCurrentBuffer(): Unit = {
+//    //TODO fix end condition
+//    while(!currentBuffer.hasRemaining()) {
+//      currentBufferIdx += 1
+//      currentBuffer = underlying(currentBufferIdx)
 //    }
 //  }
 //
-//  def getCleaner() = cleaner
-//
-//  /**
-//   * @param cleaner The previous cleaner, so that the caller can chain them if required.
-//   * @return
-//   */
-//  private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = {
-//    overrideCleaner(cleaner, allowOverride = true)
+//  def put(bytes: LargeByteBuffer): Unit = {
+//    ???
 //  }
 //
-//  private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = {
-//    if (! this.allowCleanerOverride) {
-//      // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free
-//      return this.cleaner
-//    }
-//
-//    this.allowCleanerOverride = allowOverride
-//    assert (null != cleaner)
-//    val prev = this.cleaner
-//    this.cleaner = cleaner
-//    // logInfo("Overriding " + prev + " with " + this.cleaner)
-//    prev
+//  def position: Long = _pos
+//  def position(position: Long): Unit = {
+//    //XXX check range?
+//    _pos = position
 //  }
-//
-//  private def doReleaseAll() {
-//    for (container <- containers) {
-//      container.release()
-//    }
+//  def remaining(): Long = {
+//    totalCapacity - _pos
 //  }
 //
-//  def free(invokeCleaner: Boolean = true) {
-//    // logInfo("Free on " + this + ", cleaner = " + cleaner)
-//    // always invoking release
-//    doReleaseAll()
-//
-//    if (invokeCleaner) cleaner.clean(this)
+//  def duplicate(): WrappedLargeByteBuffer = {
+//    new WrappedLargeByteBuffer(underlying.map{_.duplicate()})
 //  }
 //
-//  private def doDispose(needRelease: Boolean) {
-//
-//    if (disposeLocationThrowable ne null) {
-//      logError("Already free'ed earlier at : ", disposeLocationThrowable)
-//      logError("Current at ", new Throwable)
-//      throw new IllegalStateException("Already freed.")
-//    }
-//    disposeLocationThrowable = new Throwable()
-//
-//    // Forcefully cleanup all
-//    if (needRelease) doReleaseAll()
-//
-//    // Free in a different loop, in case different containers refer to same resource
-//    // to release (like file)
-//    for (container <- containers) {
-//      container.free()
-//    }
-//
-//    needReleaseIndices.clear()
-//
-//    // We should not use this buffer anymore : set the values such that                 f
-//    // we dont ...
-//    globalPosition = 0
-//    globalLimit = 0
-//    globalCapacity = 0
+//  def rewind(): Unit = {
+//    _pos = 0
+//    underlying.foreach{_.rewind()}
 //  }
 //
-//  // copy data over ... MUST be used only for cases where array is known to be
-//  // small to begin with. slightly risky method due to that assumption
-//  def toByteArray(): Array[Byte] = {
-//    val positionBackup = position()
-//    val size = remaining()
-//    if (size > Int.MaxValue) {
-//      throw new IllegalStateException(
-//        "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G")
-//    }
-//
-//    val retval = new Array[Byte](size.asInstanceOf[Int])
-//    val readSize = get(retval, 0, retval.length)
-//    assert (readSize == retval.length,
-//      "readSize = " + readSize + ", retval.length = " + retval.length)
-//
-//    position(positionBackup)
-//
-//    retval
+//  def limit(): Long = {
+//    totalCapacity
 //  }
 //
-//  // copy data over ... MUST be used only for cases where array is known to be
-//  // small to begin with. slightly risky method due to that assumption
-//  def toByteBuffer(): ByteBuffer = {
-//    ByteBuffer.wrap(toByteArray())
+//  def limit(newLimit: Long) = {
+//    //XXX check range?  set limits in sub buffers?
+//    _limit = newLimit
 //  }
 //
-//  def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = {
-//    val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf)
-//    val currentPosition = position()
-//    retval.put(this)
-//    position(currentPosition)
-//    retval.clear()
-//    retval
-//  }
-//
-//
-//
-//  // This is ONLY used for testing : that too as part of development of this and associated classes
-//  // remove before contributing to spark.
-//  def hexDump(): String = {
-//    if (remaining() * 64 > Int.MaxValue) {
-//      throw new UnsupportedOperationException("buffer too large " + remaining())
-//    }
-//
-//    val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int])
-//
-//    var perLine = 0
-//    var first = true
-//    for (b <- toByteArray()) {
-//      perLine += 1
-//      if (perLine % 8 == 0) {
-//        sb.append('\n')
-//        first = true
-//      }
-//      if (! first) sb.append(' ')
-//      first = false
-//      sb.append(java.lang.Integer.toHexString(b & 0xff))
+//  def writeTo(channel: WritableByteChannel): Long = {
+//    var written = 0l
+//    underlying.foreach{buffer =>
+//      //TODO test this
+//      //XXX do we care about respecting the limit here?
+//      written += buffer.remaining()
+//      while (buffer.hasRemaining)
+//        channel.write(buffer)
 //    }
-//    sb.append('\n')
-//    sb.toString()
+//    written
 //  }
 //
-//  override def toString: String = {
-//    val sb: StringBuffer = new StringBuffer
-//    sb.append(getClass.getName)
-//    sb.append(' ')
-//    sb.append(System.identityHashCode(this))
-//    sb.append("@[pos=")
-//    sb.append(position())
-//    sb.append(" lim=")
-//    sb.append(limit())
-//    sb.append(" cap=")
-//    sb.append(capacity())
-//    sb.append("]")
-//    sb.toString
-//  }
-//
-//
-//
-//  override def finalize(): Unit = {
-//    var marked = false
-//    if (containers ne null) {
-//      if (containers.exists(container => container.isAcquired && container.requireRelease())) {
-//        marked = true
-//        logError("BUG: buffer was not released - and now going out of scope. " +
-//          "Potential resource leak. Allocated at ", allocateLocationThrowable)
-//        containers.foreach(_.release())
-//      }
-//      if (containers.exists(container => !container.isFreed && container.requireFree())) {
-//        if (!marked) {
-//          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak",
-//            allocateLocationThrowable)
-//        }
-//        else {
-//          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak")
-//        }
-//        containers.foreach(_.free())
-//      }
-//    }
-//    super.finalize()
-//  }
 //}
 //
+//object LargeByteBuffer {
 //
-//object LargeByteBuffer extends Logging {
-//
-//  private val noopDisposeFunction = new BufferCleaner() {
-//    protected def doClean(buffer: LargeByteBuffer) {
-//      buffer.free(invokeCleaner = false)
-//    }
-//  }
-//
-//  val enableExpensiveAssert = false
-//  private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0)
-//  val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer(
-//    new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false)
-//  // Do not allow anyone else to override cleaner
-//  EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false)
-//
-//  // 8K sufficient ?
-//  private val TEMP_ARRAY_SIZE = 8192
-//
-//  /**
-//   * Create a LargeByteBuffer of specified size which is split across
-//   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory
-//   * ByteBuffer
-//   *
-//   */
-//  def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = {
-//    if (0 == totalSize) {
-//      return EMPTY_BUFFER
-//    }
-//
-//    assert (totalSize > 0)
-//
-//    val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
-//    val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize)
-//    val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize)
-//
-//    assert (lastBlockSize > 0)
-//
-//    val bufferArray = {
-//      val arr = new ArrayBuffer[ByteBufferContainer](numBlocks)
-//      for (index <- 0 until numBlocks - 1) {
-//        val buff = ByteBuffer.allocate(blockSize)
-//        // buff.clear()
-//        arr += new HeapByteBufferContainer(buff, true)
-//      }
-//      arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true)
-//      assert (arr.length == numBlocks)
-//      arr
-//    }
-//
-//    new LargeByteBuffer(bufferArray, false, false)
-//  }
-//
-//  /**
-//   * Create a LargeByteBuffer of specified size which is split across
-//   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk
-//   *
-//   */
-//  private def allocateDiskBuffer(totalSize: Long,
-//      blockManager: BlockManager): LargeByteBuffer = {
-//    if (0 == totalSize) {
-//      return EMPTY_BUFFER
-//    }
-//
-//    assert (totalSize > 0)
-//
-//    // Create a file of the specified size.
-//    val file = blockManager.diskBlockManager.createTempBlock()._2
-//    val raf = new RandomAccessFile(file, "rw")
-//    try {
-//      raf.setLength(totalSize)
-//    } finally {
-//      raf.close()
-//    }
-//
-//    readWriteDiskSegment(new FileSegment(file, 0, totalSize),
-//      ephemeralDiskBacked = true, blockManager.ioConf)
-//  }
-//
-//  // The returned buffer takes up ownership of the underlying buffers
-//  // (including dispos'ing that when done)
-//  def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = {
-//    val nonEmpty = buffers.filter(_.hasRemaining)
-//
-//    // cleanup the empty buffers
-//    buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b))
-//
-//
-//    if (nonEmpty.isEmpty) {
-//      return EMPTY_BUFFER
-//    }
-//
-//    // slice so that offsets match our requirement
-//    new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b =>
-//      new HeapByteBufferContainer(b.slice(), true)), false, false)
+//  def asLargeByteBuffer(byteBuffer: ByteBuffer): LargeByteBuffer = {
+//    new WrappedLargeByteBuffer(Array(byteBuffer))
 //  }
 //
-//  def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = {
-//    // only non empty arrays
-//    val arrays = byteArrays.filter(_.length > 0)
-//    if (0 == arrays.length) return EMPTY_BUFFER
-//
-//    new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr =>
-//      new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false)
-//  }
-//
-//  def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = {
-//
-//    if (inputBuffers.isEmpty) return EMPTY_BUFFER
-//
-//    if (! inputBuffers.exists(_.hasRemaining())) {
-//      if (canDispose) inputBuffers.map(_.free())
-//      return EMPTY_BUFFER
-//    }
-//
-//    // release all temp resources acquired
-//    inputBuffers.foreach(buff => buff.releasePendingContainers())
-//    // free current container if acquired.
-//    inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) {
-//      buff.containers(buff.currentContainerIndex).release()
-//    })
-//    // inputBuffers.foreach(b => b.doReleaseAll())
-//
-//
-//    // Dispose of any empty buffers
-//    if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free())
-//
-//    // Find all containers we need.
-//    val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining()))
-//
-//    val containers = buffers.flatMap(_.containers)
-//    assert (! containers.isEmpty)
-//    // The in order containers of "buffers" seq constitute the required return value
-//    val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers,
-//      // if you cant dispose, then we dont own the buffers : in which case, need duplicate
-//      ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked))
-//
-//    if (canDispose) {
-//      // override dispose of all other buffers.
-//      val disposeFunctions = inputBuffers.map {
-//        buffer => {
-//          (buffer, buffer.overrideCleaner(noopDisposeFunction))
-//        }
-//      }
-//
-//      val cleaner = retval.getCleaner()
-//      val newCleaner = new BufferCleaner {
-//        protected def doClean(buffer: LargeByteBuffer) {
-//
-//          assert (retval == buffer)
-//          // default cleaner.
-//          cleaner.clean(retval)
-//          // not required, since we are within clean anyway.
-//          // retval.free(invokeCleaner = false)
-//
-//          // retval.doDispose(needRelease = true)
-//
-//          // This might actually call dispose twice on some (initially) empty buffers,
-//          // which is fine since we now guard against that.
-//          disposeFunctions.foreach(v => v._2.clean(v._1))
-//          // Call the free method too : so that buffers are marked free ...
-//          disposeFunctions.foreach(v => v._1.free(invokeCleaner = false))
-//        }
-//      }
-//
-//      val prev = retval.overrideCleaner(newCleaner)
-//      assert (prev == cleaner)
-//    }
-//
-//    retval
+//  def asLargeByteBuffer(bytes: Array[Byte]): LargeByteBuffer = {
+//    new WrappedLargeByteBuffer(Array(ByteBuffer.wrap(bytes)))
 //  }
 //
-//  private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) {
-//    if (arr == null) {
-//      throw new NullPointerException
-//    } else if (offset < 0 || size < 0 || offset + size > arr.length) {
-//      throw new IndexOutOfBoundsException
-//    }
-//  }
 //
-//  def allocateTransientBuffer(size: Long, blockManager: BlockManager) = {
-//    if (size <= blockManager.ioConf.maxInMemSize) {
-//      LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf)
-//    } else {
-//      LargeByteBuffer.allocateDiskBuffer(size, blockManager)
-//    }
+//  def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = {
+//    val buffer = ChainedBuffer.withInitialSize(maxChunk, size)
+//    new ChainedLargeByteBuffer(buffer)
 //  }
 //
-//  def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig,
-//      ephemeralDiskBacked: Boolean): LargeByteBuffer = {
-//    // Split the block into multiple of BlockStore.maxBlockSize
-//    val segmentSize = segment.length
-//    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
-//    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
-//    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
-//
-//    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
-//
-//    for (index <- 0 until numBlocks - 1) {
-//      buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
-//        segment.offset + index * blockSize, blockSize), ioConf)
-//    }
-//
-//    // Last block
-//    buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
-//      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf)
-//
-//    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
+//  def mapFile(
+//    channel: FileChannel,
+//    mode: MapMode,
+//    offset: Long,
+//    length: Long,
+//    maxChunk: Int = Integer.MAX_VALUE - 1e6.toInt
+//  ): LargeByteBuffer = {
+//    val offsets = new ArrayBuffer[Long]()
+//    var curOffset = offset
+//    val end = offset + length
+//    while (curOffset < end) {
+//      offsets += curOffset
+//      val length = math.min(end - curOffset, maxChunk)
+//      curOffset += length
+//    }
+//    offsets += end
+//    val chunks = new Array[ByteBuffer](offsets.size - 1)
+//    (0 until offsets.size - 1).foreach{idx =>
+//      chunks(idx) = channel.map(mode, offsets(idx), offsets(idx + 1) - offsets(idx))
+//    }
+//    new WrappedLargeByteBuffer(chunks)
 //  }
+//}
 //
-//  def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean,
-//      ioConf: IOConfig): LargeByteBuffer = {
-//
-//    // Split the block into multiple of BlockStore.maxBlockSize
-//    val segmentSize = segment.length
-//    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
-//    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
-//    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
-//
-//    logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks +
-//      ", lastBlockSize = " + lastBlockSize)
-//    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
-//
-//    for (index <- 0 until numBlocks - 1) {
-//      buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
-//        segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null)
-//    }
-//
-//    // Last block
-//    buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
-//      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null)
 //
-//    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
-//  }
-//}
+////
+/////**
+//// * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G
+//// * which ByteBuffers are limited to.
+//// * Externally, it exposes all the api which java.nio.ByteBuffer exposes.
+//// * Internally, it maintains a sequence of Containers which manage the ByteBuffer data.
+//// * Not all the data might be loaded into memory  (like disk or tachyon data) - so actual
+//// * memory footprint - heap and vm could be much lower than capacity.
+//// *
+//// * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this
+//// * buffer, maybe revisit this later ? Note: this is not much different from earlier though !
+//// *
+//// * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this
+//// * will require the file to be kept open (repeatedly opening/closing file is not good
+//// * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is
+//// * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy)
+//// *
+//// * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is
+//// * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some
+//// * cases (when we duplicate/slice them). Currently spark does not need this, but might in future
+//// * so relook at it later.
+//// */
+////// We should make this constructor private: but for now,
+////// leaving it public since TachyonStore needs it
+////class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer],
+////    private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging {
+////
+////  // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME
+////  private val allocateLocationThrowable: Throwable = {
+////    if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) {
+////      new Throwable("blockId = " + BlockManager.getLookupBlockId)
+////    } else {
+////      null
+////    }
+////  }
+////  private var disposeLocationThrowable: Throwable = null
+////
+////  @volatile private var allowCleanerOverride = true
+////  @volatile private var cleaner: BufferCleaner = new BufferCleaner {
+////    override def doClean(buffer: LargeByteBuffer) = {
+////      assert (LargeByteBuffer.this == buffer)
+////      doDispose(needRelease = false)
+////    }
+////  }
+////
+////  // should not be empty
+////  assert (null != inputContainers && ! inputContainers.isEmpty)
+////  // should not have any null's
+////  assert (inputContainers.find(_ == null).isEmpty)
+////
+////  // println("Num containers = " + inputContainers.size)
+////
+////  // Position, limit and capacity relevant over the engire LargeByteBuffer
+////  @volatile private var globalPosition = 0L
+////  @volatile private var globalLimit = 0L
+////  @volatile private var currentContainerIndex = 0
+////
+////  // The buffers in which the actual data is held.
+////  private var containers: Array[ByteBufferContainer] = null
+////
+////  // aggregate capacities of the individual buffers.
+////  // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be
+////  // sum of capacity of 0th and 1st block buffer
+////  private var bufferPositionStart: Array[Long] = null
+////
+////  // Contains the indices of a containers which requires release before subsequent invocation of
+////  // read/write should be serviced. This is required since current read/write might have moved the
+////  // position but since we are returning bytebuffers which depend on the validity of the existing
+////  // bytebuffer, we cant release them yet.
+////  private var needReleaseIndices = new HashSet[Int]()
+////
+////  private val readable = ! inputContainers.exists(! _.isReadable)
+////  private val writable = ! inputContainers.exists(! _.isWritable)
+////
+////
+////  // initialize
+////  @volatile private var globalCapacity = {
+////
+////    // Ensure that there are no empty buffers : messes up with our code : unless it
+////    // is a single buffer (for empty buffer for marker case)
+////    assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length)
+////
+////    containers = {
+////      if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray
+////    }
+////    containers.foreach(_.validate())
+////
+////    def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) {
+////      val buff = new ArrayBuffer[Long](arr.length + 1)
+////      buff += 0L
+////
+////      buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1)
+////      assert (buff.length == arr.length + 1)
+////      bufferPositionStart = buff.toArray
+////    }
+////
+////    initializeBufferPositionStart(containers)
+////
+////    // remove references from inputBuffers
+////    inputContainers.clear()
+////
+////    globalLimit = bufferPositionStart(containers.length)
+////    globalPosition = 0L
+////    currentContainerIndex = 0
+////
+////    assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum)
+////
+////    globalLimit
+////  }
+////
+////  final def position(): Long = globalPosition
+////
+////  final def limit(): Long = globalLimit
+////
+////  final def capacity(): Long = globalCapacity
+////
+////  final def limit(newLimit: Long) {
+////    if ((newLimit > capacity()) || (newLimit < 0)) {
+////      throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity())
+////    }
+////
+////    globalLimit = newLimit
+////    if (position() > newLimit) position(newLimit)
+////  }
+////
+////  def skip(skipBy: Long) = position(position() + skipBy)
+////
+////  private def releasePendingContainers() {
+////    if (! needReleaseIndices.isEmpty) {
+////      val iter = needReleaseIndices.iterator
+////      while (iter.hasNext) {
+////        val index = iter.next()
+////        assert (index >= 0 && index < containers.length)
+////        // It is possible to move from one container to next before the previous
+////        // container was acquired. For example, get forcing move to next container
+////        // since current was exhausted immediatelly followed by a position()
+////        // so the container we moved to was never acquired.
+////
+////        // assert (containers(index).isAcquired)
+////        // will this always be satisfied ?
+////        // assert (index != currentContainerIndex)
+////        if (containers(index).isAcquired) containers(index).release()
+////      }
+////      needReleaseIndices.clear()
+////    }
+////  }
+////
+////  private def toNewContainer(newIndex: Int) {
+////    if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) {
+////
+////      assert (currentContainerIndex >= 0)
+////      needReleaseIndices += currentContainerIndex
+////    }
+////    currentContainerIndex = newIndex
+////  }
+////
+////  // expensive method, sigh ... optimize it later ?
+////  final def position(newPosition: Long) {
+////
+////    if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException()
+////
+////    if (currentContainerIndex < bufferPositionStart.length - 1 &&
+////        newPosition >= bufferPositionStart(currentContainerIndex) &&
+////        newPosition < bufferPositionStart(currentContainerIndex + 1)) {
+////      // Same buffer - easy method ...
+////      globalPosition = newPosition
+////      // Changed position - free previously returned buffers.
+////      releasePendingContainers()
+////      return
+////    }
+////
+////    // Find appropriate currentContainerIndex
+////    // Since bufferPositionStart is sorted, can be replaced with binary search if required.
+////    // For now, not in the perf critical path since buffers size is very low typically.
+////    var index = 0
+////    val cLen = containers.length
+////    while (index < cLen) {
+////      if (newPosition >= bufferPositionStart(index) &&
+////        newPosition < bufferPositionStart(index + 1)) {
+////        globalPosition = newPosition
+////        toNewContainer(index)
+////        // Changed position - free earlier and previously returned buffers.
+////        releasePendingContainers()
+////        return
+////      }
+////      index += 1
+////    }
+////
+////    if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) {
+////      // boundary.
+////      globalPosition = newPosition
+////      toNewContainer(cLen)
+////      // Changed position - free earlier and previously returned buffers.
+////      releasePendingContainers()
+////      return
+////    }
+////
+////    assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition +
+////      ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]"))
+////  }
+////
+////
+////  /**
+////   * Clears this buffer.  The position is set to zero, the limit is set to
+////   * the capacity, and the mark is discarded.
+////   *
+////   * <p> Invoke this method before using a sequence of channel-read or
+////   * <i>put</i> operations to fill this buffer.
+////   *
+////   * <p> This method does not actually erase the data in the buffer, but it
+////   * is named as if it did because it will most often be used in situations
+////   * in which that might as well be the case. </p>
+////   */
+////  final def clear() {
+////    // if (0 == globalCapacity) return
+////
+////    needReleaseIndices += 0
+////    globalPosition = 0L
+////    toNewContainer(0)
+////    globalLimit = globalCapacity
+////
+////    // Now free all pending containers
+////    releasePendingContainers()
+////  }
+////
+////  /**
+////   * Flips this buffer.  The limit is set to the current position and then
+////   * the position is set to zero.  If the mark is defined then it is
+////   * discarded.
+////   *
+////   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
+////   * this method to prepare for a sequence of channel-write or relative
+////   * <i>get</i> operations.
+////   */
+////  final def flip() {
+////    needReleaseIndices += 0
+////    globalLimit = globalPosition
+////    globalPosition = 0L
+////    toNewContainer(0)
+////
+////    // Now free all pending containers
+////    releasePendingContainers()
+////  }
+////
+////  /**
+////   * Rewinds this buffer.  The position is set to zero and the mark is
+////   * discarded.
+////   *
+////   * <p> Invoke this method before a sequence of channel-write or <i>get</i>
+////   * operations, assuming that the limit has already been set
+////   * appropriately.
+////   */
+////  final def rewind() {
+////    needReleaseIndices += 0
+////    globalPosition = 0L
+////    toNewContainer(0)
+////
+////    // Now free all pending containers
+////    releasePendingContainers()
+////  }
+////
+////  /**
+////   * Returns the number of elements between the current position and the
+////   * limit. </p>
+////   *
+////   * @return  The number of elements remaining in this buffer
+////   */
+////  final def remaining(): Long = {
+////    globalLimit - globalPosition
+////  }
+////
+////  /**
+////   * Tells whether there are any elements between the current position and
+////   * the limit. </p>
+////   *
+////   * @return  <tt>true</tt> if, and only if, there is at least one element
+////   *          remaining in this buffer
+////   */
+////  final def hasRemaining() = {
+////    globalPosition < globalLimit
+////  }
+////
+////  // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex)
+////
+////  // number of bytes remaining in currently active underlying buffer
+////  private def currentRemaining(): Int = {
+////    if (hasRemaining()) {
+////      // validate currentContainerIndex is valid
+////      assert (globalPosition >= bufferPositionStart(currentContainerIndex) &&
+////        globalPosition < bufferPositionStart(currentContainerIndex + 1),
+////        "globalPosition = " + globalPosition +
+////          ", currentContainerIndex = " + currentContainerIndex +
+////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+////
+////      currentRemaining0(currentContainerIndex)
+////    } else 0
+////  }
+////
+////  // Without any validation : required when we are bumping the index (when validation will fail) ...
+////  private def currentRemaining0(which: Int): Int = {
+////    // currentBuffer().remaining()
+////    math.max(0, math.min(bufferPositionStart(which + 1),
+////      globalLimit) - globalPosition).asInstanceOf[Int]
+////  }
+////
+////  // Set the approppriate position/limit for the current underlying buffer to mirror our
+////  // the LargeByteBuffer's state.
+////  private def fetchCurrentBuffer(): ByteBuffer = {
+////    releasePendingContainers()
+////
+////    assert (currentContainerIndex < containers.length)
+////
+////    val container = containers(currentContainerIndex)
+////    if (! container.isAcquired) {
+////      container.acquire()
+////    }
+////
+////    assert (container.isAcquired)
+////    if (LargeByteBuffer.enableExpensiveAssert) {
+////      assert (! containers.exists( b => (b ne container) && b.isAcquired))
+////    }
+////
+////    assert (currentContainerIndex < bufferPositionStart.length &&
+////      globalPosition < bufferPositionStart(currentContainerIndex + 1),
+////      "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " +
+////        bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this)
+////
+////    val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)).
+////      asInstanceOf[Int]
+////
+////    val buffer = container.getByteBuffer
+////    buffer.position(buffPosition)
+////    val diff = buffer.capacity - buffPosition
+////    val left = remaining()
+////    if (diff <= left) {
+////      buffer.limit(buffer.capacity())
+////    } else {
+////      // Can happen if limit() was called.
+////      buffer.limit(buffPosition + left.asInstanceOf[Int])
+////    }
+////
+////    buffer
+////  }
+////
+////  // To be used ONLY to test in suites.
+////  private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = {
+////    if ("1" != System.getProperty("SPARK_TESTING")) {
+////      throw new IllegalStateException("This method is to be used ONLY within spark test suites")
+////    }
+////
+////    fetchCurrentBuffer()
+////  }
+////
+////  // Expects that the invoker has ensured that this can be safely invoked.
+////  // That is, it wont be invoked when the loop wont terminate.
+////  private def toNonEmptyBuffer() {
+////
+////    if (! hasRemaining()) {
+////      var newIndex = currentContainerIndex
+////      // Ensure we are in the right block or not.
+////      while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) {
+////        newIndex += 1
+////      }
+////      toNewContainer(newIndex)
+////      // Do not do this - since we might not yet have consumed the buffer which caused EOF right now
+////      /*
+////      // Add last one also, and release it too - since we are at the end of the buffer with nothing
+////      // more pending.
+////      if (newIndex >= 0 && currentContainerIndex < containers.length) {
+////        needReleaseIndices += newIndex
+////      }
+////      */
+////      assert (currentContainerIndex >= 0)
+////      // releasePendingContainers()
+////      return
+////    }
+////
+////    var index = currentContainerIndex
+////    while (0 == currentRemaining0(index) && index < containers.length) {
+////      index += 1
+////    }
+////    assert (currentContainerIndex < containers.length)
+////    toNewContainer(index)
+////    assert (0 != currentRemaining())
+////  }
+////
+////  private def assertPreconditions(containerIndex: Int) {
+////    assert (globalPosition >= bufferPositionStart(containerIndex),
+////      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
+////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+////    assert (globalPosition < bufferPositionStart(containerIndex + 1),
+////      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
+////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+////
+////    assert (globalLimit <= globalCapacity)
+////    assert (containerIndex < containers.length)
+////  }
+////
+////
+////  /**
+////   * Attempts to return a ByteBuffer of the requested size.
+////   * It is possible to return a buffer of size smaller than requested
+////   * even though hasRemaining == true
+////   *
+////   * On return, position would have been moved 'ahead' by the size of the buffer returned :
+////   * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer
+////   *
+////   *
+////   * This is used to primarily retrieve content of this buffer to expose via ByteBuffer
+////   * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the
+////   * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer
+////   * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying
+////   * container is a disk backed container, and we make subsequent calls to get(), the returned
+////   * ByteBuffer can be dispose'ed off
+////   *
+////   * @param maxChunkSize Max size of the ByteBuffer to retrieve.
+////   * @return
+////   */
+////
+////  private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = {
+////    fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true)
+////  }
+////
+////  private def fetchBufferOfSizeImpl(maxChunkSize: Int,
+////      canReleaseContainers: Boolean): ByteBuffer = {
+////    if (canReleaseContainers) releasePendingContainers()
+////    assert (maxChunkSize > 0)
+////
+////    // not checking for degenerate case of maxChunkSize == 0
+////    if (globalPosition >= globalLimit) {
+////      // throw exception
+////      throw new BufferUnderflowException()
+////    }
+////
+////    // Check preconditions : disable these later, since they might be expensive to
+////    // evaluate for every IO op
+////    assertPreconditions(currentContainerIndex)
+////
+////    val currentBufferRemaining = currentRemaining()
+////
+////    assert (currentBufferRemaining > 0)
+////
+////    val size = math.min(currentBufferRemaining, maxChunkSize)
+////
+////    val newBuffer = if (currentBufferRemaining > maxChunkSize) {
+////      val currentBuffer = fetchCurrentBuffer()
+////      val buff = ByteBufferContainer.createSlice(currentBuffer,
+////        currentBuffer.position(), maxChunkSize)
+////      assert (buff.remaining() == maxChunkSize)
+////      buff
+////    } else {
+////      val currentBuffer = fetchCurrentBuffer()
+////      val buff = currentBuffer.slice()
+////      assert (buff.remaining() == currentBufferRemaining)
+////      buff
+////    }
+////
+////    assert (size == newBuffer.remaining())
+////    assert (0 == newBuffer.position())
+////    assert (size == newBuffer.limit())
+////    assert (newBuffer.capacity() == newBuffer.limit())
+////
+////    globalPosition += newBuffer.remaining
+////    toNonEmptyBuffer()
+////
+////    newBuffer
+////  }
+////
+////  // Can we service the read/write from the currently active (underlying) bytebuffer or not.
+////  // For almost all cases, this will return true allowing us to optimize away the more expensive
+////  // computations.
+////  private def localReadWritePossible(size: Int) =
+////    size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1)
+////
+////
+////  def getLong(): Long = {
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    if (remaining() < 8) throw new BufferUnderflowException
+////
+////    if (localReadWritePossible(8)) {
+////      val buff = fetchCurrentBuffer()
+////      assert (buff.remaining() >= 8)
+////      val retval = buff.getLong
+////      globalPosition += 8
+////      toNonEmptyBuffer()
+////      return retval
+////    }
+////
+////    val buff = readFully(8)
+////    buff.getLong
+////  }
+////
+////  def getInt(): Int = {
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    if (remaining() < 4) throw new BufferUnderflowException
+////
+////    if (localReadWritePossible(4)) {
+////      val buff = fetchCurrentBuffer()
+////      assert (buff.remaining() >= 4)
+////      val retval = buff.getInt
+////      globalPosition += 4
+////      toNonEmptyBuffer()
+////      return retval
+////    }
+////
+////    val buff = readFully(4)
+////    buff.getInt
+//// }
+////
+////  def getChar(): Char = {
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    if (remaining() < 2) throw new BufferUnderflowException
+////
+////    if (localReadWritePossible(2)) {
+////      val buff = fetchCurrentBuffer()
+////      assert (buff.remaining() >= 2)
+////      val retval = buff.getChar
+////      globalPosition += 2
+////      toNonEmptyBuffer()
+////      return retval
+////    }
+////
+////    // if slice is becoming too expensive, revisit this ...
+////    val buff = readFully(2)
+////    buff.getChar
+////  }
+////
+////  def get(): Byte = {
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    if (! hasRemaining()) throw new BufferUnderflowException
+////
+////    // If we have remaining bytes, previous invocations MUST have ensured that we are at
+////    // a buffer which has data to be read.
+////    assert (localReadWritePossible(1))
+////
+////    val buff = fetchCurrentBuffer()
+////    assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining())
+////    val retval = buff.get()
+////    globalPosition += 1
+////    toNonEmptyBuffer()
+////
+////    retval
+////  }
+////
+////  def get(arr: Array[Byte], offset: Int, size: Int): Int = {
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    LargeByteBuffer.checkOffsets(arr, offset, size)
+////
+////    // kyro depends on this it seems ?
+////    // assert (size > 0)
+////    if (0 == size) return 0
+////
+////    if (! hasRemaining()) return -1
+////
+////    if (localReadWritePossible(size)) {
+////      val buff = fetchCurrentBuffer()
+////      assert (buff.remaining() >= size)
+////      buff.get(arr, offset, size)
+////      globalPosition += size
+////      toNonEmptyBuffer()
+////      return size
+////    }
+////
+////    var remainingSize = math.min(size, remaining()).asInstanceOf[Int]
+////    var currentOffset = offset
+////
+////    while (remainingSize > 0) {
+////      val buff = fetchBufferOfSize(remainingSize)
+////      val toCopy = math.min(buff.remaining(), remainingSize)
+////
+////      buff.get(arr, currentOffset, toCopy)
+////      currentOffset += toCopy
+////      remainingSize -= toCopy
+////    }
+////
+////    currentOffset - offset
+////  }
+////
+////
+////  private def createSlice(size: Long): LargeByteBuffer = {
+////
+////    releasePendingContainers()
+////
+////    if (remaining() < size) {
+////      // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this)
+////      throw new BufferOverflowException
+////    }
+////
+////    // kyro depends on this it seems ?
+////    // assert (size > 0)
+////    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
+////
+////    val arr = new ArrayBuffer[ByteBufferContainer](2)
+////    var totalLeft = size
+////
+////    // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer)
+////
+////    var containerIndex = currentContainerIndex
+////    while (totalLeft > 0 && hasRemaining()) {
+////      assertPreconditions(containerIndex)
+////      val container = containers(containerIndex)
+////      val currentLeft = currentRemaining0(containerIndex)
+////
+////      assert (globalPosition + currentLeft <= globalLimit)
+////      assert (globalPosition >= bufferPositionStart(containerIndex) &&
+////        (globalPosition < bufferPositionStart(containerIndex + 1)))
+////
+////      val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int]
+////      val sliceSize = math.min(totalLeft, currentLeft)
+////      assert (from >= 0)
+////      assert (sliceSize > 0 && sliceSize <= Int.MaxValue)
+////
+////      val slice = container.createSlice(from, sliceSize.asInstanceOf[Int])
+////      arr += slice
+////
+////      globalPosition += sliceSize
+////      totalLeft -= sliceSize
+////      if (currentLeft == sliceSize) containerIndex += 1
+////    }
+////
+////    // Using toNonEmptyBuffer instead of directly moving to next here so that
+////    // other checks can be performed there.
+////    toNonEmptyBuffer()
+////    // force cleanup - this is fine since we are not using the buffers directly
+////    // which are actively needed (the returned value is on containers which can
+////    // recreate)
+////    releasePendingContainers()
+////    // free current container if acquired.
+////    if (currentContainerIndex < containers.length) {
+////      containers(currentContainerIndex).release()
+////    }
+////    assert (currentContainerIndex == containerIndex)
+////
+////    val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked)
+////    retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction)
+////    retval
+////  }
+////
+////  // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers
+////  // This is to be used only for writes : and ensures that writes are done into the appropriate
+////  // underlying bytebuffers.
+////  def getCompositeWriteBuffer(size: Long): LargeByteBuffer = {
+////    assert(writable)
+////    assert(size >= 0)
+////
+////    createSlice(size)
+////  }
+////
+////  // get a buffer which is of the specified size and contains data from the underlying buffers
+////  // Note, the actual data might be spread across the underlying buffers.
+////  // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy !
+////  private def readFully(size: Int): ByteBuffer = {
+////    assert (readable)
+////
+////    if (remaining() < size) {
+////      // throw exception
+////      throw new BufferUnderflowException()
+////    }
+////
+////    // kyro depends on this it seems ?
+////    // assert (size > 0)
+////    if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER
+////
+////    // Expected to be handled elsewhere.
+////    assert (! localReadWritePossible(size))
+////
+////    val localBuff =  {
+////      val buff = fetchBufferOfSize(size)
+////      // assert(buff.remaining() <= size)
+////      // if (buff.remaining() == size) return buff
+////      assert(buff.remaining() < size)
+////      ByteBuffer.allocate(size).put(buff)
+////    }
+////
+////    // assert (localBuff.hasRemaining)
+////
+////    while (localBuff.hasRemaining) {
+////      val buff = fetchBufferOfSize(localBuff.remaining())
+////      localBuff.put(buff)
+////    }
+////
+////    localBuff.flip()
+////    localBuff
+////  }
+////
+////
+////
+////  def put(b: Byte) {
+////    assert (writable)
+////    if (remaining() < 1) {
+////      // logInfo("put byte. remaining = " + remaining() + ", this = " + this)
+////      throw new BufferOverflowException
+////    }
+////
+////    assert (currentRemaining() > 0)
+////
+////    fetchCurrentBuffer().put(b)
+////    globalPosition += 1
+////    // Check to need to bump the index ?
+////    toNonEmptyBuffer()
+////  }
+////
+////
+////  def put(buffer: ByteBuffer) {
+////    assert (writable)
+////    if (remaining() < buffer.remaining()) {
+////      throw new BufferOverflowException
+////    }
+////
+////    val bufferRemaining = buffer.remaining()
+////    if (localReadWritePossible(bufferRemaining)) {
+////
+////      assert (currentRemaining() >= bufferRemaining)
+////
+////      fetchCurrentBuffer().put(buffer)
+////
+////      globalPosition += bufferRemaining
+////      toNonEmptyBuffer()
+////      return
+////    }
+////
+////    while (buffer.hasRemaining) {
+////      val currentBufferRemaining = currentRemaining()
+////      val bufferRemaining = buffer.remaining()
+////
+////      if (currentBufferRemaining >= bufferRemaining) {
+////        fetchCurrentBuffer().put(buffer)
+////        globalPosition += bufferRemaining
+////      } else {
+////        // Split across buffers.
+////        val currentBuffer = fetchCurrentBuffer()
+////        assert (currentBuffer.remaining() >= currentBufferRemaining)
+////        val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(),
+////          currentBufferRemaining)
+////        assert (sliced.remaining() == currentBufferRemaining)
+////        currentBuffer.put(sliced)
+////        // move buffer pos
+////        buffer.position(buffer.position() + currentBufferRemaining)
+////
+////        globalPosition += currentBufferRemaining
+////      }
+////      toNonEmptyBuffer()
+////    }
+////
+////    assert (! hasRemaining() || currentRemaining() > 0)
+////  }
+////
+////  def put(other: LargeByteBuffer) {
+////    assert (writable)
+////    if (this.remaining() < other.remaining()) {
+////      throw new BufferOverflowException
+////    }
+////
+////    while (other.hasRemaining()) {
+////      val buffer = other.fetchBufferOfSize(other.currentRemaining())
+////      this.put(buffer)
+////    }
+////  }
+////
+////
+////  def duplicate(): LargeByteBuffer = {
+////    val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size)
+////    // We do a duplicate as part of construction - so avoid double duplicate.
+////    // containersCopy ++= containers.map(_.duplicate())
+////    containersCopy ++= containers
+////    val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked)
+////
+////    // set limit and position (in that order) ...
+////    retval.limit(this.limit())
+////    retval.position(this.position())
+////
+////    // Now release our containers - if any had been acquired
+////    releasePendingContainers()
+////
+////    retval
+////  }
+////
+////
+////  /**
+////   * 'read' a LargeByteBuffer of size specified and return that.
+////   * Position will be incremented by size
+////   *
+////   * The name might be slightly confusing : rename ?
+////   *
+////   * @param size Amount of data to be read from this buffer and returned
+////   * @return
+////   */
+////  def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = {
+////    if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException
+////    if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException
+////
+////
+////    assert (readable)
+////    assert (size >= 0)
+////
+////    releasePendingContainers()
+////
+////    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
+////
+////    createSlice(size)
+////  }
+////
+////
+////  // This is essentially a workaround to exposing underlying buffers
+////  def readFrom(channel: ReadableByteChannel): Long = {
+////
+////    assert (writable)
+////    releasePendingContainers()
+////
+////    // this also allows us to avoid nasty corner cases in the loop.
+////    if (! hasRemaining()) {
+////      // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this)
+////      throw new BufferOverflowException
+////    }
+////
+////    var totalBytesRead = 0L
+////
+////    while (hasRemaining()) {
+////      // read what we can ...
+////      val buffer = fetchCurrentBuffer()
+////      val bufferRemaining = currentRemaining()
+////      val bytesRead = channel.read(buffer)
+////
+////      if (bytesRead > 0) {
+////        totalBytesRead += bytesRead
+////        // bump position too ..
+////        globalPosition += bytesRead
+////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+////      }
+////      else if (-1 == bytesRead) {
+////        // if we had already read some data in the loop, return that.
+////        if (totalBytesRead > 0) return totalBytesRead
+////        return -1
+////      }  // nothing available to read, retry later. return
+////      else if (0 == bytesRead) {
+////        return totalBytesRead
+////      }
+////
+////      // toNonEmptyBuffer()
+////    }
+////
+////    // Cleanup last buffer ?
+////    toNonEmptyBuffer()
+////    totalBytesRead
+////  }
+////
+////  // This is essentially a workaround to exposing underlying buffers
+////  def readFrom(inStrm: InputStream): Long = {
+////
+////    assert (writable)
+////    releasePendingContainers()
+////
+////    // this also allows us to avoid nasty corner cases in the loop.
+////    // if (! hasRemaining()) throw new BufferOverflowException
+////    if (! hasRemaining()) return 0
+////
+////    var totalBytesRead = 0L
+////
+////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+////
+////    while (hasRemaining()) {
+////      // read what we can ... note, since there is no gaurantee that underlying buffer might
+////      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
+////      // see if we can optimize this later ...
+////      val buffer = fetchCurrentBuffer()
+////      val bufferRemaining = buffer.remaining()
+////      val max = math.min(buff.length, bufferRemaining)
+////      val bytesRead = inStrm.read(buff, 0, max)
+////
+////      if (bytesRead > 0) {
+////        buffer.put(buff, 0, bytesRead)
+////        totalBytesRead += bytesRead
+////        // bump position too ..
+////        globalPosition += bytesRead
+////        // buffer.position(buffer.position + bytesRead)
+////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+////      }
+////      else if (-1 == bytesRead) {
+////        // if we had already read some data in the loop, return that.
+////        if (totalBytesRead > 0) return totalBytesRead
+////        return -1
+////      }  // nothing available to read, retry later. return
+////      else if (0 == bytesRead) {
+////        return totalBytesRead
+////      }
+////
+////      // toNonEmptyBuffer()
+////    }
+////
+////    totalBytesRead
+////  }
+////
+////  // This is essentially a workaround to exposing underlying buffers
+////  // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce
+////  // code for performance reasons.
+////  def readFrom(inStrm: DataInput): Long = {
+////
+////    assert (writable)
+////    releasePendingContainers()
+////
+////    // this also allows us to avoid nasty corner cases in the loop.
+////    // if (! hasRemaining()) throw new BufferOverflowException
+////    if (! hasRemaining()) return 0
+////
+////    var totalBytesRead = 0L
+////
+////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+////
+////    while (hasRemaining()) {
+////      // read what we can ... note, since there is no gaurantee that underlying buffer might
+////      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
+////      // see if we can optimize this later ...
+////      val buffer = fetchCurrentBuffer()
+////      val bufferRemaining = buffer.remaining()
+////      val max = math.min(buff.length, bufferRemaining)
+////      inStrm.readFully(buff, 0, max)
+////      val bytesRead = max
+////
+////      if (bytesRead > 0) {
+////        buffer.put(buff, 0, bytesRead)
+////        totalBytesRead += bytesRead
+////        // bump position too ..
+////        globalPosition += bytesRead
+////        // buffer.position(buffer.position() + bytesRead)
+////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+////      }
+////      else if (-1 == bytesRead) {
+////        // if we had already read some data in the loop, return that.
+////        if (totalBytesRead > 0) return totalBytesRead
+////        return -1
+////      }  // nothing available to read, retry later. return
+////      else if (0 == bytesRead) {
+////        return totalBytesRead
+////      }
+////
+////      // toNonEmptyBuffer()
+////    }
+////
+////    totalBytesRead
+////  }
+////
+////  // This is essentially a workaround to exposing underlying buffers
+////  // Note: tries to do it efficiently without needing to load everything into memory
+////  // (particularly for diskbacked buffers, etc).
+////  def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = {
+////
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    // this also allows us to avoid nasty corner cases in the loop.
+////    if (! hasRemaining()) throw new BufferUnderflowException
+////
+////    var totalBytesWritten = 0L
+////
+////    while (hasRemaining()) {
+////      // Write what we can ...
+////      val buffer = fetchCurrentBuffer()
+////      val bufferRemaining = buffer.remaining()
+////      assert (bufferRemaining > 0)
+////      val bytesWritten = channel.write(buffer)
+////
+////      if (bytesWritten > 0) {
+////        totalBytesWritten += bytesWritten
+////        // bump position too ..
+////        globalPosition += bytesWritten
+////        if (bytesWritten >= bufferRemaining) toNonEmptyBuffer()
+////        assert (! hasRemaining() || currentRemaining() > 0)
+////      }
+////      else if (0 == bytesWritten) {
+////        return totalBytesWritten
+////      }
+////
+////      // toNonEmptyBuffer()
+////    }
+////
+////    assert (! hasRemaining())
+////    if (cleanup) {
+////      free()
+////    }
+////    totalBytesWritten
+////  }
+////
+////  // This is essentially a workaround to exposing underlying buffers
+////  def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = {
+////
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    // this also allows us to avoid nasty corner cases in the loop.
+////    if (! hasRemaining()) throw new BufferUnderflowException
+////
+////    var totalBytesWritten = 0L
+////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+////
+////    while (hasRemaining()) {
+////      // write what we can ... note, since there is no gaurantee that underlying buffer might
+////      // expose array() method, we do double copy - from bytearray to buff and from
+////      // buff to outputstream. see if we can optimize this later ...
+////      val buffer = fetchCurrentBuffer()
+////      val bufferRemaining = buffer.remaining()
+////      val size = math.min(bufferRemaining, buff.length)
+////      buffer.get(buff, 0, size)
+////      outStrm.write(buff, 0, size)
+////
+////      totalBytesWritten += size
+////      // bump position too ..
+////      globalPosition += size
+////
+////      if (size >= bufferRemaining) toNonEmptyBuffer()
+////    }
+////
+////    toNonEmptyBuffer()
+////    if (cleanup) {
+////      free()
+////    }
+////    totalBytesWritten
+////  }
+////
+////  def asInputStream(): InputStream = {
+////    new InputStream() {
+////      override def read(): Int = {
+////        if (! hasRemaining()) return -1
+////        get()
+////      }
+////
+////      override def read(arr: Array[Byte], off: Int, len: Int): Int = {
+////        if (! hasRemaining()) return -1
+////
+////        get(arr, off, len)
+////      }
+////
+////      override def available(): Int = {
+////        // current remaining is what can be read without blocking
+////        // anything higher might need disk access/buffer swapping.
+////        /*
+////        val left = remaining()
+////        math.min(left, Int.MaxValue).asInstanceOf[Int]
+////        */
+////        currentRemaining()
+////      }
+////    }
+////  }
+////
+////  def getCleaner() = cleaner
+////
+////  /**
+////   * @param cleaner The previous cleaner, so that the caller can chain them if required.
+////   * @return
+////   */
+////  private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = {
+////    overrideCleaner(cleaner, allowOverride = true)
+////  }
+////
+////  private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = {
+////    if (! this.allowCleanerOverride) {
+////      // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free
+////      return this.cleaner
+////    }
+////
+////    this.allowCleanerOverride = allowOverride
+////    assert (null != cleaner)
+////    val prev = this.cleaner
+////    this.cleaner = cleaner
+////    // logInfo("Overriding " + prev + " with " + this.cleaner)
+////    prev
+////  }
+////
+////  private def doReleaseAll() {
+////    for (container <- containers) {
+////      container.release()
+////    }
+////  }
+////
+////  def free(invokeCleaner: Boolean = true) {
+////    // logInfo("Free on " + this + ", cleaner = " + cleaner)
+////    // always invoking release
+////    doReleaseAll()
+////
+////    if (invokeCleaner) cleaner.clean(this)
+////  }
+////
+////  private def doDispose(needRelease: Boolean) {
+////
+////    if (disposeLocationThrowable ne null) {
+////      logError("Already free'ed earlier at : ", disposeLocationThrowable)
+////      logError("Current at ", new Throwable)
+////      throw new IllegalStateException("Already freed.")
+////    }
+////    disposeLocationThrowable = new Throwable()
+////
+////    // Forcefully cleanup all
+////    if (needRelease) doReleaseAll()
+////
+////    // Free in a different loop, in case different containers refer to same resource
+////    // to release (like file)
+////    for (container <- containers) {
+////      container.free()
+////    }
+////
+////    needReleaseIndices.clear()
+////
+////    // We should not use this buffer anymore : set the values such that                 f
+////    // we dont ...
+////    globalPosition = 0
+////    globalLimit = 0
+////    globalCapacity = 0
+////  }
+////
+////  // copy data over ... MUST be used only for cases where array is known to be
+////  // small to begin with. slightly risky method due to that assumption
+////  def toByteArray(): Array[Byte] = {
+////    val positionBackup = position()
+////    val size = remaining()
+////    if (size > Int.MaxValue) {
+////      throw new IllegalStateException(
+////        "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G")
+////    }
+////
+////    val retval = new Array[Byte](size.asInstanceOf[Int])
+////    val readSize = get(retval, 0, retval.length)
+////    assert (readSize == retval.length,
+////      "readSize = " + readSize + ", retval.length = " + retval.length)
+////
+////    position(positionBackup)
+////
+////    retval
+////  }
+////
+////  // copy data over ... MUST be used only for cases where array is known to be
+////  // small to begin with. slightly risky method due to that assumption
+////  def toByteBuffer(): ByteBuffer = {
+////    ByteBuffer.wrap(toByteArray())
+////  }
+////
+////  def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = {
+////    val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf)
+////    val currentPosition = position()
+////    retval.put(this)
+////    position(currentPosition)
+////    retval.clear()
+////    retval
+////  }
+////
+////
+////
+////  // This is ONLY used for testing : that too as part of development of this and associated classes
+////  // remove before contributing to spark.
+////  def hexDump(): String = {
+////    if (remaining() * 64 > Int.MaxValue) {
+////      throw new UnsupportedOperationException("buffer too large " + remaining())
+////    }
+////
+////    val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int])
+////
+////    var perLine = 0
+////    var first = true
+////    for (b <- toByteArray()) {
+////      perLine += 1
+////      if (perLine % 8 == 0) {
+////        sb.append('\n')
+////        first = true
+////      }
+////      if (! first) sb.append(' ')
+////      first = false
+////      sb.append(java.lang.Integer.toHexString(b & 0xff))
+////    }
+////    sb.append('\n')
+////    sb.toString()
+////  }
+////
+////  override def toString: String = {
+////    val sb: StringBuffer = new StringBuffer
+////    sb.append(getClass.getName)
+////    sb.append(' ')
+////    sb.append(System.identityHashCode(this))
+////    sb.append("@[pos=")
+////    sb.append(position())
+////    sb.append(" lim=")
+////    sb.append(limit())
+////    sb.append(" cap=")
+////    sb.append(capacity())
+////    sb.append("]")
+////    sb.toString
+////  }
+////
+////
+////
+////  override def finalize(): Unit = {
+////    var marked = false
+////    if (containers ne null) {
+////      if (containers.exists(container => container.isAcquired && container.requireRelease())) {
+////        marked = true
+////        logError("BUG: buffer was not released - and now going out of scope. " +
+////          "Potential resource leak. Allocated at ", allocateLocationThrowable)
+////        containers.foreach(_.release())
+////      }
+////      if (containers.exists(container => !container.isFreed && container.requireFree())) {
+////        if (!marked) {
+////          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak",
+////            allocateLocationThrowable)
+////        }
+////        else {
+////          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak")
+////        }
+////        containers.foreach(_.free())
+////      }
+////    }
+////    super.finalize()
+////  }
+////}
+////
+////
+////object LargeByteBuffer extends Logging {
+////
+////  private val noopDisposeFunction = new BufferCleaner() {
+////    protected def doClean(buffer: LargeByteBuffer) {
+////      buffer.free(invokeCleaner = false)
+////    }
+////  }
+////
+////  val enableExpensiveAssert = false
+////  private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0)
+////  val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer(
+////    new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false)
+////  // Do not allow anyone else to override cleaner
+////  EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false)
+////
+////  // 8K sufficient ?
+////  private val TEMP_ARRAY_SIZE = 8192
+////
+////  /**
+////   * Create a LargeByteBuffer of specified size which is split across
+////   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory
+////   * ByteBuffer
+////   *
+////   */
+////  def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = {
+////    if (0 == totalSize) {
+////      return EMPTY_BUFFER
+////    }
+////
+////    assert (totalSize > 0)
+////
+////    val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
+////    val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize)
+////    val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize)
+////
+////    assert (lastBlockSize > 0)
+////
+////    val bufferArray = {
+////      val arr = new ArrayBuffer[ByteBufferContainer](numBlocks)
+////      for (index <- 0 until numBlocks - 1) {
+////        val buff = ByteBuffer.allocate(blockSize)
+////        // buff.clear()
+////        arr += new HeapByteBufferContainer(buff, true)
+////      }
+////      arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true)
+////      assert (arr.length == numBlocks)
+////      arr
+////    }
+////
+////    new LargeByteBuffer(bufferArray, false, false)
+////  }
+////
+////  /**
+////   * Create a LargeByteBuffer of specified size which is split across
+////   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk
+////   *
+////   */
+////  private def allocateDiskBuffer(totalSize: Long,
+////      blockManager: BlockManager): LargeByteBuffer = {
+////    if (0 == totalSize) {
+////      return EMPTY_BUFFER
+////    }
+////
+////    assert (totalSize > 0)
+////
+////    // Create a file of the specified size.
+////    val file = blockManager.diskBlockManager.createTempBlock()._2
+////    val raf = new RandomAccessFile(file, "rw")
+////    try {
+////      raf.setLength(totalSize)
+////    } finally {
+////      raf.close()
+////    }
+////
+////    readWriteDiskSegment(new FileSegment(file, 0, totalSize),
+////      ephemeralDiskBacked = true, blockManager.ioConf)
+////  }
+////
+////  // The returned buffer takes up ownership of the underlying buffers
+////  // (including dispos'ing that when done)
+////  def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = {
+////    val nonEmpty = buffers.filter(_.hasRemaining)
+////
+////    // cleanup the empty buffers
+////    buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b))
+////
+////
+////    if (nonEmpty.isEmpty) {
+////      return EMPTY_BUFFER
+////    }
+////
+////    // slice so that offsets match our requirement
+////    new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b =>
+////      new HeapByteBufferContainer(b.slice(), true)), false, false)
+////  }
+////
+////  def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = {
+////    // only non empty arrays
+////    val arrays = byteArrays.filter(_.length > 0)
+////    if (0 == arrays.length) return EMPTY_BUFFER
+////
+////    new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr =>
+////      new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false)
+////  }
+////
+////  def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = {
+////
+////    if (inputBuffers.isEmpty) return EMPTY_BUFFER
+////
+////    if (! inputBuffers.exists(_.hasRemaining())) {
+////      if (canDispose) inputBuffers.map(_.free())
+////      return EMPTY_BUFFER
+////    }
+////
+////    // release all temp resources acquired
+////    inputBuffers.foreach(buff => buff.releasePendingContainers())
+////    // free current container if acquired.
+////    inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) {
+////      buff.containers(buff.currentContainerIndex).release()
+////    })
+////    // inputBuffers.foreach(b => b.doReleaseAll())
+////
+////
+////    // Dispose of any empty buffers
+////    if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free())
+////
+////    // Find all containers we need.
+////    val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining()))
+////
+////    val containers = buffers.flatMap(_.containers)
+////    assert (! containers.isEmpty)
+////    // The in order containers of "buffers" seq constitute the required return value
+////    val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers,
+////      // if you cant dispose, then we dont own the buffers : in which case, need duplicate
+////      ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked))
+////
+////    if (canDispose) {
+////      // override dispose of all other buffers.
+////      val disposeFunctions = inputBuffers.map {
+////        buffer => {
+////          (buffer, buffer.overrideCleaner(noopDisposeFunction))
+////        }
+////      }
+////
+////      val cleaner = retval.getCleaner()
+////      val newCleaner = new BufferCleaner {
+////        protected def doClean(buffer: LargeByteBuffer) {
+////
+////          assert (retval == buffer)
+////          // default cleaner.
+////          cleaner.clean(retval)
+////          // not required, since we are within clean anyway.
+////          // retval.free(invokeCleaner = false)
+////
+////          // retval.doDispose(needRelease = true)
+////
+////          // This might actually call dispose twice on some (initially) empty buffers,
+////          // which is fine since we now guard against that.
+////          disposeFunctions.foreach(v => v._2.clean(v._1))
+////          // Call the free method too : so that buffers are marked free ...
+////          disposeFunctions.foreach(v => v._1.free(invokeCleaner = false))
+////        }
+////      }
+////
+////      val prev = retval.overrideCleaner(newCleaner)
+////      assert (prev == cleaner)
+////    }
+////
+////    retval
+////  }
+////
+////  private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) {
+////    if (arr == null) {
+////      throw new NullPointerException
+////    } else if (offset < 0 || size < 0 || offset + size > arr.length) {
+////      throw new IndexOutOfBoundsException
+////    }
+////  }
+////
+////  def allocateTransientBuffer(size: Long, blockManager: BlockManager) = {
+////    if (size <= blockManager.ioConf.maxInMemSize) {
+////      LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf)
+////    } else {
+////      LargeByteBuffer.allocateDiskBuffer(size, blockManager)
+////    }
+////  }
+////
+////  def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig,
+////      ephemeralDiskBacked: Boolean): LargeByteBuffer = {
+////    // Split the block into multiple of BlockStore.maxBlockSize
+////    val segmentSize = segment.length
+////    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
+////    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
+////    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
+////
+////    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
+////
+////    for (index <- 0 until numBlocks - 1) {
+////      buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
+////        segment.offset + index * blockSize, blockSize), ioConf)
+////    }
+////
+////    // Last block
+////    buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
+////      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf)
+////
+////    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
+////  }
+////
+////  def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean,
+////      ioConf: IOConfig): LargeByteBuffer = {
+////
+////    // Split the block into multiple of BlockStore.maxBlockSize
+////    val segmentSize = segment.length
+////    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
+////    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
+////    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
+////
+////    logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks +
+////      ", lastBlockSize = " + lastBlockSize)
+////    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
+////
+////    for (index <- 0 until numBlocks - 1) {
+////      buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
+////        segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null)
+////    }
+////
+////    // Last block
+////    buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
+////      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null)
+////
+////    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
+////  }
+////}
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index dcbda5a8515dd..644544cf869df 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -24,7 +24,7 @@ import scala.concurrent.{Promise, Await, Future}
 import scala.concurrent.duration.Duration
 
 import org.apache.spark.Logging
-import org.apache.spark.network.buffer.{NioManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, NioManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.shuffle.{ShuffleClient, BlockFetchingListener}
 import org.apache.spark.storage.{BlockManagerId, BlockId, StorageLevel}
 
@@ -92,9 +92,10 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
           result.failure(exception)
         }
         override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
-          val ret = ByteBuffer.allocate(data.size.toInt)
+          val ret = LargeByteBufferHelper.allocate(data.size)
           ret.put(data.nioByteBuffer())
-          ret.flip()
+          //XXX do we need ret.flip()??
+          ret.position(0l)
           result.success(new NioManagedBuffer(ret))
         }
       })
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index b089da8596e2b..86df34920a666 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConversions._
 
 import org.apache.spark.Logging
 import org.apache.spark.network.BlockDataManager
-import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
 import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
 import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock}
@@ -63,7 +63,7 @@ class NettyBlockRpcServer(
         // StorageLevel is serialized as bytes using our JavaSerializer.
         val level: StorageLevel =
           serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata))
-        val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))
+        val data = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(uploadBlock.blockData))
         blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level)
         responseContext.onSuccess(new Array[Byte](0))
     }
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 3f0950dae1f24..9824c7c38c188 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -115,13 +115,10 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
 
     // Convert or copy nio buffer into array in order to serialize it.
     val nioBuffer = blockData.nioByteBuffer()
-    val array = if (nioBuffer.hasArray) {
-      nioBuffer.array()
-    } else {
-      val data = new Array[Byte](nioBuffer.remaining())
-      nioBuffer.get(data)
-      data
-    }
+    //TODO key change -- multiple uploads here
+    // this stub is not even efficient when the buffer actually is small
+    val array =  new Array[Byte](nioBuffer.remaining().toInt)
+    nioBuffer.get(array, 0, nioBuffer.remaining().toInt)
 
     client.sendRpc(new UploadBlock(appId, execId, blockId.toString, levelBytes, array).toByteArray,
       new RpcResponseCallback {
diff --git a/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
index b2aec160635c7..d0ba9d8948594 100644
--- a/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
@@ -20,7 +20,7 @@ package org.apache.spark.network.nio
 import java.nio.ByteBuffer
 
 import org.apache.spark.network._
-import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.buffer.{LargeByteBuffer, LargeByteBufferHelper, ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.shuffle.BlockFetchingListener
 import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.util.Utils
@@ -116,7 +116,7 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
             val blockId = blockMessage.getId
             val networkSize = blockMessage.getData.limit()
             listener.onBlockFetchSuccess(
-              blockId.toString, new NioManagedBuffer(blockMessage.getData))
+              blockId.toString, new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(blockMessage.getData)))
           }
         }
       }
@@ -143,7 +143,7 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
       level: StorageLevel)
     : Future[Unit] = {
     checkInit()
-    val msg = PutBlock(blockId, blockData.nioByteBuffer(), level)
+    val msg = PutBlock(blockId, blockData.nioByteBuffer().firstByteBuffer(), level)
     val blockMessageArray = new BlockMessageArray(BlockMessage.fromPutBlock(msg))
     val remoteCmId = new ConnectionManagerId(hostName, port)
     val reply = cm.sendMessageReliably(remoteCmId, blockMessageArray.toBufferMessage)
@@ -192,7 +192,7 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
         if (buffer == null) {
           return None
         }
-        Some(BlockMessage.fromGotBlock(GotBlock(msg.id, buffer)))
+        Some(BlockMessage.fromGotBlock(GotBlock(msg.id, buffer.firstByteBuffer())))
 
       case _ => None
     }
@@ -201,12 +201,12 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
   private def putBlock(blockId: BlockId, bytes: ByteBuffer, level: StorageLevel) {
     val startTimeMs = System.currentTimeMillis()
     logDebug("PutBlock " + blockId + " started from " + startTimeMs + " with data: " + bytes)
-    blockDataManager.putBlockData(blockId, new NioManagedBuffer(bytes), level)
+    blockDataManager.putBlockData(blockId, new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(bytes)), level)
     logDebug("PutBlock " + blockId + " used " + Utils.getUsedTimeMs(startTimeMs)
       + " with data size: " + bytes.limit)
   }
 
-  private def getBlock(blockId: BlockId): ByteBuffer = {
+  private def getBlock(blockId: BlockId): LargeByteBuffer = {
     val startTimeMs = System.currentTimeMillis()
     logDebug("GetBlock " + blockId + " started from " + startTimeMs)
     val buffer = blockDataManager.getBlockData(blockId)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 9428273561cd8..1ea26ee8f9946 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -20,7 +20,7 @@ package org.apache.spark.scheduler
 import java.nio.ByteBuffer
 import java.util.concurrent.RejectedExecutionException
 
-import org.apache.spark.io.WrappedLargeByteBuffer
+import org.apache.spark.network.buffer.WrappedLargeByteBuffer
 
 import scala.language.existentials
 import scala.util.control.NonFatal
@@ -76,7 +76,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
               }
               //TODO either change serializer interface, or ...
               val deserializedResult = serializer.get().deserialize[DirectTaskResult[_]](
-                serializedTaskResult.get.asInstanceOf[WrappedLargeByteBuffer].underlying(0))
+                serializedTaskResult.get.firstByteBuffer())
               sparkEnv.blockManager.master.removeBlock(blockId)
               (deserializedResult, size)
           }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
index 016964fc274d8..4c0bc9ccec06e 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
@@ -18,17 +18,14 @@
 package org.apache.spark.shuffle
 
 import java.io.File
-import java.nio.ByteBuffer
 import java.util.concurrent.ConcurrentLinkedQueue
 import java.util.concurrent.atomic.AtomicInteger
 
-import org.apache.spark.io.LargeByteBuffer
-
 import scala.collection.JavaConversions._
 
 import org.apache.spark.{Logging, SparkConf, SparkEnv}
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.buffer.{LargeByteBuffer, FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.FileShuffleBlockManager.ShuffleFileGroup
@@ -176,7 +173,7 @@ class FileShuffleBlockManager(conf: SparkConf)
   override def getBytes(blockId: ShuffleBlockId): Option[LargeByteBuffer] = {
     //TODO
     val segment = getBlockData(blockId)
-    Some(LargeByteBuffer.asLargeByteBuffer(segment.nioByteBuffer()))
+    Some(segment.nioByteBuffer())
   }
 
   override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = {
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
index 76c2e4180c838..4dfdf9987a5fe 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
@@ -18,13 +18,11 @@
 package org.apache.spark.shuffle
 
 import java.io._
-import java.nio.ByteBuffer
 
 import com.google.common.io.ByteStreams
-import org.apache.spark.io.LargeByteBuffer
 
 import org.apache.spark.{SparkConf, SparkEnv}
-import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.buffer.{LargeByteBuffer, FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.storage._
 
@@ -99,7 +97,7 @@ class IndexShuffleBlockManager(conf: SparkConf) extends ShuffleBlockManager {
   }
 
   override def getBytes(blockId: ShuffleBlockId): Option[LargeByteBuffer] = {
-    Some(LargeByteBuffer.asLargeByteBuffer(getBlockData(blockId).nioByteBuffer()))
+    Some(getBlockData(blockId).nioByteBuffer())
   }
 
   override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = {
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
index fa737729b8758..96dde7f53b84b 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.shuffle
 
-import org.apache.spark.io.LargeByteBuffer
-import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.network.buffer.{LargeByteBuffer, ManagedBuffer}
 import org.apache.spark.storage.ShuffleBlockId
 
 private[spark]
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 479fcf35283ad..be63f9cb03d29 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.storage
 
-import java.io.{BufferedOutputStream, ByteArrayOutputStream, File, InputStream, OutputStream}
+import java.io.{BufferedOutputStream, File, InputStream, OutputStream}
 import java.nio.{ByteBuffer, MappedByteBuffer}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap}
@@ -31,9 +31,9 @@ import sun.nio.ch.DirectBuffer
 
 import org.apache.spark._
 import org.apache.spark.executor._
-import org.apache.spark.io.{WrappedLargeByteBuffer, ChainedLargeByteBuffer, LargeByteBuffer, CompressionCodec}
+import org.apache.spark.io.CompressionCodec
 import org.apache.spark.network._
-import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer, ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.shuffle.ExternalShuffleClient
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo
@@ -307,7 +307,7 @@ private[spark] class BlockManager(
       shuffleManager.shuffleBlockManager.getBlockData(blockId.asInstanceOf[ShuffleBlockId])
     } else {
       val blockBytesOpt = doGetLocal(blockId, asBlockResult = false)
-        .asInstanceOf[Option[ByteBuffer]]
+        .asInstanceOf[Option[LargeByteBuffer]]
       if (blockBytesOpt.isDefined) {
         val buffer = blockBytesOpt.get
         new NioManagedBuffer(buffer)
@@ -321,7 +321,7 @@ private[spark] class BlockManager(
    * Put the block locally, using the given storage level.
    */
   override def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Unit = {
-    putBytes(blockId, LargeByteBuffer.asLargeByteBuffer(data.nioByteBuffer()), level)
+    putBytes(blockId, data.nioByteBuffer(), level)
   }
 
   /**
@@ -538,10 +538,10 @@ private[spark] class BlockManager(
               /* We'll store the bytes in memory if the block's storage level includes
                * "memory serialized", or if it should be cached as objects in memory
                * but we only requested its serialized bytes. */
-              val copyForMemory = LargeByteBuffer.allocateOnHeap(bytes.limit, largeByteBufferChunkSize)
+              val copyForMemory = LargeByteBufferHelper.allocate(bytes.limit)
               copyForMemory.put(bytes)
               memoryStore.putBytes(blockId, copyForMemory, level)
-              bytes.rewind()
+              bytes.position(0l)
             }
             if (!asBlockResult) {
               return Some(bytes)
@@ -595,8 +595,8 @@ private[spark] class BlockManager(
     for (loc <- locations) {
       logDebug(s"Getting remote block $blockId from $loc")
       //TODO the fetch will always be one byte buffer till we fix SPARK-5928
-      val data: LargeByteBuffer = LargeByteBuffer.asLargeByteBuffer(blockTransferService.fetchBlockSync(
-        loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer())
+      val data: LargeByteBuffer = blockTransferService.fetchBlockSync(
+        loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()
 
       if (data != null) {
         if (asBlockResult) {
@@ -791,7 +791,7 @@ private[spark] class BlockManager(
           case ArrayValues(array) =>
             blockStore.putArray(blockId, array, putLevel, returnValues)
           case ByteBufferValues(bytes) =>
-            bytes.rewind()
+            bytes.position(0l)
             blockStore.putBytes(blockId, bytes, putLevel)
         }
         size = result.size
@@ -942,7 +942,7 @@ private[spark] class BlockManager(
         case Some(peer) =>
           try {
             val onePeerStartTime = System.currentTimeMillis
-            data.rewind()
+            data.position(0l)
             logTrace(s"Trying to replicate $blockId of ${data.limit()} bytes to $peer")
             //TODO
             //ACK!  here we're stuck -- we can't replicate a large block until we figure out
@@ -1201,7 +1201,7 @@ private[spark] class BlockManager(
       blockId: BlockId,
       bytes: LargeByteBuffer,
       serializer: Serializer = defaultSerializer): Iterator[Any] = {
-    bytes.rewind()
+    bytes.position(0);
     val stream = wrapForCompression(blockId, new LargeByteBufferInputStream(bytes, true))
     serializer.newInstance().deserializeStream(stream).asIterator
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
index 38989f0c07681..e1b48bf11bcc7 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
@@ -17,12 +17,8 @@
 
 package org.apache.spark.storage
 
-import java.nio.ByteBuffer
-
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.Logging
-import org.apache.spark.io.LargeByteBuffer
+import org.apache.spark.network.buffer.LargeByteBuffer
 
 /**
  * Abstract class to store blocks.
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 180b237a1a1f2..5ef9929feaabf 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -22,7 +22,7 @@ import java.nio.ByteBuffer
 import java.nio.channels.FileChannel.MapMode
 
 import org.apache.spark.Logging
-import org.apache.spark.io.{WrappedLargeByteBuffer, LargeByteBuffer}
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.util.Utils
 
@@ -118,9 +118,9 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
           }
         }
         buf.flip()
-        Some(LargeByteBuffer.asLargeByteBuffer(buf))
+        Some(LargeByteBufferHelper.asLargeByteBuffer(buf))
       } else {
-        Some(LargeByteBuffer.mapFile(channel, MapMode.READ_ONLY, offset, length))
+        Some(LargeByteBufferHelper.mapFile(channel, MapMode.READ_ONLY, offset, length))
       }
     } finally {
       channel.close()
diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index 8fccc0f3e78d2..074919fdeb706 100644
--- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -19,7 +19,7 @@ package org.apache.spark.storage
 
 import java.util.LinkedHashMap
 
-import org.apache.spark.io.LargeByteBuffer
+import org.apache.spark.network.buffer.LargeByteBuffer
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
@@ -81,7 +81,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
   override def putBytes(blockId: BlockId, _bytes: LargeByteBuffer, level: StorageLevel): PutResult = {
     // Work on a duplicate - since the original input might be used elsewhere.
     val bytes = _bytes.duplicate()
-    bytes.rewind()
+    bytes.position(0l);
     if (level.deserialized) {
       val values = blockManager.dataDeserialize(blockId, bytes)
       putIterator(blockId, values, level, returnValues = true)
diff --git a/core/src/main/scala/org/apache/spark/storage/PutResult.scala b/core/src/main/scala/org/apache/spark/storage/PutResult.scala
index 2e00934bde243..aa9176791b319 100644
--- a/core/src/main/scala/org/apache/spark/storage/PutResult.scala
+++ b/core/src/main/scala/org/apache/spark/storage/PutResult.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.storage
 
-import org.apache.spark.io.LargeByteBuffer
+import org.apache.spark.network.buffer.LargeByteBuffer
 
 /**
  * Result of adding a block into a BlockStore. This case class contains a few things:
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
index 9f964ed456d5e..62cfd9e65eaec 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
@@ -21,7 +21,7 @@ import java.io.IOException
 import java.nio.ByteBuffer
 
 import com.google.common.io.ByteStreams
-import org.apache.spark.io.LargeByteBuffer
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer}
 import tachyon.client.{ReadType, WriteType}
 
 import org.apache.spark.Logging
@@ -70,7 +70,7 @@ private[spark] class TachyonStore(
     // So that we do not modify the input offsets !
     // duplicate does not copy buffer, so inexpensive
     val byteBuffer = bytes.duplicate()
-    byteBuffer.rewind()
+    byteBuffer.position(0l)
     logDebug(s"Attempting to put block $blockId into Tachyon")
     val startTime = System.currentTimeMillis
     val file = tachyonManager.getFile(blockId)
@@ -114,7 +114,7 @@ private[spark] class TachyonStore(
       //TODO
       val bs = new Array[Byte](size.asInstanceOf[Int])
       ByteStreams.readFully(is, bs)
-      Some(LargeByteBuffer.asLargeByteBuffer(ByteBuffer.wrap(bs)))
+      Some(LargeByteBufferHelper.asLargeByteBuffer(bs))
     } catch {
       case ioe: IOException =>
         logWarning(s"Failed to fetch the block $blockId from Tachyon", ioe)
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
index 26f2d7848bb29..98e41906b8251 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
@@ -18,9 +18,8 @@
 package org.apache.spark.util
 
 import java.io.InputStream
-import java.nio.ByteBuffer
 
-import org.apache.spark.io.LargeByteBuffer
+import org.apache.spark.network.buffer.LargeByteBuffer
 import org.apache.spark.storage.BlockManager
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index 2fe904b10c53c..6c61d52ecca75 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -19,7 +19,8 @@ package org.apache.spark.util
 
 import java.io.OutputStream
 
-import org.apache.spark.io.{ChainedLargeByteBuffer, LargeByteBuffer}
+import org.apache.spark.io.ChainedLargeByteBuffer
+import org.apache.spark.network.buffer.LargeByteBuffer
 import org.apache.spark.util.collection.ChainedBuffer
 
 private[spark]
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
index 94bfa67451892..6c01db61168b3 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
@@ -25,7 +25,7 @@ import scala.concurrent.{Await, Promise}
 import scala.util.{Failure, Success, Try}
 
 import org.apache.commons.io.IOUtils
-import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer, ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.shuffle.BlockFetchingListener
 import org.apache.spark.network.{BlockDataManager, BlockTransferService}
 import org.apache.spark.storage.{BlockId, ShuffleBlockId}
@@ -100,7 +100,7 @@ class NettyBlockTransferSecuritySuite extends FunSuite with MockitoSugar with Sh
     val blockManager = mock[BlockDataManager]
     val blockId = ShuffleBlockId(0, 1, 2)
     val blockString = "Hello, world!"
-    val blockBuffer = new NioManagedBuffer(ByteBuffer.wrap(blockString.getBytes))
+    val blockBuffer = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(blockString.getBytes))
     when(blockManager.getBlockData(blockId)).thenReturn(blockBuffer)
 
     val securityManager0 = new SecurityManager(conf0)
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
new file mode 100644
index 0000000000000..b242ff593f04a
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.netty
+
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.network.BlockDataManager
+import org.apache.spark.network.buffer.{ManagedBuffer, LargeByteBufferHelper, NioManagedBuffer}
+import org.apache.spark.network.shuffle.BlockFetchingListener
+import org.apache.spark.storage.ShuffleBlockId
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.mockito.Mockito._
+import org.scalatest.mock.MockitoSugar
+import org.scalatest.{Matchers, FunSuite}
+
+import scala.concurrent.duration.FiniteDuration
+import scala.concurrent.{Await, Promise}
+
+class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar {
+
+  val conf = new SparkConf()
+    .set("spark.app.id", "app-id")
+  val securityManager = new SecurityManager(conf)
+
+
+
+  test("simple fetch") {
+
+    val blockManager = mock[BlockDataManager]
+    val blockId = ShuffleBlockId(0, 1, 2)
+    val blockString = "Hello, world!"
+    val blockBuffer = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(blockString.getBytes))
+    when(blockManager.getBlockData(blockId)).thenReturn(blockBuffer)
+
+    val from = new NettyBlockTransferService(conf, securityManager, numCores = 1)
+    from.init(blockManager)
+    val to = new NettyBlockTransferService(conf, securityManager, numCores = 1)
+    to.init(blockManager)
+
+    try {
+      val promise = Promise[ManagedBuffer]()
+
+      to.fetchBlocks(from.hostName, from.port, "1", Array(blockId.toString),
+        new BlockFetchingListener {
+          override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = {
+            promise.failure(exception)
+          }
+
+          override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
+            promise.success(data.retain())
+          }
+        })
+
+      Await.ready(promise.future, FiniteDuration(1000, TimeUnit.MILLISECONDS))
+    } finally {
+      from.close()
+      to.close()
+    }
+
+
+  }
+
+
+
+
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
index 75c426f379238..213013e14b7c6 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -1,19 +1,19 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 
 package org.apache.spark.network.buffer;
 
@@ -34,8 +34,8 @@
 import org.apache.spark.network.util.TransportConf;
 
 /**
- * A {@link ManagedBuffer} backed by a segment in a file.
- */
+* A {@link ManagedBuffer} backed by a segment in a file.
+*/
 public final class FileSegmentManagedBuffer extends ManagedBuffer {
   private final TransportConf conf;
   private final File file;
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index c56fdebb887a1..4997dcecc3370 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -17,6 +17,7 @@
 package org.apache.spark.network.buffer;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.nio.channels.WritableByteChannel;
 
 public interface LargeByteBuffer {
@@ -35,6 +36,8 @@ public interface LargeByteBuffer {
 
     public void put(LargeByteBuffer bytes);
 
+    public long remaining();
+
     //TODO checks on limit semantics
 
     /**
@@ -53,4 +56,7 @@ public interface LargeByteBuffer {
     public long writeTo(WritableByteChannel channel) throws IOException;
 
 
+    //TODO this should be deleted -- just to help me get going
+    public ByteBuffer firstByteBuffer();
+
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
index 2585b65b1f969..5fe01d87e36e9 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -29,6 +29,23 @@ public static LargeByteBuffer asLargeByteBuffer(ByteBuffer buffer) {
         return new WrappedLargeByteBuffer(new ByteBuffer[]{buffer});
     }
 
+    public static LargeByteBuffer asLargeByteBuffer(byte[] bytes) {
+        return new WrappedLargeByteBuffer(new ByteBuffer[]{ByteBuffer.wrap(bytes)});
+    }
+
+    public static LargeByteBuffer allocate(long size) {
+        ArrayList<ByteBuffer> chunks = new ArrayList<ByteBuffer>();
+        long remaining = size;
+        while (remaining > 0) {
+            int nextSize = (int)Math.min(remaining, DEFAULT_MAX_CHUNK);
+            ByteBuffer next = ByteBuffer.allocate(nextSize);
+            remaining -= nextSize;
+            chunks.add(next);
+        }
+        return new WrappedLargeByteBuffer(chunks.toArray(new ByteBuffer[chunks.size()]));
+    }
+
+
     public static LargeByteBuffer mapFile(
             FileChannel channel,
             FileChannel.MapMode mode,
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
index d4746a76e7d13..2febfe7f761c5 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
@@ -29,9 +29,9 @@
  * A {@link ManagedBuffer} backed by {@link ByteBuffer}.
  */
 public final class NioManagedBuffer extends ManagedBuffer {
-  private final ByteBuffer buf;
+  private final LargeByteBuffer buf;
 
-  public NioManagedBuffer(ByteBuffer buf) {
+  public NioManagedBuffer(LargeByteBuffer buf) {
     this.buf = buf;
   }
 
@@ -42,12 +42,14 @@ public long size() {
 
   @Override
   public LargeByteBuffer nioByteBuffer() throws IOException {
-    return LargeByteBufferHelper.asLargeByteBuffer(buf.duplicate());
+    return buf.duplicate();
   }
 
   @Override
   public InputStream createInputStream() throws IOException {
-    return new ByteBufInputStream(Unpooled.wrappedBuffer(buf));
+    //TODO
+    ByteBuffer aBuf = ((WrappedLargeByteBuffer) buf).underlying[0];
+    return new ByteBufInputStream(Unpooled.wrappedBuffer(aBuf));
   }
 
   @Override
@@ -62,7 +64,9 @@ public ManagedBuffer release() {
 
   @Override
   public Object convertToNetty() throws IOException {
-    return Unpooled.wrappedBuffer(buf);
+    //TODO
+    ByteBuffer aBuf = ((WrappedLargeByteBuffer) buf).underlying[0];
+    return Unpooled.wrappedBuffer(aBuf);
   }
 
   @Override
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index bbe73a87e9105..2ecb569b3a23c 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -22,7 +22,7 @@
 
 public class WrappedLargeByteBuffer implements LargeByteBuffer {
 
-    private final ByteBuffer[] underlying;
+    final ByteBuffer[] underlying;
     private final Long totalCapacity;
     private final long[] chunkOffsets;
 
@@ -91,6 +91,11 @@ public void position(long newPosition) {
         _pos = newPosition;
     }
 
+    @Override
+    public long remaining() {
+        return limit - _pos;
+    }
+
     @Override
     public WrappedLargeByteBuffer duplicate() {
         ByteBuffer[] duplicates = new ByteBuffer[underlying.length];
@@ -132,4 +137,9 @@ public long writeTo(WritableByteChannel channel) throws IOException {
         }
         return written;
     }
+
+    @Override
+    public ByteBuffer firstByteBuffer() {
+        return underlying[0];
+    }
 }
diff --git a/network/common/src/main/scala/org/apache/spark/network/buffer/LargeByteBuffer.scala b/network/common/src/main/scala/org/apache/spark/network/buffer/LargeByteBuffer.scala
new file mode 100644
index 0000000000000..eb7520d3c6325
--- /dev/null
+++ b/network/common/src/main/scala/org/apache/spark/network/buffer/LargeByteBuffer.scala
@@ -0,0 +1,1760 @@
+///*
+// * Licensed to the Apache Software Foundation (ASF) under one or more
+// * contributor license agreements.  See the NOTICE file distributed with
+// * this work for additional information regarding copyright ownership.
+// * The ASF licenses this file to You under the Apache License, Version 2.0
+// * (the "License"); you may not use this file except in compliance with
+// * the License.  You may obtain a copy of the License at
+// *
+// *    http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS,
+// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// * See the License for the specific language governing permissions and
+// * limitations under the License.
+// */
+//
+//package org.apache.spark.network.buffer
+//
+//import java.nio.ByteBuffer
+//import java.nio.channels.FileChannel.MapMode
+//import java.nio.channels.{FileChannel, WritableByteChannel}
+//
+//import scala.collection.mutable.ArrayBuffer
+//
+//
+//
+//trait LargeByteBuffer {
+////  def position(): Long
+////
+////  def limit(): Long
+//
+//  def capacity(): Long
+//
+//  def get(): Byte //needed for ByteBufferInputStream
+//
+//  def get(dst: Array[Byte], offset: Int, length: Int): Unit // for ByteBufferInputStream
+//
+//  def position(position: Long): Unit //for ByteBufferInputStream
+//
+//  def position(): Long //for ByteBufferInputStream
+//
+//  /** doesn't copy data, just copies references & offsets */
+//  def duplicate(): LargeByteBuffer
+//
+//  def put(bytes: LargeByteBuffer): Unit
+//
+//  //also need whatever is necessary for ByteArrayOutputStream for BlockManager#dataSerialize
+//
+//
+//  //TODO checks on limit semantics
+//
+//  /**
+//   * Sets this buffer's limit. If the position is larger than the new limit then it is set to the
+//   * new limit. If the mark is defined and larger than the new limit then it is discarded.
+//   */
+//  def limit(newLimit: Long): Unit
+//
+//  /**
+//   * return this buffer's limit
+//   * @return
+//   */
+//  def limit(): Long
+//
+//
+//  //an alternative to having this method would be having a foreachBuffer(f: Buffer => T)
+//  def writeTo(channel: WritableByteChannel): Long
+//
+////
+////  def skip(skipBy: Long): Unit
+////
+////  def position(newPosition: Long): Unit
+////
+////  /**
+////   * Clears this buffer.  The position is set to zero, the limit is set to
+////   * the capacity, and the mark is discarded.
+////   *
+////   * <p> Invoke this method before using a sequence of channel-read or
+////   * <i>put</i> operations to fill this buffer.
+////   *
+////   * <p> This method does not actually erase the data in the buffer, but it
+////   * is named as if it did because it will most often be used in situations
+////   * in which that might as well be the case. </p>
+////   */
+////  def clear(): Unit
+////
+////  /**
+////   * Flips this buffer.  The limit is set to the current position and then
+////   * the position is set to zero.  If the mark is defined then it is
+////   * discarded.
+////   *
+////   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
+////   * this method to prepare for a sequence of channel-write or relative
+////   * <i>get</i> operations.
+////   */
+////  def flip(): Unit
+//
+//  /**
+//   * Rewinds this buffer.  The position is set to zero and the mark is
+//   * discarded.
+//   *
+//   * <p> Invoke this method before a sequence of channel-write or <i>get</i>
+//   * operations, assuming that the limit has already been set
+//   * appropriately.
+//   */
+//  def rewind(): Unit
+//
+//  /**
+//   * Returns the number of elements between the current position and the
+//   * limit. </p>
+//   *
+//   * @return  The number of elements remaining in this buffer
+//   */
+//  def remaining(): Long
+//}
+////
+////class ChainedLargeByteBuffer(private[network] val underlying: ChainedBuffer) extends LargeByteBuffer {
+////
+////  def capacity = underlying.capacity
+////
+////  var _pos = 0l
+////
+////  def get(dst: Array[Byte],offset: Int,length: Int): Unit = {
+////    underlying.read(_pos, dst, offset, length)
+////    _pos += length
+////  }
+////
+////  def get(): Byte = {
+////    val b = underlying.read(_pos)
+////    _pos += 1
+////    b
+////  }
+////
+////  def put(bytes: LargeByteBuffer): Unit = {
+////    ???
+////  }
+////
+////  def position: Long = _pos
+////  def position(position: Long): Unit = {
+////    _pos = position
+////  }
+////  def remaining(): Long = {
+////    underlying.size - position
+////  }
+////
+////  def duplicate(): ChainedLargeByteBuffer = {
+////    new ChainedLargeByteBuffer(underlying)
+////  }
+////
+////  def rewind(): Unit = {
+////    _pos = 0
+////  }
+////
+////  def limit(): Long = {
+////    capacity
+////  }
+////
+////  def limit(newLimit: Long): Unit = {
+////    ???
+////  }
+////
+////  def writeTo(channel:WritableByteChannel): Long = {
+////    var written = 0l
+////    underlying.chunks.foreach{bytes =>
+////      //TODO test this
+////      val buffer = ByteBuffer.wrap(bytes)
+////      while (buffer.hasRemaining)
+////        channel.write(buffer)
+////      written += bytes.length
+////    }
+////    written
+////  }
+////}
+//
+//class WrappedLargeByteBuffer(private[spark] val underlying: Array[ByteBuffer]) extends LargeByteBuffer {
+//
+//  val (totalCapacity, chunkOffsets) = {
+//    var sum = 0l
+//    val offsets = new Array[Long](underlying.size)
+//    (0 until underlying.size).foreach{idx =>
+//      offsets(idx) = sum
+//      sum += underlying(idx).capacity()
+//    }
+//    (sum, offsets)
+//  }
+//
+//  private var _pos = 0l
+//  private var currentBufferIdx = 0
+//  private var currentBuffer = underlying(0)
+//  private var _limit = totalCapacity
+//
+//  def capacity = totalCapacity
+//
+//  def get(dst: Array[Byte], offset: Int, length: Int): Unit = {
+//    var moved = 0
+//    while (moved < length) {
+//      val toRead = math.min(length - moved, currentBuffer.remaining())
+//      currentBuffer.get(dst, offset, toRead)
+//      moved += toRead
+//      updateCurrentBuffer()
+//    }
+//  }
+//
+//  def get(): Byte = {
+//    val r = currentBuffer.get()
+//    _pos += 1
+//    updateCurrentBuffer()
+//    r
+//  }
+//
+//  private def updateCurrentBuffer(): Unit = {
+//    //TODO fix end condition
+//    while(!currentBuffer.hasRemaining()) {
+//      currentBufferIdx += 1
+//      currentBuffer = underlying(currentBufferIdx)
+//    }
+//  }
+//
+//  def put(bytes: LargeByteBuffer): Unit = {
+//    ???
+//  }
+//
+//  def position: Long = _pos
+//  def position(position: Long): Unit = {
+//    //XXX check range?
+//    _pos = position
+//  }
+//  def remaining(): Long = {
+//    totalCapacity - _pos
+//  }
+//
+//  def duplicate(): WrappedLargeByteBuffer = {
+//    new WrappedLargeByteBuffer(underlying.map{_.duplicate()})
+//  }
+//
+//  def rewind(): Unit = {
+//    _pos = 0
+//    underlying.foreach{_.rewind()}
+//  }
+//
+//  def limit(): Long = {
+//    totalCapacity
+//  }
+//
+//  def limit(newLimit: Long) = {
+//    //XXX check range?  set limits in sub buffers?
+//    _limit = newLimit
+//  }
+//
+//  def writeTo(channel: WritableByteChannel): Long = {
+//    var written = 0l
+//    underlying.foreach{buffer =>
+//      //TODO test this
+//      //XXX do we care about respecting the limit here?
+//      written += buffer.remaining()
+//      while (buffer.hasRemaining)
+//        channel.write(buffer)
+//    }
+//    written
+//  }
+//
+//}
+//
+//object LargeByteBuffer {
+//
+//  def asLargeByteBuffer(byteBuffer: ByteBuffer): LargeByteBuffer = {
+//    new WrappedLargeByteBuffer(Array(byteBuffer))
+//  }
+//
+//  def asLargeByteBuffer(bytes: Array[Byte]): LargeByteBuffer = {
+//    new WrappedLargeByteBuffer(Array(ByteBuffer.wrap(bytes)))
+//  }
+//
+////
+////  def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = {
+////    val buffer = ChainedBuffer.withInitialSize(maxChunk, size)
+////    new ChainedLargeByteBuffer(buffer)
+////  }
+//
+//  def mapFile(
+//    channel: FileChannel,
+//    mode: MapMode,
+//    offset: Long,
+//    length: Long,
+//    maxChunk: Int = Integer.MAX_VALUE - 1e6.toInt
+//  ): LargeByteBuffer = {
+//    val offsets = new ArrayBuffer[Long]()
+//    var curOffset = offset
+//    val end = offset + length
+//    while (curOffset < end) {
+//      offsets += curOffset
+//      val length = math.min(end - curOffset, maxChunk)
+//      curOffset += length
+//    }
+//    offsets += end
+//    val chunks = new Array[ByteBuffer](offsets.size - 1)
+//    (0 until offsets.size - 1).foreach{idx =>
+//      chunks(idx) = channel.map(mode, offsets(idx), offsets(idx + 1) - offsets(idx))
+//    }
+//    new WrappedLargeByteBuffer(chunks)
+//  }
+//}
+//
+//
+////
+/////**
+//// * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G
+//// * which ByteBuffers are limited to.
+//// * Externally, it exposes all the api which java.nio.ByteBuffer exposes.
+//// * Internally, it maintains a sequence of Containers which manage the ByteBuffer data.
+//// * Not all the data might be loaded into memory  (like disk or tachyon data) - so actual
+//// * memory footprint - heap and vm could be much lower than capacity.
+//// *
+//// * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this
+//// * buffer, maybe revisit this later ? Note: this is not much different from earlier though !
+//// *
+//// * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this
+//// * will require the file to be kept open (repeatedly opening/closing file is not good
+//// * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is
+//// * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy)
+//// *
+//// * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is
+//// * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some
+//// * cases (when we duplicate/slice them). Currently spark does not need this, but might in future
+//// * so relook at it later.
+//// */
+////// We should make this constructor private: but for now,
+////// leaving it public since TachyonStore needs it
+////class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer],
+////    private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging {
+////
+////  // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME
+////  private val allocateLocationThrowable: Throwable = {
+////    if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) {
+////      new Throwable("blockId = " + BlockManager.getLookupBlockId)
+////    } else {
+////      null
+////    }
+////  }
+////  private var disposeLocationThrowable: Throwable = null
+////
+////  @volatile private var allowCleanerOverride = true
+////  @volatile private var cleaner: BufferCleaner = new BufferCleaner {
+////    override def doClean(buffer: LargeByteBuffer) = {
+////      assert (LargeByteBuffer.this == buffer)
+////      doDispose(needRelease = false)
+////    }
+////  }
+////
+////  // should not be empty
+////  assert (null != inputContainers && ! inputContainers.isEmpty)
+////  // should not have any null's
+////  assert (inputContainers.find(_ == null).isEmpty)
+////
+////  // println("Num containers = " + inputContainers.size)
+////
+////  // Position, limit and capacity relevant over the engire LargeByteBuffer
+////  @volatile private var globalPosition = 0L
+////  @volatile private var globalLimit = 0L
+////  @volatile private var currentContainerIndex = 0
+////
+////  // The buffers in which the actual data is held.
+////  private var containers: Array[ByteBufferContainer] = null
+////
+////  // aggregate capacities of the individual buffers.
+////  // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be
+////  // sum of capacity of 0th and 1st block buffer
+////  private var bufferPositionStart: Array[Long] = null
+////
+////  // Contains the indices of a containers which requires release before subsequent invocation of
+////  // read/write should be serviced. This is required since current read/write might have moved the
+////  // position but since we are returning bytebuffers which depend on the validity of the existing
+////  // bytebuffer, we cant release them yet.
+////  private var needReleaseIndices = new HashSet[Int]()
+////
+////  private val readable = ! inputContainers.exists(! _.isReadable)
+////  private val writable = ! inputContainers.exists(! _.isWritable)
+////
+////
+////  // initialize
+////  @volatile private var globalCapacity = {
+////
+////    // Ensure that there are no empty buffers : messes up with our code : unless it
+////    // is a single buffer (for empty buffer for marker case)
+////    assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length)
+////
+////    containers = {
+////      if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray
+////    }
+////    containers.foreach(_.validate())
+////
+////    def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) {
+////      val buff = new ArrayBuffer[Long](arr.length + 1)
+////      buff += 0L
+////
+////      buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1)
+////      assert (buff.length == arr.length + 1)
+////      bufferPositionStart = buff.toArray
+////    }
+////
+////    initializeBufferPositionStart(containers)
+////
+////    // remove references from inputBuffers
+////    inputContainers.clear()
+////
+////    globalLimit = bufferPositionStart(containers.length)
+////    globalPosition = 0L
+////    currentContainerIndex = 0
+////
+////    assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum)
+////
+////    globalLimit
+////  }
+////
+////  final def position(): Long = globalPosition
+////
+////  final def limit(): Long = globalLimit
+////
+////  final def capacity(): Long = globalCapacity
+////
+////  final def limit(newLimit: Long) {
+////    if ((newLimit > capacity()) || (newLimit < 0)) {
+////      throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity())
+////    }
+////
+////    globalLimit = newLimit
+////    if (position() > newLimit) position(newLimit)
+////  }
+////
+////  def skip(skipBy: Long) = position(position() + skipBy)
+////
+////  private def releasePendingContainers() {
+////    if (! needReleaseIndices.isEmpty) {
+////      val iter = needReleaseIndices.iterator
+////      while (iter.hasNext) {
+////        val index = iter.next()
+////        assert (index >= 0 && index < containers.length)
+////        // It is possible to move from one container to next before the previous
+////        // container was acquired. For example, get forcing move to next container
+////        // since current was exhausted immediatelly followed by a position()
+////        // so the container we moved to was never acquired.
+////
+////        // assert (containers(index).isAcquired)
+////        // will this always be satisfied ?
+////        // assert (index != currentContainerIndex)
+////        if (containers(index).isAcquired) containers(index).release()
+////      }
+////      needReleaseIndices.clear()
+////    }
+////  }
+////
+////  private def toNewContainer(newIndex: Int) {
+////    if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) {
+////
+////      assert (currentContainerIndex >= 0)
+////      needReleaseIndices += currentContainerIndex
+////    }
+////    currentContainerIndex = newIndex
+////  }
+////
+////  // expensive method, sigh ... optimize it later ?
+////  final def position(newPosition: Long) {
+////
+////    if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException()
+////
+////    if (currentContainerIndex < bufferPositionStart.length - 1 &&
+////        newPosition >= bufferPositionStart(currentContainerIndex) &&
+////        newPosition < bufferPositionStart(currentContainerIndex + 1)) {
+////      // Same buffer - easy method ...
+////      globalPosition = newPosition
+////      // Changed position - free previously returned buffers.
+////      releasePendingContainers()
+////      return
+////    }
+////
+////    // Find appropriate currentContainerIndex
+////    // Since bufferPositionStart is sorted, can be replaced with binary search if required.
+////    // For now, not in the perf critical path since buffers size is very low typically.
+////    var index = 0
+////    val cLen = containers.length
+////    while (index < cLen) {
+////      if (newPosition >= bufferPositionStart(index) &&
+////        newPosition < bufferPositionStart(index + 1)) {
+////        globalPosition = newPosition
+////        toNewContainer(index)
+////        // Changed position - free earlier and previously returned buffers.
+////        releasePendingContainers()
+////        return
+////      }
+////      index += 1
+////    }
+////
+////    if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) {
+////      // boundary.
+////      globalPosition = newPosition
+////      toNewContainer(cLen)
+////      // Changed position - free earlier and previously returned buffers.
+////      releasePendingContainers()
+////      return
+////    }
+////
+////    assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition +
+////      ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]"))
+////  }
+////
+////
+////  /**
+////   * Clears this buffer.  The position is set to zero, the limit is set to
+////   * the capacity, and the mark is discarded.
+////   *
+////   * <p> Invoke this method before using a sequence of channel-read or
+////   * <i>put</i> operations to fill this buffer.
+////   *
+////   * <p> This method does not actually erase the data in the buffer, but it
+////   * is named as if it did because it will most often be used in situations
+////   * in which that might as well be the case. </p>
+////   */
+////  final def clear() {
+////    // if (0 == globalCapacity) return
+////
+////    needReleaseIndices += 0
+////    globalPosition = 0L
+////    toNewContainer(0)
+////    globalLimit = globalCapacity
+////
+////    // Now free all pending containers
+////    releasePendingContainers()
+////  }
+////
+////  /**
+////   * Flips this buffer.  The limit is set to the current position and then
+////   * the position is set to zero.  If the mark is defined then it is
+////   * discarded.
+////   *
+////   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
+////   * this method to prepare for a sequence of channel-write or relative
+////   * <i>get</i> operations.
+////   */
+////  final def flip() {
+////    needReleaseIndices += 0
+////    globalLimit = globalPosition
+////    globalPosition = 0L
+////    toNewContainer(0)
+////
+////    // Now free all pending containers
+////    releasePendingContainers()
+////  }
+////
+////  /**
+////   * Rewinds this buffer.  The position is set to zero and the mark is
+////   * discarded.
+////   *
+////   * <p> Invoke this method before a sequence of channel-write or <i>get</i>
+////   * operations, assuming that the limit has already been set
+////   * appropriately.
+////   */
+////  final def rewind() {
+////    needReleaseIndices += 0
+////    globalPosition = 0L
+////    toNewContainer(0)
+////
+////    // Now free all pending containers
+////    releasePendingContainers()
+////  }
+////
+////  /**
+////   * Returns the number of elements between the current position and the
+////   * limit. </p>
+////   *
+////   * @return  The number of elements remaining in this buffer
+////   */
+////  final def remaining(): Long = {
+////    globalLimit - globalPosition
+////  }
+////
+////  /**
+////   * Tells whether there are any elements between the current position and
+////   * the limit. </p>
+////   *
+////   * @return  <tt>true</tt> if, and only if, there is at least one element
+////   *          remaining in this buffer
+////   */
+////  final def hasRemaining() = {
+////    globalPosition < globalLimit
+////  }
+////
+////  // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex)
+////
+////  // number of bytes remaining in currently active underlying buffer
+////  private def currentRemaining(): Int = {
+////    if (hasRemaining()) {
+////      // validate currentContainerIndex is valid
+////      assert (globalPosition >= bufferPositionStart(currentContainerIndex) &&
+////        globalPosition < bufferPositionStart(currentContainerIndex + 1),
+////        "globalPosition = " + globalPosition +
+////          ", currentContainerIndex = " + currentContainerIndex +
+////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+////
+////      currentRemaining0(currentContainerIndex)
+////    } else 0
+////  }
+////
+////  // Without any validation : required when we are bumping the index (when validation will fail) ...
+////  private def currentRemaining0(which: Int): Int = {
+////    // currentBuffer().remaining()
+////    math.max(0, math.min(bufferPositionStart(which + 1),
+////      globalLimit) - globalPosition).asInstanceOf[Int]
+////  }
+////
+////  // Set the approppriate position/limit for the current underlying buffer to mirror our
+////  // the LargeByteBuffer's state.
+////  private def fetchCurrentBuffer(): ByteBuffer = {
+////    releasePendingContainers()
+////
+////    assert (currentContainerIndex < containers.length)
+////
+////    val container = containers(currentContainerIndex)
+////    if (! container.isAcquired) {
+////      container.acquire()
+////    }
+////
+////    assert (container.isAcquired)
+////    if (LargeByteBuffer.enableExpensiveAssert) {
+////      assert (! containers.exists( b => (b ne container) && b.isAcquired))
+////    }
+////
+////    assert (currentContainerIndex < bufferPositionStart.length &&
+////      globalPosition < bufferPositionStart(currentContainerIndex + 1),
+////      "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " +
+////        bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this)
+////
+////    val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)).
+////      asInstanceOf[Int]
+////
+////    val buffer = container.getByteBuffer
+////    buffer.position(buffPosition)
+////    val diff = buffer.capacity - buffPosition
+////    val left = remaining()
+////    if (diff <= left) {
+////      buffer.limit(buffer.capacity())
+////    } else {
+////      // Can happen if limit() was called.
+////      buffer.limit(buffPosition + left.asInstanceOf[Int])
+////    }
+////
+////    buffer
+////  }
+////
+////  // To be used ONLY to test in suites.
+////  private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = {
+////    if ("1" != System.getProperty("SPARK_TESTING")) {
+////      throw new IllegalStateException("This method is to be used ONLY within spark test suites")
+////    }
+////
+////    fetchCurrentBuffer()
+////  }
+////
+////  // Expects that the invoker has ensured that this can be safely invoked.
+////  // That is, it wont be invoked when the loop wont terminate.
+////  private def toNonEmptyBuffer() {
+////
+////    if (! hasRemaining()) {
+////      var newIndex = currentContainerIndex
+////      // Ensure we are in the right block or not.
+////      while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) {
+////        newIndex += 1
+////      }
+////      toNewContainer(newIndex)
+////      // Do not do this - since we might not yet have consumed the buffer which caused EOF right now
+////      /*
+////      // Add last one also, and release it too - since we are at the end of the buffer with nothing
+////      // more pending.
+////      if (newIndex >= 0 && currentContainerIndex < containers.length) {
+////        needReleaseIndices += newIndex
+////      }
+////      */
+////      assert (currentContainerIndex >= 0)
+////      // releasePendingContainers()
+////      return
+////    }
+////
+////    var index = currentContainerIndex
+////    while (0 == currentRemaining0(index) && index < containers.length) {
+////      index += 1
+////    }
+////    assert (currentContainerIndex < containers.length)
+////    toNewContainer(index)
+////    assert (0 != currentRemaining())
+////  }
+////
+////  private def assertPreconditions(containerIndex: Int) {
+////    assert (globalPosition >= bufferPositionStart(containerIndex),
+////      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
+////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+////    assert (globalPosition < bufferPositionStart(containerIndex + 1),
+////      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
+////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
+////
+////    assert (globalLimit <= globalCapacity)
+////    assert (containerIndex < containers.length)
+////  }
+////
+////
+////  /**
+////   * Attempts to return a ByteBuffer of the requested size.
+////   * It is possible to return a buffer of size smaller than requested
+////   * even though hasRemaining == true
+////   *
+////   * On return, position would have been moved 'ahead' by the size of the buffer returned :
+////   * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer
+////   *
+////   *
+////   * This is used to primarily retrieve content of this buffer to expose via ByteBuffer
+////   * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the
+////   * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer
+////   * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying
+////   * container is a disk backed container, and we make subsequent calls to get(), the returned
+////   * ByteBuffer can be dispose'ed off
+////   *
+////   * @param maxChunkSize Max size of the ByteBuffer to retrieve.
+////   * @return
+////   */
+////
+////  private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = {
+////    fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true)
+////  }
+////
+////  private def fetchBufferOfSizeImpl(maxChunkSize: Int,
+////      canReleaseContainers: Boolean): ByteBuffer = {
+////    if (canReleaseContainers) releasePendingContainers()
+////    assert (maxChunkSize > 0)
+////
+////    // not checking for degenerate case of maxChunkSize == 0
+////    if (globalPosition >= globalLimit) {
+////      // throw exception
+////      throw new BufferUnderflowException()
+////    }
+////
+////    // Check preconditions : disable these later, since they might be expensive to
+////    // evaluate for every IO op
+////    assertPreconditions(currentContainerIndex)
+////
+////    val currentBufferRemaining = currentRemaining()
+////
+////    assert (currentBufferRemaining > 0)
+////
+////    val size = math.min(currentBufferRemaining, maxChunkSize)
+////
+////    val newBuffer = if (currentBufferRemaining > maxChunkSize) {
+////      val currentBuffer = fetchCurrentBuffer()
+////      val buff = ByteBufferContainer.createSlice(currentBuffer,
+////        currentBuffer.position(), maxChunkSize)
+////      assert (buff.remaining() == maxChunkSize)
+////      buff
+////    } else {
+////      val currentBuffer = fetchCurrentBuffer()
+////      val buff = currentBuffer.slice()
+////      assert (buff.remaining() == currentBufferRemaining)
+////      buff
+////    }
+////
+////    assert (size == newBuffer.remaining())
+////    assert (0 == newBuffer.position())
+////    assert (size == newBuffer.limit())
+////    assert (newBuffer.capacity() == newBuffer.limit())
+////
+////    globalPosition += newBuffer.remaining
+////    toNonEmptyBuffer()
+////
+////    newBuffer
+////  }
+////
+////  // Can we service the read/write from the currently active (underlying) bytebuffer or not.
+////  // For almost all cases, this will return true allowing us to optimize away the more expensive
+////  // computations.
+////  private def localReadWritePossible(size: Int) =
+////    size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1)
+////
+////
+////  def getLong(): Long = {
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    if (remaining() < 8) throw new BufferUnderflowException
+////
+////    if (localReadWritePossible(8)) {
+////      val buff = fetchCurrentBuffer()
+////      assert (buff.remaining() >= 8)
+////      val retval = buff.getLong
+////      globalPosition += 8
+////      toNonEmptyBuffer()
+////      return retval
+////    }
+////
+////    val buff = readFully(8)
+////    buff.getLong
+////  }
+////
+////  def getInt(): Int = {
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    if (remaining() < 4) throw new BufferUnderflowException
+////
+////    if (localReadWritePossible(4)) {
+////      val buff = fetchCurrentBuffer()
+////      assert (buff.remaining() >= 4)
+////      val retval = buff.getInt
+////      globalPosition += 4
+////      toNonEmptyBuffer()
+////      return retval
+////    }
+////
+////    val buff = readFully(4)
+////    buff.getInt
+//// }
+////
+////  def getChar(): Char = {
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    if (remaining() < 2) throw new BufferUnderflowException
+////
+////    if (localReadWritePossible(2)) {
+////      val buff = fetchCurrentBuffer()
+////      assert (buff.remaining() >= 2)
+////      val retval = buff.getChar
+////      globalPosition += 2
+////      toNonEmptyBuffer()
+////      return retval
+////    }
+////
+////    // if slice is becoming too expensive, revisit this ...
+////    val buff = readFully(2)
+////    buff.getChar
+////  }
+////
+////  def get(): Byte = {
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    if (! hasRemaining()) throw new BufferUnderflowException
+////
+////    // If we have remaining bytes, previous invocations MUST have ensured that we are at
+////    // a buffer which has data to be read.
+////    assert (localReadWritePossible(1))
+////
+////    val buff = fetchCurrentBuffer()
+////    assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining())
+////    val retval = buff.get()
+////    globalPosition += 1
+////    toNonEmptyBuffer()
+////
+////    retval
+////  }
+////
+////  def get(arr: Array[Byte], offset: Int, size: Int): Int = {
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    LargeByteBuffer.checkOffsets(arr, offset, size)
+////
+////    // kyro depends on this it seems ?
+////    // assert (size > 0)
+////    if (0 == size) return 0
+////
+////    if (! hasRemaining()) return -1
+////
+////    if (localReadWritePossible(size)) {
+////      val buff = fetchCurrentBuffer()
+////      assert (buff.remaining() >= size)
+////      buff.get(arr, offset, size)
+////      globalPosition += size
+////      toNonEmptyBuffer()
+////      return size
+////    }
+////
+////    var remainingSize = math.min(size, remaining()).asInstanceOf[Int]
+////    var currentOffset = offset
+////
+////    while (remainingSize > 0) {
+////      val buff = fetchBufferOfSize(remainingSize)
+////      val toCopy = math.min(buff.remaining(), remainingSize)
+////
+////      buff.get(arr, currentOffset, toCopy)
+////      currentOffset += toCopy
+////      remainingSize -= toCopy
+////    }
+////
+////    currentOffset - offset
+////  }
+////
+////
+////  private def createSlice(size: Long): LargeByteBuffer = {
+////
+////    releasePendingContainers()
+////
+////    if (remaining() < size) {
+////      // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this)
+////      throw new BufferOverflowException
+////    }
+////
+////    // kyro depends on this it seems ?
+////    // assert (size > 0)
+////    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
+////
+////    val arr = new ArrayBuffer[ByteBufferContainer](2)
+////    var totalLeft = size
+////
+////    // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer)
+////
+////    var containerIndex = currentContainerIndex
+////    while (totalLeft > 0 && hasRemaining()) {
+////      assertPreconditions(containerIndex)
+////      val container = containers(containerIndex)
+////      val currentLeft = currentRemaining0(containerIndex)
+////
+////      assert (globalPosition + currentLeft <= globalLimit)
+////      assert (globalPosition >= bufferPositionStart(containerIndex) &&
+////        (globalPosition < bufferPositionStart(containerIndex + 1)))
+////
+////      val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int]
+////      val sliceSize = math.min(totalLeft, currentLeft)
+////      assert (from >= 0)
+////      assert (sliceSize > 0 && sliceSize <= Int.MaxValue)
+////
+////      val slice = container.createSlice(from, sliceSize.asInstanceOf[Int])
+////      arr += slice
+////
+////      globalPosition += sliceSize
+////      totalLeft -= sliceSize
+////      if (currentLeft == sliceSize) containerIndex += 1
+////    }
+////
+////    // Using toNonEmptyBuffer instead of directly moving to next here so that
+////    // other checks can be performed there.
+////    toNonEmptyBuffer()
+////    // force cleanup - this is fine since we are not using the buffers directly
+////    // which are actively needed (the returned value is on containers which can
+////    // recreate)
+////    releasePendingContainers()
+////    // free current container if acquired.
+////    if (currentContainerIndex < containers.length) {
+////      containers(currentContainerIndex).release()
+////    }
+////    assert (currentContainerIndex == containerIndex)
+////
+////    val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked)
+////    retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction)
+////    retval
+////  }
+////
+////  // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers
+////  // This is to be used only for writes : and ensures that writes are done into the appropriate
+////  // underlying bytebuffers.
+////  def getCompositeWriteBuffer(size: Long): LargeByteBuffer = {
+////    assert(writable)
+////    assert(size >= 0)
+////
+////    createSlice(size)
+////  }
+////
+////  // get a buffer which is of the specified size and contains data from the underlying buffers
+////  // Note, the actual data might be spread across the underlying buffers.
+////  // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy !
+////  private def readFully(size: Int): ByteBuffer = {
+////    assert (readable)
+////
+////    if (remaining() < size) {
+////      // throw exception
+////      throw new BufferUnderflowException()
+////    }
+////
+////    // kyro depends on this it seems ?
+////    // assert (size > 0)
+////    if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER
+////
+////    // Expected to be handled elsewhere.
+////    assert (! localReadWritePossible(size))
+////
+////    val localBuff =  {
+////      val buff = fetchBufferOfSize(size)
+////      // assert(buff.remaining() <= size)
+////      // if (buff.remaining() == size) return buff
+////      assert(buff.remaining() < size)
+////      ByteBuffer.allocate(size).put(buff)
+////    }
+////
+////    // assert (localBuff.hasRemaining)
+////
+////    while (localBuff.hasRemaining) {
+////      val buff = fetchBufferOfSize(localBuff.remaining())
+////      localBuff.put(buff)
+////    }
+////
+////    localBuff.flip()
+////    localBuff
+////  }
+////
+////
+////
+////  def put(b: Byte) {
+////    assert (writable)
+////    if (remaining() < 1) {
+////      // logInfo("put byte. remaining = " + remaining() + ", this = " + this)
+////      throw new BufferOverflowException
+////    }
+////
+////    assert (currentRemaining() > 0)
+////
+////    fetchCurrentBuffer().put(b)
+////    globalPosition += 1
+////    // Check to need to bump the index ?
+////    toNonEmptyBuffer()
+////  }
+////
+////
+////  def put(buffer: ByteBuffer) {
+////    assert (writable)
+////    if (remaining() < buffer.remaining()) {
+////      throw new BufferOverflowException
+////    }
+////
+////    val bufferRemaining = buffer.remaining()
+////    if (localReadWritePossible(bufferRemaining)) {
+////
+////      assert (currentRemaining() >= bufferRemaining)
+////
+////      fetchCurrentBuffer().put(buffer)
+////
+////      globalPosition += bufferRemaining
+////      toNonEmptyBuffer()
+////      return
+////    }
+////
+////    while (buffer.hasRemaining) {
+////      val currentBufferRemaining = currentRemaining()
+////      val bufferRemaining = buffer.remaining()
+////
+////      if (currentBufferRemaining >= bufferRemaining) {
+////        fetchCurrentBuffer().put(buffer)
+////        globalPosition += bufferRemaining
+////      } else {
+////        // Split across buffers.
+////        val currentBuffer = fetchCurrentBuffer()
+////        assert (currentBuffer.remaining() >= currentBufferRemaining)
+////        val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(),
+////          currentBufferRemaining)
+////        assert (sliced.remaining() == currentBufferRemaining)
+////        currentBuffer.put(sliced)
+////        // move buffer pos
+////        buffer.position(buffer.position() + currentBufferRemaining)
+////
+////        globalPosition += currentBufferRemaining
+////      }
+////      toNonEmptyBuffer()
+////    }
+////
+////    assert (! hasRemaining() || currentRemaining() > 0)
+////  }
+////
+////  def put(other: LargeByteBuffer) {
+////    assert (writable)
+////    if (this.remaining() < other.remaining()) {
+////      throw new BufferOverflowException
+////    }
+////
+////    while (other.hasRemaining()) {
+////      val buffer = other.fetchBufferOfSize(other.currentRemaining())
+////      this.put(buffer)
+////    }
+////  }
+////
+////
+////  def duplicate(): LargeByteBuffer = {
+////    val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size)
+////    // We do a duplicate as part of construction - so avoid double duplicate.
+////    // containersCopy ++= containers.map(_.duplicate())
+////    containersCopy ++= containers
+////    val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked)
+////
+////    // set limit and position (in that order) ...
+////    retval.limit(this.limit())
+////    retval.position(this.position())
+////
+////    // Now release our containers - if any had been acquired
+////    releasePendingContainers()
+////
+////    retval
+////  }
+////
+////
+////  /**
+////   * 'read' a LargeByteBuffer of size specified and return that.
+////   * Position will be incremented by size
+////   *
+////   * The name might be slightly confusing : rename ?
+////   *
+////   * @param size Amount of data to be read from this buffer and returned
+////   * @return
+////   */
+////  def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = {
+////    if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException
+////    if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException
+////
+////
+////    assert (readable)
+////    assert (size >= 0)
+////
+////    releasePendingContainers()
+////
+////    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
+////
+////    createSlice(size)
+////  }
+////
+////
+////  // This is essentially a workaround to exposing underlying buffers
+////  def readFrom(channel: ReadableByteChannel): Long = {
+////
+////    assert (writable)
+////    releasePendingContainers()
+////
+////    // this also allows us to avoid nasty corner cases in the loop.
+////    if (! hasRemaining()) {
+////      // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this)
+////      throw new BufferOverflowException
+////    }
+////
+////    var totalBytesRead = 0L
+////
+////    while (hasRemaining()) {
+////      // read what we can ...
+////      val buffer = fetchCurrentBuffer()
+////      val bufferRemaining = currentRemaining()
+////      val bytesRead = channel.read(buffer)
+////
+////      if (bytesRead > 0) {
+////        totalBytesRead += bytesRead
+////        // bump position too ..
+////        globalPosition += bytesRead
+////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+////      }
+////      else if (-1 == bytesRead) {
+////        // if we had already read some data in the loop, return that.
+////        if (totalBytesRead > 0) return totalBytesRead
+////        return -1
+////      }  // nothing available to read, retry later. return
+////      else if (0 == bytesRead) {
+////        return totalBytesRead
+////      }
+////
+////      // toNonEmptyBuffer()
+////    }
+////
+////    // Cleanup last buffer ?
+////    toNonEmptyBuffer()
+////    totalBytesRead
+////  }
+////
+////  // This is essentially a workaround to exposing underlying buffers
+////  def readFrom(inStrm: InputStream): Long = {
+////
+////    assert (writable)
+////    releasePendingContainers()
+////
+////    // this also allows us to avoid nasty corner cases in the loop.
+////    // if (! hasRemaining()) throw new BufferOverflowException
+////    if (! hasRemaining()) return 0
+////
+////    var totalBytesRead = 0L
+////
+////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+////
+////    while (hasRemaining()) {
+////      // read what we can ... note, since there is no gaurantee that underlying buffer might
+////      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
+////      // see if we can optimize this later ...
+////      val buffer = fetchCurrentBuffer()
+////      val bufferRemaining = buffer.remaining()
+////      val max = math.min(buff.length, bufferRemaining)
+////      val bytesRead = inStrm.read(buff, 0, max)
+////
+////      if (bytesRead > 0) {
+////        buffer.put(buff, 0, bytesRead)
+////        totalBytesRead += bytesRead
+////        // bump position too ..
+////        globalPosition += bytesRead
+////        // buffer.position(buffer.position + bytesRead)
+////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+////      }
+////      else if (-1 == bytesRead) {
+////        // if we had already read some data in the loop, return that.
+////        if (totalBytesRead > 0) return totalBytesRead
+////        return -1
+////      }  // nothing available to read, retry later. return
+////      else if (0 == bytesRead) {
+////        return totalBytesRead
+////      }
+////
+////      // toNonEmptyBuffer()
+////    }
+////
+////    totalBytesRead
+////  }
+////
+////  // This is essentially a workaround to exposing underlying buffers
+////  // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce
+////  // code for performance reasons.
+////  def readFrom(inStrm: DataInput): Long = {
+////
+////    assert (writable)
+////    releasePendingContainers()
+////
+////    // this also allows us to avoid nasty corner cases in the loop.
+////    // if (! hasRemaining()) throw new BufferOverflowException
+////    if (! hasRemaining()) return 0
+////
+////    var totalBytesRead = 0L
+////
+////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+////
+////    while (hasRemaining()) {
+////      // read what we can ... note, since there is no gaurantee that underlying buffer might
+////      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
+////      // see if we can optimize this later ...
+////      val buffer = fetchCurrentBuffer()
+////      val bufferRemaining = buffer.remaining()
+////      val max = math.min(buff.length, bufferRemaining)
+////      inStrm.readFully(buff, 0, max)
+////      val bytesRead = max
+////
+////      if (bytesRead > 0) {
+////        buffer.put(buff, 0, bytesRead)
+////        totalBytesRead += bytesRead
+////        // bump position too ..
+////        globalPosition += bytesRead
+////        // buffer.position(buffer.position() + bytesRead)
+////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
+////      }
+////      else if (-1 == bytesRead) {
+////        // if we had already read some data in the loop, return that.
+////        if (totalBytesRead > 0) return totalBytesRead
+////        return -1
+////      }  // nothing available to read, retry later. return
+////      else if (0 == bytesRead) {
+////        return totalBytesRead
+////      }
+////
+////      // toNonEmptyBuffer()
+////    }
+////
+////    totalBytesRead
+////  }
+////
+////  // This is essentially a workaround to exposing underlying buffers
+////  // Note: tries to do it efficiently without needing to load everything into memory
+////  // (particularly for diskbacked buffers, etc).
+////  def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = {
+////
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    // this also allows us to avoid nasty corner cases in the loop.
+////    if (! hasRemaining()) throw new BufferUnderflowException
+////
+////    var totalBytesWritten = 0L
+////
+////    while (hasRemaining()) {
+////      // Write what we can ...
+////      val buffer = fetchCurrentBuffer()
+////      val bufferRemaining = buffer.remaining()
+////      assert (bufferRemaining > 0)
+////      val bytesWritten = channel.write(buffer)
+////
+////      if (bytesWritten > 0) {
+////        totalBytesWritten += bytesWritten
+////        // bump position too ..
+////        globalPosition += bytesWritten
+////        if (bytesWritten >= bufferRemaining) toNonEmptyBuffer()
+////        assert (! hasRemaining() || currentRemaining() > 0)
+////      }
+////      else if (0 == bytesWritten) {
+////        return totalBytesWritten
+////      }
+////
+////      // toNonEmptyBuffer()
+////    }
+////
+////    assert (! hasRemaining())
+////    if (cleanup) {
+////      free()
+////    }
+////    totalBytesWritten
+////  }
+////
+////  // This is essentially a workaround to exposing underlying buffers
+////  def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = {
+////
+////    assert (readable)
+////    releasePendingContainers()
+////
+////    // this also allows us to avoid nasty corner cases in the loop.
+////    if (! hasRemaining()) throw new BufferUnderflowException
+////
+////    var totalBytesWritten = 0L
+////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
+////
+////    while (hasRemaining()) {
+////      // write what we can ... note, since there is no gaurantee that underlying buffer might
+////      // expose array() method, we do double copy - from bytearray to buff and from
+////      // buff to outputstream. see if we can optimize this later ...
+////      val buffer = fetchCurrentBuffer()
+////      val bufferRemaining = buffer.remaining()
+////      val size = math.min(bufferRemaining, buff.length)
+////      buffer.get(buff, 0, size)
+////      outStrm.write(buff, 0, size)
+////
+////      totalBytesWritten += size
+////      // bump position too ..
+////      globalPosition += size
+////
+////      if (size >= bufferRemaining) toNonEmptyBuffer()
+////    }
+////
+////    toNonEmptyBuffer()
+////    if (cleanup) {
+////      free()
+////    }
+////    totalBytesWritten
+////  }
+////
+////  def asInputStream(): InputStream = {
+////    new InputStream() {
+////      override def read(): Int = {
+////        if (! hasRemaining()) return -1
+////        get()
+////      }
+////
+////      override def read(arr: Array[Byte], off: Int, len: Int): Int = {
+////        if (! hasRemaining()) return -1
+////
+////        get(arr, off, len)
+////      }
+////
+////      override def available(): Int = {
+////        // current remaining is what can be read without blocking
+////        // anything higher might need disk access/buffer swapping.
+////        /*
+////        val left = remaining()
+////        math.min(left, Int.MaxValue).asInstanceOf[Int]
+////        */
+////        currentRemaining()
+////      }
+////    }
+////  }
+////
+////  def getCleaner() = cleaner
+////
+////  /**
+////   * @param cleaner The previous cleaner, so that the caller can chain them if required.
+////   * @return
+////   */
+////  private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = {
+////    overrideCleaner(cleaner, allowOverride = true)
+////  }
+////
+////  private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = {
+////    if (! this.allowCleanerOverride) {
+////      // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free
+////      return this.cleaner
+////    }
+////
+////    this.allowCleanerOverride = allowOverride
+////    assert (null != cleaner)
+////    val prev = this.cleaner
+////    this.cleaner = cleaner
+////    // logInfo("Overriding " + prev + " with " + this.cleaner)
+////    prev
+////  }
+////
+////  private def doReleaseAll() {
+////    for (container <- containers) {
+////      container.release()
+////    }
+////  }
+////
+////  def free(invokeCleaner: Boolean = true) {
+////    // logInfo("Free on " + this + ", cleaner = " + cleaner)
+////    // always invoking release
+////    doReleaseAll()
+////
+////    if (invokeCleaner) cleaner.clean(this)
+////  }
+////
+////  private def doDispose(needRelease: Boolean) {
+////
+////    if (disposeLocationThrowable ne null) {
+////      logError("Already free'ed earlier at : ", disposeLocationThrowable)
+////      logError("Current at ", new Throwable)
+////      throw new IllegalStateException("Already freed.")
+////    }
+////    disposeLocationThrowable = new Throwable()
+////
+////    // Forcefully cleanup all
+////    if (needRelease) doReleaseAll()
+////
+////    // Free in a different loop, in case different containers refer to same resource
+////    // to release (like file)
+////    for (container <- containers) {
+////      container.free()
+////    }
+////
+////    needReleaseIndices.clear()
+////
+////    // We should not use this buffer anymore : set the values such that                 f
+////    // we dont ...
+////    globalPosition = 0
+////    globalLimit = 0
+////    globalCapacity = 0
+////  }
+////
+////  // copy data over ... MUST be used only for cases where array is known to be
+////  // small to begin with. slightly risky method due to that assumption
+////  def toByteArray(): Array[Byte] = {
+////    val positionBackup = position()
+////    val size = remaining()
+////    if (size > Int.MaxValue) {
+////      throw new IllegalStateException(
+////        "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G")
+////    }
+////
+////    val retval = new Array[Byte](size.asInstanceOf[Int])
+////    val readSize = get(retval, 0, retval.length)
+////    assert (readSize == retval.length,
+////      "readSize = " + readSize + ", retval.length = " + retval.length)
+////
+////    position(positionBackup)
+////
+////    retval
+////  }
+////
+////  // copy data over ... MUST be used only for cases where array is known to be
+////  // small to begin with. slightly risky method due to that assumption
+////  def toByteBuffer(): ByteBuffer = {
+////    ByteBuffer.wrap(toByteArray())
+////  }
+////
+////  def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = {
+////    val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf)
+////    val currentPosition = position()
+////    retval.put(this)
+////    position(currentPosition)
+////    retval.clear()
+////    retval
+////  }
+////
+////
+////
+////  // This is ONLY used for testing : that too as part of development of this and associated classes
+////  // remove before contributing to spark.
+////  def hexDump(): String = {
+////    if (remaining() * 64 > Int.MaxValue) {
+////      throw new UnsupportedOperationException("buffer too large " + remaining())
+////    }
+////
+////    val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int])
+////
+////    var perLine = 0
+////    var first = true
+////    for (b <- toByteArray()) {
+////      perLine += 1
+////      if (perLine % 8 == 0) {
+////        sb.append('\n')
+////        first = true
+////      }
+////      if (! first) sb.append(' ')
+////      first = false
+////      sb.append(java.lang.Integer.toHexString(b & 0xff))
+////    }
+////    sb.append('\n')
+////    sb.toString()
+////  }
+////
+////  override def toString: String = {
+////    val sb: StringBuffer = new StringBuffer
+////    sb.append(getClass.getName)
+////    sb.append(' ')
+////    sb.append(System.identityHashCode(this))
+////    sb.append("@[pos=")
+////    sb.append(position())
+////    sb.append(" lim=")
+////    sb.append(limit())
+////    sb.append(" cap=")
+////    sb.append(capacity())
+////    sb.append("]")
+////    sb.toString
+////  }
+////
+////
+////
+////  override def finalize(): Unit = {
+////    var marked = false
+////    if (containers ne null) {
+////      if (containers.exists(container => container.isAcquired && container.requireRelease())) {
+////        marked = true
+////        logError("BUG: buffer was not released - and now going out of scope. " +
+////          "Potential resource leak. Allocated at ", allocateLocationThrowable)
+////        containers.foreach(_.release())
+////      }
+////      if (containers.exists(container => !container.isFreed && container.requireFree())) {
+////        if (!marked) {
+////          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak",
+////            allocateLocationThrowable)
+////        }
+////        else {
+////          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak")
+////        }
+////        containers.foreach(_.free())
+////      }
+////    }
+////    super.finalize()
+////  }
+////}
+////
+////
+////object LargeByteBuffer extends Logging {
+////
+////  private val noopDisposeFunction = new BufferCleaner() {
+////    protected def doClean(buffer: LargeByteBuffer) {
+////      buffer.free(invokeCleaner = false)
+////    }
+////  }
+////
+////  val enableExpensiveAssert = false
+////  private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0)
+////  val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer(
+////    new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false)
+////  // Do not allow anyone else to override cleaner
+////  EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false)
+////
+////  // 8K sufficient ?
+////  private val TEMP_ARRAY_SIZE = 8192
+////
+////  /**
+////   * Create a LargeByteBuffer of specified size which is split across
+////   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory
+////   * ByteBuffer
+////   *
+////   */
+////  def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = {
+////    if (0 == totalSize) {
+////      return EMPTY_BUFFER
+////    }
+////
+////    assert (totalSize > 0)
+////
+////    val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
+////    val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize)
+////    val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize)
+////
+////    assert (lastBlockSize > 0)
+////
+////    val bufferArray = {
+////      val arr = new ArrayBuffer[ByteBufferContainer](numBlocks)
+////      for (index <- 0 until numBlocks - 1) {
+////        val buff = ByteBuffer.allocate(blockSize)
+////        // buff.clear()
+////        arr += new HeapByteBufferContainer(buff, true)
+////      }
+////      arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true)
+////      assert (arr.length == numBlocks)
+////      arr
+////    }
+////
+////    new LargeByteBuffer(bufferArray, false, false)
+////  }
+////
+////  /**
+////   * Create a LargeByteBuffer of specified size which is split across
+////   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk
+////   *
+////   */
+////  private def allocateDiskBuffer(totalSize: Long,
+////      blockManager: BlockManager): LargeByteBuffer = {
+////    if (0 == totalSize) {
+////      return EMPTY_BUFFER
+////    }
+////
+////    assert (totalSize > 0)
+////
+////    // Create a file of the specified size.
+////    val file = blockManager.diskBlockManager.createTempBlock()._2
+////    val raf = new RandomAccessFile(file, "rw")
+////    try {
+////      raf.setLength(totalSize)
+////    } finally {
+////      raf.close()
+////    }
+////
+////    readWriteDiskSegment(new FileSegment(file, 0, totalSize),
+////      ephemeralDiskBacked = true, blockManager.ioConf)
+////  }
+////
+////  // The returned buffer takes up ownership of the underlying buffers
+////  // (including dispos'ing that when done)
+////  def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = {
+////    val nonEmpty = buffers.filter(_.hasRemaining)
+////
+////    // cleanup the empty buffers
+////    buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b))
+////
+////
+////    if (nonEmpty.isEmpty) {
+////      return EMPTY_BUFFER
+////    }
+////
+////    // slice so that offsets match our requirement
+////    new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b =>
+////      new HeapByteBufferContainer(b.slice(), true)), false, false)
+////  }
+////
+////  def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = {
+////    // only non empty arrays
+////    val arrays = byteArrays.filter(_.length > 0)
+////    if (0 == arrays.length) return EMPTY_BUFFER
+////
+////    new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr =>
+////      new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false)
+////  }
+////
+////  def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = {
+////
+////    if (inputBuffers.isEmpty) return EMPTY_BUFFER
+////
+////    if (! inputBuffers.exists(_.hasRemaining())) {
+////      if (canDispose) inputBuffers.map(_.free())
+////      return EMPTY_BUFFER
+////    }
+////
+////    // release all temp resources acquired
+////    inputBuffers.foreach(buff => buff.releasePendingContainers())
+////    // free current container if acquired.
+////    inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) {
+////      buff.containers(buff.currentContainerIndex).release()
+////    })
+////    // inputBuffers.foreach(b => b.doReleaseAll())
+////
+////
+////    // Dispose of any empty buffers
+////    if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free())
+////
+////    // Find all containers we need.
+////    val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining()))
+////
+////    val containers = buffers.flatMap(_.containers)
+////    assert (! containers.isEmpty)
+////    // The in order containers of "buffers" seq constitute the required return value
+////    val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers,
+////      // if you cant dispose, then we dont own the buffers : in which case, need duplicate
+////      ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked))
+////
+////    if (canDispose) {
+////      // override dispose of all other buffers.
+////      val disposeFunctions = inputBuffers.map {
+////        buffer => {
+////          (buffer, buffer.overrideCleaner(noopDisposeFunction))
+////        }
+////      }
+////
+////      val cleaner = retval.getCleaner()
+////      val newCleaner = new BufferCleaner {
+////        protected def doClean(buffer: LargeByteBuffer) {
+////
+////          assert (retval == buffer)
+////          // default cleaner.
+////          cleaner.clean(retval)
+////          // not required, since we are within clean anyway.
+////          // retval.free(invokeCleaner = false)
+////
+////          // retval.doDispose(needRelease = true)
+////
+////          // This might actually call dispose twice on some (initially) empty buffers,
+////          // which is fine since we now guard against that.
+////          disposeFunctions.foreach(v => v._2.clean(v._1))
+////          // Call the free method too : so that buffers are marked free ...
+////          disposeFunctions.foreach(v => v._1.free(invokeCleaner = false))
+////        }
+////      }
+////
+////      val prev = retval.overrideCleaner(newCleaner)
+////      assert (prev == cleaner)
+////    }
+////
+////    retval
+////  }
+////
+////  private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) {
+////    if (arr == null) {
+////      throw new NullPointerException
+////    } else if (offset < 0 || size < 0 || offset + size > arr.length) {
+////      throw new IndexOutOfBoundsException
+////    }
+////  }
+////
+////  def allocateTransientBuffer(size: Long, blockManager: BlockManager) = {
+////    if (size <= blockManager.ioConf.maxInMemSize) {
+////      LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf)
+////    } else {
+////      LargeByteBuffer.allocateDiskBuffer(size, blockManager)
+////    }
+////  }
+////
+////  def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig,
+////      ephemeralDiskBacked: Boolean): LargeByteBuffer = {
+////    // Split the block into multiple of BlockStore.maxBlockSize
+////    val segmentSize = segment.length
+////    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
+////    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
+////    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
+////
+////    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
+////
+////    for (index <- 0 until numBlocks - 1) {
+////      buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
+////        segment.offset + index * blockSize, blockSize), ioConf)
+////    }
+////
+////    // Last block
+////    buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
+////      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf)
+////
+////    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
+////  }
+////
+////  def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean,
+////      ioConf: IOConfig): LargeByteBuffer = {
+////
+////    // Split the block into multiple of BlockStore.maxBlockSize
+////    val segmentSize = segment.length
+////    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
+////    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
+////    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
+////
+////    logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks +
+////      ", lastBlockSize = " + lastBlockSize)
+////    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
+////
+////    for (index <- 0 until numBlocks - 1) {
+////      buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
+////        segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null)
+////    }
+////
+////    // Last block
+////    buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
+////      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null)
+////
+////    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
+////  }
+////}

From 01cafbf15026fdcbfd58566335802082493a491c Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 25 Feb 2015 15:53:22 -0600
Subject: [PATCH 07/97] tests compile too

---
 core/src/test/scala/org/apache/spark/DistributedSuite.scala   | 3 +--
 .../scala/org/apache/spark/storage/BlockManagerSuite.scala    | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index b183373de5019..97ea3578aa8ba 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark
 
-import org.apache.spark.io.LargeByteBuffer
 import org.scalatest.FunSuite
 import org.scalatest.concurrent.Timeouts._
 import org.scalatest.Matchers
@@ -196,7 +195,7 @@ class DistributedSuite extends FunSuite with Matchers with LocalSparkContext {
     blockManager.master.getLocations(blockId).foreach { cmId =>
       val bytes = blockTransfer.fetchBlockSync(cmId.host, cmId.port, cmId.executorId,
         blockId.toString)
-      val deserialized = blockManager.dataDeserialize(blockId, LargeByteBuffer.asLargeByteBuffer(bytes.nioByteBuffer()))
+      val deserialized = blockManager.dataDeserialize(blockId, bytes.nioByteBuffer())
         .asInstanceOf[Iterator[Int]].toList
       assert(deserialized === (1 to 100).toList)
     }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index f692c0ffe967f..05107fca1a5d1 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -21,7 +21,7 @@ import java.nio.{ByteBuffer, MappedByteBuffer}
 import java.util.Arrays
 import java.util.concurrent.TimeUnit
 
-import org.apache.spark.io.LargeByteBuffer
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.Await
@@ -811,7 +811,7 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfterEach
     var counter = 0.toByte
     def incr = {counter = (counter + 1).toByte; counter;}
     val bytes = Array.fill[Byte](1000)(incr)
-    val byteBuffer = LargeByteBuffer.asLargeByteBuffer(bytes)
+    val byteBuffer = LargeByteBufferHelper.asLargeByteBuffer(bytes)
 
     val blockId = BlockId("rdd_1_2")
 

From ce391a0dbbba3d169d4013d2e387b7808065b3f8 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 25 Feb 2015 16:00:00 -0600
Subject: [PATCH 08/97] failing test case (though its crappy)

---
 .../network/netty/NettyBlockTransferSuite.scala      | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
index b242ff593f04a..2133b3286ff36 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
@@ -18,6 +18,7 @@ package org.apache.spark.network.netty
 
 import java.util.concurrent.TimeUnit
 
+import org.apache.commons.io.IOUtils
 import org.apache.spark.network.BlockDataManager
 import org.apache.spark.network.buffer.{ManagedBuffer, LargeByteBufferHelper, NioManagedBuffer}
 import org.apache.spark.network.shuffle.BlockFetchingListener
@@ -42,14 +43,16 @@ class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar {
 
     val blockManager = mock[BlockDataManager]
     val blockId = ShuffleBlockId(0, 1, 2)
-    val blockString = "Hello, world!"
-    val blockBuffer = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(blockString.getBytes))
+    val buf = LargeByteBufferHelper.allocate(Integer.MAX_VALUE.toLong + 100l)
+    val blockBuffer = new NioManagedBuffer(buf)
     when(blockManager.getBlockData(blockId)).thenReturn(blockBuffer)
 
     val from = new NettyBlockTransferService(conf, securityManager, numCores = 1)
     from.init(blockManager)
+    println("from: " + from.hostName + ":" + from.port)
     val to = new NettyBlockTransferService(conf, securityManager, numCores = 1)
     to.init(blockManager)
+    println("to: " + to.hostName + ":" + to.port)
 
     try {
       val promise = Promise[ManagedBuffer]()
@@ -65,7 +68,10 @@ class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar {
           }
         })
 
-      Await.ready(promise.future, FiniteDuration(1000, TimeUnit.MILLISECONDS))
+      Await.ready(promise.future, FiniteDuration(100, TimeUnit.SECONDS))
+      val v = promise.future.value.get.get
+//      IOUtils.toString(v.createInputStream()) should equal(blockString)
+      println(v.nioByteBuffer().limit())
     } finally {
       from.close()
       to.close()

From 29f0a8a10c685ea2742d239a748bc6c5d7798380 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 27 Feb 2015 13:19:12 -0600
Subject: [PATCH 09/97] fix use of LargeByteBuffer in some tests, create
 UploadPartialBlock

---
 .../buffer/WrappedLargeByteBuffer.java        |    4 +-
 .../network/buffer/LargeByteBuffer.scala      | 1760 -----------------
 .../network/ChunkFetchIntegrationSuite.java   |   15 +-
 .../spark/network/TestManagedBuffer.java      |    5 +-
 .../protocol/BlockTransferMessage.java        |    3 +-
 .../shuffle/protocol/UploadPartialBlock.java  |  128 ++
 .../shuffle/BlockTransferMessagesSuite.java   |    2 +
 .../ExternalShuffleBlockHandlerSuite.java     |    5 +-
 .../ExternalShuffleIntegrationSuite.java      |   13 +-
 .../shuffle/OneForOneBlockFetcherSuite.java   |   13 +-
 .../shuffle/RetryingBlockFetcherSuite.java    |    7 +-
 11 files changed, 166 insertions(+), 1789 deletions(-)
 delete mode 100644 network/common/src/main/scala/org/apache/spark/network/buffer/LargeByteBuffer.scala
 create mode 100644 network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadPartialBlock.java

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 2ecb569b3a23c..e948fa67581d0 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -71,9 +71,9 @@ public byte get() {
 
     private void updateCurrentBuffer() {
         //TODO fix end condition
-        while(!currentBuffer.hasRemaining()) {
+        while(currentBuffer != null && !currentBuffer.hasRemaining()) {
             currentBufferIdx += 1;
-            currentBuffer = underlying[currentBufferIdx];
+            currentBuffer = currentBufferIdx < underlying.length ? underlying[currentBufferIdx] : null;
         }
     }
 
diff --git a/network/common/src/main/scala/org/apache/spark/network/buffer/LargeByteBuffer.scala b/network/common/src/main/scala/org/apache/spark/network/buffer/LargeByteBuffer.scala
deleted file mode 100644
index eb7520d3c6325..0000000000000
--- a/network/common/src/main/scala/org/apache/spark/network/buffer/LargeByteBuffer.scala
+++ /dev/null
@@ -1,1760 +0,0 @@
-///*
-// * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements.  See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
-// * the License.  You may obtain a copy of the License at
-// *
-// *    http://www.apache.org/licenses/LICENSE-2.0
-// *
-// * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
-// * limitations under the License.
-// */
-//
-//package org.apache.spark.network.buffer
-//
-//import java.nio.ByteBuffer
-//import java.nio.channels.FileChannel.MapMode
-//import java.nio.channels.{FileChannel, WritableByteChannel}
-//
-//import scala.collection.mutable.ArrayBuffer
-//
-//
-//
-//trait LargeByteBuffer {
-////  def position(): Long
-////
-////  def limit(): Long
-//
-//  def capacity(): Long
-//
-//  def get(): Byte //needed for ByteBufferInputStream
-//
-//  def get(dst: Array[Byte], offset: Int, length: Int): Unit // for ByteBufferInputStream
-//
-//  def position(position: Long): Unit //for ByteBufferInputStream
-//
-//  def position(): Long //for ByteBufferInputStream
-//
-//  /** doesn't copy data, just copies references & offsets */
-//  def duplicate(): LargeByteBuffer
-//
-//  def put(bytes: LargeByteBuffer): Unit
-//
-//  //also need whatever is necessary for ByteArrayOutputStream for BlockManager#dataSerialize
-//
-//
-//  //TODO checks on limit semantics
-//
-//  /**
-//   * Sets this buffer's limit. If the position is larger than the new limit then it is set to the
-//   * new limit. If the mark is defined and larger than the new limit then it is discarded.
-//   */
-//  def limit(newLimit: Long): Unit
-//
-//  /**
-//   * return this buffer's limit
-//   * @return
-//   */
-//  def limit(): Long
-//
-//
-//  //an alternative to having this method would be having a foreachBuffer(f: Buffer => T)
-//  def writeTo(channel: WritableByteChannel): Long
-//
-////
-////  def skip(skipBy: Long): Unit
-////
-////  def position(newPosition: Long): Unit
-////
-////  /**
-////   * Clears this buffer.  The position is set to zero, the limit is set to
-////   * the capacity, and the mark is discarded.
-////   *
-////   * <p> Invoke this method before using a sequence of channel-read or
-////   * <i>put</i> operations to fill this buffer.
-////   *
-////   * <p> This method does not actually erase the data in the buffer, but it
-////   * is named as if it did because it will most often be used in situations
-////   * in which that might as well be the case. </p>
-////   */
-////  def clear(): Unit
-////
-////  /**
-////   * Flips this buffer.  The limit is set to the current position and then
-////   * the position is set to zero.  If the mark is defined then it is
-////   * discarded.
-////   *
-////   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
-////   * this method to prepare for a sequence of channel-write or relative
-////   * <i>get</i> operations.
-////   */
-////  def flip(): Unit
-//
-//  /**
-//   * Rewinds this buffer.  The position is set to zero and the mark is
-//   * discarded.
-//   *
-//   * <p> Invoke this method before a sequence of channel-write or <i>get</i>
-//   * operations, assuming that the limit has already been set
-//   * appropriately.
-//   */
-//  def rewind(): Unit
-//
-//  /**
-//   * Returns the number of elements between the current position and the
-//   * limit. </p>
-//   *
-//   * @return  The number of elements remaining in this buffer
-//   */
-//  def remaining(): Long
-//}
-////
-////class ChainedLargeByteBuffer(private[network] val underlying: ChainedBuffer) extends LargeByteBuffer {
-////
-////  def capacity = underlying.capacity
-////
-////  var _pos = 0l
-////
-////  def get(dst: Array[Byte],offset: Int,length: Int): Unit = {
-////    underlying.read(_pos, dst, offset, length)
-////    _pos += length
-////  }
-////
-////  def get(): Byte = {
-////    val b = underlying.read(_pos)
-////    _pos += 1
-////    b
-////  }
-////
-////  def put(bytes: LargeByteBuffer): Unit = {
-////    ???
-////  }
-////
-////  def position: Long = _pos
-////  def position(position: Long): Unit = {
-////    _pos = position
-////  }
-////  def remaining(): Long = {
-////    underlying.size - position
-////  }
-////
-////  def duplicate(): ChainedLargeByteBuffer = {
-////    new ChainedLargeByteBuffer(underlying)
-////  }
-////
-////  def rewind(): Unit = {
-////    _pos = 0
-////  }
-////
-////  def limit(): Long = {
-////    capacity
-////  }
-////
-////  def limit(newLimit: Long): Unit = {
-////    ???
-////  }
-////
-////  def writeTo(channel:WritableByteChannel): Long = {
-////    var written = 0l
-////    underlying.chunks.foreach{bytes =>
-////      //TODO test this
-////      val buffer = ByteBuffer.wrap(bytes)
-////      while (buffer.hasRemaining)
-////        channel.write(buffer)
-////      written += bytes.length
-////    }
-////    written
-////  }
-////}
-//
-//class WrappedLargeByteBuffer(private[spark] val underlying: Array[ByteBuffer]) extends LargeByteBuffer {
-//
-//  val (totalCapacity, chunkOffsets) = {
-//    var sum = 0l
-//    val offsets = new Array[Long](underlying.size)
-//    (0 until underlying.size).foreach{idx =>
-//      offsets(idx) = sum
-//      sum += underlying(idx).capacity()
-//    }
-//    (sum, offsets)
-//  }
-//
-//  private var _pos = 0l
-//  private var currentBufferIdx = 0
-//  private var currentBuffer = underlying(0)
-//  private var _limit = totalCapacity
-//
-//  def capacity = totalCapacity
-//
-//  def get(dst: Array[Byte], offset: Int, length: Int): Unit = {
-//    var moved = 0
-//    while (moved < length) {
-//      val toRead = math.min(length - moved, currentBuffer.remaining())
-//      currentBuffer.get(dst, offset, toRead)
-//      moved += toRead
-//      updateCurrentBuffer()
-//    }
-//  }
-//
-//  def get(): Byte = {
-//    val r = currentBuffer.get()
-//    _pos += 1
-//    updateCurrentBuffer()
-//    r
-//  }
-//
-//  private def updateCurrentBuffer(): Unit = {
-//    //TODO fix end condition
-//    while(!currentBuffer.hasRemaining()) {
-//      currentBufferIdx += 1
-//      currentBuffer = underlying(currentBufferIdx)
-//    }
-//  }
-//
-//  def put(bytes: LargeByteBuffer): Unit = {
-//    ???
-//  }
-//
-//  def position: Long = _pos
-//  def position(position: Long): Unit = {
-//    //XXX check range?
-//    _pos = position
-//  }
-//  def remaining(): Long = {
-//    totalCapacity - _pos
-//  }
-//
-//  def duplicate(): WrappedLargeByteBuffer = {
-//    new WrappedLargeByteBuffer(underlying.map{_.duplicate()})
-//  }
-//
-//  def rewind(): Unit = {
-//    _pos = 0
-//    underlying.foreach{_.rewind()}
-//  }
-//
-//  def limit(): Long = {
-//    totalCapacity
-//  }
-//
-//  def limit(newLimit: Long) = {
-//    //XXX check range?  set limits in sub buffers?
-//    _limit = newLimit
-//  }
-//
-//  def writeTo(channel: WritableByteChannel): Long = {
-//    var written = 0l
-//    underlying.foreach{buffer =>
-//      //TODO test this
-//      //XXX do we care about respecting the limit here?
-//      written += buffer.remaining()
-//      while (buffer.hasRemaining)
-//        channel.write(buffer)
-//    }
-//    written
-//  }
-//
-//}
-//
-//object LargeByteBuffer {
-//
-//  def asLargeByteBuffer(byteBuffer: ByteBuffer): LargeByteBuffer = {
-//    new WrappedLargeByteBuffer(Array(byteBuffer))
-//  }
-//
-//  def asLargeByteBuffer(bytes: Array[Byte]): LargeByteBuffer = {
-//    new WrappedLargeByteBuffer(Array(ByteBuffer.wrap(bytes)))
-//  }
-//
-////
-////  def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = {
-////    val buffer = ChainedBuffer.withInitialSize(maxChunk, size)
-////    new ChainedLargeByteBuffer(buffer)
-////  }
-//
-//  def mapFile(
-//    channel: FileChannel,
-//    mode: MapMode,
-//    offset: Long,
-//    length: Long,
-//    maxChunk: Int = Integer.MAX_VALUE - 1e6.toInt
-//  ): LargeByteBuffer = {
-//    val offsets = new ArrayBuffer[Long]()
-//    var curOffset = offset
-//    val end = offset + length
-//    while (curOffset < end) {
-//      offsets += curOffset
-//      val length = math.min(end - curOffset, maxChunk)
-//      curOffset += length
-//    }
-//    offsets += end
-//    val chunks = new Array[ByteBuffer](offsets.size - 1)
-//    (0 until offsets.size - 1).foreach{idx =>
-//      chunks(idx) = channel.map(mode, offsets(idx), offsets(idx + 1) - offsets(idx))
-//    }
-//    new WrappedLargeByteBuffer(chunks)
-//  }
-//}
-//
-//
-////
-/////**
-//// * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G
-//// * which ByteBuffers are limited to.
-//// * Externally, it exposes all the api which java.nio.ByteBuffer exposes.
-//// * Internally, it maintains a sequence of Containers which manage the ByteBuffer data.
-//// * Not all the data might be loaded into memory  (like disk or tachyon data) - so actual
-//// * memory footprint - heap and vm could be much lower than capacity.
-//// *
-//// * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this
-//// * buffer, maybe revisit this later ? Note: this is not much different from earlier though !
-//// *
-//// * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this
-//// * will require the file to be kept open (repeatedly opening/closing file is not good
-//// * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is
-//// * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy)
-//// *
-//// * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is
-//// * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some
-//// * cases (when we duplicate/slice them). Currently spark does not need this, but might in future
-//// * so relook at it later.
-//// */
-////// We should make this constructor private: but for now,
-////// leaving it public since TachyonStore needs it
-////class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer],
-////    private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging {
-////
-////  // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME
-////  private val allocateLocationThrowable: Throwable = {
-////    if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) {
-////      new Throwable("blockId = " + BlockManager.getLookupBlockId)
-////    } else {
-////      null
-////    }
-////  }
-////  private var disposeLocationThrowable: Throwable = null
-////
-////  @volatile private var allowCleanerOverride = true
-////  @volatile private var cleaner: BufferCleaner = new BufferCleaner {
-////    override def doClean(buffer: LargeByteBuffer) = {
-////      assert (LargeByteBuffer.this == buffer)
-////      doDispose(needRelease = false)
-////    }
-////  }
-////
-////  // should not be empty
-////  assert (null != inputContainers && ! inputContainers.isEmpty)
-////  // should not have any null's
-////  assert (inputContainers.find(_ == null).isEmpty)
-////
-////  // println("Num containers = " + inputContainers.size)
-////
-////  // Position, limit and capacity relevant over the engire LargeByteBuffer
-////  @volatile private var globalPosition = 0L
-////  @volatile private var globalLimit = 0L
-////  @volatile private var currentContainerIndex = 0
-////
-////  // The buffers in which the actual data is held.
-////  private var containers: Array[ByteBufferContainer] = null
-////
-////  // aggregate capacities of the individual buffers.
-////  // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be
-////  // sum of capacity of 0th and 1st block buffer
-////  private var bufferPositionStart: Array[Long] = null
-////
-////  // Contains the indices of a containers which requires release before subsequent invocation of
-////  // read/write should be serviced. This is required since current read/write might have moved the
-////  // position but since we are returning bytebuffers which depend on the validity of the existing
-////  // bytebuffer, we cant release them yet.
-////  private var needReleaseIndices = new HashSet[Int]()
-////
-////  private val readable = ! inputContainers.exists(! _.isReadable)
-////  private val writable = ! inputContainers.exists(! _.isWritable)
-////
-////
-////  // initialize
-////  @volatile private var globalCapacity = {
-////
-////    // Ensure that there are no empty buffers : messes up with our code : unless it
-////    // is a single buffer (for empty buffer for marker case)
-////    assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length)
-////
-////    containers = {
-////      if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray
-////    }
-////    containers.foreach(_.validate())
-////
-////    def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) {
-////      val buff = new ArrayBuffer[Long](arr.length + 1)
-////      buff += 0L
-////
-////      buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1)
-////      assert (buff.length == arr.length + 1)
-////      bufferPositionStart = buff.toArray
-////    }
-////
-////    initializeBufferPositionStart(containers)
-////
-////    // remove references from inputBuffers
-////    inputContainers.clear()
-////
-////    globalLimit = bufferPositionStart(containers.length)
-////    globalPosition = 0L
-////    currentContainerIndex = 0
-////
-////    assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum)
-////
-////    globalLimit
-////  }
-////
-////  final def position(): Long = globalPosition
-////
-////  final def limit(): Long = globalLimit
-////
-////  final def capacity(): Long = globalCapacity
-////
-////  final def limit(newLimit: Long) {
-////    if ((newLimit > capacity()) || (newLimit < 0)) {
-////      throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity())
-////    }
-////
-////    globalLimit = newLimit
-////    if (position() > newLimit) position(newLimit)
-////  }
-////
-////  def skip(skipBy: Long) = position(position() + skipBy)
-////
-////  private def releasePendingContainers() {
-////    if (! needReleaseIndices.isEmpty) {
-////      val iter = needReleaseIndices.iterator
-////      while (iter.hasNext) {
-////        val index = iter.next()
-////        assert (index >= 0 && index < containers.length)
-////        // It is possible to move from one container to next before the previous
-////        // container was acquired. For example, get forcing move to next container
-////        // since current was exhausted immediatelly followed by a position()
-////        // so the container we moved to was never acquired.
-////
-////        // assert (containers(index).isAcquired)
-////        // will this always be satisfied ?
-////        // assert (index != currentContainerIndex)
-////        if (containers(index).isAcquired) containers(index).release()
-////      }
-////      needReleaseIndices.clear()
-////    }
-////  }
-////
-////  private def toNewContainer(newIndex: Int) {
-////    if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) {
-////
-////      assert (currentContainerIndex >= 0)
-////      needReleaseIndices += currentContainerIndex
-////    }
-////    currentContainerIndex = newIndex
-////  }
-////
-////  // expensive method, sigh ... optimize it later ?
-////  final def position(newPosition: Long) {
-////
-////    if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException()
-////
-////    if (currentContainerIndex < bufferPositionStart.length - 1 &&
-////        newPosition >= bufferPositionStart(currentContainerIndex) &&
-////        newPosition < bufferPositionStart(currentContainerIndex + 1)) {
-////      // Same buffer - easy method ...
-////      globalPosition = newPosition
-////      // Changed position - free previously returned buffers.
-////      releasePendingContainers()
-////      return
-////    }
-////
-////    // Find appropriate currentContainerIndex
-////    // Since bufferPositionStart is sorted, can be replaced with binary search if required.
-////    // For now, not in the perf critical path since buffers size is very low typically.
-////    var index = 0
-////    val cLen = containers.length
-////    while (index < cLen) {
-////      if (newPosition >= bufferPositionStart(index) &&
-////        newPosition < bufferPositionStart(index + 1)) {
-////        globalPosition = newPosition
-////        toNewContainer(index)
-////        // Changed position - free earlier and previously returned buffers.
-////        releasePendingContainers()
-////        return
-////      }
-////      index += 1
-////    }
-////
-////    if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) {
-////      // boundary.
-////      globalPosition = newPosition
-////      toNewContainer(cLen)
-////      // Changed position - free earlier and previously returned buffers.
-////      releasePendingContainers()
-////      return
-////    }
-////
-////    assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition +
-////      ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]"))
-////  }
-////
-////
-////  /**
-////   * Clears this buffer.  The position is set to zero, the limit is set to
-////   * the capacity, and the mark is discarded.
-////   *
-////   * <p> Invoke this method before using a sequence of channel-read or
-////   * <i>put</i> operations to fill this buffer.
-////   *
-////   * <p> This method does not actually erase the data in the buffer, but it
-////   * is named as if it did because it will most often be used in situations
-////   * in which that might as well be the case. </p>
-////   */
-////  final def clear() {
-////    // if (0 == globalCapacity) return
-////
-////    needReleaseIndices += 0
-////    globalPosition = 0L
-////    toNewContainer(0)
-////    globalLimit = globalCapacity
-////
-////    // Now free all pending containers
-////    releasePendingContainers()
-////  }
-////
-////  /**
-////   * Flips this buffer.  The limit is set to the current position and then
-////   * the position is set to zero.  If the mark is defined then it is
-////   * discarded.
-////   *
-////   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
-////   * this method to prepare for a sequence of channel-write or relative
-////   * <i>get</i> operations.
-////   */
-////  final def flip() {
-////    needReleaseIndices += 0
-////    globalLimit = globalPosition
-////    globalPosition = 0L
-////    toNewContainer(0)
-////
-////    // Now free all pending containers
-////    releasePendingContainers()
-////  }
-////
-////  /**
-////   * Rewinds this buffer.  The position is set to zero and the mark is
-////   * discarded.
-////   *
-////   * <p> Invoke this method before a sequence of channel-write or <i>get</i>
-////   * operations, assuming that the limit has already been set
-////   * appropriately.
-////   */
-////  final def rewind() {
-////    needReleaseIndices += 0
-////    globalPosition = 0L
-////    toNewContainer(0)
-////
-////    // Now free all pending containers
-////    releasePendingContainers()
-////  }
-////
-////  /**
-////   * Returns the number of elements between the current position and the
-////   * limit. </p>
-////   *
-////   * @return  The number of elements remaining in this buffer
-////   */
-////  final def remaining(): Long = {
-////    globalLimit - globalPosition
-////  }
-////
-////  /**
-////   * Tells whether there are any elements between the current position and
-////   * the limit. </p>
-////   *
-////   * @return  <tt>true</tt> if, and only if, there is at least one element
-////   *          remaining in this buffer
-////   */
-////  final def hasRemaining() = {
-////    globalPosition < globalLimit
-////  }
-////
-////  // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex)
-////
-////  // number of bytes remaining in currently active underlying buffer
-////  private def currentRemaining(): Int = {
-////    if (hasRemaining()) {
-////      // validate currentContainerIndex is valid
-////      assert (globalPosition >= bufferPositionStart(currentContainerIndex) &&
-////        globalPosition < bufferPositionStart(currentContainerIndex + 1),
-////        "globalPosition = " + globalPosition +
-////          ", currentContainerIndex = " + currentContainerIndex +
-////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-////
-////      currentRemaining0(currentContainerIndex)
-////    } else 0
-////  }
-////
-////  // Without any validation : required when we are bumping the index (when validation will fail) ...
-////  private def currentRemaining0(which: Int): Int = {
-////    // currentBuffer().remaining()
-////    math.max(0, math.min(bufferPositionStart(which + 1),
-////      globalLimit) - globalPosition).asInstanceOf[Int]
-////  }
-////
-////  // Set the approppriate position/limit for the current underlying buffer to mirror our
-////  // the LargeByteBuffer's state.
-////  private def fetchCurrentBuffer(): ByteBuffer = {
-////    releasePendingContainers()
-////
-////    assert (currentContainerIndex < containers.length)
-////
-////    val container = containers(currentContainerIndex)
-////    if (! container.isAcquired) {
-////      container.acquire()
-////    }
-////
-////    assert (container.isAcquired)
-////    if (LargeByteBuffer.enableExpensiveAssert) {
-////      assert (! containers.exists( b => (b ne container) && b.isAcquired))
-////    }
-////
-////    assert (currentContainerIndex < bufferPositionStart.length &&
-////      globalPosition < bufferPositionStart(currentContainerIndex + 1),
-////      "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " +
-////        bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this)
-////
-////    val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)).
-////      asInstanceOf[Int]
-////
-////    val buffer = container.getByteBuffer
-////    buffer.position(buffPosition)
-////    val diff = buffer.capacity - buffPosition
-////    val left = remaining()
-////    if (diff <= left) {
-////      buffer.limit(buffer.capacity())
-////    } else {
-////      // Can happen if limit() was called.
-////      buffer.limit(buffPosition + left.asInstanceOf[Int])
-////    }
-////
-////    buffer
-////  }
-////
-////  // To be used ONLY to test in suites.
-////  private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = {
-////    if ("1" != System.getProperty("SPARK_TESTING")) {
-////      throw new IllegalStateException("This method is to be used ONLY within spark test suites")
-////    }
-////
-////    fetchCurrentBuffer()
-////  }
-////
-////  // Expects that the invoker has ensured that this can be safely invoked.
-////  // That is, it wont be invoked when the loop wont terminate.
-////  private def toNonEmptyBuffer() {
-////
-////    if (! hasRemaining()) {
-////      var newIndex = currentContainerIndex
-////      // Ensure we are in the right block or not.
-////      while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) {
-////        newIndex += 1
-////      }
-////      toNewContainer(newIndex)
-////      // Do not do this - since we might not yet have consumed the buffer which caused EOF right now
-////      /*
-////      // Add last one also, and release it too - since we are at the end of the buffer with nothing
-////      // more pending.
-////      if (newIndex >= 0 && currentContainerIndex < containers.length) {
-////        needReleaseIndices += newIndex
-////      }
-////      */
-////      assert (currentContainerIndex >= 0)
-////      // releasePendingContainers()
-////      return
-////    }
-////
-////    var index = currentContainerIndex
-////    while (0 == currentRemaining0(index) && index < containers.length) {
-////      index += 1
-////    }
-////    assert (currentContainerIndex < containers.length)
-////    toNewContainer(index)
-////    assert (0 != currentRemaining())
-////  }
-////
-////  private def assertPreconditions(containerIndex: Int) {
-////    assert (globalPosition >= bufferPositionStart(containerIndex),
-////      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
-////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-////    assert (globalPosition < bufferPositionStart(containerIndex + 1),
-////      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
-////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-////
-////    assert (globalLimit <= globalCapacity)
-////    assert (containerIndex < containers.length)
-////  }
-////
-////
-////  /**
-////   * Attempts to return a ByteBuffer of the requested size.
-////   * It is possible to return a buffer of size smaller than requested
-////   * even though hasRemaining == true
-////   *
-////   * On return, position would have been moved 'ahead' by the size of the buffer returned :
-////   * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer
-////   *
-////   *
-////   * This is used to primarily retrieve content of this buffer to expose via ByteBuffer
-////   * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the
-////   * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer
-////   * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying
-////   * container is a disk backed container, and we make subsequent calls to get(), the returned
-////   * ByteBuffer can be dispose'ed off
-////   *
-////   * @param maxChunkSize Max size of the ByteBuffer to retrieve.
-////   * @return
-////   */
-////
-////  private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = {
-////    fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true)
-////  }
-////
-////  private def fetchBufferOfSizeImpl(maxChunkSize: Int,
-////      canReleaseContainers: Boolean): ByteBuffer = {
-////    if (canReleaseContainers) releasePendingContainers()
-////    assert (maxChunkSize > 0)
-////
-////    // not checking for degenerate case of maxChunkSize == 0
-////    if (globalPosition >= globalLimit) {
-////      // throw exception
-////      throw new BufferUnderflowException()
-////    }
-////
-////    // Check preconditions : disable these later, since they might be expensive to
-////    // evaluate for every IO op
-////    assertPreconditions(currentContainerIndex)
-////
-////    val currentBufferRemaining = currentRemaining()
-////
-////    assert (currentBufferRemaining > 0)
-////
-////    val size = math.min(currentBufferRemaining, maxChunkSize)
-////
-////    val newBuffer = if (currentBufferRemaining > maxChunkSize) {
-////      val currentBuffer = fetchCurrentBuffer()
-////      val buff = ByteBufferContainer.createSlice(currentBuffer,
-////        currentBuffer.position(), maxChunkSize)
-////      assert (buff.remaining() == maxChunkSize)
-////      buff
-////    } else {
-////      val currentBuffer = fetchCurrentBuffer()
-////      val buff = currentBuffer.slice()
-////      assert (buff.remaining() == currentBufferRemaining)
-////      buff
-////    }
-////
-////    assert (size == newBuffer.remaining())
-////    assert (0 == newBuffer.position())
-////    assert (size == newBuffer.limit())
-////    assert (newBuffer.capacity() == newBuffer.limit())
-////
-////    globalPosition += newBuffer.remaining
-////    toNonEmptyBuffer()
-////
-////    newBuffer
-////  }
-////
-////  // Can we service the read/write from the currently active (underlying) bytebuffer or not.
-////  // For almost all cases, this will return true allowing us to optimize away the more expensive
-////  // computations.
-////  private def localReadWritePossible(size: Int) =
-////    size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1)
-////
-////
-////  def getLong(): Long = {
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    if (remaining() < 8) throw new BufferUnderflowException
-////
-////    if (localReadWritePossible(8)) {
-////      val buff = fetchCurrentBuffer()
-////      assert (buff.remaining() >= 8)
-////      val retval = buff.getLong
-////      globalPosition += 8
-////      toNonEmptyBuffer()
-////      return retval
-////    }
-////
-////    val buff = readFully(8)
-////    buff.getLong
-////  }
-////
-////  def getInt(): Int = {
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    if (remaining() < 4) throw new BufferUnderflowException
-////
-////    if (localReadWritePossible(4)) {
-////      val buff = fetchCurrentBuffer()
-////      assert (buff.remaining() >= 4)
-////      val retval = buff.getInt
-////      globalPosition += 4
-////      toNonEmptyBuffer()
-////      return retval
-////    }
-////
-////    val buff = readFully(4)
-////    buff.getInt
-//// }
-////
-////  def getChar(): Char = {
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    if (remaining() < 2) throw new BufferUnderflowException
-////
-////    if (localReadWritePossible(2)) {
-////      val buff = fetchCurrentBuffer()
-////      assert (buff.remaining() >= 2)
-////      val retval = buff.getChar
-////      globalPosition += 2
-////      toNonEmptyBuffer()
-////      return retval
-////    }
-////
-////    // if slice is becoming too expensive, revisit this ...
-////    val buff = readFully(2)
-////    buff.getChar
-////  }
-////
-////  def get(): Byte = {
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    if (! hasRemaining()) throw new BufferUnderflowException
-////
-////    // If we have remaining bytes, previous invocations MUST have ensured that we are at
-////    // a buffer which has data to be read.
-////    assert (localReadWritePossible(1))
-////
-////    val buff = fetchCurrentBuffer()
-////    assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining())
-////    val retval = buff.get()
-////    globalPosition += 1
-////    toNonEmptyBuffer()
-////
-////    retval
-////  }
-////
-////  def get(arr: Array[Byte], offset: Int, size: Int): Int = {
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    LargeByteBuffer.checkOffsets(arr, offset, size)
-////
-////    // kyro depends on this it seems ?
-////    // assert (size > 0)
-////    if (0 == size) return 0
-////
-////    if (! hasRemaining()) return -1
-////
-////    if (localReadWritePossible(size)) {
-////      val buff = fetchCurrentBuffer()
-////      assert (buff.remaining() >= size)
-////      buff.get(arr, offset, size)
-////      globalPosition += size
-////      toNonEmptyBuffer()
-////      return size
-////    }
-////
-////    var remainingSize = math.min(size, remaining()).asInstanceOf[Int]
-////    var currentOffset = offset
-////
-////    while (remainingSize > 0) {
-////      val buff = fetchBufferOfSize(remainingSize)
-////      val toCopy = math.min(buff.remaining(), remainingSize)
-////
-////      buff.get(arr, currentOffset, toCopy)
-////      currentOffset += toCopy
-////      remainingSize -= toCopy
-////    }
-////
-////    currentOffset - offset
-////  }
-////
-////
-////  private def createSlice(size: Long): LargeByteBuffer = {
-////
-////    releasePendingContainers()
-////
-////    if (remaining() < size) {
-////      // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this)
-////      throw new BufferOverflowException
-////    }
-////
-////    // kyro depends on this it seems ?
-////    // assert (size > 0)
-////    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
-////
-////    val arr = new ArrayBuffer[ByteBufferContainer](2)
-////    var totalLeft = size
-////
-////    // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer)
-////
-////    var containerIndex = currentContainerIndex
-////    while (totalLeft > 0 && hasRemaining()) {
-////      assertPreconditions(containerIndex)
-////      val container = containers(containerIndex)
-////      val currentLeft = currentRemaining0(containerIndex)
-////
-////      assert (globalPosition + currentLeft <= globalLimit)
-////      assert (globalPosition >= bufferPositionStart(containerIndex) &&
-////        (globalPosition < bufferPositionStart(containerIndex + 1)))
-////
-////      val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int]
-////      val sliceSize = math.min(totalLeft, currentLeft)
-////      assert (from >= 0)
-////      assert (sliceSize > 0 && sliceSize <= Int.MaxValue)
-////
-////      val slice = container.createSlice(from, sliceSize.asInstanceOf[Int])
-////      arr += slice
-////
-////      globalPosition += sliceSize
-////      totalLeft -= sliceSize
-////      if (currentLeft == sliceSize) containerIndex += 1
-////    }
-////
-////    // Using toNonEmptyBuffer instead of directly moving to next here so that
-////    // other checks can be performed there.
-////    toNonEmptyBuffer()
-////    // force cleanup - this is fine since we are not using the buffers directly
-////    // which are actively needed (the returned value is on containers which can
-////    // recreate)
-////    releasePendingContainers()
-////    // free current container if acquired.
-////    if (currentContainerIndex < containers.length) {
-////      containers(currentContainerIndex).release()
-////    }
-////    assert (currentContainerIndex == containerIndex)
-////
-////    val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked)
-////    retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction)
-////    retval
-////  }
-////
-////  // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers
-////  // This is to be used only for writes : and ensures that writes are done into the appropriate
-////  // underlying bytebuffers.
-////  def getCompositeWriteBuffer(size: Long): LargeByteBuffer = {
-////    assert(writable)
-////    assert(size >= 0)
-////
-////    createSlice(size)
-////  }
-////
-////  // get a buffer which is of the specified size and contains data from the underlying buffers
-////  // Note, the actual data might be spread across the underlying buffers.
-////  // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy !
-////  private def readFully(size: Int): ByteBuffer = {
-////    assert (readable)
-////
-////    if (remaining() < size) {
-////      // throw exception
-////      throw new BufferUnderflowException()
-////    }
-////
-////    // kyro depends on this it seems ?
-////    // assert (size > 0)
-////    if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER
-////
-////    // Expected to be handled elsewhere.
-////    assert (! localReadWritePossible(size))
-////
-////    val localBuff =  {
-////      val buff = fetchBufferOfSize(size)
-////      // assert(buff.remaining() <= size)
-////      // if (buff.remaining() == size) return buff
-////      assert(buff.remaining() < size)
-////      ByteBuffer.allocate(size).put(buff)
-////    }
-////
-////    // assert (localBuff.hasRemaining)
-////
-////    while (localBuff.hasRemaining) {
-////      val buff = fetchBufferOfSize(localBuff.remaining())
-////      localBuff.put(buff)
-////    }
-////
-////    localBuff.flip()
-////    localBuff
-////  }
-////
-////
-////
-////  def put(b: Byte) {
-////    assert (writable)
-////    if (remaining() < 1) {
-////      // logInfo("put byte. remaining = " + remaining() + ", this = " + this)
-////      throw new BufferOverflowException
-////    }
-////
-////    assert (currentRemaining() > 0)
-////
-////    fetchCurrentBuffer().put(b)
-////    globalPosition += 1
-////    // Check to need to bump the index ?
-////    toNonEmptyBuffer()
-////  }
-////
-////
-////  def put(buffer: ByteBuffer) {
-////    assert (writable)
-////    if (remaining() < buffer.remaining()) {
-////      throw new BufferOverflowException
-////    }
-////
-////    val bufferRemaining = buffer.remaining()
-////    if (localReadWritePossible(bufferRemaining)) {
-////
-////      assert (currentRemaining() >= bufferRemaining)
-////
-////      fetchCurrentBuffer().put(buffer)
-////
-////      globalPosition += bufferRemaining
-////      toNonEmptyBuffer()
-////      return
-////    }
-////
-////    while (buffer.hasRemaining) {
-////      val currentBufferRemaining = currentRemaining()
-////      val bufferRemaining = buffer.remaining()
-////
-////      if (currentBufferRemaining >= bufferRemaining) {
-////        fetchCurrentBuffer().put(buffer)
-////        globalPosition += bufferRemaining
-////      } else {
-////        // Split across buffers.
-////        val currentBuffer = fetchCurrentBuffer()
-////        assert (currentBuffer.remaining() >= currentBufferRemaining)
-////        val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(),
-////          currentBufferRemaining)
-////        assert (sliced.remaining() == currentBufferRemaining)
-////        currentBuffer.put(sliced)
-////        // move buffer pos
-////        buffer.position(buffer.position() + currentBufferRemaining)
-////
-////        globalPosition += currentBufferRemaining
-////      }
-////      toNonEmptyBuffer()
-////    }
-////
-////    assert (! hasRemaining() || currentRemaining() > 0)
-////  }
-////
-////  def put(other: LargeByteBuffer) {
-////    assert (writable)
-////    if (this.remaining() < other.remaining()) {
-////      throw new BufferOverflowException
-////    }
-////
-////    while (other.hasRemaining()) {
-////      val buffer = other.fetchBufferOfSize(other.currentRemaining())
-////      this.put(buffer)
-////    }
-////  }
-////
-////
-////  def duplicate(): LargeByteBuffer = {
-////    val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size)
-////    // We do a duplicate as part of construction - so avoid double duplicate.
-////    // containersCopy ++= containers.map(_.duplicate())
-////    containersCopy ++= containers
-////    val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked)
-////
-////    // set limit and position (in that order) ...
-////    retval.limit(this.limit())
-////    retval.position(this.position())
-////
-////    // Now release our containers - if any had been acquired
-////    releasePendingContainers()
-////
-////    retval
-////  }
-////
-////
-////  /**
-////   * 'read' a LargeByteBuffer of size specified and return that.
-////   * Position will be incremented by size
-////   *
-////   * The name might be slightly confusing : rename ?
-////   *
-////   * @param size Amount of data to be read from this buffer and returned
-////   * @return
-////   */
-////  def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = {
-////    if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException
-////    if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException
-////
-////
-////    assert (readable)
-////    assert (size >= 0)
-////
-////    releasePendingContainers()
-////
-////    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
-////
-////    createSlice(size)
-////  }
-////
-////
-////  // This is essentially a workaround to exposing underlying buffers
-////  def readFrom(channel: ReadableByteChannel): Long = {
-////
-////    assert (writable)
-////    releasePendingContainers()
-////
-////    // this also allows us to avoid nasty corner cases in the loop.
-////    if (! hasRemaining()) {
-////      // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this)
-////      throw new BufferOverflowException
-////    }
-////
-////    var totalBytesRead = 0L
-////
-////    while (hasRemaining()) {
-////      // read what we can ...
-////      val buffer = fetchCurrentBuffer()
-////      val bufferRemaining = currentRemaining()
-////      val bytesRead = channel.read(buffer)
-////
-////      if (bytesRead > 0) {
-////        totalBytesRead += bytesRead
-////        // bump position too ..
-////        globalPosition += bytesRead
-////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-////      }
-////      else if (-1 == bytesRead) {
-////        // if we had already read some data in the loop, return that.
-////        if (totalBytesRead > 0) return totalBytesRead
-////        return -1
-////      }  // nothing available to read, retry later. return
-////      else if (0 == bytesRead) {
-////        return totalBytesRead
-////      }
-////
-////      // toNonEmptyBuffer()
-////    }
-////
-////    // Cleanup last buffer ?
-////    toNonEmptyBuffer()
-////    totalBytesRead
-////  }
-////
-////  // This is essentially a workaround to exposing underlying buffers
-////  def readFrom(inStrm: InputStream): Long = {
-////
-////    assert (writable)
-////    releasePendingContainers()
-////
-////    // this also allows us to avoid nasty corner cases in the loop.
-////    // if (! hasRemaining()) throw new BufferOverflowException
-////    if (! hasRemaining()) return 0
-////
-////    var totalBytesRead = 0L
-////
-////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-////
-////    while (hasRemaining()) {
-////      // read what we can ... note, since there is no gaurantee that underlying buffer might
-////      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
-////      // see if we can optimize this later ...
-////      val buffer = fetchCurrentBuffer()
-////      val bufferRemaining = buffer.remaining()
-////      val max = math.min(buff.length, bufferRemaining)
-////      val bytesRead = inStrm.read(buff, 0, max)
-////
-////      if (bytesRead > 0) {
-////        buffer.put(buff, 0, bytesRead)
-////        totalBytesRead += bytesRead
-////        // bump position too ..
-////        globalPosition += bytesRead
-////        // buffer.position(buffer.position + bytesRead)
-////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-////      }
-////      else if (-1 == bytesRead) {
-////        // if we had already read some data in the loop, return that.
-////        if (totalBytesRead > 0) return totalBytesRead
-////        return -1
-////      }  // nothing available to read, retry later. return
-////      else if (0 == bytesRead) {
-////        return totalBytesRead
-////      }
-////
-////      // toNonEmptyBuffer()
-////    }
-////
-////    totalBytesRead
-////  }
-////
-////  // This is essentially a workaround to exposing underlying buffers
-////  // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce
-////  // code for performance reasons.
-////  def readFrom(inStrm: DataInput): Long = {
-////
-////    assert (writable)
-////    releasePendingContainers()
-////
-////    // this also allows us to avoid nasty corner cases in the loop.
-////    // if (! hasRemaining()) throw new BufferOverflowException
-////    if (! hasRemaining()) return 0
-////
-////    var totalBytesRead = 0L
-////
-////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-////
-////    while (hasRemaining()) {
-////      // read what we can ... note, since there is no gaurantee that underlying buffer might
-////      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
-////      // see if we can optimize this later ...
-////      val buffer = fetchCurrentBuffer()
-////      val bufferRemaining = buffer.remaining()
-////      val max = math.min(buff.length, bufferRemaining)
-////      inStrm.readFully(buff, 0, max)
-////      val bytesRead = max
-////
-////      if (bytesRead > 0) {
-////        buffer.put(buff, 0, bytesRead)
-////        totalBytesRead += bytesRead
-////        // bump position too ..
-////        globalPosition += bytesRead
-////        // buffer.position(buffer.position() + bytesRead)
-////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-////      }
-////      else if (-1 == bytesRead) {
-////        // if we had already read some data in the loop, return that.
-////        if (totalBytesRead > 0) return totalBytesRead
-////        return -1
-////      }  // nothing available to read, retry later. return
-////      else if (0 == bytesRead) {
-////        return totalBytesRead
-////      }
-////
-////      // toNonEmptyBuffer()
-////    }
-////
-////    totalBytesRead
-////  }
-////
-////  // This is essentially a workaround to exposing underlying buffers
-////  // Note: tries to do it efficiently without needing to load everything into memory
-////  // (particularly for diskbacked buffers, etc).
-////  def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = {
-////
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    // this also allows us to avoid nasty corner cases in the loop.
-////    if (! hasRemaining()) throw new BufferUnderflowException
-////
-////    var totalBytesWritten = 0L
-////
-////    while (hasRemaining()) {
-////      // Write what we can ...
-////      val buffer = fetchCurrentBuffer()
-////      val bufferRemaining = buffer.remaining()
-////      assert (bufferRemaining > 0)
-////      val bytesWritten = channel.write(buffer)
-////
-////      if (bytesWritten > 0) {
-////        totalBytesWritten += bytesWritten
-////        // bump position too ..
-////        globalPosition += bytesWritten
-////        if (bytesWritten >= bufferRemaining) toNonEmptyBuffer()
-////        assert (! hasRemaining() || currentRemaining() > 0)
-////      }
-////      else if (0 == bytesWritten) {
-////        return totalBytesWritten
-////      }
-////
-////      // toNonEmptyBuffer()
-////    }
-////
-////    assert (! hasRemaining())
-////    if (cleanup) {
-////      free()
-////    }
-////    totalBytesWritten
-////  }
-////
-////  // This is essentially a workaround to exposing underlying buffers
-////  def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = {
-////
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    // this also allows us to avoid nasty corner cases in the loop.
-////    if (! hasRemaining()) throw new BufferUnderflowException
-////
-////    var totalBytesWritten = 0L
-////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-////
-////    while (hasRemaining()) {
-////      // write what we can ... note, since there is no gaurantee that underlying buffer might
-////      // expose array() method, we do double copy - from bytearray to buff and from
-////      // buff to outputstream. see if we can optimize this later ...
-////      val buffer = fetchCurrentBuffer()
-////      val bufferRemaining = buffer.remaining()
-////      val size = math.min(bufferRemaining, buff.length)
-////      buffer.get(buff, 0, size)
-////      outStrm.write(buff, 0, size)
-////
-////      totalBytesWritten += size
-////      // bump position too ..
-////      globalPosition += size
-////
-////      if (size >= bufferRemaining) toNonEmptyBuffer()
-////    }
-////
-////    toNonEmptyBuffer()
-////    if (cleanup) {
-////      free()
-////    }
-////    totalBytesWritten
-////  }
-////
-////  def asInputStream(): InputStream = {
-////    new InputStream() {
-////      override def read(): Int = {
-////        if (! hasRemaining()) return -1
-////        get()
-////      }
-////
-////      override def read(arr: Array[Byte], off: Int, len: Int): Int = {
-////        if (! hasRemaining()) return -1
-////
-////        get(arr, off, len)
-////      }
-////
-////      override def available(): Int = {
-////        // current remaining is what can be read without blocking
-////        // anything higher might need disk access/buffer swapping.
-////        /*
-////        val left = remaining()
-////        math.min(left, Int.MaxValue).asInstanceOf[Int]
-////        */
-////        currentRemaining()
-////      }
-////    }
-////  }
-////
-////  def getCleaner() = cleaner
-////
-////  /**
-////   * @param cleaner The previous cleaner, so that the caller can chain them if required.
-////   * @return
-////   */
-////  private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = {
-////    overrideCleaner(cleaner, allowOverride = true)
-////  }
-////
-////  private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = {
-////    if (! this.allowCleanerOverride) {
-////      // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free
-////      return this.cleaner
-////    }
-////
-////    this.allowCleanerOverride = allowOverride
-////    assert (null != cleaner)
-////    val prev = this.cleaner
-////    this.cleaner = cleaner
-////    // logInfo("Overriding " + prev + " with " + this.cleaner)
-////    prev
-////  }
-////
-////  private def doReleaseAll() {
-////    for (container <- containers) {
-////      container.release()
-////    }
-////  }
-////
-////  def free(invokeCleaner: Boolean = true) {
-////    // logInfo("Free on " + this + ", cleaner = " + cleaner)
-////    // always invoking release
-////    doReleaseAll()
-////
-////    if (invokeCleaner) cleaner.clean(this)
-////  }
-////
-////  private def doDispose(needRelease: Boolean) {
-////
-////    if (disposeLocationThrowable ne null) {
-////      logError("Already free'ed earlier at : ", disposeLocationThrowable)
-////      logError("Current at ", new Throwable)
-////      throw new IllegalStateException("Already freed.")
-////    }
-////    disposeLocationThrowable = new Throwable()
-////
-////    // Forcefully cleanup all
-////    if (needRelease) doReleaseAll()
-////
-////    // Free in a different loop, in case different containers refer to same resource
-////    // to release (like file)
-////    for (container <- containers) {
-////      container.free()
-////    }
-////
-////    needReleaseIndices.clear()
-////
-////    // We should not use this buffer anymore : set the values such that                 f
-////    // we dont ...
-////    globalPosition = 0
-////    globalLimit = 0
-////    globalCapacity = 0
-////  }
-////
-////  // copy data over ... MUST be used only for cases where array is known to be
-////  // small to begin with. slightly risky method due to that assumption
-////  def toByteArray(): Array[Byte] = {
-////    val positionBackup = position()
-////    val size = remaining()
-////    if (size > Int.MaxValue) {
-////      throw new IllegalStateException(
-////        "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G")
-////    }
-////
-////    val retval = new Array[Byte](size.asInstanceOf[Int])
-////    val readSize = get(retval, 0, retval.length)
-////    assert (readSize == retval.length,
-////      "readSize = " + readSize + ", retval.length = " + retval.length)
-////
-////    position(positionBackup)
-////
-////    retval
-////  }
-////
-////  // copy data over ... MUST be used only for cases where array is known to be
-////  // small to begin with. slightly risky method due to that assumption
-////  def toByteBuffer(): ByteBuffer = {
-////    ByteBuffer.wrap(toByteArray())
-////  }
-////
-////  def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = {
-////    val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf)
-////    val currentPosition = position()
-////    retval.put(this)
-////    position(currentPosition)
-////    retval.clear()
-////    retval
-////  }
-////
-////
-////
-////  // This is ONLY used for testing : that too as part of development of this and associated classes
-////  // remove before contributing to spark.
-////  def hexDump(): String = {
-////    if (remaining() * 64 > Int.MaxValue) {
-////      throw new UnsupportedOperationException("buffer too large " + remaining())
-////    }
-////
-////    val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int])
-////
-////    var perLine = 0
-////    var first = true
-////    for (b <- toByteArray()) {
-////      perLine += 1
-////      if (perLine % 8 == 0) {
-////        sb.append('\n')
-////        first = true
-////      }
-////      if (! first) sb.append(' ')
-////      first = false
-////      sb.append(java.lang.Integer.toHexString(b & 0xff))
-////    }
-////    sb.append('\n')
-////    sb.toString()
-////  }
-////
-////  override def toString: String = {
-////    val sb: StringBuffer = new StringBuffer
-////    sb.append(getClass.getName)
-////    sb.append(' ')
-////    sb.append(System.identityHashCode(this))
-////    sb.append("@[pos=")
-////    sb.append(position())
-////    sb.append(" lim=")
-////    sb.append(limit())
-////    sb.append(" cap=")
-////    sb.append(capacity())
-////    sb.append("]")
-////    sb.toString
-////  }
-////
-////
-////
-////  override def finalize(): Unit = {
-////    var marked = false
-////    if (containers ne null) {
-////      if (containers.exists(container => container.isAcquired && container.requireRelease())) {
-////        marked = true
-////        logError("BUG: buffer was not released - and now going out of scope. " +
-////          "Potential resource leak. Allocated at ", allocateLocationThrowable)
-////        containers.foreach(_.release())
-////      }
-////      if (containers.exists(container => !container.isFreed && container.requireFree())) {
-////        if (!marked) {
-////          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak",
-////            allocateLocationThrowable)
-////        }
-////        else {
-////          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak")
-////        }
-////        containers.foreach(_.free())
-////      }
-////    }
-////    super.finalize()
-////  }
-////}
-////
-////
-////object LargeByteBuffer extends Logging {
-////
-////  private val noopDisposeFunction = new BufferCleaner() {
-////    protected def doClean(buffer: LargeByteBuffer) {
-////      buffer.free(invokeCleaner = false)
-////    }
-////  }
-////
-////  val enableExpensiveAssert = false
-////  private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0)
-////  val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer(
-////    new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false)
-////  // Do not allow anyone else to override cleaner
-////  EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false)
-////
-////  // 8K sufficient ?
-////  private val TEMP_ARRAY_SIZE = 8192
-////
-////  /**
-////   * Create a LargeByteBuffer of specified size which is split across
-////   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory
-////   * ByteBuffer
-////   *
-////   */
-////  def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = {
-////    if (0 == totalSize) {
-////      return EMPTY_BUFFER
-////    }
-////
-////    assert (totalSize > 0)
-////
-////    val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
-////    val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize)
-////    val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize)
-////
-////    assert (lastBlockSize > 0)
-////
-////    val bufferArray = {
-////      val arr = new ArrayBuffer[ByteBufferContainer](numBlocks)
-////      for (index <- 0 until numBlocks - 1) {
-////        val buff = ByteBuffer.allocate(blockSize)
-////        // buff.clear()
-////        arr += new HeapByteBufferContainer(buff, true)
-////      }
-////      arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true)
-////      assert (arr.length == numBlocks)
-////      arr
-////    }
-////
-////    new LargeByteBuffer(bufferArray, false, false)
-////  }
-////
-////  /**
-////   * Create a LargeByteBuffer of specified size which is split across
-////   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk
-////   *
-////   */
-////  private def allocateDiskBuffer(totalSize: Long,
-////      blockManager: BlockManager): LargeByteBuffer = {
-////    if (0 == totalSize) {
-////      return EMPTY_BUFFER
-////    }
-////
-////    assert (totalSize > 0)
-////
-////    // Create a file of the specified size.
-////    val file = blockManager.diskBlockManager.createTempBlock()._2
-////    val raf = new RandomAccessFile(file, "rw")
-////    try {
-////      raf.setLength(totalSize)
-////    } finally {
-////      raf.close()
-////    }
-////
-////    readWriteDiskSegment(new FileSegment(file, 0, totalSize),
-////      ephemeralDiskBacked = true, blockManager.ioConf)
-////  }
-////
-////  // The returned buffer takes up ownership of the underlying buffers
-////  // (including dispos'ing that when done)
-////  def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = {
-////    val nonEmpty = buffers.filter(_.hasRemaining)
-////
-////    // cleanup the empty buffers
-////    buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b))
-////
-////
-////    if (nonEmpty.isEmpty) {
-////      return EMPTY_BUFFER
-////    }
-////
-////    // slice so that offsets match our requirement
-////    new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b =>
-////      new HeapByteBufferContainer(b.slice(), true)), false, false)
-////  }
-////
-////  def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = {
-////    // only non empty arrays
-////    val arrays = byteArrays.filter(_.length > 0)
-////    if (0 == arrays.length) return EMPTY_BUFFER
-////
-////    new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr =>
-////      new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false)
-////  }
-////
-////  def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = {
-////
-////    if (inputBuffers.isEmpty) return EMPTY_BUFFER
-////
-////    if (! inputBuffers.exists(_.hasRemaining())) {
-////      if (canDispose) inputBuffers.map(_.free())
-////      return EMPTY_BUFFER
-////    }
-////
-////    // release all temp resources acquired
-////    inputBuffers.foreach(buff => buff.releasePendingContainers())
-////    // free current container if acquired.
-////    inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) {
-////      buff.containers(buff.currentContainerIndex).release()
-////    })
-////    // inputBuffers.foreach(b => b.doReleaseAll())
-////
-////
-////    // Dispose of any empty buffers
-////    if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free())
-////
-////    // Find all containers we need.
-////    val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining()))
-////
-////    val containers = buffers.flatMap(_.containers)
-////    assert (! containers.isEmpty)
-////    // The in order containers of "buffers" seq constitute the required return value
-////    val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers,
-////      // if you cant dispose, then we dont own the buffers : in which case, need duplicate
-////      ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked))
-////
-////    if (canDispose) {
-////      // override dispose of all other buffers.
-////      val disposeFunctions = inputBuffers.map {
-////        buffer => {
-////          (buffer, buffer.overrideCleaner(noopDisposeFunction))
-////        }
-////      }
-////
-////      val cleaner = retval.getCleaner()
-////      val newCleaner = new BufferCleaner {
-////        protected def doClean(buffer: LargeByteBuffer) {
-////
-////          assert (retval == buffer)
-////          // default cleaner.
-////          cleaner.clean(retval)
-////          // not required, since we are within clean anyway.
-////          // retval.free(invokeCleaner = false)
-////
-////          // retval.doDispose(needRelease = true)
-////
-////          // This might actually call dispose twice on some (initially) empty buffers,
-////          // which is fine since we now guard against that.
-////          disposeFunctions.foreach(v => v._2.clean(v._1))
-////          // Call the free method too : so that buffers are marked free ...
-////          disposeFunctions.foreach(v => v._1.free(invokeCleaner = false))
-////        }
-////      }
-////
-////      val prev = retval.overrideCleaner(newCleaner)
-////      assert (prev == cleaner)
-////    }
-////
-////    retval
-////  }
-////
-////  private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) {
-////    if (arr == null) {
-////      throw new NullPointerException
-////    } else if (offset < 0 || size < 0 || offset + size > arr.length) {
-////      throw new IndexOutOfBoundsException
-////    }
-////  }
-////
-////  def allocateTransientBuffer(size: Long, blockManager: BlockManager) = {
-////    if (size <= blockManager.ioConf.maxInMemSize) {
-////      LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf)
-////    } else {
-////      LargeByteBuffer.allocateDiskBuffer(size, blockManager)
-////    }
-////  }
-////
-////  def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig,
-////      ephemeralDiskBacked: Boolean): LargeByteBuffer = {
-////    // Split the block into multiple of BlockStore.maxBlockSize
-////    val segmentSize = segment.length
-////    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
-////    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
-////    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
-////
-////    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
-////
-////    for (index <- 0 until numBlocks - 1) {
-////      buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
-////        segment.offset + index * blockSize, blockSize), ioConf)
-////    }
-////
-////    // Last block
-////    buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
-////      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf)
-////
-////    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
-////  }
-////
-////  def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean,
-////      ioConf: IOConfig): LargeByteBuffer = {
-////
-////    // Split the block into multiple of BlockStore.maxBlockSize
-////    val segmentSize = segment.length
-////    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
-////    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
-////    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
-////
-////    logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks +
-////      ", lastBlockSize = " + lastBlockSize)
-////    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
-////
-////    for (index <- 0 until numBlocks - 1) {
-////      buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
-////        segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null)
-////    }
-////
-////    // Last block
-////    buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
-////      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null)
-////
-////    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
-////  }
-////}
diff --git a/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java b/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
index dfb7740344ed0..fde2b78d10a5d 100644
--- a/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
@@ -37,9 +37,7 @@
 
 import static org.junit.Assert.*;
 
-import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
-import org.apache.spark.network.buffer.ManagedBuffer;
-import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.buffer.*;
 import org.apache.spark.network.client.ChunkReceivedCallback;
 import org.apache.spark.network.client.RpcResponseCallback;
 import org.apache.spark.network.client.TransportClient;
@@ -73,7 +71,8 @@ public static void setUp() throws Exception {
       buf.put((byte) i);
     }
     buf.flip();
-    bufferChunk = new NioManagedBuffer(buf);
+    final LargeByteBuffer lBuf = LargeByteBufferHelper.asLargeByteBuffer(buf);
+    bufferChunk = new NioManagedBuffer(lBuf);
 
     testFile = File.createTempFile("shuffle-test-file", "txt");
     testFile.deleteOnExit();
@@ -91,7 +90,7 @@ public static void setUp() throws Exception {
       public ManagedBuffer getChunk(long streamId, int chunkIndex) {
         assertEquals(STREAM_ID, streamId);
         if (chunkIndex == BUFFER_CHUNK_INDEX) {
-          return new NioManagedBuffer(buf);
+          return new NioManagedBuffer(lBuf);
         } else if (chunkIndex == FILE_CHUNK_INDEX) {
           return new FileSegmentManagedBuffer(conf, testFile, 10, testFile.length() - 25);
         } else {
@@ -222,10 +221,10 @@ private void assertBufferListsEqual(List<ManagedBuffer> list0, List<ManagedBuffe
   }
 
   private void assertBuffersEqual(ManagedBuffer buffer0, ManagedBuffer buffer1) throws Exception {
-    ByteBuffer nio0 = buffer0.nioByteBuffer();
-    ByteBuffer nio1 = buffer1.nioByteBuffer();
+    LargeByteBuffer nio0 = buffer0.nioByteBuffer();
+    LargeByteBuffer nio1 = buffer1.nioByteBuffer();
 
-    int len = nio0.remaining();
+    long len = nio0.remaining();
     assertEquals(nio0.remaining(), nio1.remaining());
     for (int i = 0; i < len; i ++) {
       assertEquals(nio0.get(), nio1.get());
diff --git a/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java b/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
index 38113a918f795..15eb261334b85 100644
--- a/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
+++ b/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
@@ -24,6 +24,7 @@
 import com.google.common.base.Preconditions;
 import io.netty.buffer.Unpooled;
 
+import org.apache.spark.network.buffer.LargeByteBuffer;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.buffer.NettyManagedBuffer;
 
@@ -54,7 +55,7 @@ public long size() {
   }
 
   @Override
-  public ByteBuffer nioByteBuffer() throws IOException {
+  public LargeByteBuffer nioByteBuffer() throws IOException {
     return underlying.nioByteBuffer();
   }
 
@@ -84,7 +85,7 @@ public Object convertToNetty() throws IOException {
   public boolean equals(Object other) {
     if (other instanceof ManagedBuffer) {
       try {
-        ByteBuffer nioBuf = ((ManagedBuffer) other).nioByteBuffer();
+        LargeByteBuffer nioBuf = ((ManagedBuffer) other).nioByteBuffer();
         if (nioBuf.remaining() != len) {
           return false;
         } else {
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
index 6c1210b33268a..54890660590af 100644
--- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
@@ -37,7 +37,7 @@ public abstract class BlockTransferMessage implements Encodable {
 
   /** Preceding every serialized message is its type, which allows us to deserialize it. */
   public static enum Type {
-    OPEN_BLOCKS(0), UPLOAD_BLOCK(1), REGISTER_EXECUTOR(2), STREAM_HANDLE(3);
+    OPEN_BLOCKS(0), UPLOAD_BLOCK(1), REGISTER_EXECUTOR(2), STREAM_HANDLE(3), UPLOAD_PARTIAL_BLOCK(4);
 
     private final byte id;
 
@@ -60,6 +60,7 @@ public static BlockTransferMessage fromByteArray(byte[] msg) {
         case 1: return UploadBlock.decode(buf);
         case 2: return RegisterExecutor.decode(buf);
         case 3: return StreamHandle.decode(buf);
+        case 4: return UploadPartialBlock.decode(buf);
         default: throw new IllegalArgumentException("Unknown message type: " + type);
       }
     }
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadPartialBlock.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadPartialBlock.java
new file mode 100644
index 0000000000000..6c11777d2d0f5
--- /dev/null
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadPartialBlock.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+import org.apache.spark.network.protocol.Encoders;
+
+import java.util.Arrays;
+
+
+/** Request to upload a block with a certain StorageLevel. Returns nothing (empty byte array). */
+public class UploadPartialBlock extends BlockTransferMessage {
+  public final String appId;
+  public final String execId;
+  public final String blockId;
+  public final int nTotalBlockChunks;
+  public final int blockChunkIndex;
+  public final long blockChunkOffset;
+  // TODO: StorageLevel is serialized separately in here because StorageLevel is not available in
+  // this package. We should avoid this hack.
+  public final byte[] metadata;
+  public final byte[] blockData;
+
+  public UploadPartialBlock(
+          String appId,
+          String execId,
+          String blockId,
+          int nTotalBlockChunks,
+          int blockChunkIndex,
+          long blockChunkOffset,
+          byte[] metadata,
+          byte[] blockData) {
+    this.appId = appId;
+    this.execId = execId;
+    this.blockId = blockId;
+    this.nTotalBlockChunks = nTotalBlockChunks;
+    this.blockChunkIndex = blockChunkIndex;
+    this.blockChunkOffset = blockChunkOffset;
+    this.metadata = metadata;
+    this.blockData = blockData;
+  }
+
+  @Override
+  protected Type type() { return Type.UPLOAD_PARTIAL_BLOCK; }
+
+  @Override
+  public int hashCode() {
+    int objectsHashCode = Objects.hashCode(appId, execId, blockId);
+    return (objectsHashCode * 41 + Arrays.hashCode(metadata)) * 41 + Arrays.hashCode(blockData);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("appId", appId)
+      .add("execId", execId)
+      .add("blockId", blockId)
+      .add("metadata size", metadata.length)
+      .add("block size", blockData.length)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof UploadPartialBlock) {
+      UploadPartialBlock o = (UploadPartialBlock) other;
+      return Objects.equal(appId, o.appId)
+        && Objects.equal(execId, o.execId)
+        && Objects.equal(blockId, o.blockId)
+        && Arrays.equals(metadata, o.metadata)
+        && Arrays.equals(blockData, o.blockData);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.Strings.encodedLength(appId)
+      + Encoders.Strings.encodedLength(execId)
+      + Encoders.Strings.encodedLength(blockId)
+      + 4 //nTotalBlockChunks
+      + 4 //blockChunkIndex
+      + 8 //blockChunkOffset
+      + Encoders.ByteArrays.encodedLength(metadata)
+      + Encoders.ByteArrays.encodedLength(blockData);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.Strings.encode(buf, appId);
+    Encoders.Strings.encode(buf, execId);
+    Encoders.Strings.encode(buf, blockId);
+    buf.writeInt(nTotalBlockChunks);
+    buf.writeInt(blockChunkIndex);
+    buf.writeLong(blockChunkOffset);
+    Encoders.ByteArrays.encode(buf, metadata);
+    Encoders.ByteArrays.encode(buf, blockData);
+  }
+
+  public static UploadPartialBlock decode(ByteBuf buf) {
+    String appId = Encoders.Strings.decode(buf);
+    String execId = Encoders.Strings.decode(buf);
+    String blockId = Encoders.Strings.decode(buf);
+    int nTotalBlockChunks = buf.readInt();
+    int blockChunkIndex = buf.readInt();
+    long blockChunkOffset = buf.readLong();
+    byte[] metadata = Encoders.ByteArrays.decode(buf);
+    byte[] blockData = Encoders.ByteArrays.decode(buf);
+    return new UploadPartialBlock(appId, execId, blockId, nTotalBlockChunks, blockChunkIndex,
+            blockChunkOffset, metadata, blockData);
+  }
+}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
index d65de9ca550a3..a33928c4756b2 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
@@ -32,6 +32,8 @@ public void serializeOpenShuffleBlocks() {
       new String[] { "/local1", "/local2" }, 32, "MyShuffleManager")));
     checkSerializeDeserialize(new UploadBlock("app-1", "exec-2", "block-3", new byte[] { 1, 2 },
       new byte[] { 4, 5, 6, 7} ));
+    checkSerializeDeserialize(new UploadPartialBlock("app-1", "exec-2", "block-3", 3, 4,
+            ((long)Integer.MAX_VALUE) + 10, new byte[] {1,2}, new byte[] { 4, 5, 6, 7} ));
     checkSerializeDeserialize(new StreamHandle(12345, 16));
   }
 
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
index 3f9fe1681cf27..968dc750f683a 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
@@ -20,6 +20,7 @@
 import java.nio.ByteBuffer;
 import java.util.Iterator;
 
+import org.apache.spark.network.buffer.LargeByteBufferHelper;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
@@ -73,8 +74,8 @@ public void testRegisterExecutor() {
   public void testOpenShuffleBlocks() {
     RpcResponseCallback callback = mock(RpcResponseCallback.class);
 
-    ManagedBuffer block0Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[3]));
-    ManagedBuffer block1Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
+    ManagedBuffer block0Marker = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[3]));
+    ManagedBuffer block1Marker = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[7]));
     when(blockManager.getBlockData("app0", "exec1", "b0")).thenReturn(block0Marker);
     when(blockManager.getBlockData("app0", "exec1", "b1")).thenReturn(block1Marker);
     byte[] openBlocks = new OpenBlocks("app0", "exec1", new String[] { "b0", "b1" }).toByteArray();
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
index 02c10bcb7b261..7b2f21e5d1f1a 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
@@ -30,6 +30,8 @@
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
+import org.apache.spark.network.buffer.LargeByteBuffer;
+import org.apache.spark.network.buffer.LargeByteBufferHelper;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -284,17 +286,18 @@ private void assertBufferListsEqual(List<ManagedBuffer> list0, List<byte[]> list
     throws Exception {
     assertEquals(list0.size(), list1.size());
     for (int i = 0; i < list0.size(); i ++) {
-      assertBuffersEqual(list0.get(i), new NioManagedBuffer(ByteBuffer.wrap(list1.get(i))));
+      assertBuffersEqual(list0.get(i), new NioManagedBuffer(
+              LargeByteBufferHelper.asLargeByteBuffer(list1.get(i))));
     }
   }
 
   private void assertBuffersEqual(ManagedBuffer buffer0, ManagedBuffer buffer1) throws Exception {
-    ByteBuffer nio0 = buffer0.nioByteBuffer();
-    ByteBuffer nio1 = buffer1.nioByteBuffer();
+    LargeByteBuffer nio0 = buffer0.nioByteBuffer();
+    LargeByteBuffer nio1 = buffer1.nioByteBuffer();
 
-    int len = nio0.remaining();
+    long len = nio0.remaining();
     assertEquals(nio0.remaining(), nio1.remaining());
-    for (int i = 0; i < len; i ++) {
+    for (long i = 0; i < len; i ++) {
       assertEquals(nio0.get(), nio1.get());
     }
   }
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
index 842741e3d354f..853d6c195bb85 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -24,6 +24,7 @@
 
 import com.google.common.collect.Maps;
 import io.netty.buffer.Unpooled;
+import org.apache.spark.network.buffer.LargeByteBufferHelper;
 import org.junit.Test;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
@@ -48,7 +49,7 @@ public class OneForOneBlockFetcherSuite {
   @Test
   public void testFetchOne() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
+    blocks.put("shuffle_0_0_0", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[0])));
 
     BlockFetchingListener listener = fetchBlocks(blocks);
 
@@ -58,8 +59,8 @@ public void testFetchOne() {
   @Test
   public void testFetchThree() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
-    blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23])));
+    blocks.put("b0", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[12])));
+    blocks.put("b1", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[23])));
     blocks.put("b2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23])));
 
     BlockFetchingListener listener = fetchBlocks(blocks);
@@ -72,7 +73,7 @@ public void testFetchThree() {
   @Test
   public void testFailure() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b0", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[12])));
     blocks.put("b1", null);
     blocks.put("b2", null);
 
@@ -87,9 +88,9 @@ public void testFailure() {
   @Test
   public void testFailureAndSuccess() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b0", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[12])));
     blocks.put("b1", null);
-    blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[21])));
+    blocks.put("b2", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[21])));
 
     BlockFetchingListener listener = fetchBlocks(blocks);
 
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
index 1ad0d72ae5ec5..d2056254fbbf6 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
@@ -25,6 +25,7 @@
 
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Sets;
+import org.apache.spark.network.buffer.LargeByteBufferHelper;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -47,9 +48,9 @@
  */
 public class RetryingBlockFetcherSuite {
 
-  ManagedBuffer block0 = new NioManagedBuffer(ByteBuffer.wrap(new byte[13]));
-  ManagedBuffer block1 = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
-  ManagedBuffer block2 = new NioManagedBuffer(ByteBuffer.wrap(new byte[19]));
+  ManagedBuffer block0 = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[13]));
+  ManagedBuffer block1 = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[7]));
+  ManagedBuffer block2 = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[19]));
 
   @Before
   public void beforeEach() {

From dcb46697d59fa77ac643e438b346eb28972d9e8f Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 27 Feb 2015 14:59:04 -0600
Subject: [PATCH 10/97] add real test case for uploading large blocks (failing
 now)

---
 .../netty/NettyBlockTransferSuite.scala       | 79 ++++++++++++++++---
 1 file changed, 66 insertions(+), 13 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
index 2133b3286ff36..710bf2822fa9b 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
@@ -16,14 +16,17 @@
  */
 package org.apache.spark.network.netty
 
+import java.nio.ByteBuffer
 import java.util.concurrent.TimeUnit
 
 import org.apache.commons.io.IOUtils
 import org.apache.spark.network.BlockDataManager
-import org.apache.spark.network.buffer.{ManagedBuffer, LargeByteBufferHelper, NioManagedBuffer}
+import org.apache.spark.network.buffer._
 import org.apache.spark.network.shuffle.BlockFetchingListener
-import org.apache.spark.storage.ShuffleBlockId
-import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.storage.{BlockId, StorageLevel, RDDBlockId, ShuffleBlockId}
+import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.mockito.ArgumentCaptor
+import org.mockito.{Matchers => MockitoMatchers}
 import org.mockito.Mockito._
 import org.scalatest.mock.MockitoSugar
 import org.scalatest.{Matchers, FunSuite}
@@ -31,28 +34,23 @@ import org.scalatest.{Matchers, FunSuite}
 import scala.concurrent.duration.FiniteDuration
 import scala.concurrent.{Await, Promise}
 
-class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar {
+class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar with Logging {
 
   val conf = new SparkConf()
     .set("spark.app.id", "app-id")
   val securityManager = new SecurityManager(conf)
 
 
-
-  test("simple fetch") {
-
+  def fetchBlock(buf: LargeByteBuffer): ManagedBuffer = {
     val blockManager = mock[BlockDataManager]
     val blockId = ShuffleBlockId(0, 1, 2)
-    val buf = LargeByteBufferHelper.allocate(Integer.MAX_VALUE.toLong + 100l)
     val blockBuffer = new NioManagedBuffer(buf)
     when(blockManager.getBlockData(blockId)).thenReturn(blockBuffer)
 
     val from = new NettyBlockTransferService(conf, securityManager, numCores = 1)
     from.init(blockManager)
-    println("from: " + from.hostName + ":" + from.port)
     val to = new NettyBlockTransferService(conf, securityManager, numCores = 1)
     to.init(blockManager)
-    println("to: " + to.hostName + ":" + to.port)
 
     try {
       val promise = Promise[ManagedBuffer]()
@@ -69,17 +67,72 @@ class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar {
         })
 
       Await.ready(promise.future, FiniteDuration(100, TimeUnit.SECONDS))
-      val v = promise.future.value.get.get
-//      IOUtils.toString(v.createInputStream()) should equal(blockString)
-      println(v.nioByteBuffer().limit())
+      promise.future.value.get.get
     } finally {
       from.close()
       to.close()
     }
 
+  }
+
+  ignore("simple fetch") {
+    val blockString = "Hello, world!"
+    val blockBuffer = LargeByteBufferHelper.asLargeByteBuffer(blockString.getBytes)
+    val fetched = fetchBlock(blockBuffer)
+
+    IOUtils.toString(fetched.createInputStream()) should equal(blockString)
+  }
+
+
+  def uploadBlock(buf: LargeByteBuffer) {
+
+    val fromBlockManager = mock[BlockDataManager]
+    val toBlockManager = mock[BlockDataManager]
+    val blockId = RDDBlockId(0, 1)
+    val blockBuffer = new NioManagedBuffer(buf)
+    val level = StorageLevel.DISK_ONLY //doesn't matter
+
+    val from = new NettyBlockTransferService(conf, securityManager, numCores = 1)
+    from.init(fromBlockManager)
+    val to = new NettyBlockTransferService(conf, securityManager, numCores = 1)
+    logTrace("to block manager = " + toBlockManager)
+    to.init(toBlockManager)
+
+    from.uploadBlock(to.hostName, to.port, "exec-1", blockId, blockBuffer, level)
+    //TODO how to get rid of this wait??
+    Thread.sleep(1000)
+    val bufferCaptor = ArgumentCaptor.forClass(classOf[ManagedBuffer])
+    verify(toBlockManager).putBlockData(MockitoMatchers.eq(blockId), bufferCaptor.capture(),
+      MockitoMatchers.eq(level))
+    val putBuffer = bufferCaptor.getValue()
+  }
+
+  test("simple upload") {
+    val buf = LargeByteBufferHelper.asLargeByteBuffer(Array[Byte](0,1,2,3))
+    uploadBlock(buf)
+  }
+
+
+  test("giant upload") {
+    val parts = (0 until 2).map{_ => ByteBuffer.allocate(Integer.MAX_VALUE - 100)}.toArray
+    val buf = new WrappedLargeByteBuffer(parts)
+    uploadBlock(buf)
+  }
+
+
 
+  def equivalentBuffers(exp: ManagedBuffer, act: ManagedBuffer): Unit = {
+    equivalentBuffers(exp.nioByteBuffer(), act.nioByteBuffer())
   }
 
+  def equivalentBuffers(exp: LargeByteBuffer, act: LargeByteBuffer): Unit = {
+    assert(exp.capacity() === act.capacity())
+    assert(exp.remaining() === act.remaining())
+    while (exp.remaining() > 0) {
+      assert(exp.get() === act.get())
+    }
+
+  }
 
 
 

From 660f5e362439d79d8dfd000a805be0ad5181106c Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 27 Feb 2015 20:57:32 -0600
Subject: [PATCH 11/97] flesh out NettyBlockTransfer#uploadBlock

---
 .../spark/io/ChainedLargeByteBuffer.scala     |  6 ++
 .../network/netty/NettyBlockRpcServer.scala   | 56 +++++++++++++--
 .../netty/NettyBlockTransferService.scala     | 72 +++++++++++++------
 .../apache/spark/storage/BlockManager.scala   |  4 +-
 .../netty/NettyBlockTransferSuite.scala       | 36 ++++++----
 .../rdd/LargePartitionCachingSuite.scala      |  4 ++
 .../spark/network/buffer/LargeByteBuffer.java |  4 ++
 .../buffer/WrappedLargeByteBuffer.java        |  8 +++
 8 files changed, 149 insertions(+), 41 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala b/core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala
index eea1114ec35ca..a5af9618fde4a 100644
--- a/core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala
@@ -19,6 +19,8 @@ package org.apache.spark.io
 import java.nio.ByteBuffer
 import java.nio.channels.WritableByteChannel
 
+import scala.collection.JavaConverters._
+
 import org.apache.spark.network.buffer.LargeByteBuffer
 import org.apache.spark.util.collection.ChainedBuffer
 
@@ -82,4 +84,8 @@ class ChainedLargeByteBuffer(private[io] val underlying: ChainedBuffer) extends
   override def firstByteBuffer(): ByteBuffer = {
     ByteBuffer.wrap(underlying.chunks(0))
   }
+
+  override def nioBuffers(): java.util.List[ByteBuffer] = {
+    underlying.chunks.map{bytes => ByteBuffer.wrap(bytes)}.asJava
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index 86df34920a666..0c67459382502 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -18,15 +18,16 @@
 package org.apache.spark.network.netty
 
 import java.nio.ByteBuffer
+import java.util.concurrent.ConcurrentHashMap
 
-import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
 
 import org.apache.spark.Logging
 import org.apache.spark.network.BlockDataManager
-import org.apache.spark.network.buffer.{LargeByteBufferHelper, ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.buffer.{WrappedLargeByteBuffer, LargeByteBufferHelper, ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
 import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
-import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock}
+import org.apache.spark.network.shuffle.protocol._
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage.{BlockId, StorageLevel}
 
@@ -44,6 +45,8 @@ class NettyBlockRpcServer(
 
   private val streamManager = new OneForOneStreamManager()
 
+  private val openRequests = new ConcurrentHashMap[String,PartialBlockUploadHandler]()
+
   override def receive(
       client: TransportClient,
       messageBytes: Array[Byte],
@@ -55,7 +58,7 @@ class NettyBlockRpcServer(
       case openBlocks: OpenBlocks =>
         val blocks: Seq[ManagedBuffer] =
           openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData)
-        val streamId = streamManager.registerStream(blocks.iterator)
+        val streamId = streamManager.registerStream(blocks.iterator.asJava)
         logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
         responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteArray)
 
@@ -64,8 +67,53 @@ class NettyBlockRpcServer(
         val level: StorageLevel =
           serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata))
         val data = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(uploadBlock.blockData))
+        logTrace("putting block into our block manager: " + blockManager)
         blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level)
         responseContext.onSuccess(new Array[Byte](0))
+
+      case uploadPartialBock: UploadPartialBlock =>
+        logTrace("received upload partial block: " + uploadPartialBock)
+        val storageLevel: StorageLevel =
+          serializer.newInstance().deserialize(ByteBuffer.wrap(uploadPartialBock.metadata))
+        logTrace("open requests = " + openRequests)
+        openRequests.putIfAbsent(uploadPartialBock.blockId,
+          new PartialBlockUploadHandler(uploadPartialBock.blockId, storageLevel,
+            uploadPartialBock.nTotalBlockChunks))
+        val handler = openRequests.get(uploadPartialBock.blockId)
+        handler.addPartialBlock(uploadPartialBock, storageLevel)
+        responseContext.onSuccess(new Array[Byte](0))
+    }
+  }
+
+
+  private class PartialBlockUploadHandler(
+    val blockId: String,
+    val storageLevel: StorageLevel,
+    val nTotalBlockChunks: Int
+  ) {
+    val chunks = new Array[Array[Byte]](nTotalBlockChunks)
+    var nMissing = nTotalBlockChunks
+
+    def addPartialBlock(partial: UploadPartialBlock, storageLevel: StorageLevel): Unit = {
+      if (partial.nTotalBlockChunks != nTotalBlockChunks) {
+        throw new IllegalArgumentException(s"received incompatible UploadPartialBlock: expecting " +
+          s"$nTotalBlockChunks total chunks, but new msg has ${partial.nTotalBlockChunks}")
+      }
+      if (storageLevel != this.storageLevel) {
+        throw new IllegalArgumentException(s"received incompatible UploadPartialBlock: expecting " +
+          s"${this.storageLevel}, but new message has $storageLevel")
+      }
+      logTrace("received partial msg")
+      chunks(partial.blockChunkIndex) = partial.blockData
+      nMissing -= 1
+      logTrace("nmissing = " + nMissing)
+      if (nMissing == 0) {
+        //we've got all the blocks -- now we can insert into the block manager
+        logTrace("received all partial blocks for " + blockId)
+        val data = new NioManagedBuffer(new WrappedLargeByteBuffer(chunks.map{ByteBuffer.wrap}))
+        blockManager.putBlockData(BlockId(blockId), data, storageLevel)
+        openRequests.remove(blockId)
+      }
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 9824c7c38c188..3deecf3242d4a 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.network.netty
 
 import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
 import scala.concurrent.{Future, Promise}
 
 import org.apache.spark.{SecurityManager, SparkConf}
@@ -27,11 +28,13 @@ import org.apache.spark.network.client.{TransportClientBootstrap, RpcResponseCal
 import org.apache.spark.network.sasl.{SaslRpcHandler, SaslClientBootstrap}
 import org.apache.spark.network.server._
 import org.apache.spark.network.shuffle.{RetryingBlockFetcher, BlockFetchingListener, OneForOneBlockFetcher}
-import org.apache.spark.network.shuffle.protocol.UploadBlock
+import org.apache.spark.network.shuffle.protocol.{UploadPartialBlock, UploadBlock}
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.util.Utils
 
+import scala.util.{Failure, Success}
+
 /**
  * A BlockTransferService that uses Netty to fetch a set of blocks at at time.
  */
@@ -106,7 +109,6 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
       blockId: BlockId,
       blockData: ManagedBuffer,
       level: StorageLevel): Future[Unit] = {
-    val result = Promise[Unit]()
     val client = clientFactory.createClient(hostname, port)
 
     // StorageLevel is serialized as bytes using our JavaSerializer. Everything else is encoded
@@ -114,27 +116,57 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
     val levelBytes = serializer.newInstance().serialize(level).array()
 
     // Convert or copy nio buffer into array in order to serialize it.
-    val nioBuffer = blockData.nioByteBuffer()
-    //TODO key change -- multiple uploads here
-    // this stub is not even efficient when the buffer actually is small
-    val array =  new Array[Byte](nioBuffer.remaining().toInt)
-    nioBuffer.get(array, 0, nioBuffer.remaining().toInt)
-
-    client.sendRpc(new UploadBlock(appId, execId, blockId.toString, levelBytes, array).toByteArray,
-      new RpcResponseCallback {
-        override def onSuccess(response: Array[Byte]): Unit = {
-          logTrace(s"Successfully uploaded block $blockId")
-          result.success()
-        }
-        override def onFailure(e: Throwable): Unit = {
-          logError(s"Error while uploading block $blockId", e)
-          result.failure(e)
-        }
-      })
+    val largeByteBuffer = blockData.nioByteBuffer()
+    val bufferParts = largeByteBuffer.nioBuffers().asScala
+    val chunkOffsets: Seq[Long] = bufferParts.scanLeft(0l){case(offset, buf) => offset + buf.limit()}
 
-    result.future
+    performSequentially(bufferParts.zipWithIndex){case (buf,idx) =>
+      val partialBlockArray = if (buf.hasArray) {
+        buf.array()
+      } else {
+        val arr = new Array[Byte](buf.limit())
+        buf.get(arr)
+        arr
+      }
+      //Note: one major shortcoming of this is that it expects the incoming LargeByteBuffer to
+      // already be reasonably chunked -- in particular, the chunks cannot get too close to 2GB
+      // or else we'll still run into problems b/c there is some more overhead in the transfer
+      val msg = new UploadPartialBlock(appId, execId, blockId.toString, bufferParts.size, idx,
+        chunkOffsets(idx), levelBytes, partialBlockArray)
+
+      val result = Promise[Unit]()
+      client.sendRpc(msg.toByteArray,
+        new RpcResponseCallback {
+          override def onSuccess(response: Array[Byte]): Unit = {
+            logTrace(s"Successfully uploaded partial block $blockId, part $idx (out of ${bufferParts.size})")
+            result.success()
+          }
+
+          override def onFailure(e: Throwable): Unit = {
+            logError(s"Error while uploading partial block $blockId, part $idx (out of ${bufferParts.size})", e)
+            result.failure(e)
+          }
+        })
+      result.future
+    }
   }
 
+  //thanks to our old friend @ryanlecompte: https://gist.github.com/squito/242f82ad6345e3f85a5b
+  private def performSequentially[A](items: Seq[A])(f: A => Future[Unit]): Future[Unit] = {
+    import scala.concurrent.ExecutionContext.Implicits.global
+    items.headOption match {
+      case Some(nextItem) =>
+        val fut = f(nextItem)
+        fut.flatMap { _ =>
+            performSequentially(items.tail)(f)
+        }
+      case None =>
+        // nothing left to process
+        Future.successful(())
+    }
+  }
+
+
   override def close(): Unit = {
     server.close()
     clientFactory.close()
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index be63f9cb03d29..92e9512146b35 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -947,8 +947,8 @@ private[spark] class BlockManager(
             //TODO
             //ACK!  here we're stuck -- we can't replicate a large block until we figure out
             // how to deal w/ shuffling more than 2 gb
-//            blockTransferService.uploadBlockSync(
-//              peer.host, peer.port, peer.executorId, blockId, new NioManagedBuffer(data), tLevel)
+            blockTransferService.uploadBlockSync(
+              peer.host, peer.port, peer.executorId, blockId, new NioManagedBuffer(data), tLevel)
             logTrace(s"Replicated $blockId of ${data.limit()} bytes to $peer in %s ms"
               .format(System.currentTimeMillis - onePeerStartTime))
             peersReplicatedTo += peer
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
index 710bf2822fa9b..a484221aaa731 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
@@ -31,7 +31,7 @@ import org.mockito.Mockito._
 import org.scalatest.mock.MockitoSugar
 import org.scalatest.{Matchers, FunSuite}
 
-import scala.concurrent.duration.FiniteDuration
+import scala.concurrent.duration.{Duration, FiniteDuration}
 import scala.concurrent.{Await, Promise}
 
 class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar with Logging {
@@ -84,39 +84,49 @@ class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar w
   }
 
 
-  def uploadBlock(buf: LargeByteBuffer) {
+  def uploadBlock(buf: LargeByteBuffer, rddId: Int, timeout: Long) {
 
     val fromBlockManager = mock[BlockDataManager]
     val toBlockManager = mock[BlockDataManager]
-    val blockId = RDDBlockId(0, 1)
+    val blockId = RDDBlockId(rddId, rddId + 1)
     val blockBuffer = new NioManagedBuffer(buf)
     val level = StorageLevel.DISK_ONLY //doesn't matter
 
     val from = new NettyBlockTransferService(conf, securityManager, numCores = 1)
     from.init(fromBlockManager)
     val to = new NettyBlockTransferService(conf, securityManager, numCores = 1)
-    logTrace("to block manager = " + toBlockManager)
     to.init(toBlockManager)
 
-    from.uploadBlock(to.hostName, to.port, "exec-1", blockId, blockBuffer, level)
-    //TODO how to get rid of this wait??
-    Thread.sleep(1000)
+    val uploadFuture = from.uploadBlock(to.hostName, to.port, "exec-1", blockId, blockBuffer, level)
+    Await.result(uploadFuture, Duration.apply(timeout, TimeUnit.MILLISECONDS))
     val bufferCaptor = ArgumentCaptor.forClass(classOf[ManagedBuffer])
     verify(toBlockManager).putBlockData(MockitoMatchers.eq(blockId), bufferCaptor.capture(),
       MockitoMatchers.eq(level))
     val putBuffer = bufferCaptor.getValue()
+    logTrace("begin checking buffer equivalence")
+    equivalentBuffers(blockBuffer, putBuffer)
+    logTrace("finished checking buffer equivalence")
   }
 
-  test("simple upload") {
+  test("small one-part upload") {
     val buf = LargeByteBufferHelper.asLargeByteBuffer(Array[Byte](0,1,2,3))
-    uploadBlock(buf)
+    uploadBlock(buf, 0, 100)
   }
 
+  test("small multi-part upload") {
+    val parts = (0 until 5).map{idx =>
+      val arr = Array.tabulate[Byte](100){subIdx => (idx + subIdx).toByte}
+      ByteBuffer.wrap(arr)
+    }.toArray
+    val buf = new WrappedLargeByteBuffer(parts)
+    uploadBlock(buf, 1, 500)
+  }
 
   test("giant upload") {
-    val parts = (0 until 2).map{_ => ByteBuffer.allocate(Integer.MAX_VALUE - 100)}.toArray
+    //actually pretty close to max size due to overhead from the rest of the msg
+    val parts = (0 until 2).map{_ => ByteBuffer.allocate(Integer.MAX_VALUE - 200)}.toArray
     val buf = new WrappedLargeByteBuffer(parts)
-    uploadBlock(buf)
+    uploadBlock(buf, 2, 15 * 60 * 1000)
   }
 
 
@@ -131,9 +141,5 @@ class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar w
     while (exp.remaining() > 0) {
       assert(exp.get() === act.get())
     }
-
   }
-
-
-
 }
diff --git a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
index 21c6e5fe3ab50..1233f16978d32 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
@@ -33,4 +33,8 @@ class LargePartitionCachingSuite extends FunSuite with SharedSparkContext {
   test("disk cache large partitions") {
     largePartitionRdd.persist(StorageLevel.DISK_ONLY).count()
   }
+
+  test("disk cache large partitions with replications") {
+    pending
+  }
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index 4997dcecc3370..d5a3c2cb476eb 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -19,6 +19,7 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.WritableByteChannel;
+import java.util.List;
 
 public interface LargeByteBuffer {
     public long capacity();
@@ -59,4 +60,7 @@ public interface LargeByteBuffer {
     //TODO this should be deleted -- just to help me get going
     public ByteBuffer firstByteBuffer();
 
+    //List b/c we need to know the size.  Could also use Iterator w/ separate numBuffers method
+    public List<ByteBuffer> nioBuffers();
+
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index e948fa67581d0..34adcc98fa146 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -19,6 +19,9 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.WritableByteChannel;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
 
 public class WrappedLargeByteBuffer implements LargeByteBuffer {
 
@@ -142,4 +145,9 @@ public long writeTo(WritableByteChannel channel) throws IOException {
     public ByteBuffer firstByteBuffer() {
         return underlying[0];
     }
+
+    @Override
+    public List<ByteBuffer> nioBuffers() {
+        return Arrays.asList(underlying);
+    }
 }

From 4c228a07173e06f8da449db17b878d220e14dea0 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 27 Feb 2015 21:14:13 -0600
Subject: [PATCH 12/97] minor cleanup

---
 .../network/netty/NettyBlockRpcServer.scala   |  1 -
 .../netty/NettyBlockTransferService.scala     | 73 ++++++++-----------
 .../netty/NettyBlockTransferSuite.scala       |  7 +-
 3 files changed, 35 insertions(+), 46 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index 0c67459382502..822934a323a08 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -75,7 +75,6 @@ class NettyBlockRpcServer(
         logTrace("received upload partial block: " + uploadPartialBock)
         val storageLevel: StorageLevel =
           serializer.newInstance().deserialize(ByteBuffer.wrap(uploadPartialBock.metadata))
-        logTrace("open requests = " + openRequests)
         openRequests.putIfAbsent(uploadPartialBock.blockId,
           new PartialBlockUploadHandler(uploadPartialBock.blockId, storageLevel,
             uploadPartialBock.nTotalBlockChunks))
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 3deecf3242d4a..8162b640b770b 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -115,58 +115,45 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
     // using our binary protocol.
     val levelBytes = serializer.newInstance().serialize(level).array()
 
-    // Convert or copy nio buffer into array in order to serialize it.
     val largeByteBuffer = blockData.nioByteBuffer()
     val bufferParts = largeByteBuffer.nioBuffers().asScala
     val chunkOffsets: Seq[Long] = bufferParts.scanLeft(0l){case(offset, buf) => offset + buf.limit()}
 
-    performSequentially(bufferParts.zipWithIndex){case (buf,idx) =>
-      val partialBlockArray = if (buf.hasArray) {
-        buf.array()
-      } else {
-        val arr = new Array[Byte](buf.limit())
-        buf.get(arr)
-        arr
-      }
-      //Note: one major shortcoming of this is that it expects the incoming LargeByteBuffer to
-      // already be reasonably chunked -- in particular, the chunks cannot get too close to 2GB
-      // or else we'll still run into problems b/c there is some more overhead in the transfer
-      val msg = new UploadPartialBlock(appId, execId, blockId.toString, bufferParts.size, idx,
-        chunkOffsets(idx), levelBytes, partialBlockArray)
-
-      val result = Promise[Unit]()
-      client.sendRpc(msg.toByteArray,
-        new RpcResponseCallback {
-          override def onSuccess(response: Array[Byte]): Unit = {
-            logTrace(s"Successfully uploaded partial block $blockId, part $idx (out of ${bufferParts.size})")
-            result.success()
-          }
-
-          override def onFailure(e: Throwable): Unit = {
-            logError(s"Error while uploading partial block $blockId, part $idx (out of ${bufferParts.size})", e)
-            result.failure(e)
-          }
-        })
-      result.future
-    }
-  }
-
-  //thanks to our old friend @ryanlecompte: https://gist.github.com/squito/242f82ad6345e3f85a5b
-  private def performSequentially[A](items: Seq[A])(f: A => Future[Unit]): Future[Unit] = {
     import scala.concurrent.ExecutionContext.Implicits.global
-    items.headOption match {
-      case Some(nextItem) =>
-        val fut = f(nextItem)
-        fut.flatMap { _ =>
-            performSequentially(items.tail)(f)
+    bufferParts.zipWithIndex.foldLeft(Future.successful(())){case (prevFuture,(buf,idx)) =>
+      prevFuture.flatMap{_ =>
+        // Convert or copy nio buffer into array in order to serialize it.
+        val partialBlockArray = if (buf.hasArray) {
+          buf.array()
+        } else {
+          val arr = new Array[Byte](buf.limit())
+          buf.get(arr)
+          arr
         }
-      case None =>
-        // nothing left to process
-        Future.successful(())
+        //Note: one major shortcoming of this is that it expects the incoming LargeByteBuffer to
+        // already be reasonably chunked -- in particular, the chunks cannot get too close to 2GB
+        // or else we'll still run into problems b/c there is some more overhead in the transfer
+        val msg = new UploadPartialBlock(appId, execId, blockId.toString, bufferParts.size, idx,
+          chunkOffsets(idx), levelBytes, partialBlockArray)
+
+        val result = Promise[Unit]()
+        client.sendRpc(msg.toByteArray,
+          new RpcResponseCallback {
+            override def onSuccess(response: Array[Byte]): Unit = {
+              logTrace(s"Successfully uploaded partial block $blockId, part $idx (out of ${bufferParts.size})")
+              result.success()
+            }
+
+            override def onFailure(e: Throwable): Unit = {
+              logError(s"Error while uploading partial block $blockId, part $idx (out of ${bufferParts.size})", e)
+              result.failure(e)
+            }
+          })
+        result.future
+      }
     }
   }
 
-
   override def close(): Unit = {
     server.close()
     clientFactory.close()
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
index a484221aaa731..b02b9d396c95c 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
@@ -123,12 +123,15 @@ class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar w
   }
 
   test("giant upload") {
-    //actually pretty close to max size due to overhead from the rest of the msg
+    // pretty close to max size due to overhead from the rest of the msg
     val parts = (0 until 2).map{_ => ByteBuffer.allocate(Integer.MAX_VALUE - 200)}.toArray
     val buf = new WrappedLargeByteBuffer(parts)
-    uploadBlock(buf, 2, 15 * 60 * 1000)
+    uploadBlock(buf, 2, 20 * 60 * 1000) // yup, takes this long ...
   }
 
+  test("cleanup partial uploads") {
+    pending
+  }
 
 
   def equivalentBuffers(exp: ManagedBuffer, act: ManagedBuffer): Unit = {

From cf7c3a7067aaa61732782995984f17fa94a6cff7 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 27 Feb 2015 22:45:50 -0600
Subject: [PATCH 13/97] cleanup abandonded block uploads

---
 .../network/netty/NettyBlockRpcServer.scala   | 53 +++++++++++++++++--
 .../netty/NettyBlockTransferService.scala     |  1 +
 .../netty/NettyBlockTransferSuite.scala       | 25 +++++----
 .../spark/network/TransportContext.java       |  2 +
 .../spark/network/server/RpcHandler.java      |  2 +
 5 files changed, 69 insertions(+), 14 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index 822934a323a08..9552b71e84de6 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -18,11 +18,13 @@
 package org.apache.spark.network.netty
 
 import java.nio.ByteBuffer
-import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.{TimeUnit, Executors, ConcurrentHashMap}
+
+import org.apache.spark.util.Utils
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.Logging
+import org.apache.spark.{SparkException, Logging}
 import org.apache.spark.network.BlockDataManager
 import org.apache.spark.network.buffer.{WrappedLargeByteBuffer, LargeByteBufferHelper, ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
@@ -45,7 +47,22 @@ class NettyBlockRpcServer(
 
   private val streamManager = new OneForOneStreamManager()
 
-  private val openRequests = new ConcurrentHashMap[String,PartialBlockUploadHandler]()
+  private val openRequests = new ConcurrentHashMap[String, PartialBlockUploadHandler]()
+  // TODO from configuration.  Might need to be really big ...
+  private val cleanupTime = 30 * 60 * 1000
+
+  //ideally, this should be empty, and it will contain a very small amount of data for abandoned
+  // requests -- so hopefully its OK to hold on to this forever
+  private val abandonedRequests = new ConcurrentHashMap[String,Object]()
+
+  val cleaner = Executors.newSingleThreadScheduledExecutor(
+    Utils.namedThreadFactory("NettyBlockRPCServer cleanup")).scheduleWithFixedDelay(
+      new Runnable {
+        def run() {
+          dropAbandonedPartialUploads()
+        }
+      }, cleanupTime / 10, cleanupTime / 10, TimeUnit.MILLISECONDS
+    )
 
   override def receive(
       client: TransportClient,
@@ -75,6 +92,11 @@ class NettyBlockRpcServer(
         logTrace("received upload partial block: " + uploadPartialBock)
         val storageLevel: StorageLevel =
           serializer.newInstance().deserialize(ByteBuffer.wrap(uploadPartialBock.metadata))
+        if (abandonedRequests.containsKey(uploadPartialBock.blockId)) {
+          val msg = s"Too much time passed between the msgs for this block -- the other msgs have" +
+            " already been dropped.  Try increasing the timeout specified in XXX"
+          throw new SparkException(msg)
+        }
         openRequests.putIfAbsent(uploadPartialBock.blockId,
           new PartialBlockUploadHandler(uploadPartialBock.blockId, storageLevel,
             uploadPartialBock.nTotalBlockChunks))
@@ -92,8 +114,12 @@ class NettyBlockRpcServer(
   ) {
     val chunks = new Array[Array[Byte]](nTotalBlockChunks)
     var nMissing = nTotalBlockChunks
+    var lastUpdated = System.currentTimeMillis()
 
-    def addPartialBlock(partial: UploadPartialBlock, storageLevel: StorageLevel): Unit = {
+    def addPartialBlock(
+      partial: UploadPartialBlock,
+      storageLevel: StorageLevel
+    ): Unit = synchronized {
       if (partial.nTotalBlockChunks != nTotalBlockChunks) {
         throw new IllegalArgumentException(s"received incompatible UploadPartialBlock: expecting " +
           s"$nTotalBlockChunks total chunks, but new msg has ${partial.nTotalBlockChunks}")
@@ -102,6 +128,7 @@ class NettyBlockRpcServer(
         throw new IllegalArgumentException(s"received incompatible UploadPartialBlock: expecting " +
           s"${this.storageLevel}, but new message has $storageLevel")
       }
+      lastUpdated = System.currentTimeMillis()
       logTrace("received partial msg")
       chunks(partial.blockChunkIndex) = partial.blockData
       nMissing -= 1
@@ -116,5 +143,23 @@ class NettyBlockRpcServer(
     }
   }
 
+  private def dropAbandonedPartialUploads(): Unit = {
+    logTrace("checking for abandoned uploads among: " + openRequests.keys().asScala.mkString(","))
+    val itr = openRequests.entrySet.iterator
+    while (itr.hasNext()) {
+      val entry = itr.next()
+      if (System.currentTimeMillis() - entry.getValue().lastUpdated > cleanupTime) {
+        logWarning(s"never received all parts for block ${entry.getKey}; dropping this block")
+        abandonedRequests.putIfAbsent(entry.getKey, new Object())
+        itr.remove()
+      } else {
+        logTrace(entry.getKey() + " OK")
+      }
+    }
+  }
+
+
   override def getStreamManager(): StreamManager = streamManager
+
+  override def close(): Unit = {cleaner.cancel(false)}
 }
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 8162b640b770b..976a338eeba7f 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -157,5 +157,6 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
   override def close(): Unit = {
     server.close()
     clientFactory.close()
+    transportContext.close()
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
index b02b9d396c95c..ba6e8a3079d10 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
@@ -97,15 +97,21 @@ class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar w
     val to = new NettyBlockTransferService(conf, securityManager, numCores = 1)
     to.init(toBlockManager)
 
-    val uploadFuture = from.uploadBlock(to.hostName, to.port, "exec-1", blockId, blockBuffer, level)
-    Await.result(uploadFuture, Duration.apply(timeout, TimeUnit.MILLISECONDS))
-    val bufferCaptor = ArgumentCaptor.forClass(classOf[ManagedBuffer])
-    verify(toBlockManager).putBlockData(MockitoMatchers.eq(blockId), bufferCaptor.capture(),
-      MockitoMatchers.eq(level))
-    val putBuffer = bufferCaptor.getValue()
-    logTrace("begin checking buffer equivalence")
-    equivalentBuffers(blockBuffer, putBuffer)
-    logTrace("finished checking buffer equivalence")
+    try {
+      val uploadFuture = from.uploadBlock(to.hostName, to.port, "exec-1", blockId, blockBuffer, level)
+      Await.result(uploadFuture, Duration.apply(timeout, TimeUnit.MILLISECONDS))
+      val bufferCaptor = ArgumentCaptor.forClass(classOf[ManagedBuffer])
+      verify(toBlockManager).putBlockData(MockitoMatchers.eq(blockId), bufferCaptor.capture(),
+        MockitoMatchers.eq(level))
+      val putBuffer = bufferCaptor.getValue()
+      logTrace("begin checking buffer equivalence")
+      equivalentBuffers(blockBuffer, putBuffer)
+      logTrace("finished checking buffer equivalence")
+    } finally {
+      from.close()
+      to.close()
+    }
+
   }
 
   test("small one-part upload") {
@@ -133,7 +139,6 @@ class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar w
     pending
   }
 
-
   def equivalentBuffers(exp: ManagedBuffer, act: ManagedBuffer): Unit = {
     equivalentBuffers(exp.nioByteBuffer(), act.nioByteBuffer())
   }
diff --git a/network/common/src/main/java/org/apache/spark/network/TransportContext.java b/network/common/src/main/java/org/apache/spark/network/TransportContext.java
index 5bc6e5a2418a9..2fa361edb44f6 100644
--- a/network/common/src/main/java/org/apache/spark/network/TransportContext.java
+++ b/network/common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -131,4 +131,6 @@ private TransportChannelHandler createChannelHandler(Channel channel) {
   }
 
   public TransportConf getConf() { return conf; }
+
+  public void close() { rpcHandler.close(); }
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
index 2ba92a40f8b0a..f8972072a2551 100644
--- a/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
+++ b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
@@ -52,4 +52,6 @@ public abstract void receive(
    * No further requests will come from this client.
    */
   public void connectionTerminated(TransportClient client) { }
+
+  public void close() { }
 }

From fe90fd682d71ce4c156dde9e6a016e7923e65aad Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 09:46:19 -0600
Subject: [PATCH 14/97] crank up memory for tests

---
 pom.xml                  | 2 +-
 project/SparkBuild.scala | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index bb355bf735bee..b5c8f823f62de 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1199,7 +1199,7 @@
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
             <filereports>SparkTestSuite.txt</filereports>
-            <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+            <argLine>-ea -Xmx16g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
             <stderr/>
             <environmentVariables>
               <!--
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index e4b1b96527fbd..9424ff0d7e153 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -433,7 +433,7 @@ object TestSettings {
     javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark")
       .map { case (k,v) => s"-D$k=$v" }.toSeq,
     javaOptions in Test += "-ea",
-    javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g"
+    javaOptions in Test ++= "-Xmx16g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g"
       .split(" ").toSeq,
     javaOptions += "-Xmx3g",
     // Show full stack trace and duration in test cases.

From 857f3dfae649d56dbd37a022ddbe594c2a9bd0ac Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 09:47:16 -0600
Subject: [PATCH 15/97] fix LargeByteBuffer dispose()

---
 .../main/scala/org/apache/spark/storage/BlockManager.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 92e9512146b35..4cfa8f1276e1f 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -21,6 +21,7 @@ import java.io.{BufferedOutputStream, File, InputStream, OutputStream}
 import java.nio.{ByteBuffer, MappedByteBuffer}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap}
+import scala.collection.JavaConverters._
 import scala.concurrent.{Await, Future}
 import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.duration._
@@ -1253,8 +1254,7 @@ private[spark] object BlockManager extends Logging {
   }
 
   def dispose(buffer: LargeByteBuffer): Unit = {
-    // TODO
-    ???
+    buffer.nioBuffers().asScala.foreach{buf => dispose(buf)}
   }
 
   def blockIdsToBlockManagers(

From b700723033d49094a4c0ee6f43a23592b29ae01f Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 10:55:19 -0600
Subject: [PATCH 16/97] maven needs you to request lots of extra memory for
 some reason

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index b5c8f823f62de..55e5fc733654b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1199,7 +1199,7 @@
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
             <filereports>SparkTestSuite.txt</filereports>
-            <argLine>-ea -Xmx16g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+            <argLine>-ea -Xmx20g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
             <stderr/>
             <environmentVariables>
               <!--

From 6b102a028df0fae8d60b5cb2174fc4f380a81627 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 13:17:16 -0600
Subject: [PATCH 17/97] passing tests!  assorted little changes

---
 .../spark/io/ChainedLargeByteBuffer.scala     |  91 -------------
 .../apache/spark/storage/BlockManager.scala   |   4 +-
 .../org/apache/spark/storage/DiskStore.scala  |   3 +
 .../util/LargeByteBufferOutputStream.scala    |  14 +-
 .../spark/util/collection/ChainedBuffer.scala | 126 ------------------
 .../rdd/LargePartitionCachingSuite.scala      |   2 -
 .../network/buffer/LargeByteBufferHelper.java |   2 +-
 .../buffer/WrappedLargeByteBuffer.java        |   1 +
 8 files changed, 16 insertions(+), 227 deletions(-)
 delete mode 100644 core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala
 delete mode 100644 core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala

diff --git a/core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala b/core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala
deleted file mode 100644
index a5af9618fde4a..0000000000000
--- a/core/src/main/scala/org/apache/spark/io/ChainedLargeByteBuffer.scala
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.io
-
-import java.nio.ByteBuffer
-import java.nio.channels.WritableByteChannel
-
-import scala.collection.JavaConverters._
-
-import org.apache.spark.network.buffer.LargeByteBuffer
-import org.apache.spark.util.collection.ChainedBuffer
-
-class ChainedLargeByteBuffer(private[io] val underlying: ChainedBuffer) extends LargeByteBuffer {
-
-  def capacity = underlying.capacity
-
-  var _pos = 0l
-
-  def get(dst: Array[Byte],offset: Int,length: Int): Unit = {
-    underlying.read(_pos, dst, offset, length)
-    _pos += length
-  }
-
-  def get(): Byte = {
-    val b = underlying.read(_pos)
-    _pos += 1
-    b
-  }
-
-  def put(bytes: LargeByteBuffer): Unit = {
-    ???
-  }
-
-  def position: Long = _pos
-  def position(position: Long): Unit = {
-    _pos = position
-  }
-  def remaining(): Long = {
-    underlying.size - position
-  }
-
-  def duplicate(): ChainedLargeByteBuffer = {
-    new ChainedLargeByteBuffer(underlying)
-  }
-
-  def rewind(): Unit = {
-    _pos = 0
-  }
-
-  def limit(): Long = {
-    capacity
-  }
-
-  def limit(newLimit: Long): Unit = {
-    ???
-  }
-
-  def writeTo(channel:WritableByteChannel): Long = {
-    var written = 0l
-    underlying.chunks.foreach{bytes =>
-      //TODO test this
-      val buffer = ByteBuffer.wrap(bytes)
-      while (buffer.hasRemaining)
-        channel.write(buffer)
-      written += bytes.length
-    }
-    written
-  }
-
-  override def firstByteBuffer(): ByteBuffer = {
-    ByteBuffer.wrap(underlying.chunks(0))
-  }
-
-  override def nioBuffers(): java.util.List[ByteBuffer] = {
-    underlying.chunks.map{bytes => ByteBuffer.wrap(bytes)}.asJava
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 4cfa8f1276e1f..248fc0fa21077 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1254,7 +1254,9 @@ private[spark] object BlockManager extends Logging {
   }
 
   def dispose(buffer: LargeByteBuffer): Unit = {
-    buffer.nioBuffers().asScala.foreach{buf => dispose(buf)}
+    if (buffer != null) {
+      buffer.nioBuffers().asScala.foreach { buf => dispose(buf)}
+    }
   }
 
   def blockIdsToBlockManagers(
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 5ef9929feaabf..c9371a1d67899 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -46,6 +46,7 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
     logDebug(s"Attempting to put block $blockId")
     val startTime = System.currentTimeMillis
     val file = diskManager.getFile(blockId)
+    logTrace(s"Block $blockId will be written to $file")
     val channel = new FileOutputStream(file).getChannel
     bytes.writeTo(channel)
     channel.close()
@@ -72,6 +73,7 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
     logDebug(s"Attempting to write values for block $blockId")
     val startTime = System.currentTimeMillis
     val file = diskManager.getFile(blockId)
+    logTrace(s"Block $blockId will be written to $file")
     val outputStream = new FileOutputStream(file)
     try {
       try {
@@ -120,6 +122,7 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
         buf.flip()
         Some(LargeByteBufferHelper.asLargeByteBuffer(buf))
       } else {
+        logTrace(s"mapping file: $file:$offset+$length")
         Some(LargeByteBufferHelper.mapFile(channel, MapMode.READ_ONLY, offset, length))
       }
     } finally {
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index 6c61d52ecca75..b9e8fe3f091df 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -18,16 +18,16 @@
 package org.apache.spark.util
 
 import java.io.OutputStream
+import java.nio.ByteBuffer
 
-import org.apache.spark.io.ChainedLargeByteBuffer
-import org.apache.spark.network.buffer.LargeByteBuffer
-import org.apache.spark.util.collection.ChainedBuffer
+import org.apache.spark.network.buffer.{WrappedLargeByteBuffer, LargeByteBuffer}
+import org.apache.spark.util.io.ByteArrayChunkOutputStream
 
 private[spark]
 class LargeByteBufferOutputStream(chunkSize: Int = 65536)
   extends OutputStream {
 
-  val buffer = ChainedBuffer.withInitialSize(chunkSize)
+  val output = new ByteArrayChunkOutputStream(chunkSize)
 
   private var _pos = 0
 
@@ -36,11 +36,13 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
   }
 
   override def write(bytes: Array[Byte], offs: Int, len: Int): Unit = {
-    buffer.write(_pos, bytes, offs, len)
+    output.write(bytes, offs, len)
     _pos += len
   }
 
   def pos: Int = _pos
 
-  def largeBuffer: LargeByteBuffer = new ChainedLargeByteBuffer(buffer)
+  def largeBuffer: LargeByteBuffer = {
+    new WrappedLargeByteBuffer(output.toArrays.map{ByteBuffer.wrap})
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala
deleted file mode 100644
index c39a2fd1f8a11..0000000000000
--- a/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util.collection
-
-import java.io.OutputStream
-
-import scala.collection.mutable.ArrayBuffer
-
-/**
- * A logical byte buffer that wraps a list of byte arrays. All the byte arrays have equal size. The
- * advantage of this over a standard ArrayBuffer is that it can grow without claiming large amounts
- * of memory and needing to copy the full contents.
- */
-private[spark] class ChainedBuffer private(val chunks: ArrayBuffer[Array[Byte]], chunkSize: Int) {
-  private val chunkSizeLog2 = (math.log(chunkSize) / math.log(2)).toInt
-  assert(math.pow(2, chunkSizeLog2).toInt == chunkSize)
-  private var _size: Long = _
-
-  /**
-   * Read bytes from this buffer into a byte array.
-   *
-   * @param pos Offset in the buffer to read from.
-   * @param bytes Byte array to read into.
-   * @param offs Offset in the byte array to read to.
-   * @param len Number of bytes to read.
-   */
-  def read(pos: Long, bytes: Array[Byte], offs: Int, len: Int): Unit = {
-    var chunkIndex = (pos >> chunkSizeLog2).toInt
-    var posInChunk = (pos - (chunkIndex << chunkSizeLog2)).toInt
-    var moved = 0
-    while (moved < len) {
-      val toRead = math.min(len - moved, chunkSize - posInChunk)
-      System.arraycopy(chunks(chunkIndex), posInChunk, bytes, offs + moved, toRead)
-      moved += toRead
-      chunkIndex += 1
-      posInChunk = 0
-    }
-  }
-
-  def read(pos:Long): Byte = {
-    val chunkIndex = (pos >> chunkSizeLog2).toInt
-    val posInChunk = (pos - (chunkIndex << chunkSizeLog2)).toInt
-    chunks(chunkIndex)(posInChunk)
-  }
-
-  /**
-   * Write bytes from a byte array into this buffer.
-   *
-   * @param pos Offset in the buffer to write to.
-   * @param bytes Byte array to write from.
-   * @param offs Offset in the byte array to write from.
-   * @param len Number of bytes to write.
-   */
-  def write(pos: Long, bytes: Array[Byte], offs: Int, len: Int): Unit = {
-    // Grow if needed
-    val endChunkIndex = ((pos + len - 1) >> chunkSizeLog2).toInt
-    while (endChunkIndex >= chunks.length) {
-      chunks += new Array[Byte](chunkSize)
-    }
-
-    var chunkIndex = (pos >> chunkSizeLog2).toInt
-    var posInChunk = (pos - (chunkIndex << chunkSizeLog2)).toInt
-    var moved = 0
-    while (moved < len) {
-      val toWrite = math.min(len - moved, chunkSize - posInChunk)
-      System.arraycopy(bytes, offs + moved, chunks(chunkIndex), posInChunk, toWrite)
-      moved += toWrite
-      chunkIndex += 1
-      posInChunk = 0
-    }
-
-    _size = math.max(_size, pos + len)
-  }
-
-  /**
-   * Total size of buffer that can be written to without allocating additional memory.
-   */
-  def capacity: Int = chunks.size * chunkSize
-
-  /**
-   * Size of the logical buffer.
-   */
-  def size: Long = _size
-}
-
-private[spark] object ChainedBuffer {
-  def withInitialSize(chunkSize: Int, minInitialSize: Long = 0): ChainedBuffer = {
-    val nChunks = (((minInitialSize - 1) / chunkSize).toInt) + 1
-    val chunks = new ArrayBuffer[Array[Byte]](nChunks)
-    (0 until nChunks).foreach{idx => chunks(idx) = new Array[Byte](chunkSize)}
-    new ChainedBuffer(chunks, chunkSize)
-  }
-}
-
-/**
- * Output stream that writes to a ChainedBuffer.
- */
-private[spark] class ChainedBufferOutputStream(chainedBuffer: ChainedBuffer) extends OutputStream {
-  private var _pos = 0
-
-  override def write(b: Int): Unit = {
-    throw new UnsupportedOperationException()
-  }
-
-  override def write(bytes: Array[Byte], offs: Int, len: Int): Unit = {
-    chainedBuffer.write(_pos, bytes, offs, len)
-    _pos += len
-  }
-
-  def pos: Int = _pos
-}
\ No newline at end of file
diff --git a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
index 1233f16978d32..80ff1408aacee 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
@@ -25,8 +25,6 @@ class LargePartitionCachingSuite extends FunSuite with SharedSparkContext {
   def largePartitionRdd = sc.parallelize(1 to 1e6.toInt, 1).map{i => new Array[Byte](2.2e3.toInt)}
 
   test("memory serialized cache large partitions") {
-    //this test doesn't actually work, b/c we'll just think we don't have enough memory,
-    // and so it won't get persisted :(
     largePartitionRdd.persist(StorageLevel.MEMORY_ONLY_SER).count()
   }
 
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
index 5fe01d87e36e9..7373b2dea3cc0 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -67,7 +67,7 @@ public static LargeByteBuffer mapFile(
         long end = offset + length;
         while (curOffset < end) {
             offsets.add(curOffset);
-            int chunkLength = Math.min((int) (end - curOffset), maxChunk);
+            int chunkLength = (int) Math.min((end - curOffset), maxChunk);
             curOffset += chunkLength;
         }
         offsets.add(end);
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 34adcc98fa146..ef27ab1806690 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -62,6 +62,7 @@ public void get(byte[] dest, int offset, int length){
             moved += toRead;
             updateCurrentBuffer();
         }
+        _pos += moved;
     }
 
     @Override

From 84df2dd20664fa9333a2c01e06ed1ec1159e1b51 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 14:07:59 -0600
Subject: [PATCH 18/97] fix tests

---
 .../apache/spark/io/LargeByteBufferTest.scala |   4 +-
 .../util/collection/ChainedBufferTest.scala   | 135 ------------------
 .../buffer/WrappedLargeByteBuffer.java        |   3 +-
 3 files changed, 4 insertions(+), 138 deletions(-)
 delete mode 100644 core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala

diff --git a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
index 06d3d223c3858..48b9ac83d7d34 100644
--- a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
+++ b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
@@ -18,6 +18,7 @@ package org.apache.spark.io
 
 import java.io.{ObjectInputStream, ObjectOutputStream}
 
+import org.apache.spark.network.buffer.WrappedLargeByteBuffer
 import org.apache.spark.util.{LargeByteBufferInputStream, LargeByteBufferOutputStream}
 import org.scalatest.{Matchers, FunSuite}
 
@@ -45,13 +46,12 @@ class LargeByteBufferTest extends FunSuite with Matchers {
     objOut.writeObject(someObject)
     objOut.close()
 
-    rawOut.largeBuffer.asInstanceOf[ChainedLargeByteBuffer].underlying.chunks.size should be > 1
+    rawOut.largeBuffer.asInstanceOf[WrappedLargeByteBuffer].underlying.size should be > 1
 
     val rawIn = new LargeByteBufferInputStream(rawOut.largeBuffer)
     val objIn = new ObjectInputStream(rawIn)
     val deser = objIn.readObject()
     deser should be (someObject)
-
   }
 
 }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala b/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala
deleted file mode 100644
index cc96e24e3dc03..0000000000000
--- a/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util.collection
-
-import java.nio.ByteBuffer
-
-import org.scalatest.FunSuite
-import org.scalatest.Matchers._
-
-class ChainedBufferSuite extends FunSuite {
-  test("write and read at start") {
-    // write from start of source array
-    val buffer = ChainedBuffer.withInitialSize(8)
-    buffer.capacity should be (0)
-    verifyWriteAndRead(buffer, 0, 0, 0, 4)
-    buffer.capacity should be (8)
-
-    // write from middle of source array
-    verifyWriteAndRead(buffer, 0, 5, 0, 4)
-    buffer.capacity should be (8)
-
-    // read to middle of target array
-    verifyWriteAndRead(buffer, 0, 0, 5, 4)
-    buffer.capacity should be (8)
-
-    // write up to border
-    verifyWriteAndRead(buffer, 0, 0, 0, 8)
-    buffer.capacity should be (8)
-
-    // expand into second buffer
-    verifyWriteAndRead(buffer, 0, 0, 0, 12)
-    buffer.capacity should be (16)
-
-    // expand into multiple buffers
-    verifyWriteAndRead(buffer, 0, 0, 0, 28)
-    buffer.capacity should be (32)
-  }
-
-  test("write and read at middle") {
-    // write from start of source array
-    val buffer = ChainedBuffer.withInitialSize(8)
-    verifyWriteAndRead(buffer, 3, 0, 0, 4)
-    buffer.capacity should be (8)
-
-    // write from middle of source array
-    verifyWriteAndRead(buffer, 3, 5, 0, 4)
-    buffer.capacity should be (8)
-
-    // read to middle of target array
-    verifyWriteAndRead(buffer, 3, 0, 5, 4)
-    buffer.capacity should be (8)
-
-    // write up to border
-    verifyWriteAndRead(buffer, 3, 0, 0, 5)
-    buffer.capacity should be (8)
-
-    // expand into second buffer
-    verifyWriteAndRead(buffer, 3, 0, 0, 12)
-    buffer.capacity should be (16)
-
-    // expand into multiple buffers
-    verifyWriteAndRead(buffer, 3, 0, 0, 28)
-    buffer.capacity should be (32)
-  }
-
-  test("write and read at later buffer") {
-    // write from start of source array
-    val buffer = ChainedBuffer.withInitialSize(8)
-    verifyWriteAndRead(buffer, 11, 0, 0, 4)
-    buffer.capacity should be (16)
-
-    // write from middle of source array
-    verifyWriteAndRead(buffer, 11, 5, 0, 4)
-    buffer.capacity should be (16)
-
-    // read to middle of target array
-    verifyWriteAndRead(buffer, 11, 0, 5, 4)
-    buffer.capacity should be (16)
-
-    // write up to border
-    verifyWriteAndRead(buffer, 11, 0, 0, 5)
-    buffer.capacity should be (16)
-
-    // expand into second buffer
-    verifyWriteAndRead(buffer, 11, 0, 0, 12)
-    buffer.capacity should be (24)
-
-    // expand into multiple buffers
-    verifyWriteAndRead(buffer, 11, 0, 0, 28)
-    buffer.capacity should be (40)
-  }
-
-
-  // Used to make sure we're writing different bytes each time
-  var rangeStart = 0
-
-  /**
-   * @param buffer The buffer to write to and read from.
-   * @param offsetInBuffer The offset to write to in the buffer.
-   * @param offsetInSource The offset in the array that the bytes are written from.
-   * @param offsetInTarget The offset in the array to read the bytes into.
-   * @param length The number of bytes to read and write
-   */
-  def verifyWriteAndRead(
-    buffer: ChainedBuffer,
-    offsetInBuffer: Int,
-    offsetInSource: Int,
-    offsetInTarget: Int,
-    length: Int): Unit = {
-    val source = new Array[Byte](offsetInSource + length)
-    (rangeStart until rangeStart + length).map(_.toByte).copyToArray(source, offsetInSource)
-    buffer.write(offsetInBuffer, source, offsetInSource, length)
-    val target = new Array[Byte](offsetInTarget + length)
-    buffer.read(offsetInBuffer, target, offsetInTarget, length)
-    ByteBuffer.wrap(source, offsetInSource, length) should be
-    (ByteBuffer.wrap(target, offsetInTarget, length))
-
-    rangeStart += 100
-  }
-}
\ No newline at end of file
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index ef27ab1806690..295774504662d 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -25,7 +25,8 @@
 
 public class WrappedLargeByteBuffer implements LargeByteBuffer {
 
-    final ByteBuffer[] underlying;
+    //only public for tests for the moment ...
+    public final ByteBuffer[] underlying;
     private final Long totalCapacity;
     private final long[] chunkOffsets;
 

From 7a5d1acb4a512372800d7829aa676594c1f0eed3 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 14:12:30 -0600
Subject: [PATCH 19/97] dont kill jenkins with huge tests

---
 .../apache/spark/network/netty/NettyBlockTransferSuite.scala   | 3 ++-
 .../org/apache/spark/rdd/LargePartitionCachingSuite.scala      | 3 ++-
 pom.xml                                                        | 2 +-
 project/SparkBuild.scala                                       | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
index ba6e8a3079d10..50be90980721a 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
@@ -128,7 +128,8 @@ class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar w
     uploadBlock(buf, 1, 500)
   }
 
-  test("giant upload") {
+  //just don't want to kill the test server
+  ignore("giant upload") {
     // pretty close to max size due to overhead from the rest of the msg
     val parts = (0 until 2).map{_ => ByteBuffer.allocate(Integer.MAX_VALUE - 200)}.toArray
     val buf = new WrappedLargeByteBuffer(parts)
diff --git a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
index 80ff1408aacee..e2c14f14bb4b8 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
@@ -24,7 +24,8 @@ class LargePartitionCachingSuite extends FunSuite with SharedSparkContext {
 
   def largePartitionRdd = sc.parallelize(1 to 1e6.toInt, 1).map{i => new Array[Byte](2.2e3.toInt)}
 
-  test("memory serialized cache large partitions") {
+  //just don't want to kill the test server
+  ignore("memory serialized cache large partitions") {
     largePartitionRdd.persist(StorageLevel.MEMORY_ONLY_SER).count()
   }
 
diff --git a/pom.xml b/pom.xml
index 55e5fc733654b..bb355bf735bee 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1199,7 +1199,7 @@
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
             <filereports>SparkTestSuite.txt</filereports>
-            <argLine>-ea -Xmx20g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+            <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
             <stderr/>
             <environmentVariables>
               <!--
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 9424ff0d7e153..e4b1b96527fbd 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -433,7 +433,7 @@ object TestSettings {
     javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark")
       .map { case (k,v) => s"-D$k=$v" }.toSeq,
     javaOptions in Test += "-ea",
-    javaOptions in Test ++= "-Xmx16g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g"
+    javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g"
       .split(" ").toSeq,
     javaOptions += "-Xmx3g",
     // Show full stack trace and duration in test cases.

From 38d6993f7f687f6d857970229444644ddfe30db1 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 14:12:38 -0600
Subject: [PATCH 20/97] test cleanup

---
 .../org/apache/spark/io/LargeByteBufferTest.scala  | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
index 48b9ac83d7d34..20b3ff46743de 100644
--- a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
+++ b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
@@ -24,20 +24,6 @@ import org.scalatest.{Matchers, FunSuite}
 
 class LargeByteBufferTest extends FunSuite with Matchers {
 
-//  test("allocateOnHeap") {
-//    val bufs = LargeByteBuffer.allocateOnHeap(10, 3).asInstanceOf[ChainedLargeByteBuffer]
-//    bufs.underlying.foreach{buf => buf.capacity should be <= 3}
-//    bufs.underlying.map{_.capacity}.sum should be (10)
-//  }
-//
-//  test("allocate large") {
-//    val size = Integer.MAX_VALUE.toLong + 10
-//    val bufs = LargeByteBuffer.allocateOnHeap(size, 1e9.toInt).asInstanceOf[WrappedLargeByteBuffer]
-//    bufs.capacity should be (size)
-//    bufs.underlying.map{_.capacity.toLong}.sum should be (Integer.MAX_VALUE.toLong + 10)
-//  }
-
-
   test("io stream roundtrip") {
 
     val rawOut = new LargeByteBufferOutputStream(128)

From fc0d118b48d16a41f1e690b073c156fbf1a47437 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 14:25:58 -0600
Subject: [PATCH 21/97] fixup test & fix bug in WrappedLargeByteBuffer

---
 .../util/LargeByteBufferOutputStream.scala    |  6 ++++-
 .../apache/spark/io/LargeByteBufferTest.scala | 27 ++++++++++---------
 .../buffer/WrappedLargeByteBuffer.java        |  2 +-
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index b9e8fe3f091df..c67f4f482d81f 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -32,7 +32,7 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
   private var _pos = 0
 
   override def write(b: Int): Unit = {
-    throw new UnsupportedOperationException()
+    output.write(b)
   }
 
   override def write(bytes: Array[Byte], offs: Int, len: Int): Unit = {
@@ -45,4 +45,8 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
   def largeBuffer: LargeByteBuffer = {
     new WrappedLargeByteBuffer(output.toArrays.map{ByteBuffer.wrap})
   }
+
+  override def close(): Unit = {
+    output.close()
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
index 20b3ff46743de..cda63d1a2520b 100644
--- a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
+++ b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
@@ -26,18 +26,21 @@ class LargeByteBufferTest extends FunSuite with Matchers {
 
   test("io stream roundtrip") {
 
-    val rawOut = new LargeByteBufferOutputStream(128)
-    val objOut = new ObjectOutputStream(rawOut)
-    val someObject = (1 to 100).map{x => x -> scala.util.Random.nextInt(x)}.toMap
-    objOut.writeObject(someObject)
-    objOut.close()
-
-    rawOut.largeBuffer.asInstanceOf[WrappedLargeByteBuffer].underlying.size should be > 1
-
-    val rawIn = new LargeByteBufferInputStream(rawOut.largeBuffer)
-    val objIn = new ObjectInputStream(rawIn)
-    val deser = objIn.readObject()
-    deser should be (someObject)
+    val out = new LargeByteBufferOutputStream(128)
+    (0 until 200).foreach{idx => out.write(idx)}
+    out.close()
+
+    out.largeBuffer.asInstanceOf[WrappedLargeByteBuffer].underlying.size should be > 1
+
+    val rawIn = new LargeByteBufferInputStream(out.largeBuffer)
+    val arr = new Array[Byte](500)
+    val nRead = rawIn.read(arr, 0, 500)
+    nRead should be (200)
+    (0 until 200).foreach{idx =>
+      println(idx)
+      arr(idx) should be (idx.toByte)
+    }
+
   }
 
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 295774504662d..613c4a6df05ab 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -59,7 +59,7 @@ public void get(byte[] dest, int offset, int length){
         int moved = 0;
         while (moved < length) {
             int toRead = Math.min(length - moved, currentBuffer.remaining());
-            currentBuffer.get(dest, offset, toRead);
+            currentBuffer.get(dest, offset + moved, toRead);
             moved += toRead;
             updateCurrentBuffer();
         }

From 0e3700f3b69031bb91e6756d2abbbc993a3e8dca Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 15:12:55 -0600
Subject: [PATCH 22/97] cleanup

---
 .../org/apache/spark/io/LargeByteBuffer.scala | 1764 -----------------
 .../io/WrappedByteArrayOutputStream.scala     |  121 --
 2 files changed, 1885 deletions(-)
 delete mode 100644 core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
 delete mode 100644 core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala

diff --git a/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala b/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
deleted file mode 100644
index 56cdd9e6938b3..0000000000000
--- a/core/src/main/scala/org/apache/spark/io/LargeByteBuffer.scala
+++ /dev/null
@@ -1,1764 +0,0 @@
-///*
-// * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements.  See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
-// * the License.  You may obtain a copy of the License at
-// *
-// *    http://www.apache.org/licenses/LICENSE-2.0
-// *
-// * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
-// * limitations under the License.
-// */
-//
-//package org.apache.spark.io
-//
-//import java.io.{RandomAccessFile, DataInput, InputStream, OutputStream}
-//import java.nio.channels.FileChannel.MapMode
-//import java.nio.{ByteBuffer, BufferUnderflowException, BufferOverflowException}
-//import java.nio.channels.{FileChannel, WritableByteChannel, ReadableByteChannel}
-//
-//import org.apache.spark.util.collection.ChainedBuffer
-//
-//import scala.collection.mutable.{ArrayBuffer, HashSet}
-//
-//
-//
-//
-//trait LargeByteBuffer {
-////  def position(): Long
-////
-////  def limit(): Long
-//
-//  def capacity(): Long
-//
-//  def get(): Byte //needed for ByteBufferInputStream
-//
-//  def get(dst: Array[Byte], offset: Int, length: Int): Unit // for ByteBufferInputStream
-//
-//  def position(position: Long): Unit //for ByteBufferInputStream
-//
-//  def position(): Long //for ByteBufferInputStream
-//
-//  /** doesn't copy data, just copies references & offsets */
-//  def duplicate(): LargeByteBuffer
-//
-//  def put(bytes: LargeByteBuffer): Unit
-//
-//  //also need whatever is necessary for ByteArrayOutputStream for BlockManager#dataSerialize
-//
-//
-//  //TODO checks on limit semantics
-//
-//  /**
-//   * Sets this buffer's limit. If the position is larger than the new limit then it is set to the
-//   * new limit. If the mark is defined and larger than the new limit then it is discarded.
-//   */
-//  def limit(newLimit: Long): Unit
-//
-//  /**
-//   * return this buffer's limit
-//   * @return
-//   */
-//  def limit(): Long
-//
-//
-//  //an alternative to having this method would be having a foreachBuffer(f: Buffer => T)
-//  def writeTo(channel: WritableByteChannel): Long
-//
-////
-////  def skip(skipBy: Long): Unit
-////
-////  def position(newPosition: Long): Unit
-////
-////  /**
-////   * Clears this buffer.  The position is set to zero, the limit is set to
-////   * the capacity, and the mark is discarded.
-////   *
-////   * <p> Invoke this method before using a sequence of channel-read or
-////   * <i>put</i> operations to fill this buffer.
-////   *
-////   * <p> This method does not actually erase the data in the buffer, but it
-////   * is named as if it did because it will most often be used in situations
-////   * in which that might as well be the case. </p>
-////   */
-////  def clear(): Unit
-////
-////  /**
-////   * Flips this buffer.  The limit is set to the current position and then
-////   * the position is set to zero.  If the mark is defined then it is
-////   * discarded.
-////   *
-////   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
-////   * this method to prepare for a sequence of channel-write or relative
-////   * <i>get</i> operations.
-////   */
-////  def flip(): Unit
-//
-//  /**
-//   * Rewinds this buffer.  The position is set to zero and the mark is
-//   * discarded.
-//   *
-//   * <p> Invoke this method before a sequence of channel-write or <i>get</i>
-//   * operations, assuming that the limit has already been set
-//   * appropriately.
-//   */
-//  def rewind(): Unit
-//
-//  /**
-//   * Returns the number of elements between the current position and the
-//   * limit. </p>
-//   *
-//   * @return  The number of elements remaining in this buffer
-//   */
-//  def remaining(): Long
-//}
-//
-//class ChainedLargeByteBuffer(private[io] val underlying: ChainedBuffer) extends LargeByteBuffer {
-//
-//  def capacity = underlying.capacity
-//
-//  var _pos = 0l
-//
-//  def get(dst: Array[Byte],offset: Int,length: Int): Unit = {
-//    underlying.read(_pos, dst, offset, length)
-//    _pos += length
-//  }
-//
-//  def get(): Byte = {
-//    val b = underlying.read(_pos)
-//    _pos += 1
-//    b
-//  }
-//
-//  def put(bytes: LargeByteBuffer): Unit = {
-//    ???
-//  }
-//
-//  def position: Long = _pos
-//  def position(position: Long): Unit = {
-//    _pos = position
-//  }
-//  def remaining(): Long = {
-//    underlying.size - position
-//  }
-//
-//  def duplicate(): ChainedLargeByteBuffer = {
-//    new ChainedLargeByteBuffer(underlying)
-//  }
-//
-//  def rewind(): Unit = {
-//    _pos = 0
-//  }
-//
-//  def limit(): Long = {
-//    capacity
-//  }
-//
-//  def limit(newLimit: Long): Unit = {
-//    ???
-//  }
-//
-//  def writeTo(channel:WritableByteChannel): Long = {
-//    var written = 0l
-//    underlying.chunks.foreach{bytes =>
-//      //TODO test this
-//      val buffer = ByteBuffer.wrap(bytes)
-//      while (buffer.hasRemaining)
-//        channel.write(buffer)
-//      written += bytes.length
-//    }
-//    written
-//  }
-//}
-//
-//class WrappedLargeByteBuffer(private[spark] val underlying: Array[ByteBuffer]) extends LargeByteBuffer {
-//
-//  val (totalCapacity, chunkOffsets) = {
-//    var sum = 0l
-//    val offsets = new Array[Long](underlying.size)
-//    (0 until underlying.size).foreach{idx =>
-//      offsets(idx) = sum
-//      sum += underlying(idx).capacity()
-//    }
-//    (sum, offsets)
-//  }
-//
-//  private var _pos = 0l
-//  private var currentBufferIdx = 0
-//  private var currentBuffer = underlying(0)
-//  private var _limit = totalCapacity
-//
-//  def capacity = totalCapacity
-//
-//  def get(dst: Array[Byte], offset: Int, length: Int): Unit = {
-//    var moved = 0
-//    while (moved < length) {
-//      val toRead = math.min(length - moved, currentBuffer.remaining())
-//      currentBuffer.get(dst, offset, toRead)
-//      moved += toRead
-//      updateCurrentBuffer()
-//    }
-//  }
-//
-//  def get(): Byte = {
-//    val r = currentBuffer.get()
-//    _pos += 1
-//    updateCurrentBuffer()
-//    r
-//  }
-//
-//  private def updateCurrentBuffer(): Unit = {
-//    //TODO fix end condition
-//    while(!currentBuffer.hasRemaining()) {
-//      currentBufferIdx += 1
-//      currentBuffer = underlying(currentBufferIdx)
-//    }
-//  }
-//
-//  def put(bytes: LargeByteBuffer): Unit = {
-//    ???
-//  }
-//
-//  def position: Long = _pos
-//  def position(position: Long): Unit = {
-//    //XXX check range?
-//    _pos = position
-//  }
-//  def remaining(): Long = {
-//    totalCapacity - _pos
-//  }
-//
-//  def duplicate(): WrappedLargeByteBuffer = {
-//    new WrappedLargeByteBuffer(underlying.map{_.duplicate()})
-//  }
-//
-//  def rewind(): Unit = {
-//    _pos = 0
-//    underlying.foreach{_.rewind()}
-//  }
-//
-//  def limit(): Long = {
-//    totalCapacity
-//  }
-//
-//  def limit(newLimit: Long) = {
-//    //XXX check range?  set limits in sub buffers?
-//    _limit = newLimit
-//  }
-//
-//  def writeTo(channel: WritableByteChannel): Long = {
-//    var written = 0l
-//    underlying.foreach{buffer =>
-//      //TODO test this
-//      //XXX do we care about respecting the limit here?
-//      written += buffer.remaining()
-//      while (buffer.hasRemaining)
-//        channel.write(buffer)
-//    }
-//    written
-//  }
-//
-//}
-//
-//object LargeByteBuffer {
-//
-//  def asLargeByteBuffer(byteBuffer: ByteBuffer): LargeByteBuffer = {
-//    new WrappedLargeByteBuffer(Array(byteBuffer))
-//  }
-//
-//  def asLargeByteBuffer(bytes: Array[Byte]): LargeByteBuffer = {
-//    new WrappedLargeByteBuffer(Array(ByteBuffer.wrap(bytes)))
-//  }
-//
-//
-//  def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = {
-//    val buffer = ChainedBuffer.withInitialSize(maxChunk, size)
-//    new ChainedLargeByteBuffer(buffer)
-//  }
-//
-//  def mapFile(
-//    channel: FileChannel,
-//    mode: MapMode,
-//    offset: Long,
-//    length: Long,
-//    maxChunk: Int = Integer.MAX_VALUE - 1e6.toInt
-//  ): LargeByteBuffer = {
-//    val offsets = new ArrayBuffer[Long]()
-//    var curOffset = offset
-//    val end = offset + length
-//    while (curOffset < end) {
-//      offsets += curOffset
-//      val length = math.min(end - curOffset, maxChunk)
-//      curOffset += length
-//    }
-//    offsets += end
-//    val chunks = new Array[ByteBuffer](offsets.size - 1)
-//    (0 until offsets.size - 1).foreach{idx =>
-//      chunks(idx) = channel.map(mode, offsets(idx), offsets(idx + 1) - offsets(idx))
-//    }
-//    new WrappedLargeByteBuffer(chunks)
-//  }
-//}
-//
-//
-////
-/////**
-//// * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G
-//// * which ByteBuffers are limited to.
-//// * Externally, it exposes all the api which java.nio.ByteBuffer exposes.
-//// * Internally, it maintains a sequence of Containers which manage the ByteBuffer data.
-//// * Not all the data might be loaded into memory  (like disk or tachyon data) - so actual
-//// * memory footprint - heap and vm could be much lower than capacity.
-//// *
-//// * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this
-//// * buffer, maybe revisit this later ? Note: this is not much different from earlier though !
-//// *
-//// * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this
-//// * will require the file to be kept open (repeatedly opening/closing file is not good
-//// * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is
-//// * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy)
-//// *
-//// * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is
-//// * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some
-//// * cases (when we duplicate/slice them). Currently spark does not need this, but might in future
-//// * so relook at it later.
-//// */
-////// We should make this constructor private: but for now,
-////// leaving it public since TachyonStore needs it
-////class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer],
-////    private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging {
-////
-////  // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME
-////  private val allocateLocationThrowable: Throwable = {
-////    if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) {
-////      new Throwable("blockId = " + BlockManager.getLookupBlockId)
-////    } else {
-////      null
-////    }
-////  }
-////  private var disposeLocationThrowable: Throwable = null
-////
-////  @volatile private var allowCleanerOverride = true
-////  @volatile private var cleaner: BufferCleaner = new BufferCleaner {
-////    override def doClean(buffer: LargeByteBuffer) = {
-////      assert (LargeByteBuffer.this == buffer)
-////      doDispose(needRelease = false)
-////    }
-////  }
-////
-////  // should not be empty
-////  assert (null != inputContainers && ! inputContainers.isEmpty)
-////  // should not have any null's
-////  assert (inputContainers.find(_ == null).isEmpty)
-////
-////  // println("Num containers = " + inputContainers.size)
-////
-////  // Position, limit and capacity relevant over the engire LargeByteBuffer
-////  @volatile private var globalPosition = 0L
-////  @volatile private var globalLimit = 0L
-////  @volatile private var currentContainerIndex = 0
-////
-////  // The buffers in which the actual data is held.
-////  private var containers: Array[ByteBufferContainer] = null
-////
-////  // aggregate capacities of the individual buffers.
-////  // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be
-////  // sum of capacity of 0th and 1st block buffer
-////  private var bufferPositionStart: Array[Long] = null
-////
-////  // Contains the indices of a containers which requires release before subsequent invocation of
-////  // read/write should be serviced. This is required since current read/write might have moved the
-////  // position but since we are returning bytebuffers which depend on the validity of the existing
-////  // bytebuffer, we cant release them yet.
-////  private var needReleaseIndices = new HashSet[Int]()
-////
-////  private val readable = ! inputContainers.exists(! _.isReadable)
-////  private val writable = ! inputContainers.exists(! _.isWritable)
-////
-////
-////  // initialize
-////  @volatile private var globalCapacity = {
-////
-////    // Ensure that there are no empty buffers : messes up with our code : unless it
-////    // is a single buffer (for empty buffer for marker case)
-////    assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length)
-////
-////    containers = {
-////      if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray
-////    }
-////    containers.foreach(_.validate())
-////
-////    def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) {
-////      val buff = new ArrayBuffer[Long](arr.length + 1)
-////      buff += 0L
-////
-////      buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1)
-////      assert (buff.length == arr.length + 1)
-////      bufferPositionStart = buff.toArray
-////    }
-////
-////    initializeBufferPositionStart(containers)
-////
-////    // remove references from inputBuffers
-////    inputContainers.clear()
-////
-////    globalLimit = bufferPositionStart(containers.length)
-////    globalPosition = 0L
-////    currentContainerIndex = 0
-////
-////    assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum)
-////
-////    globalLimit
-////  }
-////
-////  final def position(): Long = globalPosition
-////
-////  final def limit(): Long = globalLimit
-////
-////  final def capacity(): Long = globalCapacity
-////
-////  final def limit(newLimit: Long) {
-////    if ((newLimit > capacity()) || (newLimit < 0)) {
-////      throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity())
-////    }
-////
-////    globalLimit = newLimit
-////    if (position() > newLimit) position(newLimit)
-////  }
-////
-////  def skip(skipBy: Long) = position(position() + skipBy)
-////
-////  private def releasePendingContainers() {
-////    if (! needReleaseIndices.isEmpty) {
-////      val iter = needReleaseIndices.iterator
-////      while (iter.hasNext) {
-////        val index = iter.next()
-////        assert (index >= 0 && index < containers.length)
-////        // It is possible to move from one container to next before the previous
-////        // container was acquired. For example, get forcing move to next container
-////        // since current was exhausted immediatelly followed by a position()
-////        // so the container we moved to was never acquired.
-////
-////        // assert (containers(index).isAcquired)
-////        // will this always be satisfied ?
-////        // assert (index != currentContainerIndex)
-////        if (containers(index).isAcquired) containers(index).release()
-////      }
-////      needReleaseIndices.clear()
-////    }
-////  }
-////
-////  private def toNewContainer(newIndex: Int) {
-////    if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) {
-////
-////      assert (currentContainerIndex >= 0)
-////      needReleaseIndices += currentContainerIndex
-////    }
-////    currentContainerIndex = newIndex
-////  }
-////
-////  // expensive method, sigh ... optimize it later ?
-////  final def position(newPosition: Long) {
-////
-////    if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException()
-////
-////    if (currentContainerIndex < bufferPositionStart.length - 1 &&
-////        newPosition >= bufferPositionStart(currentContainerIndex) &&
-////        newPosition < bufferPositionStart(currentContainerIndex + 1)) {
-////      // Same buffer - easy method ...
-////      globalPosition = newPosition
-////      // Changed position - free previously returned buffers.
-////      releasePendingContainers()
-////      return
-////    }
-////
-////    // Find appropriate currentContainerIndex
-////    // Since bufferPositionStart is sorted, can be replaced with binary search if required.
-////    // For now, not in the perf critical path since buffers size is very low typically.
-////    var index = 0
-////    val cLen = containers.length
-////    while (index < cLen) {
-////      if (newPosition >= bufferPositionStart(index) &&
-////        newPosition < bufferPositionStart(index + 1)) {
-////        globalPosition = newPosition
-////        toNewContainer(index)
-////        // Changed position - free earlier and previously returned buffers.
-////        releasePendingContainers()
-////        return
-////      }
-////      index += 1
-////    }
-////
-////    if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) {
-////      // boundary.
-////      globalPosition = newPosition
-////      toNewContainer(cLen)
-////      // Changed position - free earlier and previously returned buffers.
-////      releasePendingContainers()
-////      return
-////    }
-////
-////    assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition +
-////      ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]"))
-////  }
-////
-////
-////  /**
-////   * Clears this buffer.  The position is set to zero, the limit is set to
-////   * the capacity, and the mark is discarded.
-////   *
-////   * <p> Invoke this method before using a sequence of channel-read or
-////   * <i>put</i> operations to fill this buffer.
-////   *
-////   * <p> This method does not actually erase the data in the buffer, but it
-////   * is named as if it did because it will most often be used in situations
-////   * in which that might as well be the case. </p>
-////   */
-////  final def clear() {
-////    // if (0 == globalCapacity) return
-////
-////    needReleaseIndices += 0
-////    globalPosition = 0L
-////    toNewContainer(0)
-////    globalLimit = globalCapacity
-////
-////    // Now free all pending containers
-////    releasePendingContainers()
-////  }
-////
-////  /**
-////   * Flips this buffer.  The limit is set to the current position and then
-////   * the position is set to zero.  If the mark is defined then it is
-////   * discarded.
-////   *
-////   * <p> After a sequence of channel-read or <i>put</i> operations, invoke
-////   * this method to prepare for a sequence of channel-write or relative
-////   * <i>get</i> operations.
-////   */
-////  final def flip() {
-////    needReleaseIndices += 0
-////    globalLimit = globalPosition
-////    globalPosition = 0L
-////    toNewContainer(0)
-////
-////    // Now free all pending containers
-////    releasePendingContainers()
-////  }
-////
-////  /**
-////   * Rewinds this buffer.  The position is set to zero and the mark is
-////   * discarded.
-////   *
-////   * <p> Invoke this method before a sequence of channel-write or <i>get</i>
-////   * operations, assuming that the limit has already been set
-////   * appropriately.
-////   */
-////  final def rewind() {
-////    needReleaseIndices += 0
-////    globalPosition = 0L
-////    toNewContainer(0)
-////
-////    // Now free all pending containers
-////    releasePendingContainers()
-////  }
-////
-////  /**
-////   * Returns the number of elements between the current position and the
-////   * limit. </p>
-////   *
-////   * @return  The number of elements remaining in this buffer
-////   */
-////  final def remaining(): Long = {
-////    globalLimit - globalPosition
-////  }
-////
-////  /**
-////   * Tells whether there are any elements between the current position and
-////   * the limit. </p>
-////   *
-////   * @return  <tt>true</tt> if, and only if, there is at least one element
-////   *          remaining in this buffer
-////   */
-////  final def hasRemaining() = {
-////    globalPosition < globalLimit
-////  }
-////
-////  // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex)
-////
-////  // number of bytes remaining in currently active underlying buffer
-////  private def currentRemaining(): Int = {
-////    if (hasRemaining()) {
-////      // validate currentContainerIndex is valid
-////      assert (globalPosition >= bufferPositionStart(currentContainerIndex) &&
-////        globalPosition < bufferPositionStart(currentContainerIndex + 1),
-////        "globalPosition = " + globalPosition +
-////          ", currentContainerIndex = " + currentContainerIndex +
-////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-////
-////      currentRemaining0(currentContainerIndex)
-////    } else 0
-////  }
-////
-////  // Without any validation : required when we are bumping the index (when validation will fail) ...
-////  private def currentRemaining0(which: Int): Int = {
-////    // currentBuffer().remaining()
-////    math.max(0, math.min(bufferPositionStart(which + 1),
-////      globalLimit) - globalPosition).asInstanceOf[Int]
-////  }
-////
-////  // Set the approppriate position/limit for the current underlying buffer to mirror our
-////  // the LargeByteBuffer's state.
-////  private def fetchCurrentBuffer(): ByteBuffer = {
-////    releasePendingContainers()
-////
-////    assert (currentContainerIndex < containers.length)
-////
-////    val container = containers(currentContainerIndex)
-////    if (! container.isAcquired) {
-////      container.acquire()
-////    }
-////
-////    assert (container.isAcquired)
-////    if (LargeByteBuffer.enableExpensiveAssert) {
-////      assert (! containers.exists( b => (b ne container) && b.isAcquired))
-////    }
-////
-////    assert (currentContainerIndex < bufferPositionStart.length &&
-////      globalPosition < bufferPositionStart(currentContainerIndex + 1),
-////      "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " +
-////        bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this)
-////
-////    val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)).
-////      asInstanceOf[Int]
-////
-////    val buffer = container.getByteBuffer
-////    buffer.position(buffPosition)
-////    val diff = buffer.capacity - buffPosition
-////    val left = remaining()
-////    if (diff <= left) {
-////      buffer.limit(buffer.capacity())
-////    } else {
-////      // Can happen if limit() was called.
-////      buffer.limit(buffPosition + left.asInstanceOf[Int])
-////    }
-////
-////    buffer
-////  }
-////
-////  // To be used ONLY to test in suites.
-////  private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = {
-////    if ("1" != System.getProperty("SPARK_TESTING")) {
-////      throw new IllegalStateException("This method is to be used ONLY within spark test suites")
-////    }
-////
-////    fetchCurrentBuffer()
-////  }
-////
-////  // Expects that the invoker has ensured that this can be safely invoked.
-////  // That is, it wont be invoked when the loop wont terminate.
-////  private def toNonEmptyBuffer() {
-////
-////    if (! hasRemaining()) {
-////      var newIndex = currentContainerIndex
-////      // Ensure we are in the right block or not.
-////      while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) {
-////        newIndex += 1
-////      }
-////      toNewContainer(newIndex)
-////      // Do not do this - since we might not yet have consumed the buffer which caused EOF right now
-////      /*
-////      // Add last one also, and release it too - since we are at the end of the buffer with nothing
-////      // more pending.
-////      if (newIndex >= 0 && currentContainerIndex < containers.length) {
-////        needReleaseIndices += newIndex
-////      }
-////      */
-////      assert (currentContainerIndex >= 0)
-////      // releasePendingContainers()
-////      return
-////    }
-////
-////    var index = currentContainerIndex
-////    while (0 == currentRemaining0(index) && index < containers.length) {
-////      index += 1
-////    }
-////    assert (currentContainerIndex < containers.length)
-////    toNewContainer(index)
-////    assert (0 != currentRemaining())
-////  }
-////
-////  private def assertPreconditions(containerIndex: Int) {
-////    assert (globalPosition >= bufferPositionStart(containerIndex),
-////      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
-////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-////    assert (globalPosition < bufferPositionStart(containerIndex + 1),
-////      "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex +
-////        ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]"))
-////
-////    assert (globalLimit <= globalCapacity)
-////    assert (containerIndex < containers.length)
-////  }
-////
-////
-////  /**
-////   * Attempts to return a ByteBuffer of the requested size.
-////   * It is possible to return a buffer of size smaller than requested
-////   * even though hasRemaining == true
-////   *
-////   * On return, position would have been moved 'ahead' by the size of the buffer returned :
-////   * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer
-////   *
-////   *
-////   * This is used to primarily retrieve content of this buffer to expose via ByteBuffer
-////   * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the
-////   * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer
-////   * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying
-////   * container is a disk backed container, and we make subsequent calls to get(), the returned
-////   * ByteBuffer can be dispose'ed off
-////   *
-////   * @param maxChunkSize Max size of the ByteBuffer to retrieve.
-////   * @return
-////   */
-////
-////  private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = {
-////    fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true)
-////  }
-////
-////  private def fetchBufferOfSizeImpl(maxChunkSize: Int,
-////      canReleaseContainers: Boolean): ByteBuffer = {
-////    if (canReleaseContainers) releasePendingContainers()
-////    assert (maxChunkSize > 0)
-////
-////    // not checking for degenerate case of maxChunkSize == 0
-////    if (globalPosition >= globalLimit) {
-////      // throw exception
-////      throw new BufferUnderflowException()
-////    }
-////
-////    // Check preconditions : disable these later, since they might be expensive to
-////    // evaluate for every IO op
-////    assertPreconditions(currentContainerIndex)
-////
-////    val currentBufferRemaining = currentRemaining()
-////
-////    assert (currentBufferRemaining > 0)
-////
-////    val size = math.min(currentBufferRemaining, maxChunkSize)
-////
-////    val newBuffer = if (currentBufferRemaining > maxChunkSize) {
-////      val currentBuffer = fetchCurrentBuffer()
-////      val buff = ByteBufferContainer.createSlice(currentBuffer,
-////        currentBuffer.position(), maxChunkSize)
-////      assert (buff.remaining() == maxChunkSize)
-////      buff
-////    } else {
-////      val currentBuffer = fetchCurrentBuffer()
-////      val buff = currentBuffer.slice()
-////      assert (buff.remaining() == currentBufferRemaining)
-////      buff
-////    }
-////
-////    assert (size == newBuffer.remaining())
-////    assert (0 == newBuffer.position())
-////    assert (size == newBuffer.limit())
-////    assert (newBuffer.capacity() == newBuffer.limit())
-////
-////    globalPosition += newBuffer.remaining
-////    toNonEmptyBuffer()
-////
-////    newBuffer
-////  }
-////
-////  // Can we service the read/write from the currently active (underlying) bytebuffer or not.
-////  // For almost all cases, this will return true allowing us to optimize away the more expensive
-////  // computations.
-////  private def localReadWritePossible(size: Int) =
-////    size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1)
-////
-////
-////  def getLong(): Long = {
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    if (remaining() < 8) throw new BufferUnderflowException
-////
-////    if (localReadWritePossible(8)) {
-////      val buff = fetchCurrentBuffer()
-////      assert (buff.remaining() >= 8)
-////      val retval = buff.getLong
-////      globalPosition += 8
-////      toNonEmptyBuffer()
-////      return retval
-////    }
-////
-////    val buff = readFully(8)
-////    buff.getLong
-////  }
-////
-////  def getInt(): Int = {
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    if (remaining() < 4) throw new BufferUnderflowException
-////
-////    if (localReadWritePossible(4)) {
-////      val buff = fetchCurrentBuffer()
-////      assert (buff.remaining() >= 4)
-////      val retval = buff.getInt
-////      globalPosition += 4
-////      toNonEmptyBuffer()
-////      return retval
-////    }
-////
-////    val buff = readFully(4)
-////    buff.getInt
-//// }
-////
-////  def getChar(): Char = {
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    if (remaining() < 2) throw new BufferUnderflowException
-////
-////    if (localReadWritePossible(2)) {
-////      val buff = fetchCurrentBuffer()
-////      assert (buff.remaining() >= 2)
-////      val retval = buff.getChar
-////      globalPosition += 2
-////      toNonEmptyBuffer()
-////      return retval
-////    }
-////
-////    // if slice is becoming too expensive, revisit this ...
-////    val buff = readFully(2)
-////    buff.getChar
-////  }
-////
-////  def get(): Byte = {
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    if (! hasRemaining()) throw new BufferUnderflowException
-////
-////    // If we have remaining bytes, previous invocations MUST have ensured that we are at
-////    // a buffer which has data to be read.
-////    assert (localReadWritePossible(1))
-////
-////    val buff = fetchCurrentBuffer()
-////    assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining())
-////    val retval = buff.get()
-////    globalPosition += 1
-////    toNonEmptyBuffer()
-////
-////    retval
-////  }
-////
-////  def get(arr: Array[Byte], offset: Int, size: Int): Int = {
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    LargeByteBuffer.checkOffsets(arr, offset, size)
-////
-////    // kyro depends on this it seems ?
-////    // assert (size > 0)
-////    if (0 == size) return 0
-////
-////    if (! hasRemaining()) return -1
-////
-////    if (localReadWritePossible(size)) {
-////      val buff = fetchCurrentBuffer()
-////      assert (buff.remaining() >= size)
-////      buff.get(arr, offset, size)
-////      globalPosition += size
-////      toNonEmptyBuffer()
-////      return size
-////    }
-////
-////    var remainingSize = math.min(size, remaining()).asInstanceOf[Int]
-////    var currentOffset = offset
-////
-////    while (remainingSize > 0) {
-////      val buff = fetchBufferOfSize(remainingSize)
-////      val toCopy = math.min(buff.remaining(), remainingSize)
-////
-////      buff.get(arr, currentOffset, toCopy)
-////      currentOffset += toCopy
-////      remainingSize -= toCopy
-////    }
-////
-////    currentOffset - offset
-////  }
-////
-////
-////  private def createSlice(size: Long): LargeByteBuffer = {
-////
-////    releasePendingContainers()
-////
-////    if (remaining() < size) {
-////      // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this)
-////      throw new BufferOverflowException
-////    }
-////
-////    // kyro depends on this it seems ?
-////    // assert (size > 0)
-////    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
-////
-////    val arr = new ArrayBuffer[ByteBufferContainer](2)
-////    var totalLeft = size
-////
-////    // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer)
-////
-////    var containerIndex = currentContainerIndex
-////    while (totalLeft > 0 && hasRemaining()) {
-////      assertPreconditions(containerIndex)
-////      val container = containers(containerIndex)
-////      val currentLeft = currentRemaining0(containerIndex)
-////
-////      assert (globalPosition + currentLeft <= globalLimit)
-////      assert (globalPosition >= bufferPositionStart(containerIndex) &&
-////        (globalPosition < bufferPositionStart(containerIndex + 1)))
-////
-////      val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int]
-////      val sliceSize = math.min(totalLeft, currentLeft)
-////      assert (from >= 0)
-////      assert (sliceSize > 0 && sliceSize <= Int.MaxValue)
-////
-////      val slice = container.createSlice(from, sliceSize.asInstanceOf[Int])
-////      arr += slice
-////
-////      globalPosition += sliceSize
-////      totalLeft -= sliceSize
-////      if (currentLeft == sliceSize) containerIndex += 1
-////    }
-////
-////    // Using toNonEmptyBuffer instead of directly moving to next here so that
-////    // other checks can be performed there.
-////    toNonEmptyBuffer()
-////    // force cleanup - this is fine since we are not using the buffers directly
-////    // which are actively needed (the returned value is on containers which can
-////    // recreate)
-////    releasePendingContainers()
-////    // free current container if acquired.
-////    if (currentContainerIndex < containers.length) {
-////      containers(currentContainerIndex).release()
-////    }
-////    assert (currentContainerIndex == containerIndex)
-////
-////    val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked)
-////    retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction)
-////    retval
-////  }
-////
-////  // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers
-////  // This is to be used only for writes : and ensures that writes are done into the appropriate
-////  // underlying bytebuffers.
-////  def getCompositeWriteBuffer(size: Long): LargeByteBuffer = {
-////    assert(writable)
-////    assert(size >= 0)
-////
-////    createSlice(size)
-////  }
-////
-////  // get a buffer which is of the specified size and contains data from the underlying buffers
-////  // Note, the actual data might be spread across the underlying buffers.
-////  // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy !
-////  private def readFully(size: Int): ByteBuffer = {
-////    assert (readable)
-////
-////    if (remaining() < size) {
-////      // throw exception
-////      throw new BufferUnderflowException()
-////    }
-////
-////    // kyro depends on this it seems ?
-////    // assert (size > 0)
-////    if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER
-////
-////    // Expected to be handled elsewhere.
-////    assert (! localReadWritePossible(size))
-////
-////    val localBuff =  {
-////      val buff = fetchBufferOfSize(size)
-////      // assert(buff.remaining() <= size)
-////      // if (buff.remaining() == size) return buff
-////      assert(buff.remaining() < size)
-////      ByteBuffer.allocate(size).put(buff)
-////    }
-////
-////    // assert (localBuff.hasRemaining)
-////
-////    while (localBuff.hasRemaining) {
-////      val buff = fetchBufferOfSize(localBuff.remaining())
-////      localBuff.put(buff)
-////    }
-////
-////    localBuff.flip()
-////    localBuff
-////  }
-////
-////
-////
-////  def put(b: Byte) {
-////    assert (writable)
-////    if (remaining() < 1) {
-////      // logInfo("put byte. remaining = " + remaining() + ", this = " + this)
-////      throw new BufferOverflowException
-////    }
-////
-////    assert (currentRemaining() > 0)
-////
-////    fetchCurrentBuffer().put(b)
-////    globalPosition += 1
-////    // Check to need to bump the index ?
-////    toNonEmptyBuffer()
-////  }
-////
-////
-////  def put(buffer: ByteBuffer) {
-////    assert (writable)
-////    if (remaining() < buffer.remaining()) {
-////      throw new BufferOverflowException
-////    }
-////
-////    val bufferRemaining = buffer.remaining()
-////    if (localReadWritePossible(bufferRemaining)) {
-////
-////      assert (currentRemaining() >= bufferRemaining)
-////
-////      fetchCurrentBuffer().put(buffer)
-////
-////      globalPosition += bufferRemaining
-////      toNonEmptyBuffer()
-////      return
-////    }
-////
-////    while (buffer.hasRemaining) {
-////      val currentBufferRemaining = currentRemaining()
-////      val bufferRemaining = buffer.remaining()
-////
-////      if (currentBufferRemaining >= bufferRemaining) {
-////        fetchCurrentBuffer().put(buffer)
-////        globalPosition += bufferRemaining
-////      } else {
-////        // Split across buffers.
-////        val currentBuffer = fetchCurrentBuffer()
-////        assert (currentBuffer.remaining() >= currentBufferRemaining)
-////        val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(),
-////          currentBufferRemaining)
-////        assert (sliced.remaining() == currentBufferRemaining)
-////        currentBuffer.put(sliced)
-////        // move buffer pos
-////        buffer.position(buffer.position() + currentBufferRemaining)
-////
-////        globalPosition += currentBufferRemaining
-////      }
-////      toNonEmptyBuffer()
-////    }
-////
-////    assert (! hasRemaining() || currentRemaining() > 0)
-////  }
-////
-////  def put(other: LargeByteBuffer) {
-////    assert (writable)
-////    if (this.remaining() < other.remaining()) {
-////      throw new BufferOverflowException
-////    }
-////
-////    while (other.hasRemaining()) {
-////      val buffer = other.fetchBufferOfSize(other.currentRemaining())
-////      this.put(buffer)
-////    }
-////  }
-////
-////
-////  def duplicate(): LargeByteBuffer = {
-////    val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size)
-////    // We do a duplicate as part of construction - so avoid double duplicate.
-////    // containersCopy ++= containers.map(_.duplicate())
-////    containersCopy ++= containers
-////    val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked)
-////
-////    // set limit and position (in that order) ...
-////    retval.limit(this.limit())
-////    retval.position(this.position())
-////
-////    // Now release our containers - if any had been acquired
-////    releasePendingContainers()
-////
-////    retval
-////  }
-////
-////
-////  /**
-////   * 'read' a LargeByteBuffer of size specified and return that.
-////   * Position will be incremented by size
-////   *
-////   * The name might be slightly confusing : rename ?
-////   *
-////   * @param size Amount of data to be read from this buffer and returned
-////   * @return
-////   */
-////  def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = {
-////    if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException
-////    if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException
-////
-////
-////    assert (readable)
-////    assert (size >= 0)
-////
-////    releasePendingContainers()
-////
-////    if (0 == size) return LargeByteBuffer.EMPTY_BUFFER
-////
-////    createSlice(size)
-////  }
-////
-////
-////  // This is essentially a workaround to exposing underlying buffers
-////  def readFrom(channel: ReadableByteChannel): Long = {
-////
-////    assert (writable)
-////    releasePendingContainers()
-////
-////    // this also allows us to avoid nasty corner cases in the loop.
-////    if (! hasRemaining()) {
-////      // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this)
-////      throw new BufferOverflowException
-////    }
-////
-////    var totalBytesRead = 0L
-////
-////    while (hasRemaining()) {
-////      // read what we can ...
-////      val buffer = fetchCurrentBuffer()
-////      val bufferRemaining = currentRemaining()
-////      val bytesRead = channel.read(buffer)
-////
-////      if (bytesRead > 0) {
-////        totalBytesRead += bytesRead
-////        // bump position too ..
-////        globalPosition += bytesRead
-////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-////      }
-////      else if (-1 == bytesRead) {
-////        // if we had already read some data in the loop, return that.
-////        if (totalBytesRead > 0) return totalBytesRead
-////        return -1
-////      }  // nothing available to read, retry later. return
-////      else if (0 == bytesRead) {
-////        return totalBytesRead
-////      }
-////
-////      // toNonEmptyBuffer()
-////    }
-////
-////    // Cleanup last buffer ?
-////    toNonEmptyBuffer()
-////    totalBytesRead
-////  }
-////
-////  // This is essentially a workaround to exposing underlying buffers
-////  def readFrom(inStrm: InputStream): Long = {
-////
-////    assert (writable)
-////    releasePendingContainers()
-////
-////    // this also allows us to avoid nasty corner cases in the loop.
-////    // if (! hasRemaining()) throw new BufferOverflowException
-////    if (! hasRemaining()) return 0
-////
-////    var totalBytesRead = 0L
-////
-////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-////
-////    while (hasRemaining()) {
-////      // read what we can ... note, since there is no gaurantee that underlying buffer might
-////      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
-////      // see if we can optimize this later ...
-////      val buffer = fetchCurrentBuffer()
-////      val bufferRemaining = buffer.remaining()
-////      val max = math.min(buff.length, bufferRemaining)
-////      val bytesRead = inStrm.read(buff, 0, max)
-////
-////      if (bytesRead > 0) {
-////        buffer.put(buff, 0, bytesRead)
-////        totalBytesRead += bytesRead
-////        // bump position too ..
-////        globalPosition += bytesRead
-////        // buffer.position(buffer.position + bytesRead)
-////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-////      }
-////      else if (-1 == bytesRead) {
-////        // if we had already read some data in the loop, return that.
-////        if (totalBytesRead > 0) return totalBytesRead
-////        return -1
-////      }  // nothing available to read, retry later. return
-////      else if (0 == bytesRead) {
-////        return totalBytesRead
-////      }
-////
-////      // toNonEmptyBuffer()
-////    }
-////
-////    totalBytesRead
-////  }
-////
-////  // This is essentially a workaround to exposing underlying buffers
-////  // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce
-////  // code for performance reasons.
-////  def readFrom(inStrm: DataInput): Long = {
-////
-////    assert (writable)
-////    releasePendingContainers()
-////
-////    // this also allows us to avoid nasty corner cases in the loop.
-////    // if (! hasRemaining()) throw new BufferOverflowException
-////    if (! hasRemaining()) return 0
-////
-////    var totalBytesRead = 0L
-////
-////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-////
-////    while (hasRemaining()) {
-////      // read what we can ... note, since there is no gaurantee that underlying buffer might
-////      // expose array() method, we do double copy - from stream to buff and from buff to bytearray.
-////      // see if we can optimize this later ...
-////      val buffer = fetchCurrentBuffer()
-////      val bufferRemaining = buffer.remaining()
-////      val max = math.min(buff.length, bufferRemaining)
-////      inStrm.readFully(buff, 0, max)
-////      val bytesRead = max
-////
-////      if (bytesRead > 0) {
-////        buffer.put(buff, 0, bytesRead)
-////        totalBytesRead += bytesRead
-////        // bump position too ..
-////        globalPosition += bytesRead
-////        // buffer.position(buffer.position() + bytesRead)
-////        if (bytesRead >= bufferRemaining) toNonEmptyBuffer()
-////      }
-////      else if (-1 == bytesRead) {
-////        // if we had already read some data in the loop, return that.
-////        if (totalBytesRead > 0) return totalBytesRead
-////        return -1
-////      }  // nothing available to read, retry later. return
-////      else if (0 == bytesRead) {
-////        return totalBytesRead
-////      }
-////
-////      // toNonEmptyBuffer()
-////    }
-////
-////    totalBytesRead
-////  }
-////
-////  // This is essentially a workaround to exposing underlying buffers
-////  // Note: tries to do it efficiently without needing to load everything into memory
-////  // (particularly for diskbacked buffers, etc).
-////  def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = {
-////
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    // this also allows us to avoid nasty corner cases in the loop.
-////    if (! hasRemaining()) throw new BufferUnderflowException
-////
-////    var totalBytesWritten = 0L
-////
-////    while (hasRemaining()) {
-////      // Write what we can ...
-////      val buffer = fetchCurrentBuffer()
-////      val bufferRemaining = buffer.remaining()
-////      assert (bufferRemaining > 0)
-////      val bytesWritten = channel.write(buffer)
-////
-////      if (bytesWritten > 0) {
-////        totalBytesWritten += bytesWritten
-////        // bump position too ..
-////        globalPosition += bytesWritten
-////        if (bytesWritten >= bufferRemaining) toNonEmptyBuffer()
-////        assert (! hasRemaining() || currentRemaining() > 0)
-////      }
-////      else if (0 == bytesWritten) {
-////        return totalBytesWritten
-////      }
-////
-////      // toNonEmptyBuffer()
-////    }
-////
-////    assert (! hasRemaining())
-////    if (cleanup) {
-////      free()
-////    }
-////    totalBytesWritten
-////  }
-////
-////  // This is essentially a workaround to exposing underlying buffers
-////  def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = {
-////
-////    assert (readable)
-////    releasePendingContainers()
-////
-////    // this also allows us to avoid nasty corner cases in the loop.
-////    if (! hasRemaining()) throw new BufferUnderflowException
-////
-////    var totalBytesWritten = 0L
-////    val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE)
-////
-////    while (hasRemaining()) {
-////      // write what we can ... note, since there is no gaurantee that underlying buffer might
-////      // expose array() method, we do double copy - from bytearray to buff and from
-////      // buff to outputstream. see if we can optimize this later ...
-////      val buffer = fetchCurrentBuffer()
-////      val bufferRemaining = buffer.remaining()
-////      val size = math.min(bufferRemaining, buff.length)
-////      buffer.get(buff, 0, size)
-////      outStrm.write(buff, 0, size)
-////
-////      totalBytesWritten += size
-////      // bump position too ..
-////      globalPosition += size
-////
-////      if (size >= bufferRemaining) toNonEmptyBuffer()
-////    }
-////
-////    toNonEmptyBuffer()
-////    if (cleanup) {
-////      free()
-////    }
-////    totalBytesWritten
-////  }
-////
-////  def asInputStream(): InputStream = {
-////    new InputStream() {
-////      override def read(): Int = {
-////        if (! hasRemaining()) return -1
-////        get()
-////      }
-////
-////      override def read(arr: Array[Byte], off: Int, len: Int): Int = {
-////        if (! hasRemaining()) return -1
-////
-////        get(arr, off, len)
-////      }
-////
-////      override def available(): Int = {
-////        // current remaining is what can be read without blocking
-////        // anything higher might need disk access/buffer swapping.
-////        /*
-////        val left = remaining()
-////        math.min(left, Int.MaxValue).asInstanceOf[Int]
-////        */
-////        currentRemaining()
-////      }
-////    }
-////  }
-////
-////  def getCleaner() = cleaner
-////
-////  /**
-////   * @param cleaner The previous cleaner, so that the caller can chain them if required.
-////   * @return
-////   */
-////  private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = {
-////    overrideCleaner(cleaner, allowOverride = true)
-////  }
-////
-////  private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = {
-////    if (! this.allowCleanerOverride) {
-////      // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free
-////      return this.cleaner
-////    }
-////
-////    this.allowCleanerOverride = allowOverride
-////    assert (null != cleaner)
-////    val prev = this.cleaner
-////    this.cleaner = cleaner
-////    // logInfo("Overriding " + prev + " with " + this.cleaner)
-////    prev
-////  }
-////
-////  private def doReleaseAll() {
-////    for (container <- containers) {
-////      container.release()
-////    }
-////  }
-////
-////  def free(invokeCleaner: Boolean = true) {
-////    // logInfo("Free on " + this + ", cleaner = " + cleaner)
-////    // always invoking release
-////    doReleaseAll()
-////
-////    if (invokeCleaner) cleaner.clean(this)
-////  }
-////
-////  private def doDispose(needRelease: Boolean) {
-////
-////    if (disposeLocationThrowable ne null) {
-////      logError("Already free'ed earlier at : ", disposeLocationThrowable)
-////      logError("Current at ", new Throwable)
-////      throw new IllegalStateException("Already freed.")
-////    }
-////    disposeLocationThrowable = new Throwable()
-////
-////    // Forcefully cleanup all
-////    if (needRelease) doReleaseAll()
-////
-////    // Free in a different loop, in case different containers refer to same resource
-////    // to release (like file)
-////    for (container <- containers) {
-////      container.free()
-////    }
-////
-////    needReleaseIndices.clear()
-////
-////    // We should not use this buffer anymore : set the values such that                 f
-////    // we dont ...
-////    globalPosition = 0
-////    globalLimit = 0
-////    globalCapacity = 0
-////  }
-////
-////  // copy data over ... MUST be used only for cases where array is known to be
-////  // small to begin with. slightly risky method due to that assumption
-////  def toByteArray(): Array[Byte] = {
-////    val positionBackup = position()
-////    val size = remaining()
-////    if (size > Int.MaxValue) {
-////      throw new IllegalStateException(
-////        "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G")
-////    }
-////
-////    val retval = new Array[Byte](size.asInstanceOf[Int])
-////    val readSize = get(retval, 0, retval.length)
-////    assert (readSize == retval.length,
-////      "readSize = " + readSize + ", retval.length = " + retval.length)
-////
-////    position(positionBackup)
-////
-////    retval
-////  }
-////
-////  // copy data over ... MUST be used only for cases where array is known to be
-////  // small to begin with. slightly risky method due to that assumption
-////  def toByteBuffer(): ByteBuffer = {
-////    ByteBuffer.wrap(toByteArray())
-////  }
-////
-////  def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = {
-////    val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf)
-////    val currentPosition = position()
-////    retval.put(this)
-////    position(currentPosition)
-////    retval.clear()
-////    retval
-////  }
-////
-////
-////
-////  // This is ONLY used for testing : that too as part of development of this and associated classes
-////  // remove before contributing to spark.
-////  def hexDump(): String = {
-////    if (remaining() * 64 > Int.MaxValue) {
-////      throw new UnsupportedOperationException("buffer too large " + remaining())
-////    }
-////
-////    val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int])
-////
-////    var perLine = 0
-////    var first = true
-////    for (b <- toByteArray()) {
-////      perLine += 1
-////      if (perLine % 8 == 0) {
-////        sb.append('\n')
-////        first = true
-////      }
-////      if (! first) sb.append(' ')
-////      first = false
-////      sb.append(java.lang.Integer.toHexString(b & 0xff))
-////    }
-////    sb.append('\n')
-////    sb.toString()
-////  }
-////
-////  override def toString: String = {
-////    val sb: StringBuffer = new StringBuffer
-////    sb.append(getClass.getName)
-////    sb.append(' ')
-////    sb.append(System.identityHashCode(this))
-////    sb.append("@[pos=")
-////    sb.append(position())
-////    sb.append(" lim=")
-////    sb.append(limit())
-////    sb.append(" cap=")
-////    sb.append(capacity())
-////    sb.append("]")
-////    sb.toString
-////  }
-////
-////
-////
-////  override def finalize(): Unit = {
-////    var marked = false
-////    if (containers ne null) {
-////      if (containers.exists(container => container.isAcquired && container.requireRelease())) {
-////        marked = true
-////        logError("BUG: buffer was not released - and now going out of scope. " +
-////          "Potential resource leak. Allocated at ", allocateLocationThrowable)
-////        containers.foreach(_.release())
-////      }
-////      if (containers.exists(container => !container.isFreed && container.requireFree())) {
-////        if (!marked) {
-////          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak",
-////            allocateLocationThrowable)
-////        }
-////        else {
-////          logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak")
-////        }
-////        containers.foreach(_.free())
-////      }
-////    }
-////    super.finalize()
-////  }
-////}
-////
-////
-////object LargeByteBuffer extends Logging {
-////
-////  private val noopDisposeFunction = new BufferCleaner() {
-////    protected def doClean(buffer: LargeByteBuffer) {
-////      buffer.free(invokeCleaner = false)
-////    }
-////  }
-////
-////  val enableExpensiveAssert = false
-////  private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0)
-////  val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer(
-////    new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false)
-////  // Do not allow anyone else to override cleaner
-////  EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false)
-////
-////  // 8K sufficient ?
-////  private val TEMP_ARRAY_SIZE = 8192
-////
-////  /**
-////   * Create a LargeByteBuffer of specified size which is split across
-////   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory
-////   * ByteBuffer
-////   *
-////   */
-////  def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = {
-////    if (0 == totalSize) {
-////      return EMPTY_BUFFER
-////    }
-////
-////    assert (totalSize > 0)
-////
-////    val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
-////    val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize)
-////    val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize)
-////
-////    assert (lastBlockSize > 0)
-////
-////    val bufferArray = {
-////      val arr = new ArrayBuffer[ByteBufferContainer](numBlocks)
-////      for (index <- 0 until numBlocks - 1) {
-////        val buff = ByteBuffer.allocate(blockSize)
-////        // buff.clear()
-////        arr += new HeapByteBufferContainer(buff, true)
-////      }
-////      arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true)
-////      assert (arr.length == numBlocks)
-////      arr
-////    }
-////
-////    new LargeByteBuffer(bufferArray, false, false)
-////  }
-////
-////  /**
-////   * Create a LargeByteBuffer of specified size which is split across
-////   * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk
-////   *
-////   */
-////  private def allocateDiskBuffer(totalSize: Long,
-////      blockManager: BlockManager): LargeByteBuffer = {
-////    if (0 == totalSize) {
-////      return EMPTY_BUFFER
-////    }
-////
-////    assert (totalSize > 0)
-////
-////    // Create a file of the specified size.
-////    val file = blockManager.diskBlockManager.createTempBlock()._2
-////    val raf = new RandomAccessFile(file, "rw")
-////    try {
-////      raf.setLength(totalSize)
-////    } finally {
-////      raf.close()
-////    }
-////
-////    readWriteDiskSegment(new FileSegment(file, 0, totalSize),
-////      ephemeralDiskBacked = true, blockManager.ioConf)
-////  }
-////
-////  // The returned buffer takes up ownership of the underlying buffers
-////  // (including dispos'ing that when done)
-////  def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = {
-////    val nonEmpty = buffers.filter(_.hasRemaining)
-////
-////    // cleanup the empty buffers
-////    buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b))
-////
-////
-////    if (nonEmpty.isEmpty) {
-////      return EMPTY_BUFFER
-////    }
-////
-////    // slice so that offsets match our requirement
-////    new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b =>
-////      new HeapByteBufferContainer(b.slice(), true)), false, false)
-////  }
-////
-////  def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = {
-////    // only non empty arrays
-////    val arrays = byteArrays.filter(_.length > 0)
-////    if (0 == arrays.length) return EMPTY_BUFFER
-////
-////    new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr =>
-////      new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false)
-////  }
-////
-////  def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = {
-////
-////    if (inputBuffers.isEmpty) return EMPTY_BUFFER
-////
-////    if (! inputBuffers.exists(_.hasRemaining())) {
-////      if (canDispose) inputBuffers.map(_.free())
-////      return EMPTY_BUFFER
-////    }
-////
-////    // release all temp resources acquired
-////    inputBuffers.foreach(buff => buff.releasePendingContainers())
-////    // free current container if acquired.
-////    inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) {
-////      buff.containers(buff.currentContainerIndex).release()
-////    })
-////    // inputBuffers.foreach(b => b.doReleaseAll())
-////
-////
-////    // Dispose of any empty buffers
-////    if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free())
-////
-////    // Find all containers we need.
-////    val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining()))
-////
-////    val containers = buffers.flatMap(_.containers)
-////    assert (! containers.isEmpty)
-////    // The in order containers of "buffers" seq constitute the required return value
-////    val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers,
-////      // if you cant dispose, then we dont own the buffers : in which case, need duplicate
-////      ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked))
-////
-////    if (canDispose) {
-////      // override dispose of all other buffers.
-////      val disposeFunctions = inputBuffers.map {
-////        buffer => {
-////          (buffer, buffer.overrideCleaner(noopDisposeFunction))
-////        }
-////      }
-////
-////      val cleaner = retval.getCleaner()
-////      val newCleaner = new BufferCleaner {
-////        protected def doClean(buffer: LargeByteBuffer) {
-////
-////          assert (retval == buffer)
-////          // default cleaner.
-////          cleaner.clean(retval)
-////          // not required, since we are within clean anyway.
-////          // retval.free(invokeCleaner = false)
-////
-////          // retval.doDispose(needRelease = true)
-////
-////          // This might actually call dispose twice on some (initially) empty buffers,
-////          // which is fine since we now guard against that.
-////          disposeFunctions.foreach(v => v._2.clean(v._1))
-////          // Call the free method too : so that buffers are marked free ...
-////          disposeFunctions.foreach(v => v._1.free(invokeCleaner = false))
-////        }
-////      }
-////
-////      val prev = retval.overrideCleaner(newCleaner)
-////      assert (prev == cleaner)
-////    }
-////
-////    retval
-////  }
-////
-////  private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) {
-////    if (arr == null) {
-////      throw new NullPointerException
-////    } else if (offset < 0 || size < 0 || offset + size > arr.length) {
-////      throw new IndexOutOfBoundsException
-////    }
-////  }
-////
-////  def allocateTransientBuffer(size: Long, blockManager: BlockManager) = {
-////    if (size <= blockManager.ioConf.maxInMemSize) {
-////      LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf)
-////    } else {
-////      LargeByteBuffer.allocateDiskBuffer(size, blockManager)
-////    }
-////  }
-////
-////  def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig,
-////      ephemeralDiskBacked: Boolean): LargeByteBuffer = {
-////    // Split the block into multiple of BlockStore.maxBlockSize
-////    val segmentSize = segment.length
-////    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
-////    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
-////    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
-////
-////    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
-////
-////    for (index <- 0 until numBlocks - 1) {
-////      buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
-////        segment.offset + index * blockSize, blockSize), ioConf)
-////    }
-////
-////    // Last block
-////    buffers += new ReadOnlyFileContainer(new FileSegment(segment.file,
-////      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf)
-////
-////    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
-////  }
-////
-////  def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean,
-////      ioConf: IOConfig): LargeByteBuffer = {
-////
-////    // Split the block into multiple of BlockStore.maxBlockSize
-////    val segmentSize = segment.length
-////    val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long]
-////    val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize)
-////    val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize)
-////
-////    logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks +
-////      ", lastBlockSize = " + lastBlockSize)
-////    val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks)
-////
-////    for (index <- 0 until numBlocks - 1) {
-////      buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
-////        segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null)
-////    }
-////
-////    // Last block
-////    buffers += new ReadWriteFileContainer(new FileSegment(segment.file,
-////      segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null)
-////
-////    new LargeByteBuffer(buffers, false, ephemeralDiskBacked)
-////  }
-////}
diff --git a/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala b/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala
deleted file mode 100644
index 6657c4f7efc52..0000000000000
--- a/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala
+++ /dev/null
@@ -1,121 +0,0 @@
-///*
-// * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements.  See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
-// * the License.  You may obtain a copy of the License at
-// *
-// *    http://www.apache.org/licenses/LICENSE-2.0
-// *
-// * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
-// * limitations under the License.
-// */
-//
-//package org.apache.spark.io
-//
-//import java.io.OutputStream
-//import scala.collection.mutable.ArrayBuffer
-//
-//import org.apache.spark.Logging
-//import org.apache.spark.io.IOConfig.BufferType
-//
-///**
-// * byte array backed streams (FastByteArrayOutputStream, ByteArrayOutputStream, etc) are limited to
-// * array length of 2 gig - since that is the array size limit.
-// *
-// * So we move from one to the next as soon as we hit the limit per stream.
-// * And once done, asBuffers or toByteArrays can be used to pull data as a sequence of bytebuffers
-// * or byte arrays.
-// * @param initialSize initial size for the byte array stream ...
-// */
-//class WrappedByteArrayOutputStream(private val initialSize: Int,
-//    ioConf: IOConfig) extends OutputStream with Logging {
-//
-//  private val maxStreamSize = ioConf.getMaxBlockSize(BufferType.MEMORY)
-//
-//  private val allStreams = new ArrayBuffer[SparkByteArrayOutputStream](4)
-//
-//  private var current: SparkByteArrayOutputStream = null
-//  private var currentWritten = 0
-//
-//  nextWriter()
-//
-//  override def flush(): Unit = {
-//    current.flush()
-//  }
-//
-//  override def write(b: Int): Unit = {
-//    if (currentWritten >= maxStreamSize) {
-//      nextWriter()
-//    }
-//    current.write(b)
-//    currentWritten += 1
-//  }
-//
-//
-//  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
-//    // invariant checks - from OutputStream.java
-//    if (b == null) {
-//      throw new NullPointerException
-//    } else if ((off < 0) || (off > b.length) || (len < 0) ||
-//      ((off + len) > b.length) || ((off + len) < 0)) {
-//      throw new IndexOutOfBoundsException
-//    } else if (len == 0) {
-//      return
-//    }
-//
-//    // Else, write to stream.
-//
-//    // common case first
-//    if (currentWritten + len < maxStreamSize) {
-//      current.write(b, off, len)
-//      currentWritten += len
-//      return
-//    }
-//
-//    // We might need to split the write into two streams.
-//    var startOff = off
-//    var remaining = len
-//
-//    while (remaining > 0) {
-//      var toCurrent = math.min(remaining, maxStreamSize - currentWritten)
-//      if (toCurrent > 0) {
-//        current.write(b, startOff, toCurrent)
-//        currentWritten += toCurrent
-//        remaining -= toCurrent
-//        startOff += toCurrent
-//      }
-//
-//      if (currentWritten >= maxStreamSize) {
-//        // to next
-//        nextWriter()
-//      }
-//    }
-//  }
-//
-//  def toLargeByteBuffer(): LargeByteBuffer = {
-//    current.compact()
-//    val seq = allStreams.filter(_.size > 0).map(_.toByteBuffer)
-//    val retval = LargeByteBuffer.fromBuffers(seq:_*)
-//
-//    retval
-//  }
-//
-//  private def nextWriter() {
-//    if (null != current) {
-//      current.flush()
-//      current.compact()
-//      current = null
-//    }
-//
-//    current = new SparkByteArrayOutputStream(initialSize, ioConf)
-//    currentWritten = 0
-//    allStreams += current
-//  }
-//}
-//
-//

From 6f6a8d7c512ab66ee8f03fa725d97533d0672c8e Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 15:17:24 -0600
Subject: [PATCH 23/97] cleanup

---
 .../org/apache/spark/scheduler/TaskResultGetter.scala    | 3 ++-
 .../apache/spark/shuffle/FileShuffleBlockManager.scala   | 1 -
 .../scala/org/apache/spark/storage/BlockManager.scala    | 3 ---
 .../main/scala/org/apache/spark/storage/BlockStore.scala | 9 +--------
 .../apache/spark/util/LargeByteBufferInputStream.scala   | 2 +-
 .../spark/network/buffer/WrappedLargeByteBuffer.java     | 8 --------
 6 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 1ea26ee8f9946..c7f0f62aacb92 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -74,7 +74,8 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
                   taskSetManager, tid, TaskState.FINISHED, TaskResultLost)
                 return
               }
-              //TODO either change serializer interface, or ...
+              //Just blindly assume there is only one byte buffer here, b/c we expect
+              // task results to be small.
               val deserializedResult = serializer.get().deserialize[DirectTaskResult[_]](
                 serializedTaskResult.get.firstByteBuffer())
               sparkEnv.blockManager.master.removeBlock(blockId)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
index 4c0bc9ccec06e..fea59876d97bb 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
@@ -171,7 +171,6 @@ class FileShuffleBlockManager(conf: SparkConf)
   }
 
   override def getBytes(blockId: ShuffleBlockId): Option[LargeByteBuffer] = {
-    //TODO
     val segment = getBlockData(blockId)
     Some(segment.nioByteBuffer())
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 248fc0fa21077..24f9a6a5707e6 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -79,9 +79,6 @@ private[spark] class BlockManager(
 
   val diskBlockManager = new DiskBlockManager(this, conf)
 
-  //XXX
-  val largeByteBufferChunkSize = 65536
-
   private val blockInfo = new TimeStampedHashMap[BlockId, BlockInfo]
 
   // Actual storage of where blocks are kept
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
index e1b48bf11bcc7..ebbe06fde68ae 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
@@ -24,14 +24,7 @@ import org.apache.spark.network.buffer.LargeByteBuffer
  * Abstract class to store blocks.
  */
 private[spark] abstract class BlockStore(val blockManager: BlockManager) extends Logging {
-  // TODO: We have inconsistent usage of the bytes in spark.
-  // In DiskStore, we simply emit bytes to the file without a rewind
-  // While in memory and tachyon store, we do a rewind.
-  // Not sure which is correct - since both seem to be working fine in the tests !
-  // There is some underlying assumption which is probably unspecified and incorrect
-  // in a general case.
-  // Change: consistently modified to do a rewind before calling this method.
-  // Now, it validates that position == 0 (and so remaining == limit obviously)
+
   def putBytes(blockId: BlockId, bytes: LargeByteBuffer, level: StorageLevel) : PutResult
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
index 98e41906b8251..5ed2d9fbc1b28 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
@@ -23,7 +23,7 @@ import org.apache.spark.network.buffer.LargeByteBuffer
 import org.apache.spark.storage.BlockManager
 
 /**
- * Reads data from a ByteBuffer, and optionally cleans it up using BlockManager.dispose()
+ * Reads data from a LargeByteBuffer, and optionally cleans it up using BlockManager.dispose()
  * at the end of the stream (e.g. to close a memory-mapped file).
  */
 private[spark]
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 613c4a6df05ab..57cc6a7bc66fb 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -111,14 +111,6 @@ public WrappedLargeByteBuffer duplicate() {
         return new WrappedLargeByteBuffer(duplicates);
     }
 
-//    @Override
-//    public void rewind() {
-//        _pos = 0;
-//        for (ByteBuffer buf: underlying) {
-//            buf.rewind();
-//        }
-//    }
-
     @Override
     public long limit() {
         return limit;

From 9de88665fca9462f6ef6a395f0ad60a0275d1bd0 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 16:08:55 -0600
Subject: [PATCH 24/97] style fixes

---
 .../scala/org/apache/spark/executor/Executor.scala  |  5 +++--
 .../apache/spark/network/BlockTransferService.scala |  6 ++----
 .../spark/network/netty/NettyBlockRpcServer.scala   |  7 ++++---
 .../network/netty/NettyBlockTransferService.scala   | 11 +++++++----
 .../spark/network/nio/NioBlockTransferService.scala |  6 ++++--
 .../apache/spark/scheduler/TaskResultGetter.scala   |  2 +-
 .../org/apache/spark/storage/BlockManager.scala     | 11 ++++-------
 .../scala/org/apache/spark/storage/DiskStore.scala  |  5 ++++-
 .../org/apache/spark/storage/MemoryStore.scala      | 10 +++++++---
 .../org/apache/spark/storage/TachyonStore.scala     | 13 ++++++++-----
 .../org/apache/spark/broadcast/BroadcastSuite.scala |  1 -
 .../spark/network/buffer/NioManagedBuffer.java      |  5 ++---
 12 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index faec7463d9f5f..0f9694393de5a 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -219,7 +219,7 @@ private[spark] class Executor(
         val accumUpdates = Accumulators.values
 
         val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.orNull)
-        //TODO should we allow task results over 2gb?
+        // TODO should we allow task results over 2gb?
         val serializedDirectResult = ser.serialize(directResult)
         val resultSize = serializedDirectResult.limit
 
@@ -233,7 +233,8 @@ private[spark] class Executor(
           } else if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {
             val blockId = TaskResultBlockId(taskId)
             env.blockManager.putBytes(
-              blockId, LargeByteBufferHelper.asLargeByteBuffer(serializedDirectResult), StorageLevel.MEMORY_AND_DISK_SER)
+              blockId, LargeByteBufferHelper.asLargeByteBuffer(serializedDirectResult),
+              StorageLevel.MEMORY_AND_DISK_SER)
             logInfo(
               s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")
             ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index 644544cf869df..518f799a96b88 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.network
 
 import java.io.Closeable
-import java.nio.ByteBuffer
 
 import scala.concurrent.{Promise, Await, Future}
 import scala.concurrent.duration.Duration
@@ -26,7 +25,7 @@ import scala.concurrent.duration.Duration
 import org.apache.spark.Logging
 import org.apache.spark.network.buffer.{LargeByteBufferHelper, NioManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.shuffle.{ShuffleClient, BlockFetchingListener}
-import org.apache.spark.storage.{BlockManagerId, BlockId, StorageLevel}
+import org.apache.spark.storage.{BlockId, StorageLevel}
 
 private[spark]
 abstract class BlockTransferService extends ShuffleClient with Closeable with Logging {
@@ -94,8 +93,7 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
         override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
           val ret = LargeByteBufferHelper.allocate(data.size)
           ret.put(data.nioByteBuffer())
-          //XXX do we need ret.flip()??
-          ret.position(0l)
+          ret.position(0L)
           result.success(new NioManagedBuffer(ret))
         }
       })
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index 9552b71e84de6..997b8275314b7 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -51,7 +51,7 @@ class NettyBlockRpcServer(
   // TODO from configuration.  Might need to be really big ...
   private val cleanupTime = 30 * 60 * 1000
 
-  //ideally, this should be empty, and it will contain a very small amount of data for abandoned
+  // ideally, this should be empty, and it will contain a very small amount of data for abandoned
   // requests -- so hopefully its OK to hold on to this forever
   private val abandonedRequests = new ConcurrentHashMap[String,Object]()
 
@@ -83,7 +83,8 @@ class NettyBlockRpcServer(
         // StorageLevel is serialized as bytes using our JavaSerializer.
         val level: StorageLevel =
           serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata))
-        val data = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(uploadBlock.blockData))
+        val data = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(
+          uploadBlock.blockData))
         logTrace("putting block into our block manager: " + blockManager)
         blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level)
         responseContext.onSuccess(new Array[Byte](0))
@@ -134,7 +135,7 @@ class NettyBlockRpcServer(
       nMissing -= 1
       logTrace("nmissing = " + nMissing)
       if (nMissing == 0) {
-        //we've got all the blocks -- now we can insert into the block manager
+        // we've got all the blocks -- now we can insert into the block manager
         logTrace("received all partial blocks for " + blockId)
         val data = new NioManagedBuffer(new WrappedLargeByteBuffer(chunks.map{ByteBuffer.wrap}))
         blockManager.putBlockData(BlockId(blockId), data, storageLevel)
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 976a338eeba7f..72181cc5cd26c 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -117,7 +117,8 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
 
     val largeByteBuffer = blockData.nioByteBuffer()
     val bufferParts = largeByteBuffer.nioBuffers().asScala
-    val chunkOffsets: Seq[Long] = bufferParts.scanLeft(0l){case(offset, buf) => offset + buf.limit()}
+    val chunkOffsets: Seq[Long] = bufferParts.scanLeft(0L){
+      case(offset, buf) => offset + buf.limit()}
 
     import scala.concurrent.ExecutionContext.Implicits.global
     bufferParts.zipWithIndex.foldLeft(Future.successful(())){case (prevFuture,(buf,idx)) =>
@@ -130,7 +131,7 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
           buf.get(arr)
           arr
         }
-        //Note: one major shortcoming of this is that it expects the incoming LargeByteBuffer to
+        // Note: one major shortcoming of this is that it expects the incoming LargeByteBuffer to
         // already be reasonably chunked -- in particular, the chunks cannot get too close to 2GB
         // or else we'll still run into problems b/c there is some more overhead in the transfer
         val msg = new UploadPartialBlock(appId, execId, blockId.toString, bufferParts.size, idx,
@@ -140,12 +141,14 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
         client.sendRpc(msg.toByteArray,
           new RpcResponseCallback {
             override def onSuccess(response: Array[Byte]): Unit = {
-              logTrace(s"Successfully uploaded partial block $blockId, part $idx (out of ${bufferParts.size})")
+              logTrace(s"Successfully uploaded partial block $blockId," +
+                s" part $idx (out of ${bufferParts.size})")
               result.success()
             }
 
             override def onFailure(e: Throwable): Unit = {
-              logError(s"Error while uploading partial block $blockId, part $idx (out of ${bufferParts.size})", e)
+              logError(s"Error while uploading partial block $blockId," +
+                s" part $idx (out of ${bufferParts.size})", e)
               result.failure(e)
             }
           })
diff --git a/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
index d0ba9d8948594..2d67bd82568c9 100644
--- a/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
@@ -116,7 +116,8 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
             val blockId = blockMessage.getId
             val networkSize = blockMessage.getData.limit()
             listener.onBlockFetchSuccess(
-              blockId.toString, new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(blockMessage.getData)))
+              blockId.toString, new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(
+                blockMessage.getData)))
           }
         }
       }
@@ -201,7 +202,8 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
   private def putBlock(blockId: BlockId, bytes: ByteBuffer, level: StorageLevel) {
     val startTimeMs = System.currentTimeMillis()
     logDebug("PutBlock " + blockId + " started from " + startTimeMs + " with data: " + bytes)
-    blockDataManager.putBlockData(blockId, new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(bytes)), level)
+    blockDataManager.putBlockData(blockId, new NioManagedBuffer(
+      LargeByteBufferHelper.asLargeByteBuffer(bytes)), level)
     logDebug("PutBlock " + blockId + " used " + Utils.getUsedTimeMs(startTimeMs)
       + " with data size: " + bytes.limit)
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index c7f0f62aacb92..048008c3d20f2 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -74,7 +74,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
                   taskSetManager, tid, TaskState.FINISHED, TaskResultLost)
                 return
               }
-              //Just blindly assume there is only one byte buffer here, b/c we expect
+              // Just blindly assume there is only one byte buffer here, b/c we expect
               // task results to be small.
               val deserializedResult = serializer.get().deserialize[DirectTaskResult[_]](
                 serializedTaskResult.get.firstByteBuffer())
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 24f9a6a5707e6..4dc17c22acb3d 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -539,7 +539,7 @@ private[spark] class BlockManager(
               val copyForMemory = LargeByteBufferHelper.allocate(bytes.limit)
               copyForMemory.put(bytes)
               memoryStore.putBytes(blockId, copyForMemory, level)
-              bytes.position(0l)
+              bytes.position(0L)
             }
             if (!asBlockResult) {
               return Some(bytes)
@@ -592,7 +592,7 @@ private[spark] class BlockManager(
     val locations = Random.shuffle(master.getLocations(blockId))
     for (loc <- locations) {
       logDebug(s"Getting remote block $blockId from $loc")
-      //TODO the fetch will always be one byte buffer till we fix SPARK-5928
+      // the fetch will always be one byte buffer till we fix SPARK-5928
       val data: LargeByteBuffer = blockTransferService.fetchBlockSync(
         loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()
 
@@ -789,7 +789,7 @@ private[spark] class BlockManager(
           case ArrayValues(array) =>
             blockStore.putArray(blockId, array, putLevel, returnValues)
           case ByteBufferValues(bytes) =>
-            bytes.position(0l)
+            bytes.position(0L)
             blockStore.putBytes(blockId, bytes, putLevel)
         }
         size = result.size
@@ -940,11 +940,8 @@ private[spark] class BlockManager(
         case Some(peer) =>
           try {
             val onePeerStartTime = System.currentTimeMillis
-            data.position(0l)
+            data.position(0L)
             logTrace(s"Trying to replicate $blockId of ${data.limit()} bytes to $peer")
-            //TODO
-            //ACK!  here we're stuck -- we can't replicate a large block until we figure out
-            // how to deal w/ shuffling more than 2 gb
             blockTransferService.uploadBlockSync(
               peer.host, peer.port, peer.executorId, blockId, new NioManagedBuffer(data), tLevel)
             logTrace(s"Replicated $blockId of ${data.limit()} bytes to $peer in %s ms"
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index c9371a1d67899..04f0a053777d8 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -39,7 +39,10 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
     diskManager.getFile(blockId.name).length
   }
 
-  override def putBytes(blockId: BlockId, _bytes: LargeByteBuffer, level: StorageLevel): PutResult = {
+  override def putBytes(
+      blockId: BlockId,
+      _bytes: LargeByteBuffer,
+      level: StorageLevel): PutResult = {
     // So that we do not modify the input offsets !
     // duplicate does not copy buffer, so inexpensive
     val bytes = _bytes.duplicate()
diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index 074919fdeb706..bfda1a3095aa5 100644
--- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -78,10 +78,13 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     }
   }
 
-  override def putBytes(blockId: BlockId, _bytes: LargeByteBuffer, level: StorageLevel): PutResult = {
+  override def putBytes(
+      blockId: BlockId,
+      _bytes: LargeByteBuffer,
+      level: StorageLevel): PutResult = {
     // Work on a duplicate - since the original input might be used elsewhere.
     val bytes = _bytes.duplicate()
-    bytes.position(0l);
+    bytes.position(0L)
     if (level.deserialized) {
       val values = blockManager.dataDeserialize(blockId, bytes)
       putIterator(blockId, values, level, returnValues = true)
@@ -175,7 +178,8 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     } else if (entry.deserialized) {
       Some(entry.value.asInstanceOf[Array[Any]].iterator)
     } else {
-      val buffer = entry.value.asInstanceOf[LargeByteBuffer].duplicate() // Doesn't actually copy data
+      // Doesn't actually copy data
+      val buffer = entry.value.asInstanceOf[LargeByteBuffer].duplicate()
       Some(blockManager.dataDeserialize(blockId, buffer))
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
index 62cfd9e65eaec..de1a606562bbb 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
@@ -41,7 +41,10 @@ private[spark] class TachyonStore(
     tachyonManager.getFile(blockId.name).length
   }
 
-  override def putBytes(blockId: BlockId, bytes: LargeByteBuffer, level: StorageLevel): PutResult = {
+  override def putBytes(
+      blockId: BlockId,
+      bytes: LargeByteBuffer,
+      level: StorageLevel): PutResult = {
     putIntoTachyonStore(blockId, bytes, returnValues = true)
   }
 
@@ -70,13 +73,13 @@ private[spark] class TachyonStore(
     // So that we do not modify the input offsets !
     // duplicate does not copy buffer, so inexpensive
     val byteBuffer = bytes.duplicate()
-    byteBuffer.position(0l)
+    byteBuffer.position(0L)
     logDebug(s"Attempting to put block $blockId into Tachyon")
     val startTime = System.currentTimeMillis
     val file = tachyonManager.getFile(blockId)
     val os = file.getOutStream(WriteType.TRY_CACHE)
-    // TODO need a better fix here for tachyon
-//    os.write(byteBuffer.array())
+    // XXX not sure about the right fix for blocks over 2gb
+    os.write(byteBuffer.firstByteBuffer().array)
     os.close()
     val finishTime = System.currentTimeMillis
     logDebug("Block %s stored as %s file in Tachyon in %d ms".format(
@@ -111,7 +114,7 @@ private[spark] class TachyonStore(
     assert (is != null)
     try {
       val size = file.length
-      //TODO
+      // XXX also broken here for > 2gb
       val bs = new Array[Byte](size.asInstanceOf[Int])
       ByteStreams.readFully(is, bs)
       Some(LargeByteBufferHelper.asLargeByteBuffer(bs))
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 49815890623df..add295701d7e1 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -180,7 +180,6 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
   }
 
   test("large broadcast variable") {
-    //Note this currently fails by killing the whole test runner
     sc = new SparkContext("local", "test", httpConf)
     val bigArr = new Array[Long]((2.3e9 / 8).toInt)
     val bcArr = sc.broadcast(bigArr)
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
index 2febfe7f761c5..cbf554df411de 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
@@ -64,9 +64,8 @@ public ManagedBuffer release() {
 
   @Override
   public Object convertToNetty() throws IOException {
-    //TODO
-    ByteBuffer aBuf = ((WrappedLargeByteBuffer) buf).underlying[0];
-    return Unpooled.wrappedBuffer(aBuf);
+    //TODO can we do anything sensible here when we're over 2gb?
+    return Unpooled.wrappedBuffer(buf.firstByteBuffer());
   }
 
   @Override

From ef8808555c2caddefbe5809e4c194bcc0675f40b Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 16:17:02 -0600
Subject: [PATCH 25/97] minor cleanup

---
 .../org/apache/spark/network/buffer/NioManagedBuffer.java     | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
index cbf554df411de..5e84102ffbcb9 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
@@ -47,9 +47,7 @@ public LargeByteBuffer nioByteBuffer() throws IOException {
 
   @Override
   public InputStream createInputStream() throws IOException {
-    //TODO
-    ByteBuffer aBuf = ((WrappedLargeByteBuffer) buf).underlying[0];
-    return new ByteBufInputStream(Unpooled.wrappedBuffer(aBuf));
+    return new ByteBufInputStream(Unpooled.wrappedBuffer(buf.firstByteBuffer()));
   }
 
   @Override

From 17d0c1a122e082e7263398fadaa75f2200544f04 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 16:30:31 -0600
Subject: [PATCH 26/97] get streaming to compile

---
 .../streaming/rdd/WriteAheadLogBackedBlockRDD.scala      | 6 ++++--
 .../spark/streaming/receiver/ReceivedBlockHandler.scala  | 9 ++++++---
 .../spark/streaming/ReceivedBlockHandlerSuite.scala      | 6 ++++--
 .../streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala | 2 +-
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
index dd1e96334952f..ffa567f253a92 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.streaming.rdd
 
+import org.apache.spark.network.buffer.LargeByteBufferHelper
+
 import scala.reflect.ClassTag
 
 import org.apache.hadoop.conf.Configuration
@@ -97,13 +99,13 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag](
         iterator
       case None => // Data not found in Block Manager, grab it from write ahead log file
         val reader = new WriteAheadLogRandomReader(partition.segment.path, hadoopConf)
-        val dataRead = reader.read(partition.segment)
+        val dataRead = LargeByteBufferHelper.asLargeByteBuffer(reader.read(partition.segment))
         reader.close()
         logInfo(s"Read partition data of $this from write ahead log, segment ${partition.segment}")
         if (storeInBlockManager) {
           blockManager.putBytes(blockId, dataRead, storageLevel)
           logDebug(s"Stored partition data of $this into block manager with level $storageLevel")
-          dataRead.rewind()
+          dataRead.position(0L)
         }
         blockManager.dataDeserialize(blockId, dataRead).asInstanceOf[Iterator[T]]
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
index f7a8ebee8a544..0b99a84e34eb3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.streaming.receiver
 
+import org.apache.spark.network.buffer.LargeByteBufferHelper
+
 import scala.concurrent.{Await, ExecutionContext, Future}
 import scala.concurrent.duration._
 import scala.language.{existentials, postfixOps}
@@ -70,7 +72,8 @@ private[streaming] class BlockManagerBasedBlockHandler(
       case IteratorBlock(iterator) =>
         blockManager.putIterator(blockId, iterator, storageLevel, tellMaster = true)
       case ByteBufferBlock(byteBuffer) =>
-        blockManager.putBytes(blockId, byteBuffer, storageLevel, tellMaster = true)
+        blockManager.putBytes(blockId, LargeByteBufferHelper.asLargeByteBuffer(byteBuffer),
+          storageLevel, tellMaster = true)
       case o =>
         throw new SparkException(
           s"Could not store $blockId to block manager, unexpected block type ${o.getClass.getName}")
@@ -166,7 +169,7 @@ private[streaming] class WriteAheadLogBasedBlockHandler(
       case IteratorBlock(iterator) =>
         blockManager.dataSerialize(blockId, iterator)
       case ByteBufferBlock(byteBuffer) =>
-        byteBuffer
+        LargeByteBufferHelper.asLargeByteBuffer(byteBuffer)
       case _ =>
         throw new Exception(s"Could not push $blockId to block manager, unexpected block type")
     }
@@ -183,7 +186,7 @@ private[streaming] class WriteAheadLogBasedBlockHandler(
 
     // Store the block in write ahead log
     val storeInWriteAheadLogFuture = Future {
-      logManager.writeToLog(serializedBlock)
+      logManager.writeToLog(serializedBlock.firstByteBuffer())
     }
 
     // Combine the futures, wait for both to complete, and return the write ahead log segment
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index 132ff2443fc0f..f9b683aafaa3b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -20,6 +20,8 @@ package org.apache.spark.streaming
 import java.io.File
 import java.nio.ByteBuffer
 
+import org.apache.spark.network.buffer.LargeByteBufferHelper
+
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
 import scala.language.postfixOps
@@ -141,7 +143,7 @@ class ReceivedBlockHandlerSuite extends FunSuite with BeforeAndAfter with Matche
         val fileSegments = storeResults.map { _.asInstanceOf[WriteAheadLogBasedStoreResult].segment}
         val loggedData = fileSegments.flatMap { segment =>
           val reader = new WriteAheadLogRandomReader(segment.path, hadoopConf)
-          val bytes = reader.read(segment)
+          val bytes = LargeByteBufferHelper.asLargeByteBuffer(reader.read(segment))
           reader.close()
           blockManager.dataDeserialize(generateBlockId(), bytes).toList
         }
@@ -201,7 +203,7 @@ class ReceivedBlockHandlerSuite extends FunSuite with BeforeAndAfter with Matche
 
     storeAndVerify(blocks.map { b => IteratorBlock(b.toIterator) })
     storeAndVerify(blocks.map { b => ArrayBufferBlock(new ArrayBuffer ++= b) })
-    storeAndVerify(blocks.map { b => ByteBufferBlock(dataToByteBuffer(b)) })
+    storeAndVerify(blocks.map { b => ByteBufferBlock(dataToByteBuffer(b).firstByteBuffer()) })
   }
 
   /** Test error handling when blocks that cannot be stored */
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
index 7a6a2f3e577dd..0483467a4cf67 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
@@ -145,7 +145,7 @@ class WriteAheadLogBackedBlockRDDSuite extends FunSuite with BeforeAndAfterAll w
     require(blockData.size === blockIds.size)
     val writer = new WriteAheadLogWriter(new File(dir, "logFile").toString, hadoopConf)
     val segments = blockData.zip(blockIds).map { case (data, id) =>
-      writer.write(blockManager.dataSerialize(id, data.iterator))
+      writer.write(blockManager.dataSerialize(id, data.iterator).firstByteBuffer())
     }
     writer.close()
     segments

From f603ba4ae053e537514e8eb67e6cb5e3456578b0 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 16:38:04 -0600
Subject: [PATCH 27/97] very basic assertion in large broadcast test

---
 .../test/scala/org/apache/spark/broadcast/BroadcastSuite.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index add295701d7e1..6caa7ae651635 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -183,7 +183,7 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
     sc = new SparkContext("local", "test", httpConf)
     val bigArr = new Array[Long]((2.3e9 / 8).toInt)
     val bcArr = sc.broadcast(bigArr)
-    sc.parallelize(1 to 1).map{x => bcArr.value.size}.count()
+    assert(sc.parallelize(1 to 1).map{x => bcArr.value.size}.collect() === Array((2.3e9 / 8).toInt))
   }
 
   /**

From cd84c6996c5fd48f2f9f2855e7f3e7a75ba5865a Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 2 Mar 2015 16:39:10 -0600
Subject: [PATCH 28/97] but turn test off b/c it requires large heap

---
 .../test/scala/org/apache/spark/broadcast/BroadcastSuite.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 6caa7ae651635..1eeb707c29fb1 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -179,7 +179,8 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
     assert(thrown.getMessage.toLowerCase.contains("stopped"))
   }
 
-  test("large broadcast variable") {
+  //ignored for now just because it needs a lot of memory
+  ignore("large broadcast variable") {
     sc = new SparkContext("local", "test", httpConf)
     val bigArr = new Array[Long]((2.3e9 / 8).toInt)
     val bcArr = sc.broadcast(bigArr)

From da8612405452693834bef90f85cbc81900987308 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 26 Mar 2015 10:18:51 -0500
Subject: [PATCH 29/97] rip out everything attempting to send > 2GB over the
 network; add some exceptions; placeholders for some tests

---
 .../spark/network/BlockTransferService.scala  |   8 +-
 .../network/netty/NettyBlockRpcServer.scala   | 126 ++--------
 .../netty/NettyBlockTransferService.scala     |  68 ++---
 .../network/nio/NioBlockTransferService.scala |  14 +-
 .../spark/scheduler/TaskResultGetter.scala    |   2 +-
 .../shuffle/FileShuffleBlockManager.scala     |   5 +-
 .../shuffle/IndexShuffleBlockManager.scala    |   5 +-
 .../spark/shuffle/ShuffleBlockManager.scala   |   6 +-
 .../apache/spark/storage/BlockManager.scala   |  73 +++++-
 .../org/apache/spark/storage/DiskStore.scala  |   4 +-
 .../apache/spark/storage/MemoryStore.scala    |   8 +-
 .../apache/spark/storage/TachyonStore.scala   |  21 +-
 .../netty/NettyBlockTransferSuite.scala       | 154 ------------
 .../buffer/FileSegmentManagedBuffer.java      |   9 +-
 .../spark/network/buffer/LargeByteBuffer.java |  39 ++-
 .../network/buffer/LargeByteBufferHelper.java |   2 +-
 .../spark/network/buffer/ManagedBuffer.java   |   3 +-
 .../network/buffer/NettyManagedBuffer.java    |   4 +-
 .../network/buffer/NioManagedBuffer.java      |  11 +-
 .../buffer/WrappedLargeByteBuffer.java        | 233 ++++++++++--------
 .../receiver/ReceivedBlockHandler.scala       |   2 +-
 .../streaming/ReceivedBlockHandlerSuite.scala |   2 +-
 .../WriteAheadLogBackedBlockRDDSuite.scala    |   2 +-
 23 files changed, 315 insertions(+), 486 deletions(-)
 delete mode 100644 core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index 518f799a96b88..2140d6a57b21b 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.network
 
 import java.io.Closeable
+import java.nio.ByteBuffer
 
 import scala.concurrent.{Promise, Await, Future}
 import scala.concurrent.duration.Duration
@@ -91,11 +92,10 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
           result.failure(exception)
         }
         override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
-          val ret = LargeByteBufferHelper.allocate(data.size)
+          val ret = ByteBuffer.allocate(data.size.toInt)
           ret.put(data.nioByteBuffer())
-          ret.position(0L)
-          result.success(new NioManagedBuffer(ret))
-        }
+          ret.flip()
+          result.success(new NioManagedBuffer(ret))        }
       })
 
     Await.result(result.future, Duration.Inf)
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index 997b8275314b7..1628932ac5669 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -18,20 +18,17 @@
 package org.apache.spark.network.netty
 
 import java.nio.ByteBuffer
-import java.util.concurrent.{TimeUnit, Executors, ConcurrentHashMap}
 
-import org.apache.spark.util.Utils
+import scala.collection.JavaConversions._
 
-import scala.collection.JavaConverters._
-
-import org.apache.spark.{SparkException, Logging}
+import org.apache.spark.Logging
 import org.apache.spark.network.BlockDataManager
-import org.apache.spark.network.buffer.{WrappedLargeByteBuffer, LargeByteBufferHelper, ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.buffer.{BufferTooLargeException, ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
 import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
-import org.apache.spark.network.shuffle.protocol._
+import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock}
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.storage.{BlockId, StorageLevel}
+import org.apache.spark.storage.{ShuffleBlockSizeLimitException, BlockId, StorageLevel}
 
 /**
  * Serves requests to open blocks by simply registering one chunk per block requested.
@@ -47,23 +44,6 @@ class NettyBlockRpcServer(
 
   private val streamManager = new OneForOneStreamManager()
 
-  private val openRequests = new ConcurrentHashMap[String, PartialBlockUploadHandler]()
-  // TODO from configuration.  Might need to be really big ...
-  private val cleanupTime = 30 * 60 * 1000
-
-  // ideally, this should be empty, and it will contain a very small amount of data for abandoned
-  // requests -- so hopefully its OK to hold on to this forever
-  private val abandonedRequests = new ConcurrentHashMap[String,Object]()
-
-  val cleaner = Executors.newSingleThreadScheduledExecutor(
-    Utils.namedThreadFactory("NettyBlockRPCServer cleanup")).scheduleWithFixedDelay(
-      new Runnable {
-        def run() {
-          dropAbandonedPartialUploads()
-        }
-      }, cleanupTime / 10, cleanupTime / 10, TimeUnit.MILLISECONDS
-    )
-
   override def receive(
       client: TransportClient,
       messageBytes: Array[Byte],
@@ -73,94 +53,32 @@ class NettyBlockRpcServer(
 
     message match {
       case openBlocks: OpenBlocks =>
-        val blocks: Seq[ManagedBuffer] =
-          openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData)
-        val streamId = streamManager.registerStream(blocks.iterator.asJava)
-        logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
-        responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteArray)
+        try {
+          val blocks: Seq[ManagedBuffer] =
+            openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData)
+          val streamId = streamManager.registerStream(blocks.iterator)
+          logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
+          responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteArray)
+        } catch {
+          case ex: BufferTooLargeException =>
+            // throw & catch this helper exception, just to get full stack trace
+            try {
+              throw new ShuffleBlockSizeLimitException(ex)
+            } catch {
+              case ex2: ShuffleBlockSizeLimitException =>
+                responseContext.onFailure(ex2)
+            }
+        }
 
       case uploadBlock: UploadBlock =>
         // StorageLevel is serialized as bytes using our JavaSerializer.
         val level: StorageLevel =
           serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata))
-        val data = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(
-          uploadBlock.blockData))
-        logTrace("putting block into our block manager: " + blockManager)
+        val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))
         blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level)
         responseContext.onSuccess(new Array[Byte](0))
-
-      case uploadPartialBock: UploadPartialBlock =>
-        logTrace("received upload partial block: " + uploadPartialBock)
-        val storageLevel: StorageLevel =
-          serializer.newInstance().deserialize(ByteBuffer.wrap(uploadPartialBock.metadata))
-        if (abandonedRequests.containsKey(uploadPartialBock.blockId)) {
-          val msg = s"Too much time passed between the msgs for this block -- the other msgs have" +
-            " already been dropped.  Try increasing the timeout specified in XXX"
-          throw new SparkException(msg)
-        }
-        openRequests.putIfAbsent(uploadPartialBock.blockId,
-          new PartialBlockUploadHandler(uploadPartialBock.blockId, storageLevel,
-            uploadPartialBock.nTotalBlockChunks))
-        val handler = openRequests.get(uploadPartialBock.blockId)
-        handler.addPartialBlock(uploadPartialBock, storageLevel)
-        responseContext.onSuccess(new Array[Byte](0))
     }
   }
 
-
-  private class PartialBlockUploadHandler(
-    val blockId: String,
-    val storageLevel: StorageLevel,
-    val nTotalBlockChunks: Int
-  ) {
-    val chunks = new Array[Array[Byte]](nTotalBlockChunks)
-    var nMissing = nTotalBlockChunks
-    var lastUpdated = System.currentTimeMillis()
-
-    def addPartialBlock(
-      partial: UploadPartialBlock,
-      storageLevel: StorageLevel
-    ): Unit = synchronized {
-      if (partial.nTotalBlockChunks != nTotalBlockChunks) {
-        throw new IllegalArgumentException(s"received incompatible UploadPartialBlock: expecting " +
-          s"$nTotalBlockChunks total chunks, but new msg has ${partial.nTotalBlockChunks}")
-      }
-      if (storageLevel != this.storageLevel) {
-        throw new IllegalArgumentException(s"received incompatible UploadPartialBlock: expecting " +
-          s"${this.storageLevel}, but new message has $storageLevel")
-      }
-      lastUpdated = System.currentTimeMillis()
-      logTrace("received partial msg")
-      chunks(partial.blockChunkIndex) = partial.blockData
-      nMissing -= 1
-      logTrace("nmissing = " + nMissing)
-      if (nMissing == 0) {
-        // we've got all the blocks -- now we can insert into the block manager
-        logTrace("received all partial blocks for " + blockId)
-        val data = new NioManagedBuffer(new WrappedLargeByteBuffer(chunks.map{ByteBuffer.wrap}))
-        blockManager.putBlockData(BlockId(blockId), data, storageLevel)
-        openRequests.remove(blockId)
-      }
-    }
-  }
-
-  private def dropAbandonedPartialUploads(): Unit = {
-    logTrace("checking for abandoned uploads among: " + openRequests.keys().asScala.mkString(","))
-    val itr = openRequests.entrySet.iterator
-    while (itr.hasNext()) {
-      val entry = itr.next()
-      if (System.currentTimeMillis() - entry.getValue().lastUpdated > cleanupTime) {
-        logWarning(s"never received all parts for block ${entry.getKey}; dropping this block")
-        abandonedRequests.putIfAbsent(entry.getKey, new Object())
-        itr.remove()
-      } else {
-        logTrace(entry.getKey() + " OK")
-      }
-    }
-  }
-
-
   override def getStreamManager(): StreamManager = streamManager
-
-  override def close(): Unit = {cleaner.cancel(false)}
 }
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 72181cc5cd26c..3f0950dae1f24 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.network.netty
 
 import scala.collection.JavaConversions._
-import scala.collection.JavaConverters._
 import scala.concurrent.{Future, Promise}
 
 import org.apache.spark.{SecurityManager, SparkConf}
@@ -28,13 +27,11 @@ import org.apache.spark.network.client.{TransportClientBootstrap, RpcResponseCal
 import org.apache.spark.network.sasl.{SaslRpcHandler, SaslClientBootstrap}
 import org.apache.spark.network.server._
 import org.apache.spark.network.shuffle.{RetryingBlockFetcher, BlockFetchingListener, OneForOneBlockFetcher}
-import org.apache.spark.network.shuffle.protocol.{UploadPartialBlock, UploadBlock}
+import org.apache.spark.network.shuffle.protocol.UploadBlock
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.util.Utils
 
-import scala.util.{Failure, Success}
-
 /**
  * A BlockTransferService that uses Netty to fetch a set of blocks at at time.
  */
@@ -109,57 +106,40 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
       blockId: BlockId,
       blockData: ManagedBuffer,
       level: StorageLevel): Future[Unit] = {
+    val result = Promise[Unit]()
     val client = clientFactory.createClient(hostname, port)
 
     // StorageLevel is serialized as bytes using our JavaSerializer. Everything else is encoded
     // using our binary protocol.
     val levelBytes = serializer.newInstance().serialize(level).array()
 
-    val largeByteBuffer = blockData.nioByteBuffer()
-    val bufferParts = largeByteBuffer.nioBuffers().asScala
-    val chunkOffsets: Seq[Long] = bufferParts.scanLeft(0L){
-      case(offset, buf) => offset + buf.limit()}
-
-    import scala.concurrent.ExecutionContext.Implicits.global
-    bufferParts.zipWithIndex.foldLeft(Future.successful(())){case (prevFuture,(buf,idx)) =>
-      prevFuture.flatMap{_ =>
-        // Convert or copy nio buffer into array in order to serialize it.
-        val partialBlockArray = if (buf.hasArray) {
-          buf.array()
-        } else {
-          val arr = new Array[Byte](buf.limit())
-          buf.get(arr)
-          arr
-        }
-        // Note: one major shortcoming of this is that it expects the incoming LargeByteBuffer to
-        // already be reasonably chunked -- in particular, the chunks cannot get too close to 2GB
-        // or else we'll still run into problems b/c there is some more overhead in the transfer
-        val msg = new UploadPartialBlock(appId, execId, blockId.toString, bufferParts.size, idx,
-          chunkOffsets(idx), levelBytes, partialBlockArray)
-
-        val result = Promise[Unit]()
-        client.sendRpc(msg.toByteArray,
-          new RpcResponseCallback {
-            override def onSuccess(response: Array[Byte]): Unit = {
-              logTrace(s"Successfully uploaded partial block $blockId," +
-                s" part $idx (out of ${bufferParts.size})")
-              result.success()
-            }
-
-            override def onFailure(e: Throwable): Unit = {
-              logError(s"Error while uploading partial block $blockId," +
-                s" part $idx (out of ${bufferParts.size})", e)
-              result.failure(e)
-            }
-          })
-        result.future
-      }
+    // Convert or copy nio buffer into array in order to serialize it.
+    val nioBuffer = blockData.nioByteBuffer()
+    val array = if (nioBuffer.hasArray) {
+      nioBuffer.array()
+    } else {
+      val data = new Array[Byte](nioBuffer.remaining())
+      nioBuffer.get(data)
+      data
     }
+
+    client.sendRpc(new UploadBlock(appId, execId, blockId.toString, levelBytes, array).toByteArray,
+      new RpcResponseCallback {
+        override def onSuccess(response: Array[Byte]): Unit = {
+          logTrace(s"Successfully uploaded block $blockId")
+          result.success()
+        }
+        override def onFailure(e: Throwable): Unit = {
+          logError(s"Error while uploading block $blockId", e)
+          result.failure(e)
+        }
+      })
+
+    result.future
   }
 
   override def close(): Unit = {
     server.close()
     clientFactory.close()
-    transportContext.close()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
index 2d67bd82568c9..b2aec160635c7 100644
--- a/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala
@@ -20,7 +20,7 @@ package org.apache.spark.network.nio
 import java.nio.ByteBuffer
 
 import org.apache.spark.network._
-import org.apache.spark.network.buffer.{LargeByteBuffer, LargeByteBufferHelper, ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.shuffle.BlockFetchingListener
 import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.util.Utils
@@ -116,8 +116,7 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
             val blockId = blockMessage.getId
             val networkSize = blockMessage.getData.limit()
             listener.onBlockFetchSuccess(
-              blockId.toString, new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(
-                blockMessage.getData)))
+              blockId.toString, new NioManagedBuffer(blockMessage.getData))
           }
         }
       }
@@ -144,7 +143,7 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
       level: StorageLevel)
     : Future[Unit] = {
     checkInit()
-    val msg = PutBlock(blockId, blockData.nioByteBuffer().firstByteBuffer(), level)
+    val msg = PutBlock(blockId, blockData.nioByteBuffer(), level)
     val blockMessageArray = new BlockMessageArray(BlockMessage.fromPutBlock(msg))
     val remoteCmId = new ConnectionManagerId(hostName, port)
     val reply = cm.sendMessageReliably(remoteCmId, blockMessageArray.toBufferMessage)
@@ -193,7 +192,7 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
         if (buffer == null) {
           return None
         }
-        Some(BlockMessage.fromGotBlock(GotBlock(msg.id, buffer.firstByteBuffer())))
+        Some(BlockMessage.fromGotBlock(GotBlock(msg.id, buffer)))
 
       case _ => None
     }
@@ -202,13 +201,12 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa
   private def putBlock(blockId: BlockId, bytes: ByteBuffer, level: StorageLevel) {
     val startTimeMs = System.currentTimeMillis()
     logDebug("PutBlock " + blockId + " started from " + startTimeMs + " with data: " + bytes)
-    blockDataManager.putBlockData(blockId, new NioManagedBuffer(
-      LargeByteBufferHelper.asLargeByteBuffer(bytes)), level)
+    blockDataManager.putBlockData(blockId, new NioManagedBuffer(bytes), level)
     logDebug("PutBlock " + blockId + " used " + Utils.getUsedTimeMs(startTimeMs)
       + " with data size: " + bytes.limit)
   }
 
-  private def getBlock(blockId: BlockId): LargeByteBuffer = {
+  private def getBlock(blockId: BlockId): ByteBuffer = {
     val startTimeMs = System.currentTimeMillis()
     logDebug("GetBlock " + blockId + " started from " + startTimeMs)
     val buffer = blockDataManager.getBlockData(blockId)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 048008c3d20f2..9efd75b53fb9d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -77,7 +77,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
               // Just blindly assume there is only one byte buffer here, b/c we expect
               // task results to be small.
               val deserializedResult = serializer.get().deserialize[DirectTaskResult[_]](
-                serializedTaskResult.get.firstByteBuffer())
+                serializedTaskResult.get.asByteBuffer())
               sparkEnv.blockManager.master.removeBlock(blockId)
               (deserializedResult, size)
           }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
index fea59876d97bb..7de2f9cbb2866 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.shuffle
 
 import java.io.File
+import java.nio.ByteBuffer
 import java.util.concurrent.ConcurrentLinkedQueue
 import java.util.concurrent.atomic.AtomicInteger
 
@@ -25,7 +26,7 @@ import scala.collection.JavaConversions._
 
 import org.apache.spark.{Logging, SparkConf, SparkEnv}
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.network.buffer.{LargeByteBuffer, FileSegmentManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.FileShuffleBlockManager.ShuffleFileGroup
@@ -170,7 +171,7 @@ class FileShuffleBlockManager(conf: SparkConf)
     }
   }
 
-  override def getBytes(blockId: ShuffleBlockId): Option[LargeByteBuffer] = {
+  override def getBytes(blockId: ShuffleBlockId): Option[ByteBuffer] = {
     val segment = getBlockData(blockId)
     Some(segment.nioByteBuffer())
   }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
index 4dfdf9987a5fe..b292587d37028 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala
@@ -18,11 +18,12 @@
 package org.apache.spark.shuffle
 
 import java.io._
+import java.nio.ByteBuffer
 
 import com.google.common.io.ByteStreams
 
 import org.apache.spark.{SparkConf, SparkEnv}
-import org.apache.spark.network.buffer.{LargeByteBuffer, FileSegmentManagedBuffer, ManagedBuffer}
+import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.storage._
 
@@ -96,7 +97,7 @@ class IndexShuffleBlockManager(conf: SparkConf) extends ShuffleBlockManager {
     }
   }
 
-  override def getBytes(blockId: ShuffleBlockId): Option[LargeByteBuffer] = {
+  override def getBytes(blockId: ShuffleBlockId): Option[ByteBuffer] = {
     Some(getBlockData(blockId).nioByteBuffer())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
index 96dde7f53b84b..1b915e40fe2c5 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.shuffle
 
-import org.apache.spark.network.buffer.{LargeByteBuffer, ManagedBuffer}
+import java.nio.ByteBuffer
+
+import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.storage.ShuffleBlockId
 
 private[spark]
@@ -28,7 +30,7 @@ trait ShuffleBlockManager {
    * Get shuffle block data managed by the local ShuffleBlockManager.
    * @return Some(ByteBuffer) if block found, otherwise None.
    */
-  def getBytes(blockId: ShuffleBlockId): Option[LargeByteBuffer]
+  def getBytes(blockId: ShuffleBlockId): Option[ByteBuffer]
 
   def getBlockData(blockId: ShuffleBlockId): ManagedBuffer
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 138dd2f2efc09..191c694e042c7 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -20,8 +20,8 @@ package org.apache.spark.storage
 import java.io.{BufferedOutputStream, File, InputStream, OutputStream}
 import java.nio.{ByteBuffer, MappedByteBuffer}
 
-import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.collection.JavaConverters._
+import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.concurrent.{Await, Future}
 import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.duration._
@@ -34,7 +34,7 @@ import org.apache.spark._
 import org.apache.spark.executor._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.network._
-import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer, ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.buffer._
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.shuffle.ExternalShuffleClient
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo
@@ -308,7 +308,7 @@ private[spark] class BlockManager(
         .asInstanceOf[Option[LargeByteBuffer]]
       if (blockBytesOpt.isDefined) {
         val buffer = blockBytesOpt.get
-        new NioManagedBuffer(buffer)
+        new NioManagedBuffer(buffer.asByteBuffer())
       } else {
         throw new BlockNotFoundException(blockId.toString)
       }
@@ -319,7 +319,7 @@ private[spark] class BlockManager(
    * Put the block locally, using the given storage level.
    */
   override def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Unit = {
-    putBytes(blockId, data.nioByteBuffer(), level)
+    putBytes(blockId, LargeByteBufferHelper.asLargeByteBuffer(data.nioByteBuffer()), level)
   }
 
   /**
@@ -443,7 +443,7 @@ private[spark] class BlockManager(
       val shuffleBlockManager = shuffleManager.shuffleBlockManager
       shuffleBlockManager.getBytes(blockId.asInstanceOf[ShuffleBlockId]) match {
         case Some(bytes) =>
-          Some(bytes)
+          Some(LargeByteBufferHelper.asLargeByteBuffer(bytes))
         case None =>
           throw new BlockException(
             blockId, s"Block $blockId not found on disk, though it should be")
@@ -536,7 +536,7 @@ private[spark] class BlockManager(
               /* We'll store the bytes in memory if the block's storage level includes
                * "memory serialized", or if it should be cached as objects in memory
                * but we only requested its serialized bytes. */
-              val copyForMemory = LargeByteBufferHelper.allocate(bytes.limit)
+              val copyForMemory = LargeByteBufferHelper.allocate(bytes.size)
               copyForMemory.put(bytes)
               memoryStore.putBytes(blockId, copyForMemory, level)
               bytes.position(0L)
@@ -593,13 +593,13 @@ private[spark] class BlockManager(
     for (loc <- locations) {
       logDebug(s"Getting remote block $blockId from $loc")
       // the fetch will always be one byte buffer till we fix SPARK-5928
-      val data: LargeByteBuffer = blockTransferService.fetchBlockSync(
+      val data: ByteBuffer = blockTransferService.fetchBlockSync(
         loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()
 
       if (data != null) {
         if (asBlockResult) {
           return Some(new BlockResult(
-            dataDeserialize(blockId, data),
+            dataDeserialize(blockId, LargeByteBufferHelper.asLargeByteBuffer(data)),
             DataReadMethod.Network,
             data.limit()))
         } else {
@@ -751,7 +751,12 @@ private[spark] class BlockManager(
     val replicationFuture = data match {
       case b: ByteBufferValues if putLevel.replication > 1 =>
         // Duplicate doesn't copy the bytes, but just creates a wrapper
-        val bufferView = b.buffer.duplicate()
+        val bufferView = try {
+          b.buffer.asByteBuffer()
+        } catch {
+          case ex: BufferTooLargeException =>
+            throw new ReplicationBlockSizeLimitException(ex)
+        }
         Future { replicate(blockId, bufferView, putLevel) }
       case _ => null
     }
@@ -847,7 +852,12 @@ private[spark] class BlockManager(
             }
             bytesAfterPut = dataSerialize(blockId, valuesAfterPut)
           }
-          replicate(blockId, bytesAfterPut, putLevel)
+          try {
+            replicate(blockId, bytesAfterPut.asByteBuffer(), putLevel)
+          } catch {
+            case ex: BufferTooLargeException =>
+              throw new ReplicationBlockSizeLimitException(ex)
+          }
           logDebug("Put block %s remotely took %s"
             .format(blockId, Utils.getUsedTimeMs(remoteStartTime)))
       }
@@ -886,7 +896,7 @@ private[spark] class BlockManager(
    * Replicate block to another node. Not that this is a blocking call that returns after
    * the block has been replicated.
    */
-  private def replicate(blockId: BlockId, data: LargeByteBuffer, level: StorageLevel): Unit = {
+  private def replicate(blockId: BlockId, data: ByteBuffer, level: StorageLevel): Unit = {
     val maxReplicationFailures = conf.getInt("spark.storage.maxReplicationFailures", 1)
     val numPeersToReplicateTo = level.replication - 1
     val peersForReplication = new ArrayBuffer[BlockManagerId]
@@ -940,7 +950,7 @@ private[spark] class BlockManager(
         case Some(peer) =>
           try {
             val onePeerStartTime = System.currentTimeMillis
-            data.position(0L)
+            data.position(0)
             logTrace(s"Trying to replicate $blockId of ${data.limit()} bytes to $peer")
             blockTransferService.uploadBlockSync(
               peer.host, peer.port, peer.executorId, blockId, new NioManagedBuffer(data), tLevel)
@@ -1273,3 +1283,42 @@ private[spark] object BlockManager extends Logging {
     blockManagers.toMap
   }
 }
+
+
+abstract class BlockSizeLimitException(msg: String, cause: BufferTooLargeException)
+  extends SparkException(msg, cause)
+
+object BlockSizeLimitException {
+  def sizeMsg(cause: BufferTooLargeException): String = {
+    s"that was ${Utils.bytesToString(cause.actualSize)} (too " +
+    s"large by ${Utils.bytesToString(cause.extra)} / " +
+      s"${cause.actualSize.toDouble / LargeByteBufferHelper.DEFAULT_MAX_CHUNK}x)."
+  }
+
+  def sizeMsgAndAdvice(cause: BufferTooLargeException): String = {
+    sizeMsg(cause) +
+      " You should figure out which stage created these partitions, then increase the number of " +
+      "partitions used by that stage. That way, you will have less data per partition. You may " +
+      "want to make the number of partitions an easily configurable parameter so you can " +
+      "continue to update it as needed."
+  }
+
+}
+
+class ReplicationBlockSizeLimitException(cause: BufferTooLargeException)
+  extends BlockSizeLimitException("Spark cannot replicate partitions that are greater than 2GB.  " +
+    "You tried to replicate a partition " + BlockSizeLimitException.sizeMsgAndAdvice(cause) +
+    "  Or, you can turn off replication.", cause)
+
+class TachyonBlockSizeLimitException(cause: BufferTooLargeException)
+  extends BlockSizeLimitException("Spark cannot store partitions greater than 2GB in tachyon.  " +
+    "You tried to store a partition " + BlockSizeLimitException.sizeMsgAndAdvice(cause) +
+    "  Or, you can use a different storage mechanism.", cause)
+
+class ShuffleBlockSizeLimitException(cause: BufferTooLargeException)
+  extends BlockSizeLimitException("Spark cannot shuffle partitions that are greater than 2GB.  " +
+    "You tried to shuffle a block " + BlockSizeLimitException.sizeMsg(cause) +
+    "You should try to increase the number of partitions of this shuffle, and / or increase the " +
+    "figure out which stage created the partitions before the shuffle, and increase the number " +
+    "of partitions for that stage.  You may want to make both of these numbers easily " +
+    "configurable parameters so you can continue to update as needed.", cause)
\ No newline at end of file
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 04f0a053777d8..7d5944ad56028 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -55,8 +55,8 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
     channel.close()
     val finishTime = System.currentTimeMillis
     logDebug("Block %s stored as %s file on disk in %d ms".format(
-      file.getName, Utils.bytesToString(bytes.limit), finishTime - startTime))
-    PutResult(bytes.limit(), Right(bytes.duplicate()))
+      file.getName, Utils.bytesToString(bytes.size()), finishTime - startTime))
+    PutResult(bytes.size(), Right(bytes.duplicate()))
   }
 
   override def putArray(
diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index bff4a2b4a3828..09e3e8d671f60 100644
--- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -97,8 +97,8 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
       val values = blockManager.dataDeserialize(blockId, bytes)
       putIterator(blockId, values, level, returnValues = true)
     } else {
-      val putAttempt = tryToPut(blockId, bytes, bytes.limit, deserialized = false)
-      PutResult(bytes.limit(), Right(bytes.duplicate()), putAttempt.droppedBlocks)
+      val putAttempt = tryToPut(blockId, bytes, bytes.size(), deserialized = false)
+      PutResult(bytes.size(), Right(bytes.duplicate()), putAttempt.droppedBlocks)
     }
   }
 
@@ -113,8 +113,8 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
       PutResult(sizeEstimate, Left(values.iterator), putAttempt.droppedBlocks)
     } else {
       val bytes = blockManager.dataSerialize(blockId, values.iterator)
-      val putAttempt = tryToPut(blockId, bytes, bytes.limit, deserialized = false)
-      PutResult(bytes.limit(), Right(bytes.duplicate()), putAttempt.droppedBlocks)
+      val putAttempt = tryToPut(blockId, bytes, bytes.size(), deserialized = false)
+      PutResult(bytes.size(), Right(bytes.duplicate()), putAttempt.droppedBlocks)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
index de1a606562bbb..814d4fa0d5ffc 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
@@ -21,7 +21,7 @@ import java.io.IOException
 import java.nio.ByteBuffer
 
 import com.google.common.io.ByteStreams
-import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer}
+import org.apache.spark.network.buffer.{BufferTooLargeException, LargeByteBufferHelper, LargeByteBuffer}
 import tachyon.client.{ReadType, WriteType}
 
 import org.apache.spark.Logging
@@ -70,25 +70,26 @@ private[spark] class TachyonStore(
       blockId: BlockId,
       bytes: LargeByteBuffer,
       returnValues: Boolean): PutResult = {
-    // So that we do not modify the input offsets !
-    // duplicate does not copy buffer, so inexpensive
-    val byteBuffer = bytes.duplicate()
-    byteBuffer.position(0L)
+    val byteBuffer = try {
+      bytes.asByteBuffer()
+    } catch {
+      case ex: BufferTooLargeException =>
+        throw new TachyonBlockSizeLimitException(ex)
+    }
     logDebug(s"Attempting to put block $blockId into Tachyon")
     val startTime = System.currentTimeMillis
     val file = tachyonManager.getFile(blockId)
     val os = file.getOutStream(WriteType.TRY_CACHE)
-    // XXX not sure about the right fix for blocks over 2gb
-    os.write(byteBuffer.firstByteBuffer().array)
+    os.write(byteBuffer.array)
     os.close()
     val finishTime = System.currentTimeMillis
     logDebug("Block %s stored as %s file in Tachyon in %d ms".format(
-      blockId, Utils.bytesToString(byteBuffer.limit), finishTime - startTime))
+      blockId, Utils.bytesToString(byteBuffer.limit()), finishTime - startTime))
 
     if (returnValues) {
-      PutResult(bytes.limit(), Right(bytes.duplicate()))
+      PutResult(bytes.size(), Right(bytes.duplicate()))
     } else {
-      PutResult(bytes.limit(), null)
+      PutResult(bytes.size(), null)
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
deleted file mode 100644
index 50be90980721a..0000000000000
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.network.netty
-
-import java.nio.ByteBuffer
-import java.util.concurrent.TimeUnit
-
-import org.apache.commons.io.IOUtils
-import org.apache.spark.network.BlockDataManager
-import org.apache.spark.network.buffer._
-import org.apache.spark.network.shuffle.BlockFetchingListener
-import org.apache.spark.storage.{BlockId, StorageLevel, RDDBlockId, ShuffleBlockId}
-import org.apache.spark.{Logging, SecurityManager, SparkConf}
-import org.mockito.ArgumentCaptor
-import org.mockito.{Matchers => MockitoMatchers}
-import org.mockito.Mockito._
-import org.scalatest.mock.MockitoSugar
-import org.scalatest.{Matchers, FunSuite}
-
-import scala.concurrent.duration.{Duration, FiniteDuration}
-import scala.concurrent.{Await, Promise}
-
-class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar with Logging {
-
-  val conf = new SparkConf()
-    .set("spark.app.id", "app-id")
-  val securityManager = new SecurityManager(conf)
-
-
-  def fetchBlock(buf: LargeByteBuffer): ManagedBuffer = {
-    val blockManager = mock[BlockDataManager]
-    val blockId = ShuffleBlockId(0, 1, 2)
-    val blockBuffer = new NioManagedBuffer(buf)
-    when(blockManager.getBlockData(blockId)).thenReturn(blockBuffer)
-
-    val from = new NettyBlockTransferService(conf, securityManager, numCores = 1)
-    from.init(blockManager)
-    val to = new NettyBlockTransferService(conf, securityManager, numCores = 1)
-    to.init(blockManager)
-
-    try {
-      val promise = Promise[ManagedBuffer]()
-
-      to.fetchBlocks(from.hostName, from.port, "1", Array(blockId.toString),
-        new BlockFetchingListener {
-          override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = {
-            promise.failure(exception)
-          }
-
-          override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
-            promise.success(data.retain())
-          }
-        })
-
-      Await.ready(promise.future, FiniteDuration(100, TimeUnit.SECONDS))
-      promise.future.value.get.get
-    } finally {
-      from.close()
-      to.close()
-    }
-
-  }
-
-  ignore("simple fetch") {
-    val blockString = "Hello, world!"
-    val blockBuffer = LargeByteBufferHelper.asLargeByteBuffer(blockString.getBytes)
-    val fetched = fetchBlock(blockBuffer)
-
-    IOUtils.toString(fetched.createInputStream()) should equal(blockString)
-  }
-
-
-  def uploadBlock(buf: LargeByteBuffer, rddId: Int, timeout: Long) {
-
-    val fromBlockManager = mock[BlockDataManager]
-    val toBlockManager = mock[BlockDataManager]
-    val blockId = RDDBlockId(rddId, rddId + 1)
-    val blockBuffer = new NioManagedBuffer(buf)
-    val level = StorageLevel.DISK_ONLY //doesn't matter
-
-    val from = new NettyBlockTransferService(conf, securityManager, numCores = 1)
-    from.init(fromBlockManager)
-    val to = new NettyBlockTransferService(conf, securityManager, numCores = 1)
-    to.init(toBlockManager)
-
-    try {
-      val uploadFuture = from.uploadBlock(to.hostName, to.port, "exec-1", blockId, blockBuffer, level)
-      Await.result(uploadFuture, Duration.apply(timeout, TimeUnit.MILLISECONDS))
-      val bufferCaptor = ArgumentCaptor.forClass(classOf[ManagedBuffer])
-      verify(toBlockManager).putBlockData(MockitoMatchers.eq(blockId), bufferCaptor.capture(),
-        MockitoMatchers.eq(level))
-      val putBuffer = bufferCaptor.getValue()
-      logTrace("begin checking buffer equivalence")
-      equivalentBuffers(blockBuffer, putBuffer)
-      logTrace("finished checking buffer equivalence")
-    } finally {
-      from.close()
-      to.close()
-    }
-
-  }
-
-  test("small one-part upload") {
-    val buf = LargeByteBufferHelper.asLargeByteBuffer(Array[Byte](0,1,2,3))
-    uploadBlock(buf, 0, 100)
-  }
-
-  test("small multi-part upload") {
-    val parts = (0 until 5).map{idx =>
-      val arr = Array.tabulate[Byte](100){subIdx => (idx + subIdx).toByte}
-      ByteBuffer.wrap(arr)
-    }.toArray
-    val buf = new WrappedLargeByteBuffer(parts)
-    uploadBlock(buf, 1, 500)
-  }
-
-  //just don't want to kill the test server
-  ignore("giant upload") {
-    // pretty close to max size due to overhead from the rest of the msg
-    val parts = (0 until 2).map{_ => ByteBuffer.allocate(Integer.MAX_VALUE - 200)}.toArray
-    val buf = new WrappedLargeByteBuffer(parts)
-    uploadBlock(buf, 2, 20 * 60 * 1000) // yup, takes this long ...
-  }
-
-  test("cleanup partial uploads") {
-    pending
-  }
-
-  def equivalentBuffers(exp: ManagedBuffer, act: ManagedBuffer): Unit = {
-    equivalentBuffers(exp.nioByteBuffer(), act.nioByteBuffer())
-  }
-
-  def equivalentBuffers(exp: LargeByteBuffer, act: LargeByteBuffer): Unit = {
-    assert(exp.capacity() === act.capacity())
-    assert(exp.remaining() === act.remaining())
-    while (exp.remaining() > 0) {
-      assert(exp.get() === act.get())
-    }
-  }
-}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
index 213013e14b7c6..d711c8e521b4e 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -55,7 +55,7 @@ public long size() {
   }
 
   @Override
-  public LargeByteBuffer nioByteBuffer() throws IOException {
+  public ByteBuffer nioByteBuffer() throws IOException {
     FileChannel channel = null;
     try {
       channel = new RandomAccessFile(file, "r").getChannel();
@@ -71,9 +71,12 @@ public LargeByteBuffer nioByteBuffer() throws IOException {
           }
         }
         buf.flip();
-        return LargeByteBufferHelper.asLargeByteBuffer(buf);
+        return buf;
       } else {
-        return LargeByteBufferHelper.mapFile(channel, FileChannel.MapMode.READ_ONLY, offset, length);
+        if (length > LargeByteBufferHelper.DEFAULT_MAX_CHUNK) {
+          throw new BufferTooLargeException(length);
+        }
+        return channel.map(FileChannel.MapMode.READ_ONLY, offset, length);
       }
     } catch (IOException e) {
       try {
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index d5a3c2cb476eb..1b032caf0d67d 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -22,8 +22,6 @@
 import java.util.List;
 
 public interface LargeByteBuffer {
-    public long capacity();
-
     public byte get();
 
     public void get(byte[] dst,int offset, int length);
@@ -39,28 +37,43 @@ public interface LargeByteBuffer {
 
     public long remaining();
 
-    //TODO checks on limit semantics
-
     /**
-     * Sets this buffer's limit. If the position is larger than the new limit then it is set to the
-     * new limit. If the mark is defined and larger than the new limit then it is discarded.
+     * the total number of bytes in this buffer
+     * @return
      */
-    public void limit(long newLimit);
+    public long size();
 
     /**
-     * return this buffer's limit
+     * writes the entire contents of this buffer to the given channel
+     *
+     * @param channel
      * @return
+     * @throws IOException
      */
-    public long limit();
-
-    //an alternative to having this method would be having a foreachBuffer(f: Buffer => T)
     public long writeTo(WritableByteChannel channel) throws IOException;
 
+    /**
+     * get the entire contents of this as one ByteBuffer, if possible.  The returned ByteBuffer
+     * will always have the position set 0, and the limit set to the end of the data.  Each
+     * call will return a new ByteBuffer, but will not require copying the data (eg., it will
+     * use ByteBuffer#duplicate()). Updates to the ByteBuffer will be reflected in this
+     * LargeByteBuffer.
+     *
+     * @return
+     * @throws BufferTooLargeException if this buffer is too large to fit in one {@link ByteBuffer}
+     */
+    public ByteBuffer asByteBuffer() throws BufferTooLargeException;
 
-    //TODO this should be deleted -- just to help me get going
-    public ByteBuffer firstByteBuffer();
+    /**
+     * Attempt to clean up if it is memory-mapped. This uses an *unsafe* Sun API that
+     * might cause errors if one attempts to read from the unmapped buffer, but it's better than
+     * waiting for the GC to find it because that could lead to huge numbers of open files. There's
+     * unfortunately no standard API to do this.
+     */
+    public void dispose();
 
     //List b/c we need to know the size.  Could also use Iterator w/ separate numBuffers method
+    //TODO delete
     public List<ByteBuffer> nioBuffers();
 
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
index 7373b2dea3cc0..1331dc591b63a 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -23,7 +23,7 @@
 
 public class LargeByteBufferHelper {
 
-    static final int DEFAULT_MAX_CHUNK = Integer.MAX_VALUE - 1000000;
+    public static final int DEFAULT_MAX_CHUNK = Integer.MAX_VALUE - 1000000;
 
     public static LargeByteBuffer asLargeByteBuffer(ByteBuffer buffer) {
         return new WrappedLargeByteBuffer(new ByteBuffer[]{buffer});
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
index 8ebf72feef3ed..a415db593a788 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.ByteBuffer;
 
 /**
  * This interface provides an immutable view for data in the form of bytes. The implementation
@@ -43,7 +44,7 @@ public abstract class ManagedBuffer {
    * returned ByteBuffer should not affect the content of this buffer.
    */
   // TODO: Deprecate this, usage may require expensive memory mapping or allocation.
-  public abstract LargeByteBuffer nioByteBuffer() throws IOException;
+  public abstract ByteBuffer nioByteBuffer() throws IOException;
 
   /**
    * Exposes this buffer's data as an InputStream. The underlying implementation does not
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
index aa0fc74f00fd7..c806bfa45bef3 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
@@ -41,8 +41,8 @@ public long size() {
   }
 
   @Override
-  public LargeByteBuffer nioByteBuffer() throws IOException {
-    return LargeByteBufferHelper.asLargeByteBuffer(buf.nioBuffer());
+  public ByteBuffer nioByteBuffer() throws IOException {
+    return buf.nioBuffer();
   }
 
   @Override
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
index 5e84102ffbcb9..f55b884bc45ce 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
@@ -29,9 +29,9 @@
  * A {@link ManagedBuffer} backed by {@link ByteBuffer}.
  */
 public final class NioManagedBuffer extends ManagedBuffer {
-  private final LargeByteBuffer buf;
+  private final ByteBuffer buf;
 
-  public NioManagedBuffer(LargeByteBuffer buf) {
+  public NioManagedBuffer(ByteBuffer buf) {
     this.buf = buf;
   }
 
@@ -41,13 +41,13 @@ public long size() {
   }
 
   @Override
-  public LargeByteBuffer nioByteBuffer() throws IOException {
+  public ByteBuffer nioByteBuffer() throws IOException {
     return buf.duplicate();
   }
 
   @Override
   public InputStream createInputStream() throws IOException {
-    return new ByteBufInputStream(Unpooled.wrappedBuffer(buf.firstByteBuffer()));
+    return new ByteBufInputStream(Unpooled.wrappedBuffer(buf));
   }
 
   @Override
@@ -62,8 +62,7 @@ public ManagedBuffer release() {
 
   @Override
   public Object convertToNetty() throws IOException {
-    //TODO can we do anything sensible here when we're over 2gb?
-    return Unpooled.wrappedBuffer(buf.firstByteBuffer());
+    return Unpooled.wrappedBuffer(buf);
   }
 
   @Override
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 57cc6a7bc66fb..f3e49eb478d11 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -16,132 +16,149 @@
 */
 package org.apache.spark.network.buffer;
 
+import sun.nio.ch.DirectBuffer;
+
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.MappedByteBuffer;
 import java.nio.channels.WritableByteChannel;
 import java.util.Arrays;
-import java.util.Iterator;
 import java.util.List;
 
 public class WrappedLargeByteBuffer implements LargeByteBuffer {
 
-    //only public for tests for the moment ...
-    public final ByteBuffer[] underlying;
-    private final Long totalCapacity;
-    private final long[] chunkOffsets;
-
-    private long _pos;
-    private int currentBufferIdx;
-    private ByteBuffer currentBuffer;
-    private long limit;
-
-
-    public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
-        this.underlying = underlying;
-        long sum = 0l;
-        chunkOffsets = new long[underlying.length];
-        for (int i = 0; i < underlying.length; i++) {
-            chunkOffsets[i] = sum;
-            sum += underlying[i].capacity();
-        }
-        totalCapacity = sum;
-        _pos = 0l;
-        currentBufferIdx = 0;
-        currentBuffer = underlying[0];
-        limit = totalCapacity;
-    }
+  //only public for tests for the moment ...
+  public final ByteBuffer[] underlying;
+  private final Long totalCapacity;
+  private final long[] chunkOffsets;
 
-    @Override
-    public long capacity() {return totalCapacity;}
-
-    @Override
-    public void get(byte[] dest, int offset, int length){
-        int moved = 0;
-        while (moved < length) {
-            int toRead = Math.min(length - moved, currentBuffer.remaining());
-            currentBuffer.get(dest, offset + moved, toRead);
-            moved += toRead;
-            updateCurrentBuffer();
-        }
-        _pos += moved;
-    }
+  private long _pos;
+  private int currentBufferIdx;
+  private ByteBuffer currentBuffer;
+  private long size;
 
-    @Override
-    public byte get() {
-        byte r = currentBuffer.get();
-        _pos += 1;
-        updateCurrentBuffer();
-        return r;
-    }
 
-    private void updateCurrentBuffer() {
-        //TODO fix end condition
-        while(currentBuffer != null && !currentBuffer.hasRemaining()) {
-            currentBufferIdx += 1;
-            currentBuffer = currentBufferIdx < underlying.length ? underlying[currentBufferIdx] : null;
-        }
+  public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
+    this.underlying = underlying;
+    long sum = 0l;
+    chunkOffsets = new long[underlying.length];
+    for (int i = 0; i < underlying.length; i++) {
+      chunkOffsets[i] = sum;
+      sum += underlying[i].capacity();
     }
-
-    @Override
-    public void put(LargeByteBuffer bytes) {
-        throw new RuntimeException("not yet implemented");
+    totalCapacity = sum;
+    _pos = 0l;
+    currentBufferIdx = 0;
+    currentBuffer = underlying[0];
+    size = totalCapacity;
+  }
+
+  @Override
+  public void get(byte[] dest, int offset, int length) {
+    int moved = 0;
+    while (moved < length) {
+      int toRead = Math.min(length - moved, currentBuffer.remaining());
+      currentBuffer.get(dest, offset + moved, toRead);
+      moved += toRead;
+      updateCurrentBuffer();
     }
-
-    @Override
-    public long position() { return _pos;}
-
-    @Override
-    public void position(long newPosition) {
-        //XXX check range?
-        _pos = newPosition;
+    _pos += moved;
+  }
+
+  @Override
+  public byte get() {
+    byte r = currentBuffer.get();
+    _pos += 1;
+    updateCurrentBuffer();
+    return r;
+  }
+
+  private void updateCurrentBuffer() {
+    //TODO fix end condition
+    while (currentBuffer != null && !currentBuffer.hasRemaining()) {
+      currentBufferIdx += 1;
+      currentBuffer = currentBufferIdx < underlying.length ? underlying[currentBufferIdx] : null;
     }
-
-    @Override
-    public long remaining() {
-        return limit - _pos;
+  }
+
+  @Override
+  public void put(LargeByteBuffer bytes) {
+    throw new RuntimeException("not yet implemented");
+  }
+
+  @Override
+  public long position() {
+    return _pos;
+  }
+
+  @Override
+  public void position(long newPosition) {
+    //XXX check range?
+    _pos = newPosition;
+  }
+
+  @Override
+  public long remaining() {
+    return size - _pos;
+  }
+
+  @Override
+  public WrappedLargeByteBuffer duplicate() {
+    ByteBuffer[] duplicates = new ByteBuffer[underlying.length];
+    for (int i = 0; i < underlying.length; i++) {
+      duplicates[i] = underlying[i].duplicate();
     }
-
-    @Override
-    public WrappedLargeByteBuffer duplicate() {
-        ByteBuffer[] duplicates = new ByteBuffer[underlying.length];
-        for (int i = 0; i < underlying.length; i++) {
-            duplicates[i] = underlying[i].duplicate();
-        }
-        //we could also avoid initializing offsets here, if we cared ...
-        return new WrappedLargeByteBuffer(duplicates);
-    }
-
-    @Override
-    public long limit() {
-        return limit;
+    //we could also avoid initializing offsets here, if we cared ...
+    return new WrappedLargeByteBuffer(duplicates);
+  }
+
+  @Override
+  public long size() {
+    return size;
+  }
+
+  @Override
+  public long writeTo(WritableByteChannel channel) throws IOException {
+    long written = 0l;
+    for (ByteBuffer buffer : underlying) {
+      //TODO test this
+      written += buffer.remaining();
+      while (buffer.hasRemaining())
+        channel.write(buffer);
     }
-
-    @Override
-    public void limit(long newLimit) {
-        //XXX check range?  set limits in sub buffers?
-        limit = newLimit;
+    return written;
+  }
+
+  @Override
+  public ByteBuffer asByteBuffer() {
+    return underlying[0];
+  }
+
+  @Override
+  public List<ByteBuffer> nioBuffers() {
+    return Arrays.asList(underlying);
+  }
+
+  /**
+   * Attempt to clean up a ByteBuffer if it is memory-mapped. This uses an *unsafe* Sun API that
+   * might cause errors if one attempts to read from the unmapped buffer, but it's better than
+   * waiting for the GC to find it because that could lead to huge numbers of open files. There's
+   * unfortunately no standard API to do this.
+   */
+  private static void dispose(ByteBuffer buffer) {
+    if (buffer != null && buffer instanceof MappedByteBuffer) {
+      DirectBuffer db = (DirectBuffer) buffer;
+      if (db.cleaner() != null) {
+        db.cleaner().clean();
+      }
     }
+  }
 
-    @Override
-    public long writeTo(WritableByteChannel channel) throws IOException {
-        long written = 0l;
-        for(ByteBuffer buffer: underlying) {
-            //TODO test this
-            //XXX do we care about respecting the limit here?
-            written += buffer.remaining();
-            while (buffer.hasRemaining())
-                channel.write(buffer);
-        }
-        return written;
+  @Override
+  public void dispose() {
+    for (ByteBuffer bb : underlying) {
+      dispose(bb);
     }
+  }
 
-    @Override
-    public ByteBuffer firstByteBuffer() {
-        return underlying[0];
-    }
-
-    @Override
-    public List<ByteBuffer> nioBuffers() {
-        return Arrays.asList(underlying);
-    }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
index 7f60f23e1b80d..c6908d9b5cd14 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -186,7 +186,7 @@ private[streaming] class WriteAheadLogBasedBlockHandler(
 
     // Store the block in write ahead log
     val storeInWriteAheadLogFuture = Future {
-      logManager.writeToLog(serializedBlock.firstByteBuffer())
+      logManager.writeToLog(serializedBlock.asByteBuffer())
     }
 
     // Combine the futures, wait for both to complete, and return the write ahead log segment
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index 59b42868c74fa..da30ac8fd3eec 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -198,7 +198,7 @@ class ReceivedBlockHandlerSuite extends FunSuite with BeforeAndAfter with Matche
 
     storeAndVerify(blocks.map { b => IteratorBlock(b.toIterator) })
     storeAndVerify(blocks.map { b => ArrayBufferBlock(new ArrayBuffer ++= b) })
-    storeAndVerify(blocks.map { b => ByteBufferBlock(dataToByteBuffer(b).firstByteBuffer()) })
+    storeAndVerify(blocks.map { b => ByteBufferBlock(dataToByteBuffer(b).asByteBuffer()) })
   }
 
   /** Test error handling when blocks that cannot be stored */
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
index 0483467a4cf67..08c80ce2f3dfc 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
@@ -145,7 +145,7 @@ class WriteAheadLogBackedBlockRDDSuite extends FunSuite with BeforeAndAfterAll w
     require(blockData.size === blockIds.size)
     val writer = new WriteAheadLogWriter(new File(dir, "logFile").toString, hadoopConf)
     val segments = blockData.zip(blockIds).map { case (data, id) =>
-      writer.write(blockManager.dataSerialize(id, data.iterator).firstByteBuffer())
+      writer.write(blockManager.dataSerialize(id, data.iterator).asByteBuffer())
     }
     writer.close()
     segments

From e09d2c7a00cfcea0863693ebcaf2725bdfdf6f1c Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 26 Mar 2015 13:46:34 -0500
Subject: [PATCH 30/97] oops, add some missing files

---
 .../LargeByteBufferOutputStreamSuite.scala    | 31 +++++++++++++++++
 .../buffer/BufferTooLargeException.java       | 32 +++++++++++++++++
 .../buffer/WrappedLargeByteBufferSuite.java   | 34 +++++++++++++++++++
 3 files changed, 97 insertions(+)
 create mode 100644 core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
 create mode 100644 network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
 create mode 100644 network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java

diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
new file mode 100644
index 0000000000000..d5b07776f9b4f
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import org.scalatest.{FunSuite, Matchers}
+
+class LargeByteBufferOutputStreamSuite extends FunSuite with Matchers {
+
+  test("merged buffers for < 2GB") {
+    pending
+  }
+
+  test(" > 2GB .asByteBuffer appropriate exception") {
+    pending
+  }
+
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java b/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
new file mode 100644
index 0000000000000..e8affb83ca2e9
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+
+public class BufferTooLargeException extends IOException {
+  public final long actualSize;
+  public final long extra;
+
+  public BufferTooLargeException(long actualSize) {
+    super("LargeByteBuffer is too large to convert.  Size: " + actualSize + "; Size Limit: "
+      + LargeByteBufferHelper.DEFAULT_MAX_CHUNK + " (" +
+      (actualSize - LargeByteBufferHelper.DEFAULT_MAX_CHUNK) + " too big)");
+    this.extra = actualSize - LargeByteBufferHelper.DEFAULT_MAX_CHUNK;
+    this.actualSize = actualSize;
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
new file mode 100644
index 0000000000000..2f88226db57ae
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.buffer;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class WrappedLargeByteBufferSuite {
+
+  @Test
+  public void asByteBuffer() {
+    //test that it works when buffer is small, and the right error when buffer is big
+    fail("not yet implemented");
+  }
+
+  @Test
+  public void putLargeByteBuffer() {
+    fail();
+  }
+}

From 165b1c30f512d863464043efa13842457cb477c4 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 26 Mar 2015 14:49:00 -0500
Subject: [PATCH 31/97] undo more changes

---
 .../org/apache/spark/executor/Executor.scala  |   1 -
 .../spark/network/BlockTransferService.scala  |   3 +-
 .../spark/scheduler/TaskResultGetter.scala    |   2 -
 .../org/apache/spark/storage/DiskStore.scala  |   2 -
 .../apache/spark/storage/TachyonStore.scala   |   4 +-
 .../NettyBlockTransferSecuritySuite.scala     |   4 +-
 .../spark/network/TransportContext.java       |   2 -
 .../buffer/FileSegmentManagedBuffer.java      |  34 ++---
 .../spark/network/server/RpcHandler.java      |   2 -
 .../network/ChunkFetchIntegrationSuite.java   |  15 +-
 .../spark/network/TestManagedBuffer.java      |   5 +-
 .../protocol/BlockTransferMessage.java        |   3 +-
 .../shuffle/protocol/UploadPartialBlock.java  | 128 ------------------
 .../shuffle/BlockTransferMessagesSuite.java   |   2 -
 .../ExternalShuffleBlockHandlerSuite.java     |   5 +-
 .../ExternalShuffleIntegrationSuite.java      |  13 +-
 .../shuffle/OneForOneBlockFetcherSuite.java   |  13 +-
 .../shuffle/RetryingBlockFetcherSuite.java    |   7 +-
 pom.xml                                       |   3 +-
 19 files changed, 53 insertions(+), 195 deletions(-)
 delete mode 100644 network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadPartialBlock.java

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 96e96d4ac4e8d..a5205cdd480b8 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -233,7 +233,6 @@ private[spark] class Executor(
 
         val accumUpdates = Accumulators.values
         val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.orNull)
-        // TODO should we allow task results over 2gb?
         val serializedDirectResult = ser.serialize(directResult)
         val resultSize = serializedDirectResult.limit
 
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index 2140d6a57b21b..14462df32410f 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -95,7 +95,8 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
           val ret = ByteBuffer.allocate(data.size.toInt)
           ret.put(data.nioByteBuffer())
           ret.flip()
-          result.success(new NioManagedBuffer(ret))        }
+          result.success(new NioManagedBuffer(ret))
+        }
       })
 
     Await.result(result.future, Duration.Inf)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 9efd75b53fb9d..3f34b894a32d8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -74,8 +74,6 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
                   taskSetManager, tid, TaskState.FINISHED, TaskResultLost)
                 return
               }
-              // Just blindly assume there is only one byte buffer here, b/c we expect
-              // task results to be small.
               val deserializedResult = serializer.get().deserialize[DirectTaskResult[_]](
                 serializedTaskResult.get.asByteBuffer())
               sparkEnv.blockManager.master.removeBlock(blockId)
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 7d5944ad56028..68cd5016b86d2 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -49,7 +49,6 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
     logDebug(s"Attempting to put block $blockId")
     val startTime = System.currentTimeMillis
     val file = diskManager.getFile(blockId)
-    logTrace(s"Block $blockId will be written to $file")
     val channel = new FileOutputStream(file).getChannel
     bytes.writeTo(channel)
     channel.close()
@@ -76,7 +75,6 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc
     logDebug(s"Attempting to write values for block $blockId")
     val startTime = System.currentTimeMillis
     val file = diskManager.getFile(blockId)
-    logTrace(s"Block $blockId will be written to $file")
     val outputStream = new FileOutputStream(file)
     try {
       try {
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
index 814d4fa0d5ffc..de8ef4939a4e4 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
@@ -115,7 +115,9 @@ private[spark] class TachyonStore(
     assert (is != null)
     try {
       val size = file.length
-      // XXX also broken here for > 2gb
+      if (size > LargeByteBufferHelper.DEFAULT_MAX_CHUNK) {
+        throw new TachyonBlockSizeLimitException(new BufferTooLargeException(size))
+      }
       val bs = new Array[Byte](size.asInstanceOf[Int])
       ByteStreams.readFully(is, bs)
       Some(LargeByteBufferHelper.asLargeByteBuffer(bs))
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
index 6c01db61168b3..94bfa67451892 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
@@ -25,7 +25,7 @@ import scala.concurrent.{Await, Promise}
 import scala.util.{Failure, Success, Try}
 
 import org.apache.commons.io.IOUtils
-import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer, ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.shuffle.BlockFetchingListener
 import org.apache.spark.network.{BlockDataManager, BlockTransferService}
 import org.apache.spark.storage.{BlockId, ShuffleBlockId}
@@ -100,7 +100,7 @@ class NettyBlockTransferSecuritySuite extends FunSuite with MockitoSugar with Sh
     val blockManager = mock[BlockDataManager]
     val blockId = ShuffleBlockId(0, 1, 2)
     val blockString = "Hello, world!"
-    val blockBuffer = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(blockString.getBytes))
+    val blockBuffer = new NioManagedBuffer(ByteBuffer.wrap(blockString.getBytes))
     when(blockManager.getBlockData(blockId)).thenReturn(blockBuffer)
 
     val securityManager0 = new SecurityManager(conf0)
diff --git a/network/common/src/main/java/org/apache/spark/network/TransportContext.java b/network/common/src/main/java/org/apache/spark/network/TransportContext.java
index 37da80c71ff15..f0a89c9d9116c 100644
--- a/network/common/src/main/java/org/apache/spark/network/TransportContext.java
+++ b/network/common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -130,6 +130,4 @@ private TransportChannelHandler createChannelHandler(Channel channel) {
   }
 
   public TransportConf getConf() { return conf; }
-
-  public void close() { rpcHandler.close(); }
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
index d711c8e521b4e..a7de6d3e1d1f7 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package org.apache.spark.network.buffer;
 
@@ -34,8 +34,8 @@
 import org.apache.spark.network.util.TransportConf;
 
 /**
-* A {@link ManagedBuffer} backed by a segment in a file.
-*/
+ * A {@link ManagedBuffer} backed by a segment in a file.
+ */
 public final class FileSegmentManagedBuffer extends ManagedBuffer {
   private final TransportConf conf;
   private final File file;
diff --git a/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
index f8972072a2551..2ba92a40f8b0a 100644
--- a/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
+++ b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
@@ -52,6 +52,4 @@ public abstract void receive(
    * No further requests will come from this client.
    */
   public void connectionTerminated(TransportClient client) { }
-
-  public void close() { }
 }
diff --git a/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java b/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
index fde2b78d10a5d..dfb7740344ed0 100644
--- a/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
@@ -37,7 +37,9 @@
 
 import static org.junit.Assert.*;
 
-import org.apache.spark.network.buffer.*;
+import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
 import org.apache.spark.network.client.ChunkReceivedCallback;
 import org.apache.spark.network.client.RpcResponseCallback;
 import org.apache.spark.network.client.TransportClient;
@@ -71,8 +73,7 @@ public static void setUp() throws Exception {
       buf.put((byte) i);
     }
     buf.flip();
-    final LargeByteBuffer lBuf = LargeByteBufferHelper.asLargeByteBuffer(buf);
-    bufferChunk = new NioManagedBuffer(lBuf);
+    bufferChunk = new NioManagedBuffer(buf);
 
     testFile = File.createTempFile("shuffle-test-file", "txt");
     testFile.deleteOnExit();
@@ -90,7 +91,7 @@ public static void setUp() throws Exception {
       public ManagedBuffer getChunk(long streamId, int chunkIndex) {
         assertEquals(STREAM_ID, streamId);
         if (chunkIndex == BUFFER_CHUNK_INDEX) {
-          return new NioManagedBuffer(lBuf);
+          return new NioManagedBuffer(buf);
         } else if (chunkIndex == FILE_CHUNK_INDEX) {
           return new FileSegmentManagedBuffer(conf, testFile, 10, testFile.length() - 25);
         } else {
@@ -221,10 +222,10 @@ private void assertBufferListsEqual(List<ManagedBuffer> list0, List<ManagedBuffe
   }
 
   private void assertBuffersEqual(ManagedBuffer buffer0, ManagedBuffer buffer1) throws Exception {
-    LargeByteBuffer nio0 = buffer0.nioByteBuffer();
-    LargeByteBuffer nio1 = buffer1.nioByteBuffer();
+    ByteBuffer nio0 = buffer0.nioByteBuffer();
+    ByteBuffer nio1 = buffer1.nioByteBuffer();
 
-    long len = nio0.remaining();
+    int len = nio0.remaining();
     assertEquals(nio0.remaining(), nio1.remaining());
     for (int i = 0; i < len; i ++) {
       assertEquals(nio0.get(), nio1.get());
diff --git a/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java b/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
index 15eb261334b85..38113a918f795 100644
--- a/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
+++ b/network/common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
@@ -24,7 +24,6 @@
 import com.google.common.base.Preconditions;
 import io.netty.buffer.Unpooled;
 
-import org.apache.spark.network.buffer.LargeByteBuffer;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.buffer.NettyManagedBuffer;
 
@@ -55,7 +54,7 @@ public long size() {
   }
 
   @Override
-  public LargeByteBuffer nioByteBuffer() throws IOException {
+  public ByteBuffer nioByteBuffer() throws IOException {
     return underlying.nioByteBuffer();
   }
 
@@ -85,7 +84,7 @@ public Object convertToNetty() throws IOException {
   public boolean equals(Object other) {
     if (other instanceof ManagedBuffer) {
       try {
-        LargeByteBuffer nioBuf = ((ManagedBuffer) other).nioByteBuffer();
+        ByteBuffer nioBuf = ((ManagedBuffer) other).nioByteBuffer();
         if (nioBuf.remaining() != len) {
           return false;
         } else {
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
index 54890660590af..6c1210b33268a 100644
--- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
+++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
@@ -37,7 +37,7 @@ public abstract class BlockTransferMessage implements Encodable {
 
   /** Preceding every serialized message is its type, which allows us to deserialize it. */
   public static enum Type {
-    OPEN_BLOCKS(0), UPLOAD_BLOCK(1), REGISTER_EXECUTOR(2), STREAM_HANDLE(3), UPLOAD_PARTIAL_BLOCK(4);
+    OPEN_BLOCKS(0), UPLOAD_BLOCK(1), REGISTER_EXECUTOR(2), STREAM_HANDLE(3);
 
     private final byte id;
 
@@ -60,7 +60,6 @@ public static BlockTransferMessage fromByteArray(byte[] msg) {
         case 1: return UploadBlock.decode(buf);
         case 2: return RegisterExecutor.decode(buf);
         case 3: return StreamHandle.decode(buf);
-        case 4: return UploadPartialBlock.decode(buf);
         default: throw new IllegalArgumentException("Unknown message type: " + type);
       }
     }
diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadPartialBlock.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadPartialBlock.java
deleted file mode 100644
index 6c11777d2d0f5..0000000000000
--- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadPartialBlock.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.shuffle.protocol;
-
-import com.google.common.base.Objects;
-import io.netty.buffer.ByteBuf;
-import org.apache.spark.network.protocol.Encoders;
-
-import java.util.Arrays;
-
-
-/** Request to upload a block with a certain StorageLevel. Returns nothing (empty byte array). */
-public class UploadPartialBlock extends BlockTransferMessage {
-  public final String appId;
-  public final String execId;
-  public final String blockId;
-  public final int nTotalBlockChunks;
-  public final int blockChunkIndex;
-  public final long blockChunkOffset;
-  // TODO: StorageLevel is serialized separately in here because StorageLevel is not available in
-  // this package. We should avoid this hack.
-  public final byte[] metadata;
-  public final byte[] blockData;
-
-  public UploadPartialBlock(
-          String appId,
-          String execId,
-          String blockId,
-          int nTotalBlockChunks,
-          int blockChunkIndex,
-          long blockChunkOffset,
-          byte[] metadata,
-          byte[] blockData) {
-    this.appId = appId;
-    this.execId = execId;
-    this.blockId = blockId;
-    this.nTotalBlockChunks = nTotalBlockChunks;
-    this.blockChunkIndex = blockChunkIndex;
-    this.blockChunkOffset = blockChunkOffset;
-    this.metadata = metadata;
-    this.blockData = blockData;
-  }
-
-  @Override
-  protected Type type() { return Type.UPLOAD_PARTIAL_BLOCK; }
-
-  @Override
-  public int hashCode() {
-    int objectsHashCode = Objects.hashCode(appId, execId, blockId);
-    return (objectsHashCode * 41 + Arrays.hashCode(metadata)) * 41 + Arrays.hashCode(blockData);
-  }
-
-  @Override
-  public String toString() {
-    return Objects.toStringHelper(this)
-      .add("appId", appId)
-      .add("execId", execId)
-      .add("blockId", blockId)
-      .add("metadata size", metadata.length)
-      .add("block size", blockData.length)
-      .toString();
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other != null && other instanceof UploadPartialBlock) {
-      UploadPartialBlock o = (UploadPartialBlock) other;
-      return Objects.equal(appId, o.appId)
-        && Objects.equal(execId, o.execId)
-        && Objects.equal(blockId, o.blockId)
-        && Arrays.equals(metadata, o.metadata)
-        && Arrays.equals(blockData, o.blockData);
-    }
-    return false;
-  }
-
-  @Override
-  public int encodedLength() {
-    return Encoders.Strings.encodedLength(appId)
-      + Encoders.Strings.encodedLength(execId)
-      + Encoders.Strings.encodedLength(blockId)
-      + 4 //nTotalBlockChunks
-      + 4 //blockChunkIndex
-      + 8 //blockChunkOffset
-      + Encoders.ByteArrays.encodedLength(metadata)
-      + Encoders.ByteArrays.encodedLength(blockData);
-  }
-
-  @Override
-  public void encode(ByteBuf buf) {
-    Encoders.Strings.encode(buf, appId);
-    Encoders.Strings.encode(buf, execId);
-    Encoders.Strings.encode(buf, blockId);
-    buf.writeInt(nTotalBlockChunks);
-    buf.writeInt(blockChunkIndex);
-    buf.writeLong(blockChunkOffset);
-    Encoders.ByteArrays.encode(buf, metadata);
-    Encoders.ByteArrays.encode(buf, blockData);
-  }
-
-  public static UploadPartialBlock decode(ByteBuf buf) {
-    String appId = Encoders.Strings.decode(buf);
-    String execId = Encoders.Strings.decode(buf);
-    String blockId = Encoders.Strings.decode(buf);
-    int nTotalBlockChunks = buf.readInt();
-    int blockChunkIndex = buf.readInt();
-    long blockChunkOffset = buf.readLong();
-    byte[] metadata = Encoders.ByteArrays.decode(buf);
-    byte[] blockData = Encoders.ByteArrays.decode(buf);
-    return new UploadPartialBlock(appId, execId, blockId, nTotalBlockChunks, blockChunkIndex,
-            blockChunkOffset, metadata, blockData);
-  }
-}
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
index a33928c4756b2..d65de9ca550a3 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
@@ -32,8 +32,6 @@ public void serializeOpenShuffleBlocks() {
       new String[] { "/local1", "/local2" }, 32, "MyShuffleManager")));
     checkSerializeDeserialize(new UploadBlock("app-1", "exec-2", "block-3", new byte[] { 1, 2 },
       new byte[] { 4, 5, 6, 7} ));
-    checkSerializeDeserialize(new UploadPartialBlock("app-1", "exec-2", "block-3", 3, 4,
-            ((long)Integer.MAX_VALUE) + 10, new byte[] {1,2}, new byte[] { 4, 5, 6, 7} ));
     checkSerializeDeserialize(new StreamHandle(12345, 16));
   }
 
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
index 968dc750f683a..3f9fe1681cf27 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
@@ -20,7 +20,6 @@
 import java.nio.ByteBuffer;
 import java.util.Iterator;
 
-import org.apache.spark.network.buffer.LargeByteBufferHelper;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
@@ -74,8 +73,8 @@ public void testRegisterExecutor() {
   public void testOpenShuffleBlocks() {
     RpcResponseCallback callback = mock(RpcResponseCallback.class);
 
-    ManagedBuffer block0Marker = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[3]));
-    ManagedBuffer block1Marker = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[7]));
+    ManagedBuffer block0Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[3]));
+    ManagedBuffer block1Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
     when(blockManager.getBlockData("app0", "exec1", "b0")).thenReturn(block0Marker);
     when(blockManager.getBlockData("app0", "exec1", "b1")).thenReturn(block1Marker);
     byte[] openBlocks = new OpenBlocks("app0", "exec1", new String[] { "b0", "b1" }).toByteArray();
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
index 7b2f21e5d1f1a..02c10bcb7b261 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
@@ -30,8 +30,6 @@
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
-import org.apache.spark.network.buffer.LargeByteBuffer;
-import org.apache.spark.network.buffer.LargeByteBufferHelper;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -286,18 +284,17 @@ private void assertBufferListsEqual(List<ManagedBuffer> list0, List<byte[]> list
     throws Exception {
     assertEquals(list0.size(), list1.size());
     for (int i = 0; i < list0.size(); i ++) {
-      assertBuffersEqual(list0.get(i), new NioManagedBuffer(
-              LargeByteBufferHelper.asLargeByteBuffer(list1.get(i))));
+      assertBuffersEqual(list0.get(i), new NioManagedBuffer(ByteBuffer.wrap(list1.get(i))));
     }
   }
 
   private void assertBuffersEqual(ManagedBuffer buffer0, ManagedBuffer buffer1) throws Exception {
-    LargeByteBuffer nio0 = buffer0.nioByteBuffer();
-    LargeByteBuffer nio1 = buffer1.nioByteBuffer();
+    ByteBuffer nio0 = buffer0.nioByteBuffer();
+    ByteBuffer nio1 = buffer1.nioByteBuffer();
 
-    long len = nio0.remaining();
+    int len = nio0.remaining();
     assertEquals(nio0.remaining(), nio1.remaining());
-    for (long i = 0; i < len; i ++) {
+    for (int i = 0; i < len; i ++) {
       assertEquals(nio0.get(), nio1.get());
     }
   }
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
index 2e03a9c80a9d0..b35a6d685dd02 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -24,7 +24,6 @@
 
 import com.google.common.collect.Maps;
 import io.netty.buffer.Unpooled;
-import org.apache.spark.network.buffer.LargeByteBufferHelper;
 import org.junit.Test;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
@@ -54,7 +53,7 @@ public class OneForOneBlockFetcherSuite {
   @Test
   public void testFetchOne() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("shuffle_0_0_0", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[0])));
+    blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
 
     BlockFetchingListener listener = fetchBlocks(blocks);
 
@@ -64,8 +63,8 @@ public void testFetchOne() {
   @Test
   public void testFetchThree() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("b0", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[12])));
-    blocks.put("b1", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[23])));
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23])));
     blocks.put("b2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23])));
 
     BlockFetchingListener listener = fetchBlocks(blocks);
@@ -78,7 +77,7 @@ public void testFetchThree() {
   @Test
   public void testFailure() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("b0", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[12])));
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
     blocks.put("b1", null);
     blocks.put("b2", null);
 
@@ -93,9 +92,9 @@ public void testFailure() {
   @Test
   public void testFailureAndSuccess() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("b0", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[12])));
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
     blocks.put("b1", null);
-    blocks.put("b2", new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[21])));
+    blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[21])));
 
     BlockFetchingListener listener = fetchBlocks(blocks);
 
diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
index d2056254fbbf6..1ad0d72ae5ec5 100644
--- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
+++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
@@ -25,7 +25,6 @@
 
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Sets;
-import org.apache.spark.network.buffer.LargeByteBufferHelper;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -48,9 +47,9 @@
  */
 public class RetryingBlockFetcherSuite {
 
-  ManagedBuffer block0 = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[13]));
-  ManagedBuffer block1 = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[7]));
-  ManagedBuffer block2 = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(new byte[19]));
+  ManagedBuffer block0 = new NioManagedBuffer(ByteBuffer.wrap(new byte[13]));
+  ManagedBuffer block1 = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
+  ManagedBuffer block2 = new NioManagedBuffer(ByteBuffer.wrap(new byte[19]));
 
   @Before
   public void beforeEach() {
diff --git a/pom.xml b/pom.xml
index 23bb16130b504..b3cecd1893a06 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1452,7 +1452,8 @@
           <testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
           <configLocation>scalastyle-config.xml</configLocation>
           <outputFile>scalastyle-output.xml</outputFile>
-          <outputEncoding>UTF-8</outputEncoding>
+          <inputEncoding>${project.build.sourceEncoding}</inputEncoding>
+          <outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
         </configuration>
         <executions>
           <execution>

From 27330a5aba530c6cc0d089593d2f4c021739550e Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 26 Mar 2015 14:56:34 -0500
Subject: [PATCH 32/97] fix test compile

---
 .../scala/org/apache/spark/DistributedSuite.scala    |  4 +++-
 .../org/apache/spark/storage/BlockManagerSuite.scala | 12 ++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 97ea3578aa8ba..b0e0dab6e5ab4 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -22,6 +22,7 @@ import org.scalatest.concurrent.Timeouts._
 import org.scalatest.Matchers
 import org.scalatest.time.{Millis, Span}
 
+import org.apache.spark.network.buffer.LargeByteBufferHelper
 import org.apache.spark.storage.{RDDBlockId, StorageLevel}
 
 class NotSerializableClass
@@ -195,7 +196,8 @@ class DistributedSuite extends FunSuite with Matchers with LocalSparkContext {
     blockManager.master.getLocations(blockId).foreach { cmId =>
       val bytes = blockTransfer.fetchBlockSync(cmId.host, cmId.port, cmId.executorId,
         blockId.toString)
-      val deserialized = blockManager.dataDeserialize(blockId, bytes.nioByteBuffer())
+      val deserialized = blockManager.dataDeserialize(blockId,
+        LargeByteBufferHelper.asLargeByteBuffer(bytes.nioByteBuffer()))
         .asInstanceOf[Iterator[Int]].toList
       assert(deserialized === (1 to 100).toList)
     }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 6909bcff9a468..58a0d54fc9f09 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -172,8 +172,8 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfterEach
     assert(master.getLocations("a3").size === 0, "master was told about a3")
 
     // Drop a1 and a2 from memory; this should be reported back to the master
-    store.dropFromMemory("a1", null: Either[Array[Any], ByteBuffer])
-    store.dropFromMemory("a2", null: Either[Array[Any], ByteBuffer])
+    store.dropFromMemory("a1", null: Either[Array[Any], LargeByteBuffer])
+    store.dropFromMemory("a2", null: Either[Array[Any], LargeByteBuffer])
     assert(store.getSingle("a1") === None, "a1 not removed from store")
     assert(store.getSingle("a2") === None, "a2 not removed from store")
     assert(master.getLocations("a1").size === 0, "master did not remove a1")
@@ -415,8 +415,8 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfterEach
       t2.join()
       t3.join()
 
-      store.dropFromMemory("a1", null: Either[Array[Any], ByteBuffer])
-      store.dropFromMemory("a2", null: Either[Array[Any], ByteBuffer])
+      store.dropFromMemory("a1", null: Either[Array[Any], LargeByteBuffer])
+      store.dropFromMemory("a2", null: Either[Array[Any], LargeByteBuffer])
       store.waitForAsyncReregister()
     }
   }
@@ -1242,9 +1242,9 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfterEach
     store = makeBlockManager(12000)
     val memoryStore = store.memoryStore
     val blockId = BlockId("rdd_3_10")
-    var bytes: ByteBuffer = null
+    var bytes: LargeByteBuffer = null
     val result = memoryStore.putBytes(blockId, 10000, () => {
-      bytes = ByteBuffer.allocate(10000)
+      bytes = LargeByteBufferHelper.allocate(10000)
       bytes
     })
     assert(result.size === 10000)

From e33b77c7339cccb05691d2cf05c5ce4beb92540d Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 2 Apr 2015 16:47:04 -0500
Subject: [PATCH 33/97] add missing cast (remote data is always one ByteBuffer)

---
 core/src/main/scala/org/apache/spark/storage/BlockManager.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 063f67899904f..e2a8c4759c0d8 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -605,7 +605,7 @@ private[spark] class BlockManager(
             DataReadMethod.Network,
             data.limit()))
         } else {
-          return Some(data)
+          return Some(LargeByteBufferHelper.asLargeByteBuffer(data))
         }
       }
       logDebug(s"The value of block $blockId is null")

From f8264e730f4d468be92c6a1ce75c522e4b70bf72 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 2 Apr 2015 20:48:57 -0500
Subject: [PATCH 34/97] tests & some fixes for WrappedLargeByteBuffer

---
 .../apache/spark/io/LargeByteBufferTest.scala |  1 -
 .../spark/network/buffer/LargeByteBuffer.java |  3 +-
 .../buffer/WrappedLargeByteBuffer.java        | 48 +++++++++-
 .../buffer/WrappedLargeByteBufferSuite.java   | 93 ++++++++++++++++++-
 4 files changed, 135 insertions(+), 10 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
index cda63d1a2520b..4463624dc01cf 100644
--- a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
+++ b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
@@ -37,7 +37,6 @@ class LargeByteBufferTest extends FunSuite with Matchers {
     val nRead = rawIn.read(arr, 0, 500)
     nRead should be (200)
     (0 until 200).foreach{idx =>
-      println(idx)
       arr(idx) should be (idx.toByte)
     }
 
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index e7556912c72de..9075b87d0900b 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -56,8 +56,7 @@ public interface LargeByteBuffer {
      * get the entire contents of this as one ByteBuffer, if possible.  The returned ByteBuffer
      * will always have the position set 0, and the limit set to the end of the data.  Each
      * call will return a new ByteBuffer, but will not require copying the data (eg., it will
-     * use ByteBuffer#duplicate()). Updates to the ByteBuffer will be reflected in this
-     * LargeByteBuffer.
+     * use ByteBuffer#duplicate()).  The returned byte buffer and this may or may not share data.
      *
      * @return
      * @throws BufferTooLargeException if this buffer is too large to fit in one {@link ByteBuffer}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index e4306e83ab6d7..9802e3e28501d 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -74,7 +74,6 @@ public byte get() {
   }
 
   private void updateCurrentBuffer() {
-    //TODO fix end condition
     while (currentBuffer != null && !currentBuffer.hasRemaining()) {
       currentBufferIdx += 1;
       currentBuffer = currentBufferIdx < underlying.length ? underlying[currentBufferIdx] : null;
@@ -82,8 +81,21 @@ private void updateCurrentBuffer() {
   }
 
   @Override
-  public LargeByteBuffer put(LargeByteBuffer bytes) {
-    throw new RuntimeException("not yet implemented");
+  public LargeByteBuffer put(LargeByteBuffer from) {
+    if (remaining() < from.remaining()) {
+      throw new IllegalArgumentException("not enough space to copy byte buffer: need " +
+        from.remaining() + ", only have " + remaining());
+    }
+    while (from.remaining() > 0) {
+      int toRead = (int) Math.min(from.remaining(), currentBuffer.remaining());
+      // TODO the extra copy is really sad :(
+      byte[] buff = new byte[toRead];
+      from.get(buff, 0, buff.length);
+      currentBuffer.put(buff);
+      _pos += toRead;
+      updateCurrentBuffer();
+    }
+    return this;
   }
 
   @Override
@@ -94,6 +106,31 @@ public long position() {
   @Override
   public LargeByteBuffer position(long newPosition) {
     //XXX check range?
+    if (_pos > newPosition) {
+      long toMove = _pos - newPosition;
+      // move backwards -- set the position to 0 of every buffer's we go back
+      if (currentBuffer != null) {
+        currentBufferIdx += 1;
+      }
+      while (toMove > 0) {
+        currentBufferIdx -= 1;
+        currentBuffer = underlying[currentBufferIdx];
+        int thisMove = (int) Math.min(toMove, currentBuffer.position());
+        currentBuffer.position(currentBuffer.position() - thisMove);
+        toMove -= thisMove;
+      }
+    } else {
+      long toMove = newPosition - _pos;
+      // move forwards-- set the position to the end of every buffer as we go forwards
+      currentBufferIdx -= 1;
+      while (toMove > 0) {
+        currentBufferIdx += 1;
+        currentBuffer = underlying[currentBufferIdx];
+        int thisMove = (int) Math.min(toMove, currentBuffer.remaining());
+        currentBuffer.position(currentBuffer.position() + thisMove);
+        toMove -= thisMove;
+      }
+    }
     _pos = newPosition;
     return this;
   }
@@ -131,7 +168,10 @@ public long writeTo(WritableByteChannel channel) throws IOException {
   }
 
   @Override
-  public ByteBuffer asByteBuffer() {
+  public ByteBuffer asByteBuffer() throws BufferTooLargeException {
+    if (underlying.length > 1) {
+      throw new BufferTooLargeException(size());
+    }
     return underlying[0];
   }
 
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 2f88226db57ae..7f31cd11ebeaf 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -16,19 +16,106 @@
  */
 package org.apache.spark.network.buffer;
 
+import java.nio.ByteBuffer;
+import java.util.Random;
+
 import org.junit.Test;
+
 import static org.junit.Assert.*;
 
 public class WrappedLargeByteBufferSuite {
 
+  byte[] data = new byte[500];
+  {
+    new Random().nextBytes(data);
+  }
+
+  private WrappedLargeByteBuffer testDataBuf() {
+    ByteBuffer[] bufs = new ByteBuffer[10];
+    for (int i = 0; i < 10; i++) {
+      byte[] b = new byte[50];
+      System.arraycopy(data, i * 50, b, 0, 50);
+      bufs[i] = ByteBuffer.wrap(b);
+    }
+    return new WrappedLargeByteBuffer(bufs);
+  }
+
+
   @Test
-  public void asByteBuffer() {
+  public void asByteBuffer() throws BufferTooLargeException {
     //test that it works when buffer is small, and the right error when buffer is big
-    fail("not yet implemented");
+    LargeByteBuffer buf = LargeByteBufferHelper.asLargeByteBuffer(new byte[100]);
+    ByteBuffer nioBuf = buf.asByteBuffer();
+    assertEquals(100, nioBuf.remaining());
+
+    ByteBuffer[] bufs = new ByteBuffer[2];
+    for (int i = 0; i < 2; i++) {
+      bufs[i] = ByteBuffer.allocate(10);
+    }
+    LargeByteBuffer multiBuf = new WrappedLargeByteBuffer(bufs);
+    try {
+      multiBuf.asByteBuffer();
+      fail("expected an exception");
+    } catch (BufferTooLargeException btl) {
+    }
+  }
+
+  @Test
+  public void position() {
+    WrappedLargeByteBuffer buf = testDataBuf();
+    //intentionally move around sporadically
+    for(int p: new int[]{10,475, 0, 19, 58, 499, 498, 32, 234, 378}) {
+      checkBytesAt(buf, p);
+    }
+  }
+
+  private void checkBytesAt(WrappedLargeByteBuffer buf, int position) {
+    buf.position(position);
+    assertEquals(position, buf.position());
+    int remaining = 500 - position;
+    assertEquals(remaining, buf.remaining());
+    byte[] dataCopy = new byte[remaining];
+    System.arraycopy(data, position, dataCopy, 0, remaining);
+    byte[] bufCopy = new byte[remaining];
+    buf.get(bufCopy, 0, remaining);
+    assertArrayEquals(dataCopy, bufCopy);
+    buf.position(position); //go back to this position for the next set of tests
   }
 
   @Test
   public void putLargeByteBuffer() {
-    fail();
+    //copy from smaller chunks into larger ones
+
+    ByteBuffer[] to = new ByteBuffer[2];
+    for (int i = 0; i < 2; i++) {
+      to[i] = ByteBuffer.wrap(new byte[300]);
+    }
+    LargeByteBuffer fromBuf = testDataBuf();
+    LargeByteBuffer toBuf = new WrappedLargeByteBuffer(to);
+
+    toBuf.put(fromBuf);
+
+    assertEquals(fromBuf.size(), toBuf.position());
+    toBuf.position(0L);
+    byte[] toDataCopy = new byte[500];
+    toBuf.get(toDataCopy, 0, 500);
+    assertArrayEquals(data, toDataCopy);
+  }
+
+  @Test
+  public void putLargeByteBufferException() {
+    WrappedLargeByteBuffer dataBuf = testDataBuf();
+    ByteBuffer[] to = new ByteBuffer[2];
+    for (int i = 0; i < 2; i++) {
+      to[i] = ByteBuffer.wrap(new byte[300]);
+    }
+    LargeByteBuffer toBuf = new WrappedLargeByteBuffer(to);
+    toBuf.put(dataBuf);
+    dataBuf.position(0);
+    try {
+      toBuf.put(dataBuf);
+      fail("expected an exception");
+    } catch (IllegalArgumentException iae) {
+    }
   }
 }

From 0b43d8bc5023ca5b6faf8b9a9802f35b630ddc70 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 2 Apr 2015 20:49:40 -0500
Subject: [PATCH 35/97] LargeByteBuffer has its own dispose

---
 .../scala/org/apache/spark/storage/BlockManager.scala     | 8 +-------
 .../apache/spark/util/LargeByteBufferInputStream.scala    | 6 +++---
 .../org/apache/spark/network/buffer/LargeByteBuffer.java  | 2 +-
 3 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index e2a8c4759c0d8..7505c99ecf99d 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -865,7 +865,7 @@ private[spark] class BlockManager(
       }
     }
 
-    BlockManager.dispose(bytesAfterPut)
+    bytesAfterPut.dispose()
 
     if (putLevel.replication > 1) {
       logDebug("Putting block %s with replication took %s"
@@ -1267,12 +1267,6 @@ private[spark] object BlockManager extends Logging {
     }
   }
 
-  def dispose(buffer: LargeByteBuffer): Unit = {
-    if (buffer != null) {
-      buffer.nioBuffers().asScala.foreach { buf => dispose(buf)}
-    }
-  }
-
   def blockIdsToHosts(
       blockIds: Array[BlockId],
       env: SparkEnv,
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
index 5ed2d9fbc1b28..34c6bc63d0808 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
@@ -23,7 +23,7 @@ import org.apache.spark.network.buffer.LargeByteBuffer
 import org.apache.spark.storage.BlockManager
 
 /**
- * Reads data from a LargeByteBuffer, and optionally cleans it up using BlockManager.dispose()
+ * Reads data from a LargeByteBuffer, and optionally cleans it up using buffer.dispose()
  * at the end of the stream (e.g. to close a memory-mapped file).
  */
 private[spark]
@@ -68,12 +68,12 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
   }
 
   /**
-   * Clean up the buffer, and potentially dispose of it using BlockManager.dispose().
+   * Clean up the buffer, and potentially dispose of it
    */
   private def cleanUp() {
     if (buffer != null) {
       if (dispose) {
-        BlockManager.dispose(buffer)
+        buffer.dispose()
       }
       buffer = null
     }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index 9075b87d0900b..699b28e734bd5 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -72,7 +72,7 @@ public interface LargeByteBuffer {
     public void dispose();
 
     //List b/c we need to know the size.  Could also use Iterator w/ separate numBuffers method
-    //TODO delete
+    //TODO delete, only used in testing
     public List<ByteBuffer> nioBuffers();
 
 }

From db20e2cb62c46df36fc52761729333db64e3e5c0 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 2 Apr 2015 20:50:01 -0500
Subject: [PATCH 36/97] fix some tests for LargeByteBuffer

---
 .../spark/storage/BlockManagerSuite.scala       | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 58a0d54fc9f09..6ef1f2c0a8081 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -832,9 +832,10 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfterEach
     val notMapped = diskStoreNotMapped.getBytes(blockId).get
 
     // Not possible to do isInstanceOf due to visibility of HeapByteBuffer
-    assert(notMapped.getClass.getName.endsWith("HeapByteBuffer"),
+    assert(notMapped.nioBuffers().get(0).getClass.getName.endsWith("HeapByteBuffer"),
       "Expected HeapByteBuffer for un-mapped read")
-    assert(mapped.isInstanceOf[MappedByteBuffer], "Expected MappedByteBuffer for mapped read")
+    assert(mapped.nioBuffers().get(0).isInstanceOf[MappedByteBuffer],
+      "Expected MappedByteBuffer for mapped read")
 
     def arrayFromByteBuffer(in: LargeByteBuffer): Array[Byte] = {
       val array = new Array[Byte](in.remaining().toInt)
@@ -1248,7 +1249,17 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfterEach
       bytes
     })
     assert(result.size === 10000)
-    assert(result.data === Right(bytes))
+    assert(result.data.isRight)
+    assertEquivalentByteBufs(result.data.right.get, bytes)
     assert(result.droppedBlocks === Nil)
   }
+
+  def assertEquivalentByteBufs(exp: LargeByteBuffer, act: LargeByteBuffer): Unit = {
+    assert(exp.size() === act.size())
+    val expBytes = new Array[Byte](exp.size().toInt)
+    exp.get(expBytes, 0, exp.size().toInt)
+    val actBytes = new Array[Byte](act.size().toInt)
+    act.get(actBytes, 0, act.size().toInt)
+    assert(expBytes === actBytes)
+  }
 }

From 4124ca373a193ede56ffd147fda3409bf3d255ef Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 13:36:49 -0500
Subject: [PATCH 37/97] cleanup of LargeBB interface; more tests

---
 .../apache/spark/storage/BlockManager.scala   |  11 +-
 .../apache/spark/storage/MemoryStore.scala    |   4 +-
 .../util/LargeByteBufferInputStream.scala     |   6 +-
 .../spark/network/buffer/LargeByteBuffer.java |  14 ++-
 .../buffer/WrappedLargeByteBuffer.java        |  68 +++++++-----
 .../buffer/WrappedLargeByteBufferSuite.java   | 103 +++++++++---------
 .../rdd/WriteAheadLogBackedBlockRDD.scala     |   2 +-
 7 files changed, 117 insertions(+), 91 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 7505c99ecf99d..59fba864655c1 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -517,7 +517,7 @@ private[spark] class BlockManager(
               throw new BlockException(
                 blockId, s"Block $blockId not found on disk, though it should be")
           }
-          assert(0 == bytes.position())
+          assert(0L == bytes.position())
 
           if (!level.useMemory) {
             // If the block shouldn't be stored in memory, we can just return it
@@ -538,10 +538,9 @@ private[spark] class BlockManager(
                 // If the file size is bigger than the free memory, OOM will happen. So if we cannot
                 // put it into MemoryStore, copyForMemory should not be created. That's why this
                 // action is put into a `() => ByteBuffer` and created lazily.
-                val copyForMemory = LargeByteBufferHelper.allocate(bytes.size)
-                copyForMemory.put(bytes)
+                bytes.deepCopy()
               })
-              bytes.position(0L)
+              bytes.rewind()
             }
             if (!asBlockResult) {
               return Some(bytes)
@@ -796,7 +795,7 @@ private[spark] class BlockManager(
           case ArrayValues(array) =>
             blockStore.putArray(blockId, array, putLevel, returnValues)
           case ByteBufferValues(bytes) =>
-            bytes.position(0L)
+            bytes.rewind()
             blockStore.putBytes(blockId, bytes, putLevel)
         }
         size = result.size
@@ -1216,7 +1215,7 @@ private[spark] class BlockManager(
       blockId: BlockId,
       bytes: LargeByteBuffer,
       serializer: Serializer = defaultSerializer): Iterator[Any] = {
-    bytes.position(0);
+    bytes.rewind()
     val stream = wrapForCompression(blockId, new LargeByteBufferInputStream(bytes, true))
     serializer.newInstance().deserializeStream(stream).asIterator
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index 3f3634ccbdddc..a2c4d50ccb14f 100644
--- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -92,7 +92,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
       level: StorageLevel): PutResult = {
     // Work on a duplicate - since the original input might be used elsewhere.
     val bytes = _bytes.duplicate()
-    bytes.position(0L)
+    bytes.rewind()
     if (level.deserialized) {
       val values = blockManager.dataDeserialize(blockId, bytes)
       putIterator(blockId, values, level, returnValues = true)
@@ -113,7 +113,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long)
     size: Long,
     _bytes: () => LargeByteBuffer): PutResult = {
     // Work on a duplicate - since the original input might be used elsewhere.
-    lazy val bytes = _bytes().duplicate().position(0L)
+    lazy val bytes = _bytes().duplicate().rewind()
     val putAttempt = tryToPut(blockId, () => bytes, size, deserialized = false)
     val data =
       if (putAttempt.success) {
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
index 34c6bc63d0808..7539922e54631 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
@@ -50,18 +50,18 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
     } else {
       val amountToGet = math.min(buffer.remaining(), length).toInt
       buffer.get(dest, offset, amountToGet)
+      //TODO should be a cleanup check here -- need tests
       amountToGet
     }
   }
 
   override def skip(bytes: Long): Long = {
     if (buffer != null) {
-      val amountToSkip = math.min(bytes, buffer.remaining).toInt
-      buffer.position(buffer.position + amountToSkip)
+      val skipped = buffer.skip(bytes)
       if (buffer.remaining() == 0) {
         cleanUp()
       }
-      amountToSkip
+      skipped
     } else {
       0L
     }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index 699b28e734bd5..b35af892e96be 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -26,15 +26,23 @@ public interface LargeByteBuffer {
 
     public void get(byte[] dst,int offset, int length);
 
-    public LargeByteBuffer position(long position);
+    public LargeByteBuffer rewind();
+
+    /**
+     * return a deep copy of this, with a copy of all the data.
+     * The returned buffer will have position == 0.  The position
+     * of this buffer will change from the copy.
+     * @return
+     */
+    public LargeByteBuffer deepCopy();
+
+    public long skip(long n);
 
     public long position();
 
     /** doesn't copy data, just copies references & offsets */
     public LargeByteBuffer duplicate();
 
-    public LargeByteBuffer put(LargeByteBuffer bytes);
-
     public long remaining();
 
     /**
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 9802e3e28501d..860762af05023 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -65,6 +65,35 @@ public void get(byte[] dest, int offset, int length) {
     _pos += moved;
   }
 
+  @Override
+  public LargeByteBuffer rewind() {
+    while (currentBufferIdx > 0) {
+      if (currentBuffer != null) {
+        currentBuffer.rewind();
+      }
+      currentBufferIdx -= 1;
+      currentBuffer = underlying[currentBufferIdx];
+    }
+    currentBuffer.rewind();
+    _pos = 0;
+    return this;
+  }
+
+  @Override
+  public WrappedLargeByteBuffer deepCopy() {
+    ByteBuffer[] dataCopy = new ByteBuffer[underlying.length];
+    for (int i = 0; i < underlying.length; i++) {
+      ByteBuffer b = underlying[i];
+      dataCopy[i] = ByteBuffer.allocate(b.capacity());
+      int originalPosition = b.position();
+      b.position(0);
+      dataCopy[i].put(b);
+      dataCopy[i].position(0);
+      b.position(originalPosition);
+    }
+    return new WrappedLargeByteBuffer(dataCopy);
+  }
+
   @Override
   public byte get() {
     byte r = currentBuffer.get();
@@ -80,34 +109,16 @@ private void updateCurrentBuffer() {
     }
   }
 
-  @Override
-  public LargeByteBuffer put(LargeByteBuffer from) {
-    if (remaining() < from.remaining()) {
-      throw new IllegalArgumentException("not enough space to copy byte buffer: need " +
-        from.remaining() + ", only have " + remaining());
-    }
-    while (from.remaining() > 0) {
-      int toRead = (int) Math.min(from.remaining(), currentBuffer.remaining());
-      // TODO the extra copy is really sad :(
-      byte[] buff = new byte[toRead];
-      from.get(buff, 0, buff.length);
-      currentBuffer.put(buff);
-      _pos += toRead;
-      updateCurrentBuffer();
-    }
-    return this;
-  }
-
   @Override
   public long position() {
     return _pos;
   }
 
   @Override
-  public LargeByteBuffer position(long newPosition) {
-    //XXX check range?
-    if (_pos > newPosition) {
-      long toMove = _pos - newPosition;
+  public long skip(long n) {
+    if (n < 0) {
+      final long moveTotal = Math.min(-n, _pos);
+      long toMove = moveTotal;
       // move backwards -- set the position to 0 of every buffer's we go back
       if (currentBuffer != null) {
         currentBufferIdx += 1;
@@ -119,8 +130,11 @@ public LargeByteBuffer position(long newPosition) {
         currentBuffer.position(currentBuffer.position() - thisMove);
         toMove -= thisMove;
       }
-    } else {
-      long toMove = newPosition - _pos;
+      _pos -= moveTotal;
+      return -moveTotal;
+    } else if (n > 0) {
+      final long moveTotal = Math.min(n, remaining());
+      long toMove = moveTotal;
       // move forwards-- set the position to the end of every buffer as we go forwards
       currentBufferIdx -= 1;
       while (toMove > 0) {
@@ -130,9 +144,11 @@ public LargeByteBuffer position(long newPosition) {
         currentBuffer.position(currentBuffer.position() + thisMove);
         toMove -= thisMove;
       }
+      _pos += moveTotal;
+      return moveTotal;
+    } else {
+      return 0;
     }
-    _pos = newPosition;
-    return this;
   }
 
   @Override
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 7f31cd11ebeaf..b6a90452e73e6 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -40,7 +40,6 @@ private WrappedLargeByteBuffer testDataBuf() {
     return new WrappedLargeByteBuffer(bufs);
   }
 
-
   @Test
   public void asByteBuffer() throws BufferTooLargeException {
     //test that it works when buffer is small, and the right error when buffer is big
@@ -52,70 +51,74 @@ public void asByteBuffer() throws BufferTooLargeException {
     for (int i = 0; i < 2; i++) {
       bufs[i] = ByteBuffer.allocate(10);
     }
-    LargeByteBuffer multiBuf = new WrappedLargeByteBuffer(bufs);
     try {
-      multiBuf.asByteBuffer();
+      new WrappedLargeByteBuffer(bufs).asByteBuffer();
       fail("expected an exception");
     } catch (BufferTooLargeException btl) {
     }
   }
 
   @Test
-  public void position() {
-    WrappedLargeByteBuffer buf = testDataBuf();
+  public void deepCopy() {
+    WrappedLargeByteBuffer b = testDataBuf();
     //intentionally move around sporadically
-    for(int p: new int[]{10,475, 0, 19, 58, 499, 498, 32, 234, 378}) {
-      checkBytesAt(buf, p);
+    for (int initialPosition: new int[]{10,475, 0, 19, 58, 499, 498, 32, 234, 378}) {
+      b.rewind();
+      b.skip(initialPosition);
+      WrappedLargeByteBuffer copy = b.deepCopy();
+      assertEquals(0, copy.position());
+      assertConsistent(copy);
+      assertConsistent(b);
+      assertEquals(b.size(), copy.size());
+      assertEquals(initialPosition, b.position());
+      byte[] copyData = new byte[500];
+      copy.get(copyData, 0, 500);
+      assertArrayEquals(data, copyData);
     }
   }
 
-  private void checkBytesAt(WrappedLargeByteBuffer buf, int position) {
-    buf.position(position);
-    assertEquals(position, buf.position());
-    int remaining = 500 - position;
-    assertEquals(remaining, buf.remaining());
-    byte[] dataCopy = new byte[remaining];
-    System.arraycopy(data, position, dataCopy, 0, remaining);
-    byte[] bufCopy = new byte[remaining];
-    buf.get(bufCopy, 0, remaining);
-    assertArrayEquals(dataCopy, bufCopy);
-    buf.position(position); //go back to this position for the next set of tests
-  }
-
   @Test
-  public void putLargeByteBuffer() {
-    //copy from smaller chunks into larger ones
-
-    ByteBuffer[] to = new ByteBuffer[2];
-    for (int i = 0; i < 2; i++) {
-      to[i] = ByteBuffer.wrap(new byte[300]);
+  public void skipAndGet() {
+    WrappedLargeByteBuffer b = testDataBuf();
+    int position = 0;
+    for (int move: new int[]{20, 50, 100, 0, -80, 0, 200, -175, 500, 0, -1000, 0}) {
+      long moved = b.skip(move);
+      assertConsistent(b);
+      long expMoved = move > 0 ? Math.min(move, 500 - position) : Math.max(move, -position);
+      position += moved;
+      assertEquals(expMoved, moved);
+      assertEquals(position, b.position());
+      byte[] copyData = new byte[500 - position];
+      b.get(copyData, 0, 500 - position);
+      assertConsistent(b);
+      byte[] dataSubset = new byte[500 - position];
+      System.arraycopy(data, position, dataSubset, 0, 500 - position);
+      assertArrayEquals(dataSubset, copyData);
+      b.rewind();
+      assertConsistent(b);
+      b.skip(position);
+      assertConsistent(b);
     }
-    LargeByteBuffer fromBuf = testDataBuf();
-    LargeByteBuffer toBuf = new WrappedLargeByteBuffer(to);
-
-    toBuf.put(fromBuf);
-
-    assertEquals(fromBuf.size(), toBuf.position());
-    toBuf.position(0L);
-    byte[] toDataCopy = new byte[500];
-    toBuf.get(toDataCopy, 0, 500);
-    assertArrayEquals(data, toDataCopy);
   }
 
-  @Test
-  public void putLargeByteBufferException() {
-    WrappedLargeByteBuffer dataBuf = testDataBuf();
-    ByteBuffer[] to = new ByteBuffer[2];
-    for (int i = 0; i < 2; i++) {
-      to[i] = ByteBuffer.wrap(new byte[300]);
-    }
-    LargeByteBuffer toBuf = new WrappedLargeByteBuffer(to);
-    toBuf.put(dataBuf);
-    dataBuf.position(0);
-    try {
-      toBuf.put(dataBuf);
-      fail("expected an exception");
-    } catch (IllegalArgumentException iae) {
+  private void assertConsistent(WrappedLargeByteBuffer buffer) {
+    long pos = buffer.position();
+    long bufferStartPos = 0;
+    for (ByteBuffer p: buffer.nioBuffers()) {
+      if (pos < bufferStartPos) {
+        assertEquals(0, p.position());
+      } else if (pos < bufferStartPos + p.capacity()) {
+        assertEquals(pos - bufferStartPos, p.position());
+      } else {
+        assertEquals(p.capacity(), p.position());
+      }
+      bufferStartPos += p.capacity();
     }
   }
+
+//  @Test
+//  public void get() {
+//    fail("pending");
+//  }
+
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
index a28c78a70d071..c1bb44339e318 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
@@ -105,7 +105,7 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag](
         if (storeInBlockManager) {
           blockManager.putBytes(blockId, dataRead, storageLevel)
           logDebug(s"Stored partition data of $this into block manager with level $storageLevel")
-          dataRead.position(0L)
+          dataRead.rewind()
         }
         blockManager.dataDeserialize(blockId, dataRead).asInstanceOf[Iterator[T]]
     }

From a0b6976f0221a983e069d040623c4d35a3cb8582 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 13:52:45 -0500
Subject: [PATCH 38/97] more tests

---
 .../buffer/WrappedLargeByteBufferSuite.java   | 28 +++++++++++++++----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index b6a90452e73e6..4614ccc4b2168 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -17,6 +17,7 @@
 package org.apache.spark.network.buffer;
 
 import java.nio.ByteBuffer;
+import java.util.Arrays;
 import java.util.Random;
 
 import org.junit.Test;
@@ -27,7 +28,7 @@ public class WrappedLargeByteBufferSuite {
 
   byte[] data = new byte[500];
   {
-    new Random().nextBytes(data);
+    new Random(1234).nextBytes(data);
   }
 
   private WrappedLargeByteBuffer testDataBuf() {
@@ -116,9 +117,26 @@ private void assertConsistent(WrappedLargeByteBuffer buffer) {
     }
   }
 
-//  @Test
-//  public void get() {
-//    fail("pending");
-//  }
+  @Test
+  public void get() {
+    WrappedLargeByteBuffer b = testDataBuf();
+    byte[] into = new byte[500];
+    for (int[] offsetAndLength: new int[][]{{0, 200}, {10,10}, {300, 20}, {30, 100}}) {
+      b.rewind();
+      b.get(into, offsetAndLength[0], offsetAndLength[1]);
+      assertConsistent(b);
+      assertSubArrayEquals(data, 0, into, offsetAndLength[0], offsetAndLength[1]);
+    }
+    //TODO BufferUnderflowException
+  }
+
+  private void assertSubArrayEquals(byte[] exp, int expOffset, byte[] act, int actOffset, int length) {
+    byte[] expCopy = new byte[length];
+    byte[] actCopy = new byte[length];
+    System.arraycopy(exp, expOffset, expCopy, 0, length);
+    System.arraycopy(act, actOffset, actCopy, 0, length);
+    assertArrayEquals(expCopy, actCopy);
+  }
+
 
 }

From 8cbc96734d5071c9ac4a1df5921ac926ac5426aa Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 14:03:22 -0500
Subject: [PATCH 39/97] BufferUnderflowException

---
 .../apache/spark/network/buffer/LargeByteBuffer.java   |  7 +++++++
 .../spark/network/buffer/WrappedLargeByteBuffer.java   |  3 +++
 .../network/buffer/WrappedLargeByteBufferSuite.java    | 10 +++++++++-
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index b35af892e96be..3cb5225bbdaaa 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -24,6 +24,13 @@
 public interface LargeByteBuffer {
     public byte get();
 
+    /**
+     * Bulk copy data from this buffer into the given array.  First checks there is sufficient
+     * data in this buffer; if not, throws a {@link java.nio.BufferUnderflowException}.
+     * @param dst
+     * @param offset
+     * @param length
+     */
     public void get(byte[] dst,int offset, int length);
 
     public LargeByteBuffer rewind();
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 860762af05023..2e96511c8b300 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -19,6 +19,7 @@
 import sun.nio.ch.DirectBuffer;
 
 import java.io.IOException;
+import java.nio.BufferUnderflowException;
 import java.nio.ByteBuffer;
 import java.nio.MappedByteBuffer;
 import java.nio.channels.WritableByteChannel;
@@ -55,6 +56,8 @@ public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
 
   @Override
   public void get(byte[] dest, int offset, int length) {
+    if (length > remaining())
+      throw new BufferUnderflowException();
     int moved = 0;
     while (moved < length) {
       int toRead = Math.min(length - moved, currentBuffer.remaining());
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 4614ccc4b2168..d7dcb55fcd9c7 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -16,6 +16,7 @@
  */
 package org.apache.spark.network.buffer;
 
+import java.nio.BufferUnderflowException;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.Random;
@@ -127,7 +128,14 @@ public void get() {
       assertConsistent(b);
       assertSubArrayEquals(data, 0, into, offsetAndLength[0], offsetAndLength[1]);
     }
-    //TODO BufferUnderflowException
+
+    try {
+      b.rewind();
+      b.skip(400);
+      b.get(into, 0, 500);
+      fail("expected exception");
+    } catch (BufferUnderflowException bue) {
+    }
   }
 
   private void assertSubArrayEquals(byte[] exp, int expOffset, byte[] act, int actOffset, int length) {

From ec7c50cc338ce5cdfd914993ad4917868de5ce3a Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 15:02:57 -0500
Subject: [PATCH 40/97] tests (wip)

---
 .../spark/util/LargeByteBufferOutputStreamSuite.scala  | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
index d5b07776f9b4f..738a33230b5c3 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
@@ -16,12 +16,20 @@
  */
 package org.apache.spark.util
 
+import scala.util.Random
+
 import org.scalatest.{FunSuite, Matchers}
 
 class LargeByteBufferOutputStreamSuite extends FunSuite with Matchers {
 
   test("merged buffers for < 2GB") {
-    pending
+    val out = new LargeByteBufferOutputStream(10)
+    val bytes = new Array[Byte](100)
+    Random.nextBytes(bytes)
+    out.write(bytes)
+
+    val buffer = out.largeBuffer
+    buffer.asByteBuffer()
   }
 
   test(" > 2GB .asByteBuffer appropriate exception") {

From ba9fcdab6e36a57049d5e745ed03441784a7fc34 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 15:05:43 -0500
Subject: [PATCH 41/97] ByteArrayChunkOutputStream.slice()

---
 .../util/io/ByteArrayChunkOutputStream.scala  | 41 +++++++++++++++++++
 .../io/ByteArrayChunkOutputStreamSuite.scala  | 20 +++++++++
 2 files changed, 61 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
index daac6f971eb20..64f1ff7153fe4 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
@@ -43,10 +43,13 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
    */
   private var position = chunkSize
 
+  var size: Long = 0L
+
   override def write(b: Int): Unit = {
     allocateNewChunkIfNeeded()
     chunks(lastChunkIndex)(position) = b.toByte
     position += 1
+    size += 1
   }
 
   override def write(bytes: Array[Byte], off: Int, len: Int): Unit = {
@@ -58,6 +61,7 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
       written += thisBatch
       position += thisBatch
     }
+    size += len
   }
 
   @inline
@@ -91,4 +95,41 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
       ret
     }
   }
+
+  /**
+   * get a copy of the data between the two endpoints
+   */
+  def slice(start: Long, until: Long): Array[Byte] = {
+    require((until - start) < Integer.MAX_VALUE, "max slice length = Integer.MAX_VALUE")
+    var chunkStart = 0L
+    var chunkIdx = 0
+    var foundStart = false
+    val length = (until - start).toInt
+    val result = new Array[Byte](length)
+    while (!foundStart) {
+      val nextSize = chunkStart + chunks(chunkIdx).size
+      if (nextSize > start) {
+        foundStart = true
+      }
+      else {
+        chunkStart = nextSize
+        chunkIdx += 1
+      }
+    }
+
+    var remaining = length
+    var pos = 0
+    var offsetInChunk = (start - chunkStart).toInt
+    while (remaining > 0) {
+      val lenToCopy = math.min(remaining, chunks(chunkIdx).size - offsetInChunk)
+      System.arraycopy(chunks(chunkIdx), offsetInChunk, result, pos, lenToCopy)
+      chunkIdx += 1
+      offsetInChunk = 0
+      pos += lenToCopy
+      remaining -= lenToCopy
+    }
+    result
+  }
+
+
 }
diff --git a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
index f855831b8e367..667b646295db4 100644
--- a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
@@ -106,4 +106,24 @@ class ByteArrayChunkOutputStreamSuite extends FunSuite {
     assert(arrays(1).toSeq === ref.slice(10, 20))
     assert(arrays(2).toSeq === ref.slice(20, 30))
   }
+
+  test("slice") {
+    val ref = new Array[Byte](30)
+    Random.nextBytes(ref)
+    val o = new ByteArrayChunkOutputStream(10)
+    o.write(ref)
+
+    for {
+      start <- (0 until 30)
+      end <- (start to 30)
+    } {
+      withClue(s"start = $start; end = $end") {
+        try {
+          assert(o.slice(start,end).toSeq === ref.slice(start,end))
+        } catch {
+          case ex => fail(ex)
+        }
+      }
+    }
+  }
 }

From 31381a173003c4801e90012fb38b7bbf2d0399f9 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 15:18:34 -0500
Subject: [PATCH 42/97] LargeByteBufferOutputStream gives a merged array, if
 possible

---
 .../util/LargeByteBufferOutputStream.scala     | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index c67f4f482d81f..84cfdaf2d8ea9 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -20,7 +20,7 @@ package org.apache.spark.util
 import java.io.OutputStream
 import java.nio.ByteBuffer
 
-import org.apache.spark.network.buffer.{WrappedLargeByteBuffer, LargeByteBuffer}
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, WrappedLargeByteBuffer, LargeByteBuffer}
 import org.apache.spark.util.io.ByteArrayChunkOutputStream
 
 private[spark]
@@ -43,7 +43,21 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
   def pos: Int = _pos
 
   def largeBuffer: LargeByteBuffer = {
-    new WrappedLargeByteBuffer(output.toArrays.map{ByteBuffer.wrap})
+    // Merge the underlying arrays as much as possible
+    val totalSize = output.size
+    val maxChunk = LargeByteBufferHelper.DEFAULT_MAX_CHUNK
+    val chunksNeeded = ((totalSize + maxChunk -1) / maxChunk).toInt
+    val chunks = new Array[Array[Byte]](chunksNeeded)
+    var remaining = totalSize
+    var pos = 0
+    (0 until chunksNeeded).foreach{idx =>
+      val nextSize = math.min(maxChunk, remaining).toInt
+      chunks(idx) = new Array[Byte](nextSize)
+      output.slice(pos, pos + nextSize)
+      pos += nextSize
+      remaining -= nextSize
+    }
+    new WrappedLargeByteBuffer(chunks.map{ByteBuffer.wrap})
   }
 
   override def close(): Unit = {

From 879ad736538f60f678ecb8fcfcb609bdac474703 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 15:26:07 -0500
Subject: [PATCH 43/97] fix output.largeBuffers, add test

---
 .../util/LargeByteBufferOutputStream.scala      |  7 ++-----
 .../util/LargeByteBufferOutputStreamSuite.scala | 17 +++++++++++++----
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index 84cfdaf2d8ea9..74bd7b89a7604 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -27,7 +27,7 @@ private[spark]
 class LargeByteBufferOutputStream(chunkSize: Int = 65536)
   extends OutputStream {
 
-  val output = new ByteArrayChunkOutputStream(chunkSize)
+  private[util] val output = new ByteArrayChunkOutputStream(chunkSize)
 
   private var _pos = 0
 
@@ -40,8 +40,6 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
     _pos += len
   }
 
-  def pos: Int = _pos
-
   def largeBuffer: LargeByteBuffer = {
     // Merge the underlying arrays as much as possible
     val totalSize = output.size
@@ -52,8 +50,7 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
     var pos = 0
     (0 until chunksNeeded).foreach{idx =>
       val nextSize = math.min(maxChunk, remaining).toInt
-      chunks(idx) = new Array[Byte](nextSize)
-      output.slice(pos, pos + nextSize)
+      chunks(idx) = output.slice(pos, pos + nextSize)
       pos += nextSize
       remaining -= nextSize
     }
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
index 738a33230b5c3..4fcf5496a4af6 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
@@ -29,11 +29,20 @@ class LargeByteBufferOutputStreamSuite extends FunSuite with Matchers {
     out.write(bytes)
 
     val buffer = out.largeBuffer
-    buffer.asByteBuffer()
-  }
+    buffer.position() should be (0)
+    buffer.size() should be (100)
+    val nioBuffer = buffer.asByteBuffer()
+    nioBuffer.position() should be (0)
+    nioBuffer.capacity() should be (100)
+    nioBuffer.limit() should be (100)
+
+    val read = new Array[Byte](100)
+    buffer.get(read, 0, 100)
+    read should be (bytes)
 
-  test(" > 2GB .asByteBuffer appropriate exception") {
-    pending
+    buffer.rewind()
+    nioBuffer.get(read)
+    read should be (bytes)
   }
 
 }

From bae8f2374271a5b6fc12def25b354a8d51b8a9c9 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 15:29:33 -0500
Subject: [PATCH 44/97] fix NPE

---
 .../main/scala/org/apache/spark/storage/BlockManager.scala    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 59fba864655c1..341f72642007e 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -864,7 +864,9 @@ private[spark] class BlockManager(
       }
     }
 
-    bytesAfterPut.dispose()
+    if (bytesAfterPut != null) {
+      bytesAfterPut.dispose()
+    }
 
     if (putLevel.replication > 1) {
       logDebug("Putting block %s with replication took %s"

From c2e721bb7314efe286a935d9e26664627fa8dee9 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 16:33:46 -0500
Subject: [PATCH 45/97] restructure tests

---
 .../util/LargeByteBufferOutputStream.scala    |  6 ++++-
 .../LargeByteBufferInputStreamSuite.scala}    | 23 +++++++++++++------
 2 files changed, 21 insertions(+), 8 deletions(-)
 rename core/src/test/scala/org/apache/spark/{io/LargeByteBufferTest.scala => util/LargeByteBufferInputStreamSuite.scala} (72%)

diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index 74bd7b89a7604..05071d3022592 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -41,9 +41,13 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
   }
 
   def largeBuffer: LargeByteBuffer = {
+    largeBuffer(LargeByteBufferHelper.DEFAULT_MAX_CHUNK)
+  }
+
+  //exposed for testing
+  private[util] def largeBuffer(maxChunk: Int): LargeByteBuffer = {
     // Merge the underlying arrays as much as possible
     val totalSize = output.size
-    val maxChunk = LargeByteBufferHelper.DEFAULT_MAX_CHUNK
     val chunksNeeded = ((totalSize + maxChunk -1) / maxChunk).toInt
     val chunks = new Array[Array[Byte]](chunksNeeded)
     var remaining = totalSize
diff --git a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
similarity index 72%
rename from core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
rename to core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
index 4463624dc01cf..f84132f23126f 100644
--- a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
@@ -14,15 +14,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.io
+package org.apache.spark.util
 
-import java.io.{ObjectInputStream, ObjectOutputStream}
+import org.scalatest.{FunSuite, Matchers}
 
 import org.apache.spark.network.buffer.WrappedLargeByteBuffer
-import org.apache.spark.util.{LargeByteBufferInputStream, LargeByteBufferOutputStream}
-import org.scalatest.{Matchers, FunSuite}
 
-class LargeByteBufferTest extends FunSuite with Matchers {
+class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
+
+  test("read from large buffers") {
+    pending
+  }
+
+  test("dispose") {
+    pending
+  }
 
   test("io stream roundtrip") {
 
@@ -30,9 +36,11 @@ class LargeByteBufferTest extends FunSuite with Matchers {
     (0 until 200).foreach{idx => out.write(idx)}
     out.close()
 
-    out.largeBuffer.asInstanceOf[WrappedLargeByteBuffer].underlying.size should be > 1
+    val lb = out.largeBuffer(128)
+    //just make sure that we test reading from multiple chunks
+    lb.asInstanceOf[WrappedLargeByteBuffer].underlying.size should be > 1
 
-    val rawIn = new LargeByteBufferInputStream(out.largeBuffer)
+    val rawIn = new LargeByteBufferInputStream(lb)
     val arr = new Array[Byte](500)
     val nRead = rawIn.read(arr, 0, 500)
     nRead should be (200)
@@ -42,4 +50,5 @@ class LargeByteBufferTest extends FunSuite with Matchers {
 
   }
 
+
 }

From 8920e7620066132abe6773872f9010eadbc328c2 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 19:38:37 -0500
Subject: [PATCH 46/97] another test

---
 .../LargeByteBufferOutputStreamSuite.scala    | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
index 4fcf5496a4af6..b25dfc5bbaaac 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
@@ -20,6 +20,8 @@ import scala.util.Random
 
 import org.scalatest.{FunSuite, Matchers}
 
+import org.apache.spark.network.buffer.WrappedLargeByteBuffer
+
 class LargeByteBufferOutputStreamSuite extends FunSuite with Matchers {
 
   test("merged buffers for < 2GB") {
@@ -45,4 +47,21 @@ class LargeByteBufferOutputStreamSuite extends FunSuite with Matchers {
     read should be (bytes)
   }
 
+  test("chunking") {
+    val out = new LargeByteBufferOutputStream(10)
+    val bytes = new Array[Byte](100)
+    Random.nextBytes(bytes)
+    out.write(bytes)
+
+    (10 to 100 by 10).foreach{chunkSize =>
+      val buffer = out.largeBuffer(chunkSize).asInstanceOf[WrappedLargeByteBuffer]
+      buffer.position() should be (0)
+      buffer.size() should be (100)
+      val read = new Array[Byte](100)
+      buffer.get(read, 0, 100)
+      read should be (bytes)
+    }
+
+  }
+
 }

From ce1ac7c47e69d7e7f402888855b4ac64afdf3176 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 19:39:38 -0500
Subject: [PATCH 47/97] test placeholders

---
 core/src/test/scala/org/apache/spark/ShuffleSuite.scala   | 8 ++++++++
 .../scala/org/apache/spark/broadcast/BroadcastSuite.scala | 1 +
 .../org/apache/spark/rdd/LargePartitionCachingSuite.scala | 1 +
 .../spark/util/LargeByteBufferInputStreamSuite.scala      | 4 ++++
 .../spark/network/buffer/LargeByteBufferHelper.java       | 3 ++-
 5 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index f57921b768310..60654c9e9d285 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -282,6 +282,14 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
     // This count should retry the execution of the previous stage and rerun shuffle.
     rdd.count()
   }
+
+  test("large shuffle") {
+    // TODO fail with sensible exception
+    //  note that this *could* succeed in local mode, b/c local shuffles actually don't
+    //  have a limit at 2GB.  BUT, we make them fail in any case, b/c its better to have
+    //  a consistent failure, and not have success depend on where tasks get scheduled
+    pending
+  }
 }
 
 object ShuffleSuite {
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 1eeb707c29fb1..a0205d64492fa 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -181,6 +181,7 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
 
   //ignored for now just because it needs a lot of memory
   ignore("large broadcast variable") {
+    // TODO this should fail, but with a sensible exception
     sc = new SparkContext("local", "test", httpConf)
     val bigArr = new Array[Long]((2.3e9 / 8).toInt)
     val bcArr = sc.broadcast(bigArr)
diff --git a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
index e2c14f14bb4b8..2de4baabb6ecf 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
@@ -34,6 +34,7 @@ class LargePartitionCachingSuite extends FunSuite with SharedSparkContext {
   }
 
   test("disk cache large partitions with replications") {
+    // TODO this should fail, but w/ sensible message
     pending
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
index f84132f23126f..ad26803b48238 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
@@ -26,6 +26,10 @@ class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
     pending
   }
 
+  test("read from large mapped file") {
+    pending
+  }
+
   test("dispose") {
     pending
   }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
index 1331dc591b63a..d6583de0a04aa 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -55,7 +55,8 @@ public static LargeByteBuffer mapFile(
         return mapFile(channel, mode, offset, length, DEFAULT_MAX_CHUNK);
     }
 
-    public static LargeByteBuffer mapFile(
+    //exposed for testing
+    static LargeByteBuffer mapFile(
             FileChannel channel,
             FileChannel.MapMode mode,
             long offset,

From b5dab448d5bbe6dff4f7c6317b698d265538a30d Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 20:44:53 -0500
Subject: [PATCH 48/97] test for large mapped file

---
 .../buffer/LargeByteBufferHelperSuite.java    | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java

diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
new file mode 100644
index 0000000000000..6ed9e08789d07
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.buffer;
+
+import org.junit.Test;
+
+import java.io.*;
+import java.nio.channels.FileChannel;
+
+import static org.junit.Assert.*;
+
+public class LargeByteBufferHelperSuite {
+
+  @Test
+  public void testMapFile() throws IOException {
+    File testFile = File.createTempFile("large-byte-buffer-test", ".bin");
+    testFile.deleteOnExit();
+    OutputStream out = new FileOutputStream(testFile);
+    byte[] buffer = new byte[1 << 16];
+    long len = 3L << 30;
+    assertTrue(len > Integer.MAX_VALUE);  // its 1.5x Integer.MAX_VALUE, just a sanity check
+    for (int i = 0; i < buffer.length; i++) {
+      buffer[i] = (byte) i;
+    }
+    for (int i = 0; i < len / buffer.length; i++) {
+      out.write(buffer);
+    }
+    out.close();
+
+    FileChannel in = new FileInputStream(testFile).getChannel();
+
+    //fail quickly on bad bounds
+    try {
+      LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, 0, len + 1);
+      fail("expected exception");
+    } catch (IOException ioe) {
+    }
+    try {
+      LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, -1, 10);
+      fail("expected exception");
+    } catch (IllegalArgumentException iae) {
+    }
+
+    //now try to read from the buffer
+    LargeByteBuffer buf = LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, 0, len);
+    assertEquals(len, buf.size());
+    byte[] read = new byte[buffer.length];
+    for (int i = 0; i < len / buffer.length; i++) {
+      buf.get(read, 0, buffer.length);
+      // assertArrayEquals() is really slow
+      for (int j = 0; j < buffer.length; j++) {
+        if (read[j] != (byte)(j))
+          fail("bad byte at (i,j) = (" + i + "," + j + ")");
+      }
+      assertArrayEquals(buffer, read);
+    }
+  }
+}

From 248dc1c173801dd5860a377a48b63a2af6051713 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 20:47:59 -0500
Subject: [PATCH 49/97] cleanup

---
 .../spark/network/buffer/LargeByteBufferHelper.java  | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
index d6583de0a04aa..b5cbc221647da 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -52,17 +52,7 @@ public static LargeByteBuffer mapFile(
             long offset,
             long length
     ) throws IOException {
-        return mapFile(channel, mode, offset, length, DEFAULT_MAX_CHUNK);
-    }
-
-    //exposed for testing
-    static LargeByteBuffer mapFile(
-            FileChannel channel,
-            FileChannel.MapMode mode,
-            long offset,
-            long length,
-            int maxChunk
-    ) throws IOException {
+        int maxChunk = DEFAULT_MAX_CHUNK;
         ArrayList<Long> offsets = new ArrayList<Long>();
         long curOffset = offset;
         long end = offset + length;

From 9e8b8c7b8e2bea93c790a1b851cbb8cba61b7fdb Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Fri, 3 Apr 2015 21:15:33 -0500
Subject: [PATCH 50/97] inputstream tests & fixes

---
 .../util/LargeByteBufferInputStream.scala     | 16 ++++++-
 .../LargeByteBufferInputStreamSuite.scala     | 45 +++++++++++++++----
 2 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
index 7539922e54631..f4c34a3c54a35 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
@@ -35,7 +35,11 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
       cleanUp()
       -1
     } else {
-      buffer.get() & 0xFF
+      val r = buffer.get() & 0xFF
+      if (buffer.remaining() == 0) {
+        cleanUp()
+      }
+      r
     }
   }
 
@@ -50,7 +54,11 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
     } else {
       val amountToGet = math.min(buffer.remaining(), length).toInt
       buffer.get(dest, offset, amountToGet)
-      //TODO should be a cleanup check here -- need tests
+      // XXX I assume its not intentional that the stream is only disposed when you try to read
+      // *past* the end in ByteBufferInputStream, so we do a check here
+      if (buffer.remaining() == 0) {
+        cleanUp()
+      }
       amountToGet
     }
   }
@@ -67,6 +75,9 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
     }
   }
 
+  //only for testing
+  private[util] var disposed = false
+
   /**
    * Clean up the buffer, and potentially dispose of it
    */
@@ -74,6 +85,7 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
     if (buffer != null) {
       if (dispose) {
         buffer.dispose()
+        disposed = true
       }
       buffer = null
     }
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
index ad26803b48238..2ed3db026ce7a 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
@@ -16,22 +16,49 @@
  */
 package org.apache.spark.util
 
+import java.io.{FileInputStream, FileOutputStream, OutputStream, File}
+import java.nio.channels.FileChannel.MapMode
+
+import org.junit.Assert._
 import org.scalatest.{FunSuite, Matchers}
 
-import org.apache.spark.network.buffer.WrappedLargeByteBuffer
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, WrappedLargeByteBuffer}
 
 class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
 
-  test("read from large buffers") {
-    pending
-  }
-
   test("read from large mapped file") {
-    pending
-  }
+    val testFile = File.createTempFile("large-buffer-input-stream-test",".bin")
+    testFile.deleteOnExit()
 
-  test("dispose") {
-    pending
+    val out: OutputStream = new FileOutputStream(testFile)
+    val buffer: Array[Byte] = new Array[Byte](1 << 16)
+    val len: Long = 3L << 30
+    assertTrue(len > Integer.MAX_VALUE)
+    (0 until buffer.length).foreach { idx =>
+      buffer(idx) = idx.toByte
+    }
+    (0 until (len / buffer.length).toInt).foreach { idx =>
+      out.write(buffer)
+    }
+    out.close
+
+    val channel = new FileInputStream(testFile).getChannel
+    val buf = LargeByteBufferHelper.mapFile(channel, MapMode.READ_ONLY, 0, len)
+    val in = new LargeByteBufferInputStream(buf, dispose = true)
+
+    val read = new Array[Byte](buffer.length)
+    (0 until (len / buffer.length).toInt).foreach { idx =>
+      in.disposed should be (false)
+      in.read(read) should be (read.length)
+      (0 until buffer.length).foreach { arrIdx =>
+        assertEquals(buffer(arrIdx), read(arrIdx))
+      }
+    }
+    // XXX I assume its not intentional that the stream is only disposed when you try to read
+    // *past* the end in ByteBufferInputStream?
+    in.disposed should be (true)
+    in.read(read) should be (-1)
+    in.disposed should be (true)
   }
 
   test("io stream roundtrip") {

From d6c92e9642217176f83a964df01591ed0cef28d7 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 6 Apr 2015 10:47:26 -0500
Subject: [PATCH 51/97] cleanup

---
 .../main/scala/org/apache/spark/storage/BlockManager.scala | 2 +-
 .../main/scala/org/apache/spark/storage/TachyonStore.scala | 2 +-
 .../apache/spark/util/LargeByteBufferOutputStream.scala    | 7 +++++--
 .../spark/network/buffer/BufferTooLargeException.java      | 6 +++---
 .../spark/network/buffer/FileSegmentManagedBuffer.java     | 2 +-
 .../apache/spark/network/buffer/LargeByteBufferHelper.java | 6 +++---
 6 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 341f72642007e..fbf9cf454a56a 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1297,7 +1297,7 @@ object BlockSizeLimitException {
   def sizeMsg(cause: BufferTooLargeException): String = {
     s"that was ${Utils.bytesToString(cause.actualSize)} (too " +
     s"large by ${Utils.bytesToString(cause.extra)} / " +
-      s"${cause.actualSize.toDouble / LargeByteBufferHelper.DEFAULT_MAX_CHUNK}x)."
+      s"${cause.actualSize.toDouble / LargeByteBufferHelper.MAX_CHUNK}x)."
   }
 
   def sizeMsgAndAdvice(cause: BufferTooLargeException): String = {
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
index de8ef4939a4e4..9882810f055c8 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala
@@ -115,7 +115,7 @@ private[spark] class TachyonStore(
     assert (is != null)
     try {
       val size = file.length
-      if (size > LargeByteBufferHelper.DEFAULT_MAX_CHUNK) {
+      if (size > LargeByteBufferHelper.MAX_CHUNK) {
         throw new TachyonBlockSizeLimitException(new BufferTooLargeException(size))
       }
       val bs = new Array[Byte](size.asInstanceOf[Int])
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index 05071d3022592..c750afb2220ae 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -41,12 +41,15 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
   }
 
   def largeBuffer: LargeByteBuffer = {
-    largeBuffer(LargeByteBufferHelper.DEFAULT_MAX_CHUNK)
+    largeBuffer(LargeByteBufferHelper.MAX_CHUNK)
   }
 
   //exposed for testing
   private[util] def largeBuffer(maxChunk: Int): LargeByteBuffer = {
-    // Merge the underlying arrays as much as possible
+    // LargeByteBuffer is supposed to make a "best effort" to get all the data
+    // in one nio.ByteBuffer, so we want to try to merge the smaller chunks together
+    // as much as possible.  This is necessary b/c there are a number of parts of spark that
+    // still can't deal w/ > 2GB.
     val totalSize = output.size
     val chunksNeeded = ((totalSize + maxChunk -1) / maxChunk).toInt
     val chunks = new Array[Array[Byte]](chunksNeeded)
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java b/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
index e8affb83ca2e9..158d29ad152ce 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
@@ -24,9 +24,9 @@ public class BufferTooLargeException extends IOException {
 
   public BufferTooLargeException(long actualSize) {
     super("LargeByteBuffer is too large to convert.  Size: " + actualSize + "; Size Limit: "
-      + LargeByteBufferHelper.DEFAULT_MAX_CHUNK + " (" +
-      (actualSize - LargeByteBufferHelper.DEFAULT_MAX_CHUNK) + " too big)");
-    this.extra = actualSize - LargeByteBufferHelper.DEFAULT_MAX_CHUNK;
+      + LargeByteBufferHelper.MAX_CHUNK + " (" +
+      (actualSize - LargeByteBufferHelper.MAX_CHUNK) + " too big)");
+    this.extra = actualSize - LargeByteBufferHelper.MAX_CHUNK;
     this.actualSize = actualSize;
   }
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
index a7de6d3e1d1f7..5e318d8ca8b78 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -73,7 +73,7 @@ public ByteBuffer nioByteBuffer() throws IOException {
         buf.flip();
         return buf;
       } else {
-        if (length > LargeByteBufferHelper.DEFAULT_MAX_CHUNK) {
+        if (length > LargeByteBufferHelper.MAX_CHUNK) {
           throw new BufferTooLargeException(length);
         }
         return channel.map(FileChannel.MapMode.READ_ONLY, offset, length);
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
index b5cbc221647da..9d40fc4f5c07d 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -23,7 +23,7 @@
 
 public class LargeByteBufferHelper {
 
-    public static final int DEFAULT_MAX_CHUNK = Integer.MAX_VALUE - 1000000;
+    public static final int MAX_CHUNK = Integer.MAX_VALUE - 1000000;
 
     public static LargeByteBuffer asLargeByteBuffer(ByteBuffer buffer) {
         return new WrappedLargeByteBuffer(new ByteBuffer[]{buffer});
@@ -37,7 +37,7 @@ public static LargeByteBuffer allocate(long size) {
         ArrayList<ByteBuffer> chunks = new ArrayList<ByteBuffer>();
         long remaining = size;
         while (remaining > 0) {
-            int nextSize = (int)Math.min(remaining, DEFAULT_MAX_CHUNK);
+            int nextSize = (int)Math.min(remaining, MAX_CHUNK);
             ByteBuffer next = ByteBuffer.allocate(nextSize);
             remaining -= nextSize;
             chunks.add(next);
@@ -52,7 +52,7 @@ public static LargeByteBuffer mapFile(
             long offset,
             long length
     ) throws IOException {
-        int maxChunk = DEFAULT_MAX_CHUNK;
+        int maxChunk = MAX_CHUNK;
         ArrayList<Long> offsets = new ArrayList<Long>();
         long curOffset = offset;
         long end = offset + length;

From 8a65ca9ef9b3c076a1425973d73a0b2b3ace7afc Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 6 Apr 2015 10:59:52 -0500
Subject: [PATCH 52/97] cleanup

---
 .../apache/spark/network/buffer/WrappedLargeByteBuffer.java   | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 2e96511c8b300..74ceadfda11cb 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -31,7 +31,6 @@ public class WrappedLargeByteBuffer implements LargeByteBuffer {
   //only public for tests for the moment ...
   public final ByteBuffer[] underlying;
   private final Long totalCapacity;
-  private final long[] chunkOffsets;
 
   private long _pos;
   private int currentBufferIdx;
@@ -42,9 +41,7 @@ public class WrappedLargeByteBuffer implements LargeByteBuffer {
   public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
     this.underlying = underlying;
     long sum = 0l;
-    chunkOffsets = new long[underlying.length];
     for (int i = 0; i < underlying.length; i++) {
-      chunkOffsets[i] = sum;
       sum += underlying[i].capacity();
     }
     totalCapacity = sum;
@@ -165,7 +162,6 @@ public WrappedLargeByteBuffer duplicate() {
     for (int i = 0; i < underlying.length; i++) {
       duplicates[i] = underlying[i].duplicate();
     }
-    //we could also avoid initializing offsets here, if we cared ...
     return new WrappedLargeByteBuffer(duplicates);
   }
 

From 0aad3d3ae8f2dfef08a1f86bdc1cf172381f7119 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 6 Apr 2015 11:01:00 -0500
Subject: [PATCH 53/97] cleanup

---
 .../org/apache/spark/util/LargeByteBufferInputStreamSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
index 2ed3db026ce7a..209fb0d343fd0 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
@@ -54,7 +54,7 @@ class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
         assertEquals(buffer(arrIdx), read(arrIdx))
       }
     }
-    // XXX I assume its not intentional that the stream is only disposed when you try to read
+    // XXX I assume its *not* intentional that the stream is only disposed when you try to read
     // *past* the end in ByteBufferInputStream?
     in.disposed should be (true)
     in.read(read) should be (-1)

From f648e9046060a27d24464049d5520806f4274bba Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 6 Apr 2015 12:35:41 -0500
Subject: [PATCH 54/97] wip on shuffle 2GB error msgs

---
 .../network/netty/NettyBlockRpcServer.scala   |  8 ++++---
 .../apache/spark/storage/BlockManager.scala   | 13 +++++++++--
 .../spark/storage/BlockObjectWriter.scala     |  8 +++++++
 .../scala/org/apache/spark/ShuffleSuite.scala | 22 +++++++++++++++----
 4 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index 1628932ac5669..c099f7ec7f79a 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -28,7 +28,7 @@ import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
 import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
 import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock}
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.storage.{ShuffleBlockSizeLimitException, BlockId, StorageLevel}
+import org.apache.spark.storage.{ShuffleRemoteBlockSizeLimitException, BlockId, StorageLevel}
 
 /**
  * Serves requests to open blocks by simply registering one chunk per block requested.
@@ -60,12 +60,14 @@ class NettyBlockRpcServer(
           logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
           responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteArray)
         } catch {
+          // shouldn't ever happen, b/c we should prevent writing 2GB shuffle files,
+          // but just to be safe
           case ex: BufferTooLargeException =>
             // throw & catch this helper exception, just to get full stack trace
             try {
-              throw new ShuffleBlockSizeLimitException(ex)
+              throw new ShuffleRemoteBlockSizeLimitException(ex)
             } catch {
-              case ex2: ShuffleBlockSizeLimitException =>
+              case ex2: ShuffleRemoteBlockSizeLimitException =>
                 responseContext.onFailure(ex2)
             }
         }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 35b55181bafb6..5f1b9a2008932 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1320,10 +1320,19 @@ class TachyonBlockSizeLimitException(cause: BufferTooLargeException)
     "You tried to store a partition " + BlockSizeLimitException.sizeMsgAndAdvice(cause) +
     "  Or, you can use a different storage mechanism.", cause)
 
-class ShuffleBlockSizeLimitException(cause: BufferTooLargeException)
+class ShuffleBlockSizeLimitException(size: Long)
+  extends SparkException("Spark cannot shuffle partitions that are greater than 2GB.  " +
+    "You tried to shuffle a block that was at least " + Utils.bytesToString(size) + ".  " +
+    "You should try to increase the number of partitions of this shuffle, and / or increase the " +
+    "figure out which stage created the partitions before the shuffle, and increase the number " +
+    "of partitions for that stage.  You may want to make both of these numbers easily " +
+    "configurable parameters so you can continue to update as needed.")
+
+class ShuffleRemoteBlockSizeLimitException(cause: BufferTooLargeException)
   extends BlockSizeLimitException("Spark cannot shuffle partitions that are greater than 2GB.  " +
-    "You tried to shuffle a block " + BlockSizeLimitException.sizeMsg(cause) +
+    "You tried to shuffle a block that was at least " + BlockSizeLimitException.sizeMsg(cause) +
     "You should try to increase the number of partitions of this shuffle, and / or increase the " +
     "figure out which stage created the partitions before the shuffle, and increase the number " +
     "of partitions for that stage.  You may want to make both of these numbers easily " +
     "configurable parameters so you can continue to update as needed.", cause)
+
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
index 0dfc91dfaff85..ddc0ca11ba0ab 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
@@ -21,6 +21,7 @@ import java.io.{BufferedOutputStream, FileOutputStream, File, OutputStream}
 import java.nio.channels.FileChannel
 
 import org.apache.spark.Logging
+import org.apache.spark.network.buffer.LargeByteBufferHelper
 import org.apache.spark.serializer.{SerializationStream, Serializer}
 import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.util.Utils
@@ -153,6 +154,10 @@ private[spark] class DiskBlockObjectWriter(
         objOut.close()
       }
 
+      if (channel.position() > LargeByteBufferHelper.MAX_CHUNK) {
+        throw new ShuffleBlockSizeLimitException(channel.position())
+      }
+
       channel = null
       bs = null
       fos = null
@@ -214,6 +219,9 @@ private[spark] class DiskBlockObjectWriter(
 
     if (numRecordsWritten % 32 == 0) {
       updateBytesWritten()
+      if (reportedPosition > LargeByteBufferHelper.MAX_CHUNK) {
+        throw new ShuffleBlockSizeLimitException(reportedPosition)
+      }
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index 60654c9e9d285..8013ba431d117 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -23,7 +23,7 @@ import org.scalatest.Matchers
 import org.apache.spark.ShuffleSuite.NonJavaSerializableClass
 import org.apache.spark.rdd.{CoGroupedRDD, OrderedRDDFunctions, RDD, ShuffledRDD, SubtractedRDD}
 import org.apache.spark.serializer.KryoSerializer
-import org.apache.spark.storage.{ShuffleDataBlockId, ShuffleBlockId}
+import org.apache.spark.storage.{ShuffleBlockSizeLimitException, BlockSizeLimitException, ShuffleDataBlockId, ShuffleBlockId}
 import org.apache.spark.util.MutablePair
 
 abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContext {
@@ -283,12 +283,26 @@ abstract class ShuffleSuite extends FunSuite with Matchers with LocalSparkContex
     rdd.count()
   }
 
-  test("large shuffle") {
-    // TODO fail with sensible exception
+  test("shuffle blocks > 2GB fail with sane exception") {
     //  note that this *could* succeed in local mode, b/c local shuffles actually don't
     //  have a limit at 2GB.  BUT, we make them fail in any case, b/c its better to have
     //  a consistent failure, and not have success depend on where tasks get scheduled
-    pending
+
+    sc = new SparkContext("local", "test", conf)
+    val rdd = sc.parallelize(1 to 1e6.toInt, 1).map{ i =>
+      val n = 3e3.toInt
+      val arr = new Array[Byte](n)
+      //need to make sure the array doesn't compress to something small
+      scala.util.Random.nextBytes(arr)
+      (2 * i, arr)
+    }
+
+    val exc = intercept[SparkException] {
+      rdd.partitionBy(new org.apache.spark.HashPartitioner(2)).count()
+    }
+
+    exc.getMessage should include (classOf[ShuffleBlockSizeLimitException].getSimpleName)
+
   }
 }
 

From e57cb64d2fa38a45c2317b128326c42f548f5376 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 6 Apr 2015 16:30:00 -0500
Subject: [PATCH 55/97] test exception message for failed replication w/ large
 blocks

---
 .../rdd/LargePartitionCachingSuite.scala      | 30 ++++++++++++++-----
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
index 2de4baabb6ecf..baa28cceaab99 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
@@ -16,25 +16,39 @@
  */
 package org.apache.spark.rdd
 
-import org.apache.spark.SharedSparkContext
-import org.apache.spark.storage.StorageLevel
-import org.scalatest.FunSuite
+import org.apache.spark._
+import org.apache.spark.storage.{ReplicationBlockSizeLimitException, StorageLevel}
+import org.scalatest.{Matchers, FunSuite}
 
-class LargePartitionCachingSuite extends FunSuite with SharedSparkContext {
+class LargePartitionCachingSuite extends FunSuite with SharedSparkContext with Matchers {
 
   def largePartitionRdd = sc.parallelize(1 to 1e6.toInt, 1).map{i => new Array[Byte](2.2e3.toInt)}
 
   //just don't want to kill the test server
   ignore("memory serialized cache large partitions") {
-    largePartitionRdd.persist(StorageLevel.MEMORY_ONLY_SER).count()
+    largePartitionRdd.persist(StorageLevel.MEMORY_ONLY_SER).count() should be (1e6.toInt)
   }
 
   test("disk cache large partitions") {
-    largePartitionRdd.persist(StorageLevel.DISK_ONLY).count()
+    largePartitionRdd.persist(StorageLevel.DISK_ONLY).count() should be (1e6.toInt)
   }
 
   test("disk cache large partitions with replications") {
-    // TODO this should fail, but w/ sensible message
-    pending
+    val conf = new SparkConf()
+      .setMaster("local-cluster[2, 1, 512]")
+      .setAppName("test-cluster")
+      .set("spark.task.maxFailures", "1")
+      .set("spark.akka.frameSize", "1") // set to 1MB to detect direct serialization of data
+    val clusterSc = new SparkContext(conf)
+    try {
+      val exc = intercept[SparkException]{
+        val myRDD = clusterSc.parallelize(1 to 1e6.toInt, 1).map{i => new Array[Byte](2.2e3.toInt)}
+          .persist(StorageLevel.DISK_ONLY_2)
+        myRDD.count()
+      }
+      exc.getMessage() should include (classOf[ReplicationBlockSizeLimitException].getSimpleName)
+    } finally {
+      clusterSc.stop()
+    }
   }
 }

From 69bd90eb7cfa276b2c9409fb9e87835520b3b449 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 6 Apr 2015 16:58:03 -0500
Subject: [PATCH 56/97] get rid of BroadcastSuite test, not really relevant

---
 .../org/apache/spark/broadcast/BroadcastSuite.scala      | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index a0205d64492fa..af3272692d7a1 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -179,15 +179,6 @@ class BroadcastSuite extends FunSuite with LocalSparkContext {
     assert(thrown.getMessage.toLowerCase.contains("stopped"))
   }
 
-  //ignored for now just because it needs a lot of memory
-  ignore("large broadcast variable") {
-    // TODO this should fail, but with a sensible exception
-    sc = new SparkContext("local", "test", httpConf)
-    val bigArr = new Array[Long]((2.3e9 / 8).toInt)
-    val bcArr = sc.broadcast(bigArr)
-    assert(sc.parallelize(1 to 1).map{x => bcArr.value.size}.collect() === Array((2.3e9 / 8).toInt))
-  }
-
   /**
    * Verify the persistence of state associated with an HttpBroadcast in either local mode or
    * local-cluster mode (when distributed = true).

From 13d9c0f16be3c5de36659e2aa041a5252ed862a6 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 6 Apr 2015 21:20:42 -0500
Subject: [PATCH 57/97] update tests

---
 .../spark/storage/BlockManagerSuite.scala     |  6 ++---
 .../spark/network/buffer/LargeByteBuffer.java |  5 ----
 .../buffer/WrappedLargeByteBuffer.java        |  2 --
 .../buffer/LargeByteBufferHelperSuite.java    |  1 -
 .../buffer/WrappedLargeByteBufferSuite.java   | 23 +++++++++++++++++++
 5 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 3c3c18fa3bd84..39d14ffc182ef 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.storage
 import java.nio.{ByteBuffer, MappedByteBuffer}
 import java.util.Arrays
 
-import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer}
+import org.apache.spark.network.buffer.{WrappedLargeByteBuffer, LargeByteBufferHelper, LargeByteBuffer}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
@@ -813,12 +813,12 @@ class BlockManagerSuite extends FunSuite with Matchers with BeforeAndAfterEach
 
     val diskStoreMapped = new DiskStore(blockManager, diskBlockManager)
     diskStoreMapped.putBytes(blockId, byteBuffer, StorageLevel.DISK_ONLY)
-    val mapped = diskStoreMapped.getBytes(blockId).get
+    val mapped = diskStoreMapped.getBytes(blockId).get.asInstanceOf[WrappedLargeByteBuffer]
 
     when(blockManager.conf).thenReturn(conf.clone.set(confKey, (1000 * 1000).toString))
     val diskStoreNotMapped = new DiskStore(blockManager, diskBlockManager)
     diskStoreNotMapped.putBytes(blockId, byteBuffer, StorageLevel.DISK_ONLY)
-    val notMapped = diskStoreNotMapped.getBytes(blockId).get
+    val notMapped = diskStoreNotMapped.getBytes(blockId).get.asInstanceOf[WrappedLargeByteBuffer]
 
     // Not possible to do isInstanceOf due to visibility of HeapByteBuffer
     assert(notMapped.nioBuffers().get(0).getClass.getName.endsWith("HeapByteBuffer"),
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index 3cb5225bbdaaa..95fa4267732a9 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -85,9 +85,4 @@ public interface LargeByteBuffer {
      * unfortunately no standard API to do this.
      */
     public void dispose();
-
-    //List b/c we need to know the size.  Could also use Iterator w/ separate numBuffers method
-    //TODO delete, only used in testing
-    public List<ByteBuffer> nioBuffers();
-
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 74ceadfda11cb..d525db0b507a3 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -174,7 +174,6 @@ public long size() {
   public long writeTo(WritableByteChannel channel) throws IOException {
     long written = 0l;
     for (ByteBuffer buffer : underlying) {
-      //TODO test this
       written += buffer.remaining();
       while (buffer.hasRemaining())
         channel.write(buffer);
@@ -190,7 +189,6 @@ public ByteBuffer asByteBuffer() throws BufferTooLargeException {
     return underlying[0];
   }
 
-  @Override
   public List<ByteBuffer> nioBuffers() {
     return Arrays.asList(underlying);
   }
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
index 6ed9e08789d07..d56f216f11dc0 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
@@ -66,7 +66,6 @@ public void testMapFile() throws IOException {
         if (read[j] != (byte)(j))
           fail("bad byte at (i,j) = (" + i + "," + j + ")");
       }
-      assertArrayEquals(buffer, read);
     }
   }
 }
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index d7dcb55fcd9c7..3d562baf28660 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -16,8 +16,10 @@
  */
 package org.apache.spark.network.buffer;
 
+import java.io.*;
 import java.nio.BufferUnderflowException;
 import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
 import java.util.Arrays;
 import java.util.Random;
 
@@ -138,6 +140,27 @@ public void get() {
     }
   }
 
+  @Test
+  public void writeTo() throws IOException {
+    File testFile = File.createTempFile("WrappedLargeByteBuffer-writeTo",".bin");
+    testFile.deleteOnExit();
+    System.out.println("will write data to " + testFile);
+    FileChannel channel = new FileOutputStream(testFile).getChannel();
+    WrappedLargeByteBuffer buf = testDataBuf();
+    buf.writeTo(channel);
+    channel.close();
+
+    byte[] written = new byte[500];
+    FileInputStream in = new FileInputStream(testFile);
+    int n = 0;
+    while (n < 500) {
+      n += in.read(written, n, 500 - n);
+    }
+    assertEquals(-1, in.read());
+    assertArrayEquals(data, written);
+  }
+
+
   private void assertSubArrayEquals(byte[] exp, int expOffset, byte[] act, int actOffset, int length) {
     byte[] expCopy = new byte[length];
     byte[] actCopy = new byte[length];

From 763e8dce5082b030b42791261d945af348896952 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 7 Apr 2015 10:21:14 -0500
Subject: [PATCH 58/97] more docs & tests on LargeBB

---
 .../spark/network/buffer/LargeByteBuffer.java | 36 ++++++--
 .../buffer/WrappedLargeByteBuffer.java        | 60 ++++++++++---
 .../buffer/WrappedLargeByteBufferSuite.java   | 90 +++++++++++++++----
 3 files changed, 152 insertions(+), 34 deletions(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index 95fa4267732a9..12d12647f3bb7 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -36,20 +36,38 @@ public interface LargeByteBuffer {
     public LargeByteBuffer rewind();
 
     /**
-     * return a deep copy of this, with a copy of all the data.
+     * return a deep copy of this buffer.
      * The returned buffer will have position == 0.  The position
-     * of this buffer will change from the copy.
-     * @return
+     * of this buffer will not change as a result of copying.
+     *
+     * @return a new buffer with a full copy of this buffer's data
      */
     public LargeByteBuffer deepCopy();
 
+    /**
+     * Advance the position in this buffer by up to <code>n</code> bytes.  <code>n</code> may be
+     * positive or negative.  It will move the full <code>n</code> unless that moves
+     * it past the end (or beginning) of the buffer, in which case it will move to the end
+     * (or beginning).
+     *
+     * @return the number of bytes moved forward (can be negative if <code>n</code> is negative)
+     */
     public long skip(long n);
 
     public long position();
 
-    /** doesn't copy data, just copies references & offsets */
+    /**
+     * Creates a new byte buffer that shares this buffer's content.
+     *
+     * <p> The content of the new buffer will be that of this buffer.  Changes
+     * to this buffer's content will be visible in the new buffer, and vice
+     * versa; the two buffers' positions will be independent.
+     *
+     * <p> The new buffer's position will be identical to those of this buffer
+     * */
     public LargeByteBuffer duplicate();
 
+
     public long remaining();
 
     /**
@@ -59,10 +77,16 @@ public interface LargeByteBuffer {
     public long size();
 
     /**
-     * writes the entire contents of this buffer to the given channel
+     * writes the data from the current <code>position()</code> to the end of this buffer
+     * to the given channel.  The <code>position()</code> will be moved to the end of
+     * the buffer after this.
+     *
+     * Note that this method will continually attempt to push data to the given channel.  If the
+     * channel cannot accept more data, this will continuously retry until the channel accepts
+     * the data.
      *
      * @param channel
-     * @return
+     * @return the number of bytes written to the channel
      * @throws IOException
      */
     public long writeTo(WritableByteChannel channel) throws IOException;
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index d525db0b507a3..8ead571dd102f 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -28,27 +28,58 @@
 
 public class WrappedLargeByteBuffer implements LargeByteBuffer {
 
-  //only public for tests for the moment ...
+  //only public for tests
   public final ByteBuffer[] underlying;
-  private final Long totalCapacity;
 
+  private final long size;
   private long _pos;
   private int currentBufferIdx;
   private ByteBuffer currentBuffer;
-  private long size;
 
 
   public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
+    this(underlying, findExpectedInitialPosition(underlying));
+  }
+
+  private static long findExpectedInitialPosition(ByteBuffer[] bufs) {
+    long sum = 0L;
+    for (ByteBuffer b: bufs) {
+      if (b.position() > 0) {
+        // this could still lead to a mix of positions half-way through buffers that
+        // would be inconsistent -- but we'll discover that in the constructor checks
+        sum += b.position();
+      } else {
+        break;
+      }
+    }
+    return sum;
+  }
+
+  private WrappedLargeByteBuffer(ByteBuffer[] underlying, long initialPosition) {
     this.underlying = underlying;
-    long sum = 0l;
+    long sum = 0L;
     for (int i = 0; i < underlying.length; i++) {
-      sum += underlying[i].capacity();
+      ByteBuffer b = underlying[i];
+      long nextSum = sum + b.capacity();
+      int expectedPosition;
+      if (nextSum < initialPosition) {
+        expectedPosition = b.capacity();
+      } else if (sum > initialPosition) {
+        expectedPosition = 0;
+      } else {
+        expectedPosition = (int) (initialPosition - sum);
+      }
+      if (b.position() != expectedPosition) {
+        throw new IllegalArgumentException("ByteBuffer[" + i + "]:" + b + " was expected to have" +
+          " position = " + expectedPosition + " to be consistent with the overall " +
+          "initialPosition = " + initialPosition);
+      }
+      sum = nextSum;
     }
-    totalCapacity = sum;
-    _pos = 0l;
+    _pos = initialPosition;
     currentBufferIdx = 0;
     currentBuffer = underlying[0];
-    size = totalCapacity;
+    size = sum;
   }
 
   @Override
@@ -162,7 +193,7 @@ public WrappedLargeByteBuffer duplicate() {
     for (int i = 0; i < underlying.length; i++) {
       duplicates[i] = underlying[i].duplicate();
     }
-    return new WrappedLargeByteBuffer(duplicates);
+    return new WrappedLargeByteBuffer(duplicates, _pos);
   }
 
   @Override
@@ -173,11 +204,13 @@ public long size() {
   @Override
   public long writeTo(WritableByteChannel channel) throws IOException {
     long written = 0l;
-    for (ByteBuffer buffer : underlying) {
-      written += buffer.remaining();
-      while (buffer.hasRemaining())
-        channel.write(buffer);
+    for (; currentBufferIdx < underlying.length; currentBufferIdx++) {
+      currentBuffer = underlying[currentBufferIdx];
+      written += currentBuffer.remaining();
+      while (currentBuffer.hasRemaining())
+        channel.write(currentBuffer);
     }
+    _pos = size();
     return written;
   }
 
@@ -189,6 +222,7 @@ public ByteBuffer asByteBuffer() throws BufferTooLargeException {
     return underlying[0];
   }
 
+  // only needed for tests
   public List<ByteBuffer> nioBuffers() {
     return Arrays.asList(underlying);
   }
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 3d562baf28660..79398c6ae7ab1 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -142,24 +142,84 @@ public void get() {
 
   @Test
   public void writeTo() throws IOException {
-    File testFile = File.createTempFile("WrappedLargeByteBuffer-writeTo",".bin");
-    testFile.deleteOnExit();
-    System.out.println("will write data to " + testFile);
-    FileChannel channel = new FileOutputStream(testFile).getChannel();
-    WrappedLargeByteBuffer buf = testDataBuf();
-    buf.writeTo(channel);
-    channel.close();
-
-    byte[] written = new byte[500];
-    FileInputStream in = new FileInputStream(testFile);
-    int n = 0;
-    while (n < 500) {
-      n += in.read(written, n, 500 - n);
+    for (int initialPosition: new int[]{0,20, 400}) {
+      File testFile = File.createTempFile("WrappedLargeByteBuffer-writeTo-" + initialPosition,".bin");
+      testFile.deleteOnExit();
+      FileChannel channel = new FileOutputStream(testFile).getChannel();
+      WrappedLargeByteBuffer buf = testDataBuf();
+      buf.skip(initialPosition);
+      assertEquals(initialPosition, buf.position());
+      int expN = 500 - initialPosition;
+      long bytesWritten = buf.writeTo(channel);
+      assertEquals(expN, bytesWritten);
+      channel.close();
+
+      byte[] fileBytes = new byte[expN];
+      FileInputStream in = new FileInputStream(testFile);
+      int n = 0;
+      while (n < expN) {
+        n += in.read(fileBytes, n, expN - n);
+      }
+      assertEquals(-1, in.read());
+      byte[] dataSlice = Arrays.copyOfRange(data, initialPosition, 500);
+      assertArrayEquals(dataSlice, fileBytes);
+      assertEquals(0, buf.remaining());
+      assertEquals(500, buf.position());
+    }
+  }
+
+  @Test
+  public void duplicate() {
+    for (int initialPosition: new int[]{0,20, 400}) {
+      WrappedLargeByteBuffer buf = testDataBuf();
+      buf.skip(initialPosition);
+
+      WrappedLargeByteBuffer dup = buf.duplicate();
+      assertEquals(initialPosition, buf.position());
+      assertEquals(initialPosition, dup.position());
+      assertEquals(500, buf.size());
+      assertEquals(500, dup.size());
+      assertEquals(500 - initialPosition, buf.remaining());
+      assertEquals(500 - initialPosition, dup.remaining());
+      assertConsistent(buf);
+      assertConsistent(dup);
     }
-    assertEquals(-1, in.read());
-    assertArrayEquals(data, written);
   }
 
+  @Test
+  public void constructWithBuffersWithNonZeroPosition() {
+    ByteBuffer[] bufs = testDataBuf().underlying;
+
+    bufs[0].position(50);
+    bufs[1].position(5);
+
+    WrappedLargeByteBuffer b1 = new WrappedLargeByteBuffer(bufs);
+    assertEquals(55, b1.position());
+
+
+    bufs[1].position(50);
+    bufs[2].position(50);
+    bufs[3].position(35);
+    WrappedLargeByteBuffer b2 = new WrappedLargeByteBuffer(bufs);
+    assertEquals(185, b2.position());
+
+
+    bufs[5].position(16);
+    try {
+      new WrappedLargeByteBuffer(bufs);
+      fail("expected exception");
+    } catch (IllegalArgumentException ex) {
+    }
+
+    bufs[5].position(0);
+    bufs[0].position(49);
+    try {
+      new WrappedLargeByteBuffer(bufs);
+      fail("expected exception");
+    } catch (IllegalArgumentException ex) {
+    }
+
+  }
 
   private void assertSubArrayEquals(byte[] exp, int expOffset, byte[] act, int actOffset, int length) {
     byte[] expCopy = new byte[length];

From 5323dd7e72b327c862eb401d619aab9b4c3df14d Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 7 Apr 2015 10:24:32 -0500
Subject: [PATCH 59/97] style

---
 .../org/apache/spark/util/LargeByteBufferInputStream.scala      | 2 +-
 .../org/apache/spark/util/LargeByteBufferOutputStream.scala     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
index f4c34a3c54a35..69bc4902d0aac 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
@@ -75,7 +75,7 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
     }
   }
 
-  //only for testing
+  // only for testing
   private[util] var disposed = false
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index c750afb2220ae..b4ee80b2b6852 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -44,7 +44,7 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
     largeBuffer(LargeByteBufferHelper.MAX_CHUNK)
   }
 
-  //exposed for testing
+  // exposed for testing
   private[util] def largeBuffer(maxChunk: Int): LargeByteBuffer = {
     // LargeByteBuffer is supposed to make a "best effort" to get all the data
     // in one nio.ByteBuffer, so we want to try to merge the smaller chunks together

From ca329343c3865f60537f9be7c1f2d611159015eb Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 7 Apr 2015 11:53:00 -0500
Subject: [PATCH 60/97] oops, grab the position before we close the channel

---
 .../scala/org/apache/spark/storage/BlockObjectWriter.scala   | 5 +++--
 .../org/apache/spark/util/LargeByteBufferOutputStream.scala  | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
index ddc0ca11ba0ab..e58e6ab181fdd 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
@@ -142,6 +142,7 @@ private[spark] class DiskBlockObjectWriter(
 
   override def close() {
     if (initialized) {
+      val length = channel.position()
       Utils.tryWithSafeFinally {
         if (syncWrites) {
           // Force outstanding writes to disk and track how long it takes
@@ -154,8 +155,8 @@ private[spark] class DiskBlockObjectWriter(
         objOut.close()
       }
 
-      if (channel.position() > LargeByteBufferHelper.MAX_CHUNK) {
-        throw new ShuffleBlockSizeLimitException(channel.position())
+      if (length > LargeByteBufferHelper.MAX_CHUNK) {
+        throw new ShuffleBlockSizeLimitException(length)
       }
 
       channel = null
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index b4ee80b2b6852..04e685262f38c 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -49,7 +49,7 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
     // LargeByteBuffer is supposed to make a "best effort" to get all the data
     // in one nio.ByteBuffer, so we want to try to merge the smaller chunks together
     // as much as possible.  This is necessary b/c there are a number of parts of spark that
-    // still can't deal w/ > 2GB.
+    // can only deal w/ one nio.ByteBuffer, and can't use a LargeByteBuffer yet.
     val totalSize = output.size
     val chunksNeeded = ((totalSize + maxChunk -1) / maxChunk).toInt
     val chunks = new Array[Array[Byte]](chunksNeeded)

From 9834942757b31d3392cd1e22e0251ea3da6de7fa Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 7 Apr 2015 13:25:06 -0500
Subject: [PATCH 61/97] ByteArrayChunkOutputStream.slice, for grabbing
 aribitrary portion of output

---
 .../util/io/ByteArrayChunkOutputStream.scala  | 41 +++++++++++++++++++
 .../io/ByteArrayChunkOutputStreamSuite.scala  | 20 +++++++++
 2 files changed, 61 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
index daac6f971eb20..64f1ff7153fe4 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
@@ -43,10 +43,13 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
    */
   private var position = chunkSize
 
+  var size: Long = 0L
+
   override def write(b: Int): Unit = {
     allocateNewChunkIfNeeded()
     chunks(lastChunkIndex)(position) = b.toByte
     position += 1
+    size += 1
   }
 
   override def write(bytes: Array[Byte], off: Int, len: Int): Unit = {
@@ -58,6 +61,7 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
       written += thisBatch
       position += thisBatch
     }
+    size += len
   }
 
   @inline
@@ -91,4 +95,41 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
       ret
     }
   }
+
+  /**
+   * get a copy of the data between the two endpoints
+   */
+  def slice(start: Long, until: Long): Array[Byte] = {
+    require((until - start) < Integer.MAX_VALUE, "max slice length = Integer.MAX_VALUE")
+    var chunkStart = 0L
+    var chunkIdx = 0
+    var foundStart = false
+    val length = (until - start).toInt
+    val result = new Array[Byte](length)
+    while (!foundStart) {
+      val nextSize = chunkStart + chunks(chunkIdx).size
+      if (nextSize > start) {
+        foundStart = true
+      }
+      else {
+        chunkStart = nextSize
+        chunkIdx += 1
+      }
+    }
+
+    var remaining = length
+    var pos = 0
+    var offsetInChunk = (start - chunkStart).toInt
+    while (remaining > 0) {
+      val lenToCopy = math.min(remaining, chunks(chunkIdx).size - offsetInChunk)
+      System.arraycopy(chunks(chunkIdx), offsetInChunk, result, pos, lenToCopy)
+      chunkIdx += 1
+      offsetInChunk = 0
+      pos += lenToCopy
+      remaining -= lenToCopy
+    }
+    result
+  }
+
+
 }
diff --git a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
index 361ec95654f47..cacb3fbb0762b 100644
--- a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
@@ -106,4 +106,24 @@ class ByteArrayChunkOutputStreamSuite extends SparkFunSuite {
     assert(arrays(1).toSeq === ref.slice(10, 20))
     assert(arrays(2).toSeq === ref.slice(20, 30))
   }
+
+  test("slice") {
+    val ref = new Array[Byte](30)
+    Random.nextBytes(ref)
+    val o = new ByteArrayChunkOutputStream(10)
+    o.write(ref)
+
+    for {
+      start <- (0 until 30)
+      end <- (start to 30)
+    } {
+      withClue(s"start = $start; end = $end") {
+        try {
+          assert(o.slice(start,end).toSeq === ref.slice(start,end))
+        } catch {
+          case ex => fail(ex)
+        }
+      }
+    }
+  }
 }

From 4e1a842e99a612629dc0a422647da65484138a79 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 7 Apr 2015 13:19:08 -0500
Subject: [PATCH 62/97] LargeByteBuffer

---
 .../util/LargeByteBufferInputStream.scala     |  93 +++++++
 .../util/LargeByteBufferOutputStream.scala    |  70 +++++
 .../LargeByteBufferInputStreamSuite.scala     |  85 ++++++
 .../LargeByteBufferOutputStreamSuite.scala    |  67 +++++
 .../buffer/BufferTooLargeException.java       |  32 +++
 .../spark/network/buffer/LargeByteBuffer.java | 112 ++++++++
 .../network/buffer/LargeByteBufferHelper.java |  73 +++++
 .../buffer/WrappedLargeByteBuffer.java        | 252 ++++++++++++++++++
 .../buffer/LargeByteBufferHelperSuite.java    |  71 +++++
 .../buffer/WrappedLargeByteBufferSuite.java   | 233 ++++++++++++++++
 10 files changed, 1088 insertions(+)
 create mode 100644 core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
 create mode 100644 core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
 create mode 100644 core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
 create mode 100644 core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
 create mode 100644 network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
 create mode 100644 network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
 create mode 100644 network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
 create mode 100644 network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
 create mode 100644 network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
 create mode 100644 network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java

diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
new file mode 100644
index 0000000000000..69bc4902d0aac
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.io.InputStream
+
+import org.apache.spark.network.buffer.LargeByteBuffer
+import org.apache.spark.storage.BlockManager
+
+/**
+ * Reads data from a LargeByteBuffer, and optionally cleans it up using buffer.dispose()
+ * at the end of the stream (e.g. to close a memory-mapped file).
+ */
+private[spark]
+class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: Boolean = false)
+  extends InputStream {
+
+  override def read(): Int = {
+    if (buffer == null || buffer.remaining() == 0) {
+      cleanUp()
+      -1
+    } else {
+      val r = buffer.get() & 0xFF
+      if (buffer.remaining() == 0) {
+        cleanUp()
+      }
+      r
+    }
+  }
+
+  override def read(dest: Array[Byte]): Int = {
+    read(dest, 0, dest.length)
+  }
+
+  override def read(dest: Array[Byte], offset: Int, length: Int): Int = {
+    if (buffer == null || buffer.remaining() == 0) {
+      cleanUp()
+      -1
+    } else {
+      val amountToGet = math.min(buffer.remaining(), length).toInt
+      buffer.get(dest, offset, amountToGet)
+      // XXX I assume its not intentional that the stream is only disposed when you try to read
+      // *past* the end in ByteBufferInputStream, so we do a check here
+      if (buffer.remaining() == 0) {
+        cleanUp()
+      }
+      amountToGet
+    }
+  }
+
+  override def skip(bytes: Long): Long = {
+    if (buffer != null) {
+      val skipped = buffer.skip(bytes)
+      if (buffer.remaining() == 0) {
+        cleanUp()
+      }
+      skipped
+    } else {
+      0L
+    }
+  }
+
+  // only for testing
+  private[util] var disposed = false
+
+  /**
+   * Clean up the buffer, and potentially dispose of it
+   */
+  private def cleanUp() {
+    if (buffer != null) {
+      if (dispose) {
+        buffer.dispose()
+        disposed = true
+      }
+      buffer = null
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
new file mode 100644
index 0000000000000..04e685262f38c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.io.OutputStream
+import java.nio.ByteBuffer
+
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, WrappedLargeByteBuffer, LargeByteBuffer}
+import org.apache.spark.util.io.ByteArrayChunkOutputStream
+
+private[spark]
+class LargeByteBufferOutputStream(chunkSize: Int = 65536)
+  extends OutputStream {
+
+  private[util] val output = new ByteArrayChunkOutputStream(chunkSize)
+
+  private var _pos = 0
+
+  override def write(b: Int): Unit = {
+    output.write(b)
+  }
+
+  override def write(bytes: Array[Byte], offs: Int, len: Int): Unit = {
+    output.write(bytes, offs, len)
+    _pos += len
+  }
+
+  def largeBuffer: LargeByteBuffer = {
+    largeBuffer(LargeByteBufferHelper.MAX_CHUNK)
+  }
+
+  // exposed for testing
+  private[util] def largeBuffer(maxChunk: Int): LargeByteBuffer = {
+    // LargeByteBuffer is supposed to make a "best effort" to get all the data
+    // in one nio.ByteBuffer, so we want to try to merge the smaller chunks together
+    // as much as possible.  This is necessary b/c there are a number of parts of spark that
+    // can only deal w/ one nio.ByteBuffer, and can't use a LargeByteBuffer yet.
+    val totalSize = output.size
+    val chunksNeeded = ((totalSize + maxChunk -1) / maxChunk).toInt
+    val chunks = new Array[Array[Byte]](chunksNeeded)
+    var remaining = totalSize
+    var pos = 0
+    (0 until chunksNeeded).foreach{idx =>
+      val nextSize = math.min(maxChunk, remaining).toInt
+      chunks(idx) = output.slice(pos, pos + nextSize)
+      pos += nextSize
+      remaining -= nextSize
+    }
+    new WrappedLargeByteBuffer(chunks.map{ByteBuffer.wrap})
+  }
+
+  override def close(): Unit = {
+    output.close()
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
new file mode 100644
index 0000000000000..209fb0d343fd0
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import java.io.{FileInputStream, FileOutputStream, OutputStream, File}
+import java.nio.channels.FileChannel.MapMode
+
+import org.junit.Assert._
+import org.scalatest.{FunSuite, Matchers}
+
+import org.apache.spark.network.buffer.{LargeByteBufferHelper, WrappedLargeByteBuffer}
+
+class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
+
+  test("read from large mapped file") {
+    val testFile = File.createTempFile("large-buffer-input-stream-test",".bin")
+    testFile.deleteOnExit()
+
+    val out: OutputStream = new FileOutputStream(testFile)
+    val buffer: Array[Byte] = new Array[Byte](1 << 16)
+    val len: Long = 3L << 30
+    assertTrue(len > Integer.MAX_VALUE)
+    (0 until buffer.length).foreach { idx =>
+      buffer(idx) = idx.toByte
+    }
+    (0 until (len / buffer.length).toInt).foreach { idx =>
+      out.write(buffer)
+    }
+    out.close
+
+    val channel = new FileInputStream(testFile).getChannel
+    val buf = LargeByteBufferHelper.mapFile(channel, MapMode.READ_ONLY, 0, len)
+    val in = new LargeByteBufferInputStream(buf, dispose = true)
+
+    val read = new Array[Byte](buffer.length)
+    (0 until (len / buffer.length).toInt).foreach { idx =>
+      in.disposed should be (false)
+      in.read(read) should be (read.length)
+      (0 until buffer.length).foreach { arrIdx =>
+        assertEquals(buffer(arrIdx), read(arrIdx))
+      }
+    }
+    // XXX I assume its *not* intentional that the stream is only disposed when you try to read
+    // *past* the end in ByteBufferInputStream?
+    in.disposed should be (true)
+    in.read(read) should be (-1)
+    in.disposed should be (true)
+  }
+
+  test("io stream roundtrip") {
+
+    val out = new LargeByteBufferOutputStream(128)
+    (0 until 200).foreach{idx => out.write(idx)}
+    out.close()
+
+    val lb = out.largeBuffer(128)
+    //just make sure that we test reading from multiple chunks
+    lb.asInstanceOf[WrappedLargeByteBuffer].underlying.size should be > 1
+
+    val rawIn = new LargeByteBufferInputStream(lb)
+    val arr = new Array[Byte](500)
+    val nRead = rawIn.read(arr, 0, 500)
+    nRead should be (200)
+    (0 until 200).foreach{idx =>
+      arr(idx) should be (idx.toByte)
+    }
+
+  }
+
+
+}
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
new file mode 100644
index 0000000000000..b25dfc5bbaaac
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import scala.util.Random
+
+import org.scalatest.{FunSuite, Matchers}
+
+import org.apache.spark.network.buffer.WrappedLargeByteBuffer
+
+class LargeByteBufferOutputStreamSuite extends FunSuite with Matchers {
+
+  test("merged buffers for < 2GB") {
+    val out = new LargeByteBufferOutputStream(10)
+    val bytes = new Array[Byte](100)
+    Random.nextBytes(bytes)
+    out.write(bytes)
+
+    val buffer = out.largeBuffer
+    buffer.position() should be (0)
+    buffer.size() should be (100)
+    val nioBuffer = buffer.asByteBuffer()
+    nioBuffer.position() should be (0)
+    nioBuffer.capacity() should be (100)
+    nioBuffer.limit() should be (100)
+
+    val read = new Array[Byte](100)
+    buffer.get(read, 0, 100)
+    read should be (bytes)
+
+    buffer.rewind()
+    nioBuffer.get(read)
+    read should be (bytes)
+  }
+
+  test("chunking") {
+    val out = new LargeByteBufferOutputStream(10)
+    val bytes = new Array[Byte](100)
+    Random.nextBytes(bytes)
+    out.write(bytes)
+
+    (10 to 100 by 10).foreach{chunkSize =>
+      val buffer = out.largeBuffer(chunkSize).asInstanceOf[WrappedLargeByteBuffer]
+      buffer.position() should be (0)
+      buffer.size() should be (100)
+      val read = new Array[Byte](100)
+      buffer.get(read, 0, 100)
+      read should be (bytes)
+    }
+
+  }
+
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java b/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
new file mode 100644
index 0000000000000..158d29ad152ce
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+
+public class BufferTooLargeException extends IOException {
+  public final long actualSize;
+  public final long extra;
+
+  public BufferTooLargeException(long actualSize) {
+    super("LargeByteBuffer is too large to convert.  Size: " + actualSize + "; Size Limit: "
+      + LargeByteBufferHelper.MAX_CHUNK + " (" +
+      (actualSize - LargeByteBufferHelper.MAX_CHUNK) + " too big)");
+    this.extra = actualSize - LargeByteBufferHelper.MAX_CHUNK;
+    this.actualSize = actualSize;
+  }
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
new file mode 100644
index 0000000000000..12d12647f3bb7
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -0,0 +1,112 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.util.List;
+
+public interface LargeByteBuffer {
+    public byte get();
+
+    /**
+     * Bulk copy data from this buffer into the given array.  First checks there is sufficient
+     * data in this buffer; if not, throws a {@link java.nio.BufferUnderflowException}.
+     * @param dst
+     * @param offset
+     * @param length
+     */
+    public void get(byte[] dst,int offset, int length);
+
+    public LargeByteBuffer rewind();
+
+    /**
+     * return a deep copy of this buffer.
+     * The returned buffer will have position == 0.  The position
+     * of this buffer will not change as a result of copying.
+     *
+     * @return a new buffer with a full copy of this buffer's data
+     */
+    public LargeByteBuffer deepCopy();
+
+    /**
+     * Advance the position in this buffer by up to <code>n</code> bytes.  <code>n</code> may be
+     * positive or negative.  It will move the full <code>n</code> unless that moves
+     * it past the end (or beginning) of the buffer, in which case it will move to the end
+     * (or beginning).
+     *
+     * @return the number of bytes moved forward (can be negative if <code>n</code> is negative)
+     */
+    public long skip(long n);
+
+    public long position();
+
+    /**
+     * Creates a new byte buffer that shares this buffer's content.
+     *
+     * <p> The content of the new buffer will be that of this buffer.  Changes
+     * to this buffer's content will be visible in the new buffer, and vice
+     * versa; the two buffers' positions will be independent.
+     *
+     * <p> The new buffer's position will be identical to those of this buffer
+     * */
+    public LargeByteBuffer duplicate();
+
+
+    public long remaining();
+
+    /**
+     * the total number of bytes in this buffer
+     * @return
+     */
+    public long size();
+
+    /**
+     * writes the data from the current <code>position()</code> to the end of this buffer
+     * to the given channel.  The <code>position()</code> will be moved to the end of
+     * the buffer after this.
+     *
+     * Note that this method will continually attempt to push data to the given channel.  If the
+     * channel cannot accept more data, this will continuously retry until the channel accepts
+     * the data.
+     *
+     * @param channel
+     * @return the number of bytes written to the channel
+     * @throws IOException
+     */
+    public long writeTo(WritableByteChannel channel) throws IOException;
+
+    /**
+     * get the entire contents of this as one ByteBuffer, if possible.  The returned ByteBuffer
+     * will always have the position set 0, and the limit set to the end of the data.  Each
+     * call will return a new ByteBuffer, but will not require copying the data (eg., it will
+     * use ByteBuffer#duplicate()).  The returned byte buffer and this may or may not share data.
+     *
+     * @return
+     * @throws BufferTooLargeException if this buffer is too large to fit in one {@link ByteBuffer}
+     */
+    public ByteBuffer asByteBuffer() throws BufferTooLargeException;
+
+    /**
+     * Attempt to clean up if it is memory-mapped. This uses an *unsafe* Sun API that
+     * might cause errors if one attempts to read from the unmapped buffer, but it's better than
+     * waiting for the GC to find it because that could lead to huge numbers of open files. There's
+     * unfortunately no standard API to do this.
+     */
+    public void dispose();
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
new file mode 100644
index 0000000000000..9d40fc4f5c07d
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.ArrayList;
+
+public class LargeByteBufferHelper {
+
+    public static final int MAX_CHUNK = Integer.MAX_VALUE - 1000000;
+
+    public static LargeByteBuffer asLargeByteBuffer(ByteBuffer buffer) {
+        return new WrappedLargeByteBuffer(new ByteBuffer[]{buffer});
+    }
+
+    public static LargeByteBuffer asLargeByteBuffer(byte[] bytes) {
+        return new WrappedLargeByteBuffer(new ByteBuffer[]{ByteBuffer.wrap(bytes)});
+    }
+
+    public static LargeByteBuffer allocate(long size) {
+        ArrayList<ByteBuffer> chunks = new ArrayList<ByteBuffer>();
+        long remaining = size;
+        while (remaining > 0) {
+            int nextSize = (int)Math.min(remaining, MAX_CHUNK);
+            ByteBuffer next = ByteBuffer.allocate(nextSize);
+            remaining -= nextSize;
+            chunks.add(next);
+        }
+        return new WrappedLargeByteBuffer(chunks.toArray(new ByteBuffer[chunks.size()]));
+    }
+
+
+    public static LargeByteBuffer mapFile(
+            FileChannel channel,
+            FileChannel.MapMode mode,
+            long offset,
+            long length
+    ) throws IOException {
+        int maxChunk = MAX_CHUNK;
+        ArrayList<Long> offsets = new ArrayList<Long>();
+        long curOffset = offset;
+        long end = offset + length;
+        while (curOffset < end) {
+            offsets.add(curOffset);
+            int chunkLength = (int) Math.min((end - curOffset), maxChunk);
+            curOffset += chunkLength;
+        }
+        offsets.add(end);
+        ByteBuffer[] chunks = new ByteBuffer[offsets.size() - 1];
+        for (int i = 0; i< offsets.size() - 1; i++) {
+            chunks[i] = channel.map(mode, offsets.get(i), offsets.get(i+ 1) - offsets.get(i));
+        }
+        return new WrappedLargeByteBuffer(chunks);
+    }
+
+
+}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
new file mode 100644
index 0000000000000..8ead571dd102f
--- /dev/null
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -0,0 +1,252 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.spark.network.buffer;
+
+import sun.nio.ch.DirectBuffer;
+
+import java.io.IOException;
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.util.Arrays;
+import java.util.List;
+
+public class WrappedLargeByteBuffer implements LargeByteBuffer {
+
+  //only public for tests
+  public final ByteBuffer[] underlying;
+
+  private final long size;
+  private long _pos;
+  private int currentBufferIdx;
+  private ByteBuffer currentBuffer;
+
+
+  public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
+    this(underlying, findExpectedInitialPosition(underlying));
+  }
+
+  private static long findExpectedInitialPosition(ByteBuffer[] bufs) {
+    long sum = 0L;
+    for (ByteBuffer b: bufs) {
+      if (b.position() > 0) {
+        // this could still lead to a mix of positions half-way through buffers that
+        // would be inconsistent -- but we'll discover that in the constructor checks
+        sum += b.position();
+      } else {
+        break;
+      }
+    }
+    return sum;
+  }
+
+  private WrappedLargeByteBuffer(ByteBuffer[] underlying, long initialPosition) {
+    this.underlying = underlying;
+    long sum = 0L;
+    for (int i = 0; i < underlying.length; i++) {
+      ByteBuffer b = underlying[i];
+      long nextSum = sum + b.capacity();
+      int expectedPosition;
+      if (nextSum < initialPosition) {
+        expectedPosition = b.capacity();
+      } else if (sum > initialPosition) {
+        expectedPosition = 0;
+      } else {
+        expectedPosition = (int) (initialPosition - sum);
+      }
+      if (b.position() != expectedPosition) {
+        throw new IllegalArgumentException("ByteBuffer[" + i + "]:" + b + " was expected to have" +
+          " position = " + expectedPosition + " to be consistent with the overall " +
+          "initialPosition = " + initialPosition);
+      }
+      sum = nextSum;
+    }
+    _pos = initialPosition;
+    currentBufferIdx = 0;
+    currentBuffer = underlying[0];
+    size = sum;
+  }
+
+  @Override
+  public void get(byte[] dest, int offset, int length) {
+    if (length > remaining())
+      throw new BufferUnderflowException();
+    int moved = 0;
+    while (moved < length) {
+      int toRead = Math.min(length - moved, currentBuffer.remaining());
+      currentBuffer.get(dest, offset + moved, toRead);
+      moved += toRead;
+      updateCurrentBuffer();
+    }
+    _pos += moved;
+  }
+
+  @Override
+  public LargeByteBuffer rewind() {
+    while (currentBufferIdx > 0) {
+      if (currentBuffer != null) {
+        currentBuffer.rewind();
+      }
+      currentBufferIdx -= 1;
+      currentBuffer = underlying[currentBufferIdx];
+    }
+    currentBuffer.rewind();
+    _pos = 0;
+    return this;
+  }
+
+  @Override
+  public WrappedLargeByteBuffer deepCopy() {
+    ByteBuffer[] dataCopy = new ByteBuffer[underlying.length];
+    for (int i = 0; i < underlying.length; i++) {
+      ByteBuffer b = underlying[i];
+      dataCopy[i] = ByteBuffer.allocate(b.capacity());
+      int originalPosition = b.position();
+      b.position(0);
+      dataCopy[i].put(b);
+      dataCopy[i].position(0);
+      b.position(originalPosition);
+    }
+    return new WrappedLargeByteBuffer(dataCopy);
+  }
+
+  @Override
+  public byte get() {
+    byte r = currentBuffer.get();
+    _pos += 1;
+    updateCurrentBuffer();
+    return r;
+  }
+
+  private void updateCurrentBuffer() {
+    while (currentBuffer != null && !currentBuffer.hasRemaining()) {
+      currentBufferIdx += 1;
+      currentBuffer = currentBufferIdx < underlying.length ? underlying[currentBufferIdx] : null;
+    }
+  }
+
+  @Override
+  public long position() {
+    return _pos;
+  }
+
+  @Override
+  public long skip(long n) {
+    if (n < 0) {
+      final long moveTotal = Math.min(-n, _pos);
+      long toMove = moveTotal;
+      // move backwards -- set the position to 0 of every buffer's we go back
+      if (currentBuffer != null) {
+        currentBufferIdx += 1;
+      }
+      while (toMove > 0) {
+        currentBufferIdx -= 1;
+        currentBuffer = underlying[currentBufferIdx];
+        int thisMove = (int) Math.min(toMove, currentBuffer.position());
+        currentBuffer.position(currentBuffer.position() - thisMove);
+        toMove -= thisMove;
+      }
+      _pos -= moveTotal;
+      return -moveTotal;
+    } else if (n > 0) {
+      final long moveTotal = Math.min(n, remaining());
+      long toMove = moveTotal;
+      // move forwards-- set the position to the end of every buffer as we go forwards
+      currentBufferIdx -= 1;
+      while (toMove > 0) {
+        currentBufferIdx += 1;
+        currentBuffer = underlying[currentBufferIdx];
+        int thisMove = (int) Math.min(toMove, currentBuffer.remaining());
+        currentBuffer.position(currentBuffer.position() + thisMove);
+        toMove -= thisMove;
+      }
+      _pos += moveTotal;
+      return moveTotal;
+    } else {
+      return 0;
+    }
+  }
+
+  @Override
+  public long remaining() {
+    return size - _pos;
+  }
+
+  @Override
+  public WrappedLargeByteBuffer duplicate() {
+    ByteBuffer[] duplicates = new ByteBuffer[underlying.length];
+    for (int i = 0; i < underlying.length; i++) {
+      duplicates[i] = underlying[i].duplicate();
+    }
+    return new WrappedLargeByteBuffer(duplicates, _pos);
+  }
+
+  @Override
+  public long size() {
+    return size;
+  }
+
+  @Override
+  public long writeTo(WritableByteChannel channel) throws IOException {
+    long written = 0l;
+    for (; currentBufferIdx < underlying.length; currentBufferIdx++) {
+      currentBuffer = underlying[currentBufferIdx];
+      written += currentBuffer.remaining();
+      while (currentBuffer.hasRemaining())
+        channel.write(currentBuffer);
+    }
+    _pos = size();
+    return written;
+  }
+
+  @Override
+  public ByteBuffer asByteBuffer() throws BufferTooLargeException {
+    if (underlying.length > 1) {
+      throw new BufferTooLargeException(size());
+    }
+    return underlying[0];
+  }
+
+  // only needed for tests
+  public List<ByteBuffer> nioBuffers() {
+    return Arrays.asList(underlying);
+  }
+
+  /**
+   * Attempt to clean up a ByteBuffer if it is memory-mapped. This uses an *unsafe* Sun API that
+   * might cause errors if one attempts to read from the unmapped buffer, but it's better than
+   * waiting for the GC to find it because that could lead to huge numbers of open files. There's
+   * unfortunately no standard API to do this.
+   */
+  private static void dispose(ByteBuffer buffer) {
+    if (buffer != null && buffer instanceof MappedByteBuffer) {
+      DirectBuffer db = (DirectBuffer) buffer;
+      if (db.cleaner() != null) {
+        db.cleaner().clean();
+      }
+    }
+  }
+
+  @Override
+  public void dispose() {
+    for (ByteBuffer bb : underlying) {
+      dispose(bb);
+    }
+  }
+
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
new file mode 100644
index 0000000000000..d56f216f11dc0
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.buffer;
+
+import org.junit.Test;
+
+import java.io.*;
+import java.nio.channels.FileChannel;
+
+import static org.junit.Assert.*;
+
+public class LargeByteBufferHelperSuite {
+
+  @Test
+  public void testMapFile() throws IOException {
+    File testFile = File.createTempFile("large-byte-buffer-test", ".bin");
+    testFile.deleteOnExit();
+    OutputStream out = new FileOutputStream(testFile);
+    byte[] buffer = new byte[1 << 16];
+    long len = 3L << 30;
+    assertTrue(len > Integer.MAX_VALUE);  // its 1.5x Integer.MAX_VALUE, just a sanity check
+    for (int i = 0; i < buffer.length; i++) {
+      buffer[i] = (byte) i;
+    }
+    for (int i = 0; i < len / buffer.length; i++) {
+      out.write(buffer);
+    }
+    out.close();
+
+    FileChannel in = new FileInputStream(testFile).getChannel();
+
+    //fail quickly on bad bounds
+    try {
+      LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, 0, len + 1);
+      fail("expected exception");
+    } catch (IOException ioe) {
+    }
+    try {
+      LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, -1, 10);
+      fail("expected exception");
+    } catch (IllegalArgumentException iae) {
+    }
+
+    //now try to read from the buffer
+    LargeByteBuffer buf = LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, 0, len);
+    assertEquals(len, buf.size());
+    byte[] read = new byte[buffer.length];
+    for (int i = 0; i < len / buffer.length; i++) {
+      buf.get(read, 0, buffer.length);
+      // assertArrayEquals() is really slow
+      for (int j = 0; j < buffer.length; j++) {
+        if (read[j] != (byte)(j))
+          fail("bad byte at (i,j) = (" + i + "," + j + ")");
+      }
+    }
+  }
+}
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
new file mode 100644
index 0000000000000..79398c6ae7ab1
--- /dev/null
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.buffer;
+
+import java.io.*;
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class WrappedLargeByteBufferSuite {
+
+  byte[] data = new byte[500];
+  {
+    new Random(1234).nextBytes(data);
+  }
+
+  private WrappedLargeByteBuffer testDataBuf() {
+    ByteBuffer[] bufs = new ByteBuffer[10];
+    for (int i = 0; i < 10; i++) {
+      byte[] b = new byte[50];
+      System.arraycopy(data, i * 50, b, 0, 50);
+      bufs[i] = ByteBuffer.wrap(b);
+    }
+    return new WrappedLargeByteBuffer(bufs);
+  }
+
+  @Test
+  public void asByteBuffer() throws BufferTooLargeException {
+    //test that it works when buffer is small, and the right error when buffer is big
+    LargeByteBuffer buf = LargeByteBufferHelper.asLargeByteBuffer(new byte[100]);
+    ByteBuffer nioBuf = buf.asByteBuffer();
+    assertEquals(100, nioBuf.remaining());
+
+    ByteBuffer[] bufs = new ByteBuffer[2];
+    for (int i = 0; i < 2; i++) {
+      bufs[i] = ByteBuffer.allocate(10);
+    }
+    try {
+      new WrappedLargeByteBuffer(bufs).asByteBuffer();
+      fail("expected an exception");
+    } catch (BufferTooLargeException btl) {
+    }
+  }
+
+  @Test
+  public void deepCopy() {
+    WrappedLargeByteBuffer b = testDataBuf();
+    //intentionally move around sporadically
+    for (int initialPosition: new int[]{10,475, 0, 19, 58, 499, 498, 32, 234, 378}) {
+      b.rewind();
+      b.skip(initialPosition);
+      WrappedLargeByteBuffer copy = b.deepCopy();
+      assertEquals(0, copy.position());
+      assertConsistent(copy);
+      assertConsistent(b);
+      assertEquals(b.size(), copy.size());
+      assertEquals(initialPosition, b.position());
+      byte[] copyData = new byte[500];
+      copy.get(copyData, 0, 500);
+      assertArrayEquals(data, copyData);
+    }
+  }
+
+  @Test
+  public void skipAndGet() {
+    WrappedLargeByteBuffer b = testDataBuf();
+    int position = 0;
+    for (int move: new int[]{20, 50, 100, 0, -80, 0, 200, -175, 500, 0, -1000, 0}) {
+      long moved = b.skip(move);
+      assertConsistent(b);
+      long expMoved = move > 0 ? Math.min(move, 500 - position) : Math.max(move, -position);
+      position += moved;
+      assertEquals(expMoved, moved);
+      assertEquals(position, b.position());
+      byte[] copyData = new byte[500 - position];
+      b.get(copyData, 0, 500 - position);
+      assertConsistent(b);
+      byte[] dataSubset = new byte[500 - position];
+      System.arraycopy(data, position, dataSubset, 0, 500 - position);
+      assertArrayEquals(dataSubset, copyData);
+      b.rewind();
+      assertConsistent(b);
+      b.skip(position);
+      assertConsistent(b);
+    }
+  }
+
+  private void assertConsistent(WrappedLargeByteBuffer buffer) {
+    long pos = buffer.position();
+    long bufferStartPos = 0;
+    for (ByteBuffer p: buffer.nioBuffers()) {
+      if (pos < bufferStartPos) {
+        assertEquals(0, p.position());
+      } else if (pos < bufferStartPos + p.capacity()) {
+        assertEquals(pos - bufferStartPos, p.position());
+      } else {
+        assertEquals(p.capacity(), p.position());
+      }
+      bufferStartPos += p.capacity();
+    }
+  }
+
+  @Test
+  public void get() {
+    WrappedLargeByteBuffer b = testDataBuf();
+    byte[] into = new byte[500];
+    for (int[] offsetAndLength: new int[][]{{0, 200}, {10,10}, {300, 20}, {30, 100}}) {
+      b.rewind();
+      b.get(into, offsetAndLength[0], offsetAndLength[1]);
+      assertConsistent(b);
+      assertSubArrayEquals(data, 0, into, offsetAndLength[0], offsetAndLength[1]);
+    }
+
+    try {
+      b.rewind();
+      b.skip(400);
+      b.get(into, 0, 500);
+      fail("expected exception");
+    } catch (BufferUnderflowException bue) {
+    }
+  }
+
+  @Test
+  public void writeTo() throws IOException {
+    for (int initialPosition: new int[]{0,20, 400}) {
+      File testFile = File.createTempFile("WrappedLargeByteBuffer-writeTo-" + initialPosition,".bin");
+      testFile.deleteOnExit();
+      FileChannel channel = new FileOutputStream(testFile).getChannel();
+      WrappedLargeByteBuffer buf = testDataBuf();
+      buf.skip(initialPosition);
+      assertEquals(initialPosition, buf.position());
+      int expN = 500 - initialPosition;
+      long bytesWritten = buf.writeTo(channel);
+      assertEquals(expN, bytesWritten);
+      channel.close();
+
+      byte[] fileBytes = new byte[expN];
+      FileInputStream in = new FileInputStream(testFile);
+      int n = 0;
+      while (n < expN) {
+        n += in.read(fileBytes, n, expN - n);
+      }
+      assertEquals(-1, in.read());
+      byte[] dataSlice = Arrays.copyOfRange(data, initialPosition, 500);
+      assertArrayEquals(dataSlice, fileBytes);
+      assertEquals(0, buf.remaining());
+      assertEquals(500, buf.position());
+    }
+  }
+
+  @Test
+  public void duplicate() {
+    for (int initialPosition: new int[]{0,20, 400}) {
+      WrappedLargeByteBuffer buf = testDataBuf();
+      buf.skip(initialPosition);
+
+      WrappedLargeByteBuffer dup = buf.duplicate();
+      assertEquals(initialPosition, buf.position());
+      assertEquals(initialPosition, dup.position());
+      assertEquals(500, buf.size());
+      assertEquals(500, dup.size());
+      assertEquals(500 - initialPosition, buf.remaining());
+      assertEquals(500 - initialPosition, dup.remaining());
+      assertConsistent(buf);
+      assertConsistent(dup);
+    }
+  }
+
+  @Test
+  public void constructWithBuffersWithNonZeroPosition() {
+    ByteBuffer[] bufs = testDataBuf().underlying;
+
+    bufs[0].position(50);
+    bufs[1].position(5);
+
+    WrappedLargeByteBuffer b1 = new WrappedLargeByteBuffer(bufs);
+    assertEquals(55, b1.position());
+
+
+    bufs[1].position(50);
+    bufs[2].position(50);
+    bufs[3].position(35);
+    WrappedLargeByteBuffer b2 = new WrappedLargeByteBuffer(bufs);
+    assertEquals(185, b2.position());
+
+
+    bufs[5].position(16);
+    try {
+      new WrappedLargeByteBuffer(bufs);
+      fail("expected exception");
+    } catch (IllegalArgumentException ex) {
+    }
+
+    bufs[5].position(0);
+    bufs[0].position(49);
+    try {
+      new WrappedLargeByteBuffer(bufs);
+      fail("expected exception");
+    } catch (IllegalArgumentException ex) {
+    }
+
+  }
+
+  private void assertSubArrayEquals(byte[] exp, int expOffset, byte[] act, int actOffset, int length) {
+    byte[] expCopy = new byte[length];
+    byte[] actCopy = new byte[length];
+    System.arraycopy(exp, expOffset, expCopy, 0, length);
+    System.arraycopy(act, actOffset, actCopy, 0, length);
+    assertArrayEquals(expCopy, actCopy);
+  }
+
+
+}

From 8bd56068d151908ff0c81f77d6f606a29edcf449 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 9 Apr 2015 09:26:14 -0500
Subject: [PATCH 63/97] update from PR feedback

---
 .../util/LargeByteBufferOutputStream.scala    |   9 +-
 .../util/io/ByteArrayChunkOutputStream.scala  |   3 +-
 .../LargeByteBufferOutputStreamSuite.scala    |   2 +-
 .../buffer/BufferTooLargeException.java       |  12 +-
 .../spark/network/buffer/LargeByteBuffer.java | 153 +++++++++---------
 .../network/buffer/LargeByteBufferHelper.java |  85 +++++-----
 .../buffer/WrappedLargeByteBuffer.java        |  85 +++++-----
 .../buffer/LargeByteBufferHelperSuite.java    |  10 ++
 .../buffer/WrappedLargeByteBufferSuite.java   |  84 +++++++---
 9 files changed, 254 insertions(+), 189 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index 04e685262f38c..176825b7ec241 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -29,15 +29,12 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
 
   private[util] val output = new ByteArrayChunkOutputStream(chunkSize)
 
-  private var _pos = 0
-
   override def write(b: Int): Unit = {
     output.write(b)
   }
 
   override def write(bytes: Array[Byte], offs: Int, len: Int): Unit = {
     output.write(bytes, offs, len)
-    _pos += len
   }
 
   def largeBuffer: LargeByteBuffer = {
@@ -51,17 +48,17 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
     // as much as possible.  This is necessary b/c there are a number of parts of spark that
     // can only deal w/ one nio.ByteBuffer, and can't use a LargeByteBuffer yet.
     val totalSize = output.size
-    val chunksNeeded = ((totalSize + maxChunk -1) / maxChunk).toInt
+    val chunksNeeded = ((totalSize + maxChunk - 1) / maxChunk).toInt
     val chunks = new Array[Array[Byte]](chunksNeeded)
     var remaining = totalSize
     var pos = 0
-    (0 until chunksNeeded).foreach{idx =>
+    (0 until chunksNeeded).foreach { idx =>
       val nextSize = math.min(maxChunk, remaining).toInt
       chunks(idx) = output.slice(pos, pos + nextSize)
       pos += nextSize
       remaining -= nextSize
     }
-    new WrappedLargeByteBuffer(chunks.map{ByteBuffer.wrap})
+    new WrappedLargeByteBuffer(chunks.map(ByteBuffer.wrap))
   }
 
   override def close(): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
index 64f1ff7153fe4..34bb6ee6db906 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
@@ -110,8 +110,7 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
       val nextSize = chunkStart + chunks(chunkIdx).size
       if (nextSize > start) {
         foundStart = true
-      }
-      else {
+      } else {
         chunkStart = nextSize
         chunkIdx += 1
       }
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
index b25dfc5bbaaac..4b28d383df908 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
@@ -53,7 +53,7 @@ class LargeByteBufferOutputStreamSuite extends FunSuite with Matchers {
     Random.nextBytes(bytes)
     out.write(bytes)
 
-    (10 to 100 by 10).foreach{chunkSize =>
+    (10 to 100 by 10).foreach { chunkSize =>
       val buffer = out.largeBuffer(chunkSize).asInstanceOf[WrappedLargeByteBuffer]
       buffer.position() should be (0)
       buffer.size() should be (100)
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java b/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
index 158d29ad152ce..4e1a85ba1f126 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/BufferTooLargeException.java
@@ -21,12 +21,14 @@
 public class BufferTooLargeException extends IOException {
   public final long actualSize;
   public final long extra;
+  public final long maxSize;
 
-  public BufferTooLargeException(long actualSize) {
-    super("LargeByteBuffer is too large to convert.  Size: " + actualSize + "; Size Limit: "
-      + LargeByteBufferHelper.MAX_CHUNK + " (" +
-      (actualSize - LargeByteBufferHelper.MAX_CHUNK) + " too big)");
-    this.extra = actualSize - LargeByteBufferHelper.MAX_CHUNK;
+  public BufferTooLargeException(long actualSize, long maxSize) {
+    super(String.format("LargeByteBuffer is too large to convert.  Size: %d; Size Limit: %d (%d " +
+      "too big)", actualSize, maxSize,
+      actualSize - maxSize));
+    this.extra = actualSize - maxSize;
     this.actualSize = actualSize;
+    this.maxSize = maxSize;
   }
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index 12d12647f3bb7..bc48088cc8610 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -19,94 +19,95 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.WritableByteChannel;
-import java.util.List;
 
 public interface LargeByteBuffer {
-    public byte get();
+  public byte get();
 
-    /**
-     * Bulk copy data from this buffer into the given array.  First checks there is sufficient
-     * data in this buffer; if not, throws a {@link java.nio.BufferUnderflowException}.
-     * @param dst
-     * @param offset
-     * @param length
-     */
-    public void get(byte[] dst,int offset, int length);
+  /**
+   * Bulk copy data from this buffer into the given array.  First checks there is sufficient
+   * data in this buffer; if not, throws a {@link java.nio.BufferUnderflowException}.
+   *
+   * @param dst
+   * @param offset
+   * @param length
+   */
+  public void get(byte[] dst, int offset, int length);
 
-    public LargeByteBuffer rewind();
+  public LargeByteBuffer rewind();
 
-    /**
-     * return a deep copy of this buffer.
-     * The returned buffer will have position == 0.  The position
-     * of this buffer will not change as a result of copying.
-     *
-     * @return a new buffer with a full copy of this buffer's data
-     */
-    public LargeByteBuffer deepCopy();
+  /**
+   * Return a deep copy of this buffer.
+   * The returned buffer will have position == 0.  The position
+   * of this buffer will not change as a result of copying.
+   *
+   * @return a new buffer with a full copy of this buffer's data
+   */
+  public LargeByteBuffer deepCopy();
 
-    /**
-     * Advance the position in this buffer by up to <code>n</code> bytes.  <code>n</code> may be
-     * positive or negative.  It will move the full <code>n</code> unless that moves
-     * it past the end (or beginning) of the buffer, in which case it will move to the end
-     * (or beginning).
-     *
-     * @return the number of bytes moved forward (can be negative if <code>n</code> is negative)
-     */
-    public long skip(long n);
+  /**
+   * Advance the position in this buffer by up to <code>n</code> bytes.  <code>n</code> may be
+   * positive or negative.  It will move the full <code>n</code> unless that moves
+   * it past the end (or beginning) of the buffer, in which case it will move to the end
+   * (or beginning).
+   *
+   * @return the number of bytes moved forward (can be negative if <code>n</code> is negative)
+   */
+  public long skip(long n);
 
-    public long position();
+  public long position();
 
-    /**
-     * Creates a new byte buffer that shares this buffer's content.
-     *
-     * <p> The content of the new buffer will be that of this buffer.  Changes
-     * to this buffer's content will be visible in the new buffer, and vice
-     * versa; the two buffers' positions will be independent.
-     *
-     * <p> The new buffer's position will be identical to those of this buffer
-     * */
-    public LargeByteBuffer duplicate();
+  /**
+   * Creates a new byte buffer that shares this buffer's content.
+   * <p/>
+   * <p> The content of the new buffer will be that of this buffer.  Changes
+   * to this buffer's content will be visible in the new buffer, and vice
+   * versa; the two buffers' positions will be independent.
+   * <p/>
+   * <p> The new buffer's position will be identical to those of this buffer
+   */
+  public LargeByteBuffer duplicate();
 
 
-    public long remaining();
+  public long remaining();
 
-    /**
-     * the total number of bytes in this buffer
-     * @return
-     */
-    public long size();
+  /**
+   * Total number of bytes in this buffer
+   *
+   * @return
+   */
+  public long size();
 
-    /**
-     * writes the data from the current <code>position()</code> to the end of this buffer
-     * to the given channel.  The <code>position()</code> will be moved to the end of
-     * the buffer after this.
-     *
-     * Note that this method will continually attempt to push data to the given channel.  If the
-     * channel cannot accept more data, this will continuously retry until the channel accepts
-     * the data.
-     *
-     * @param channel
-     * @return the number of bytes written to the channel
-     * @throws IOException
-     */
-    public long writeTo(WritableByteChannel channel) throws IOException;
+  /**
+   * Writes the data from the current <code>position()</code> to the end of this buffer
+   * to the given channel.  The <code>position()</code> will be moved to the end of
+   * the buffer after this.
+   * <p/>
+   * Note that this method will continually attempt to push data to the given channel.  If the
+   * channel cannot accept more data, this will continuously retry until the channel accepts
+   * the data.
+   *
+   * @param channel
+   * @return the number of bytes written to the channel
+   * @throws IOException
+   */
+  public long writeTo(WritableByteChannel channel) throws IOException;
 
-    /**
-     * get the entire contents of this as one ByteBuffer, if possible.  The returned ByteBuffer
-     * will always have the position set 0, and the limit set to the end of the data.  Each
-     * call will return a new ByteBuffer, but will not require copying the data (eg., it will
-     * use ByteBuffer#duplicate()).  The returned byte buffer and this may or may not share data.
-     *
-     * @return
-     * @throws BufferTooLargeException if this buffer is too large to fit in one {@link ByteBuffer}
-     */
-    public ByteBuffer asByteBuffer() throws BufferTooLargeException;
+  /**
+   * Get the entire contents of this as one ByteBuffer, if possible.  The returned ByteBuffer
+   * will always have the position set 0, and the limit set to the end of the data.  Each
+   * call will return a new ByteBuffer, but will not require copying the data (eg., it will
+   * use ByteBuffer#duplicate()).  The returned byte buffer and this may or may not share data.
+   *
+   * @return
+   * @throws BufferTooLargeException if this buffer is too large to fit in one {@link ByteBuffer}
+   */
+  public ByteBuffer asByteBuffer() throws BufferTooLargeException;
 
-    /**
-     * Attempt to clean up if it is memory-mapped. This uses an *unsafe* Sun API that
-     * might cause errors if one attempts to read from the unmapped buffer, but it's better than
-     * waiting for the GC to find it because that could lead to huge numbers of open files. There's
-     * unfortunately no standard API to do this.
-     */
-    public void dispose();
+  /**
+   * Attempt to clean up if it is memory-mapped. This uses an *unsafe* Sun API that
+   * might cause errors if one attempts to read from the unmapped buffer, but it's better than
+   * waiting for the GC to find it because that could lead to huge numbers of open files. There's
+   * unfortunately no standard API to do this.
+   */
+  public void dispose();
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
index 9d40fc4f5c07d..f762c686bde12 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.network.buffer;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
@@ -23,51 +25,58 @@
 
 public class LargeByteBufferHelper {
 
-    public static final int MAX_CHUNK = Integer.MAX_VALUE - 1000000;
+  public static final int MAX_CHUNK = Integer.MAX_VALUE - 1000000;
 
-    public static LargeByteBuffer asLargeByteBuffer(ByteBuffer buffer) {
-        return new WrappedLargeByteBuffer(new ByteBuffer[]{buffer});
-    }
+  public static LargeByteBuffer asLargeByteBuffer(ByteBuffer buffer) {
+    return new WrappedLargeByteBuffer(new ByteBuffer[] { buffer } );
+  }
 
-    public static LargeByteBuffer asLargeByteBuffer(byte[] bytes) {
-        return new WrappedLargeByteBuffer(new ByteBuffer[]{ByteBuffer.wrap(bytes)});
-    }
+  public static LargeByteBuffer asLargeByteBuffer(byte[] bytes) {
+    return new WrappedLargeByteBuffer(new ByteBuffer[] { ByteBuffer.wrap(bytes) } );
+  }
 
-    public static LargeByteBuffer allocate(long size) {
-        ArrayList<ByteBuffer> chunks = new ArrayList<ByteBuffer>();
-        long remaining = size;
-        while (remaining > 0) {
-            int nextSize = (int)Math.min(remaining, MAX_CHUNK);
-            ByteBuffer next = ByteBuffer.allocate(nextSize);
-            remaining -= nextSize;
-            chunks.add(next);
-        }
-        return new WrappedLargeByteBuffer(chunks.toArray(new ByteBuffer[chunks.size()]));
+  public static LargeByteBuffer allocate(long size) {
+    return allocate(size, MAX_CHUNK);
+  }
+
+  @VisibleForTesting
+  static LargeByteBuffer allocate(long size, int maxChunk) {
+    int chunksNeeded = (int) ((size + maxChunk - 1) / maxChunk);
+    ByteBuffer[] chunks = new ByteBuffer[chunksNeeded];
+    long remaining = size;
+    for (int i = 0; i < chunksNeeded; i++) {
+      int nextSize = (int) Math.min(remaining, maxChunk);
+      ByteBuffer next = ByteBuffer.allocate(nextSize);
+      remaining -= nextSize;
+      chunks[i] = next;
     }
+    if (remaining != 0) throw new IllegalStateException("remaining = " + remaining);
+    return new WrappedLargeByteBuffer(chunks);
+  }
 
 
-    public static LargeByteBuffer mapFile(
-            FileChannel channel,
-            FileChannel.MapMode mode,
-            long offset,
-            long length
-    ) throws IOException {
-        int maxChunk = MAX_CHUNK;
-        ArrayList<Long> offsets = new ArrayList<Long>();
-        long curOffset = offset;
-        long end = offset + length;
-        while (curOffset < end) {
-            offsets.add(curOffset);
-            int chunkLength = (int) Math.min((end - curOffset), maxChunk);
-            curOffset += chunkLength;
-        }
-        offsets.add(end);
-        ByteBuffer[] chunks = new ByteBuffer[offsets.size() - 1];
-        for (int i = 0; i< offsets.size() - 1; i++) {
-            chunks[i] = channel.map(mode, offsets.get(i), offsets.get(i+ 1) - offsets.get(i));
-        }
-        return new WrappedLargeByteBuffer(chunks);
+  public static LargeByteBuffer mapFile(
+    FileChannel channel,
+    FileChannel.MapMode mode,
+    long offset,
+    long length
+  ) throws IOException {
+    int maxChunk = MAX_CHUNK;
+    ArrayList<Long> offsets = new ArrayList<Long>();
+    long curOffset = offset;
+    long end = offset + length;
+    while (curOffset < end) {
+      offsets.add(curOffset);
+      int chunkLength = (int) Math.min((end - curOffset), maxChunk);
+      curOffset += chunkLength;
+    }
+    offsets.add(end);
+    ByteBuffer[] chunks = new ByteBuffer[offsets.size() - 1];
+    for (int i = 0; i < offsets.size() - 1; i++) {
+      chunks[i] = channel.map(mode, offsets.get(i), offsets.get(i + 1) - offsets.get(i));
     }
+    return new WrappedLargeByteBuffer(chunks);
+  }
 
 
 }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 8ead571dd102f..d5da70b9000de 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -16,6 +16,7 @@
 */
 package org.apache.spark.network.buffer;
 
+import com.google.common.annotations.VisibleForTesting;
 import sun.nio.ch.DirectBuffer;
 
 import java.io.IOException;
@@ -28,53 +29,36 @@
 
 public class WrappedLargeByteBuffer implements LargeByteBuffer {
 
-  //only public for tests
+  @VisibleForTesting
   public final ByteBuffer[] underlying;
 
   private final long size;
   private long _pos;
-  private int currentBufferIdx;
-  private ByteBuffer currentBuffer;
+  @VisibleForTesting
+  int currentBufferIdx;
+  @VisibleForTesting
+  ByteBuffer currentBuffer;
 
 
   public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
-    this(underlying, findExpectedInitialPosition(underlying));
-  }
-
-  private static long findExpectedInitialPosition(ByteBuffer[] bufs) {
-    long sum = 0L;
-    for (ByteBuffer b: bufs) {
-      if (b.position() > 0) {
-        // this could still lead to a mix of positions half-way through buffers that
-        // would be inconsistent -- but we'll discover that in the constructor checks
-        sum += b.position();
-      } else {
-        break;
-      }
+    if (underlying.length == 0) {
+      throw new IllegalArgumentException("must wrap at least one ByteBuffer");
     }
-    return sum;
-  }
-
-  private WrappedLargeByteBuffer(ByteBuffer[] underlying, long initialPosition) {
     this.underlying = underlying;
     long sum = 0L;
+    boolean startFound = false;
+    long initialPosition = -1;
     for (int i = 0; i < underlying.length; i++) {
       ByteBuffer b = underlying[i];
-      long nextSum = sum + b.capacity();
-      int expectedPosition;
-      if (nextSum < initialPosition) {
-        expectedPosition = b.capacity();
-      } else if (sum > initialPosition) {
-        expectedPosition = 0;
-      } else {
-        expectedPosition = (int) (initialPosition - sum);
+      if (startFound) {
+        if (b.position() != 0) {
+          throw new IllegalArgumentException("ByteBuffers have inconsistent positions");
+        }
+      } else if (b.position() != b.capacity()) {
+        startFound = true;
+        initialPosition = sum + b.position();
       }
-      if (b.position() != expectedPosition) {
-        throw new IllegalArgumentException("ByteBuffer[" + i + "]:" + b + " was expected to have" +
-          " position = " + expectedPosition + " to be consistent with the overall " +
-          "initialPosition = " + initialPosition);
-      }
-      sum = nextSum;
+      sum += b.capacity();
     }
     _pos = initialPosition;
     currentBufferIdx = 0;
@@ -84,8 +68,9 @@ private WrappedLargeByteBuffer(ByteBuffer[] underlying, long initialPosition) {
 
   @Override
   public void get(byte[] dest, int offset, int length) {
-    if (length > remaining())
+    if (length > remaining()) {
       throw new BufferUnderflowException();
+    }
     int moved = 0;
     while (moved < length) {
       int toRead = Math.min(length - moved, currentBuffer.remaining());
@@ -117,7 +102,7 @@ public WrappedLargeByteBuffer deepCopy() {
       ByteBuffer b = underlying[i];
       dataCopy[i] = ByteBuffer.allocate(b.capacity());
       int originalPosition = b.position();
-      b.position(0);
+      b.rewind();
       dataCopy[i].put(b);
       dataCopy[i].position(0);
       b.position(originalPosition);
@@ -193,7 +178,7 @@ public WrappedLargeByteBuffer duplicate() {
     for (int i = 0; i < underlying.length; i++) {
       duplicates[i] = underlying[i].duplicate();
     }
-    return new WrappedLargeByteBuffer(duplicates, _pos);
+    return new WrappedLargeByteBuffer(duplicates);
   }
 
   @Override
@@ -216,13 +201,29 @@ public long writeTo(WritableByteChannel channel) throws IOException {
 
   @Override
   public ByteBuffer asByteBuffer() throws BufferTooLargeException {
-    if (underlying.length > 1) {
-      throw new BufferTooLargeException(size());
+    return asByteBuffer(LargeByteBufferHelper.MAX_CHUNK);
+  }
+
+  @VisibleForTesting
+  ByteBuffer asByteBuffer(int maxChunkSize) throws BufferTooLargeException {
+    if (underlying.length == 1) {
+      ByteBuffer b = underlying[0].duplicate();
+      b.rewind();
+      return b;
+    } else if (size() > maxChunkSize) {
+      throw new BufferTooLargeException(size(), maxChunkSize);
+    } else {
+      byte[] merged = new byte[(int) size()];
+      long initialPosition = position();
+      rewind();
+      get(merged, 0, merged.length);
+      rewind();
+      skip(initialPosition);
+      return ByteBuffer.wrap(merged);
     }
-    return underlying[0];
   }
 
-  // only needed for tests
+  @VisibleForTesting
   public List<ByteBuffer> nioBuffers() {
     return Arrays.asList(underlying);
   }
@@ -234,7 +235,7 @@ public List<ByteBuffer> nioBuffers() {
    * unfortunately no standard API to do this.
    */
   private static void dispose(ByteBuffer buffer) {
-    if (buffer != null && buffer instanceof MappedByteBuffer) {
+    if (buffer != null && buffer instanceof DirectBuffer) {
       DirectBuffer db = (DirectBuffer) buffer;
       if (db.cleaner() != null) {
         db.cleaner().clean();
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
index d56f216f11dc0..4b15c42595e02 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
@@ -68,4 +68,14 @@ public void testMapFile() throws IOException {
       }
     }
   }
+
+  @Test
+  public void testAllocate() {
+    WrappedLargeByteBuffer buf = (WrappedLargeByteBuffer) LargeByteBufferHelper.allocate(95,10);
+    assertEquals(10, buf.underlying.length);
+    for (int i = 0 ; i < 9; i++) {
+      assertEquals(10, buf.underlying[i].capacity());
+    }
+    assertEquals(5, buf.underlying[9].capacity());
+  }
 }
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 79398c6ae7ab1..62266afb500c9 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -46,17 +46,54 @@ private WrappedLargeByteBuffer testDataBuf() {
 
   @Test
   public void asByteBuffer() throws BufferTooLargeException {
-    //test that it works when buffer is small, and the right error when buffer is big
+    // test that it works when buffer is small
     LargeByteBuffer buf = LargeByteBufferHelper.asLargeByteBuffer(new byte[100]);
     ByteBuffer nioBuf = buf.asByteBuffer();
+    assertEquals(0, nioBuf.position());
     assertEquals(100, nioBuf.remaining());
+    // if we move the large byte buffer, the nio.ByteBuffer we have doesn't change
+    buf.skip(10);
+    assertEquals(0, nioBuf.position());
+    assertEquals(100, nioBuf.remaining());
+    // if we grab another byte buffer while the large byte buffer's position != 0,
+    // the returned buffer still has position 0
+    ByteBuffer nioBuf2 = buf.asByteBuffer();
+    assertEquals(0, nioBuf2.position());
+    assertEquals(100, nioBuf2.remaining());
+    // the two byte buffers we grabbed are independent
+    nioBuf2.position(20);
+    assertEquals(0, nioBuf.position());
+    assertEquals(100, nioBuf.remaining());
+    assertEquals(20, nioBuf2.position());
+    assertEquals(80, nioBuf2.remaining());
 
+    // merges the data from multiple buffers
     ByteBuffer[] bufs = new ByteBuffer[2];
     for (int i = 0; i < 2; i++) {
-      bufs[i] = ByteBuffer.allocate(10);
+      bufs[i] = ByteBuffer.allocate(250);
+      bufs[i].get(data, i * 250, 250);
+      bufs[i].rewind();
     }
+    WrappedLargeByteBuffer wrappedBB = new WrappedLargeByteBuffer(bufs);
+    ByteBuffer mergedBuffer = wrappedBB.asByteBuffer(500);
+    assertConsistent(wrappedBB);
+    assertEquals(0, mergedBuffer.position());
+    byte[] copyData = new byte[500];
+    mergedBuffer.get(copyData);
+    mergedBuffer.rewind();
+    assertArrayEquals(data, copyData);
+    wrappedBB.skip(20);
+    assertConsistent(wrappedBB);
+    ByteBuffer mergedBuffer2 = wrappedBB.asByteBuffer(500);
+    assertEquals(0, mergedBuffer2.position());
+    mergedBuffer2.get(copyData);
+    assertArrayEquals(data, copyData);
+    assertEquals(0, mergedBuffer.position());
+    assertEquals(20, wrappedBB.position());
+
+    // the right error when the buffer is too big
     try {
-      new WrappedLargeByteBuffer(bufs).asByteBuffer();
+      wrappedBB.asByteBuffer(499);
       fail("expected an exception");
     } catch (BufferTooLargeException btl) {
     }
@@ -105,21 +142,6 @@ public void skipAndGet() {
     }
   }
 
-  private void assertConsistent(WrappedLargeByteBuffer buffer) {
-    long pos = buffer.position();
-    long bufferStartPos = 0;
-    for (ByteBuffer p: buffer.nioBuffers()) {
-      if (pos < bufferStartPos) {
-        assertEquals(0, p.position());
-      } else if (pos < bufferStartPos + p.capacity()) {
-        assertEquals(pos - bufferStartPos, p.position());
-      } else {
-        assertEquals(p.capacity(), p.position());
-      }
-      bufferStartPos += p.capacity();
-    }
-  }
-
   @Test
   public void get() {
     WrappedLargeByteBuffer b = testDataBuf();
@@ -218,7 +240,32 @@ public void constructWithBuffersWithNonZeroPosition() {
       fail("expected exception");
     } catch (IllegalArgumentException ex) {
     }
+  }
 
+  @Test(expected=IllegalArgumentException.class)
+  public void testRequireAtLeastOneBuffer() {
+    new WrappedLargeByteBuffer( new ByteBuffer[0]);
+  }
+
+
+  private void assertConsistent(WrappedLargeByteBuffer buffer) {
+    long pos = buffer.position();
+    long bufferStartPos = 0;
+    if (buffer.currentBufferIdx < buffer.underlying.length) {
+      assertEquals(buffer.currentBuffer, buffer.underlying[buffer.currentBufferIdx]);
+    } else {
+      assertNull(buffer.currentBuffer);
+    }
+    for (ByteBuffer p: buffer.nioBuffers()) {
+      if (pos < bufferStartPos) {
+        assertEquals(0, p.position());
+      } else if (pos < bufferStartPos + p.capacity()) {
+        assertEquals(pos - bufferStartPos, p.position());
+      } else {
+        assertEquals(p.capacity(), p.position());
+      }
+      bufferStartPos += p.capacity();
+    }
   }
 
   private void assertSubArrayEquals(byte[] exp, int expOffset, byte[] act, int actOffset, int length) {
@@ -229,5 +276,4 @@ private void assertSubArrayEquals(byte[] exp, int expOffset, byte[] act, int act
     assertArrayEquals(expCopy, actCopy);
   }
 
-
 }

From ff9f968590d078086a0c9ec2b42c95a3b13f02f5 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 9 Apr 2015 09:40:13 -0500
Subject: [PATCH 64/97] LargeBBInputStream should dispose when we close the
 stream, not when we reach the end

---
 .../util/LargeByteBufferInputStream.scala     | 20 +++----------------
 .../LargeByteBufferInputStreamSuite.scala     | 19 ++++++++++++++----
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
index 69bc4902d0aac..1f27f895ada59 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
@@ -24,7 +24,7 @@ import org.apache.spark.storage.BlockManager
 
 /**
  * Reads data from a LargeByteBuffer, and optionally cleans it up using buffer.dispose()
- * at the end of the stream (e.g. to close a memory-mapped file).
+ * when the stream is closed (e.g. to close a memory-mapped file).
  */
 private[spark]
 class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: Boolean = false)
@@ -32,14 +32,9 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
 
   override def read(): Int = {
     if (buffer == null || buffer.remaining() == 0) {
-      cleanUp()
       -1
     } else {
-      val r = buffer.get() & 0xFF
-      if (buffer.remaining() == 0) {
-        cleanUp()
-      }
-      r
+      buffer.get() & 0xFF
     }
   }
 
@@ -49,16 +44,10 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
 
   override def read(dest: Array[Byte], offset: Int, length: Int): Int = {
     if (buffer == null || buffer.remaining() == 0) {
-      cleanUp()
       -1
     } else {
       val amountToGet = math.min(buffer.remaining(), length).toInt
       buffer.get(dest, offset, amountToGet)
-      // XXX I assume its not intentional that the stream is only disposed when you try to read
-      // *past* the end in ByteBufferInputStream, so we do a check here
-      if (buffer.remaining() == 0) {
-        cleanUp()
-      }
       amountToGet
     }
   }
@@ -66,9 +55,6 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
   override def skip(bytes: Long): Long = {
     if (buffer != null) {
       val skipped = buffer.skip(bytes)
-      if (buffer.remaining() == 0) {
-        cleanUp()
-      }
       skipped
     } else {
       0L
@@ -81,7 +67,7 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
   /**
    * Clean up the buffer, and potentially dispose of it
    */
-  private def cleanUp() {
+  override def close() {
     if (buffer != null) {
       if (dispose) {
         buffer.dispose()
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
index 209fb0d343fd0..ed8a86b62967e 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
@@ -17,12 +17,13 @@
 package org.apache.spark.util
 
 import java.io.{FileInputStream, FileOutputStream, OutputStream, File}
+import java.nio.ByteBuffer
 import java.nio.channels.FileChannel.MapMode
 
 import org.junit.Assert._
 import org.scalatest.{FunSuite, Matchers}
 
-import org.apache.spark.network.buffer.{LargeByteBufferHelper, WrappedLargeByteBuffer}
+import org.apache.spark.network.buffer.{LargeByteBuffer, LargeByteBufferHelper, WrappedLargeByteBuffer}
 
 class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
 
@@ -54,10 +55,20 @@ class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
         assertEquals(buffer(arrIdx), read(arrIdx))
       }
     }
-    // XXX I assume its *not* intentional that the stream is only disposed when you try to read
-    // *past* the end in ByteBufferInputStream?
-    in.disposed should be (true)
+    in.disposed should be (false)
     in.read(read) should be (-1)
+    in.disposed should be (false)
+    in.close()
+    in.disposed should be (true)
+  }
+
+  test("dispose on close") {
+    //don't need to read to the end -- dispose anytime we close
+    val data = new Array[Byte](10)
+    val in = new LargeByteBufferInputStream(LargeByteBufferHelper.asLargeByteBuffer(data),
+      dispose = true)
+    in.disposed should be (false)
+    in.close()
     in.disposed should be (true)
   }
 

From a3dc8110a27d56bced6c0c6005679a5cfc6ce626 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 9 Apr 2015 10:12:50 -0500
Subject: [PATCH 65/97] add bounds checks to ByteArrayChunkOutputStream.slice()

---
 .../spark/util/io/ByteArrayChunkOutputStream.scala    | 11 ++++++++---
 .../util/io/ByteArrayChunkOutputStreamSuite.scala     |  6 ++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
index 34bb6ee6db906..b9b0243216fc1 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
@@ -43,7 +43,7 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
    */
   private var position = chunkSize
 
-  var size: Long = 0L
+  private[util] var size: Long = 0L
 
   override def write(b: Int): Unit = {
     allocateNewChunkIfNeeded()
@@ -97,14 +97,19 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
   }
 
   /**
-   * get a copy of the data between the two endpoints
+   * Get a copy of the data between the two endpoints, start <= idx < until.  Always returns
+   * an array of size (until - start).  Throws an IllegalArgumentException if
+   * 0 <= start <= until <= size
    */
   def slice(start: Long, until: Long): Array[Byte] = {
     require((until - start) < Integer.MAX_VALUE, "max slice length = Integer.MAX_VALUE")
+    require(start >= 0 && start <= until, s"start ($start) must be >= 0 and <= until ($until)")
+    require(until >= start && until <= size,
+      s"until ($until) must be >= start ($start) and <= size ($size)")
     var chunkStart = 0L
     var chunkIdx = 0
-    var foundStart = false
     val length = (until - start).toInt
+    var foundStart = false
     val result = new Array[Byte](length)
     while (!foundStart) {
       val nextSize = chunkStart + chunks(chunkIdx).size
diff --git a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
index cacb3fbb0762b..a177365af9689 100644
--- a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
@@ -125,5 +125,11 @@ class ByteArrayChunkOutputStreamSuite extends SparkFunSuite {
         }
       }
     }
+
+    // errors on bad bounds
+    intercept[IllegalArgumentException]{o.slice(31, 31)}
+    intercept[IllegalArgumentException]{o.slice(-1, 10)}
+    intercept[IllegalArgumentException]{o.slice(10, 5)}
+    intercept[IllegalArgumentException]{o.slice(10, 35)}
   }
 }

From da29c3debe8215a4b14c9abef4c87e071181dfa7 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 9 Apr 2015 10:32:12 -0500
Subject: [PATCH 66/97] cleanup rewind() a little

---
 .../spark/network/buffer/WrappedLargeByteBuffer.java   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index d5da70b9000de..05366060b8285 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -83,14 +83,14 @@ public void get(byte[] dest, int offset, int length) {
 
   @Override
   public LargeByteBuffer rewind() {
+    if (currentBuffer != null) {
+      currentBuffer.rewind();
+    }
     while (currentBufferIdx > 0) {
-      if (currentBuffer != null) {
-        currentBuffer.rewind();
-      }
       currentBufferIdx -= 1;
       currentBuffer = underlying[currentBufferIdx];
+      currentBuffer.rewind();
     }
-    currentBuffer.rewind();
     _pos = 0;
     return this;
   }
@@ -151,7 +151,7 @@ public long skip(long n) {
     } else if (n > 0) {
       final long moveTotal = Math.min(n, remaining());
       long toMove = moveTotal;
-      // move forwards-- set the position to the end of every buffer as we go forwards
+      // move forwards -- set the position to the end of every buffer as we go forwards
       currentBufferIdx -= 1;
       while (toMove > 0) {
         currentBufferIdx += 1;

From 366f9212b128603406060064c0242dd711bf7df4 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 13 Apr 2015 14:49:57 -0500
Subject: [PATCH 67/97] WrappedLargeByteBuffer.asByteBuffer shouldnt copy -- it
 should just enforce underlying buffer sizes

---
 .../spark/network/buffer/LargeByteBuffer.java |  2 +-
 .../network/buffer/LargeByteBufferHelper.java |  2 +-
 .../buffer/WrappedLargeByteBuffer.java        | 36 +++++++------
 .../buffer/WrappedLargeByteBufferSuite.java   | 53 +++++++++----------
 4 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index bc48088cc8610..34fe607edf2df 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -96,7 +96,7 @@ public interface LargeByteBuffer {
    * Get the entire contents of this as one ByteBuffer, if possible.  The returned ByteBuffer
    * will always have the position set 0, and the limit set to the end of the data.  Each
    * call will return a new ByteBuffer, but will not require copying the data (eg., it will
-   * use ByteBuffer#duplicate()).  The returned byte buffer and this may or may not share data.
+   * use ByteBuffer#duplicate()).  The returned byte buffer will share data with this buffer.
    *
    * @return
    * @throws BufferTooLargeException if this buffer is too large to fit in one {@link ByteBuffer}
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
index f762c686bde12..ce2ff266649e8 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -51,7 +51,7 @@ static LargeByteBuffer allocate(long size, int maxChunk) {
       chunks[i] = next;
     }
     if (remaining != 0) throw new IllegalStateException("remaining = " + remaining);
-    return new WrappedLargeByteBuffer(chunks);
+    return new WrappedLargeByteBuffer(chunks, maxChunk);
   }
 
 
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 05366060b8285..bbcd9bcfeacbc 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -33,6 +33,13 @@ public class WrappedLargeByteBuffer implements LargeByteBuffer {
   public final ByteBuffer[] underlying;
 
   private final long size;
+  /**
+   * each sub-ByteBuffer (except for the last one) must be exactly this size.  Note that this
+   * class *really* expects this to be LargeByteBufferHelper.MAX_CHUNK.  The only reason it isn't
+   * is so that we can do tests without creating ginormous buffers.  Public methods force it to
+   * be LargeByteBufferHelper.MAX_CHUNK
+   */
+  private final int subBufferSize;
   private long _pos;
   @VisibleForTesting
   int currentBufferIdx;
@@ -41,15 +48,25 @@ public class WrappedLargeByteBuffer implements LargeByteBuffer {
 
 
   public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
+    this(underlying, LargeByteBufferHelper.MAX_CHUNK);
+  }
+
+  @VisibleForTesting
+  WrappedLargeByteBuffer(ByteBuffer[] underlying, int subBufferSize) {
     if (underlying.length == 0) {
       throw new IllegalArgumentException("must wrap at least one ByteBuffer");
     }
     this.underlying = underlying;
+    this.subBufferSize = subBufferSize;
     long sum = 0L;
     boolean startFound = false;
     long initialPosition = -1;
     for (int i = 0; i < underlying.length; i++) {
       ByteBuffer b = underlying[i];
+      if (i != underlying.length -1 && b.capacity() != subBufferSize) {
+        throw new IllegalArgumentException("All buffers, except for the final one, must have " +
+          "size = " + subBufferSize);
+      }
       if (startFound) {
         if (b.position() != 0) {
           throw new IllegalArgumentException("ByteBuffers have inconsistent positions");
@@ -107,7 +124,7 @@ public WrappedLargeByteBuffer deepCopy() {
       dataCopy[i].position(0);
       b.position(originalPosition);
     }
-    return new WrappedLargeByteBuffer(dataCopy);
+    return new WrappedLargeByteBuffer(dataCopy, subBufferSize);
   }
 
   @Override
@@ -178,7 +195,7 @@ public WrappedLargeByteBuffer duplicate() {
     for (int i = 0; i < underlying.length; i++) {
       duplicates[i] = underlying[i].duplicate();
     }
-    return new WrappedLargeByteBuffer(duplicates);
+    return new WrappedLargeByteBuffer(duplicates, subBufferSize);
   }
 
   @Override
@@ -201,25 +218,12 @@ public long writeTo(WritableByteChannel channel) throws IOException {
 
   @Override
   public ByteBuffer asByteBuffer() throws BufferTooLargeException {
-    return asByteBuffer(LargeByteBufferHelper.MAX_CHUNK);
-  }
-
-  @VisibleForTesting
-  ByteBuffer asByteBuffer(int maxChunkSize) throws BufferTooLargeException {
     if (underlying.length == 1) {
       ByteBuffer b = underlying[0].duplicate();
       b.rewind();
       return b;
-    } else if (size() > maxChunkSize) {
-      throw new BufferTooLargeException(size(), maxChunkSize);
     } else {
-      byte[] merged = new byte[(int) size()];
-      long initialPosition = position();
-      rewind();
-      get(merged, 0, merged.length);
-      rewind();
-      skip(initialPosition);
-      return ByteBuffer.wrap(merged);
+      throw new BufferTooLargeException(size(), LargeByteBufferHelper.MAX_CHUNK);
     }
   }
 
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 62266afb500c9..4e3db7f1cd75c 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -41,7 +41,7 @@ private WrappedLargeByteBuffer testDataBuf() {
       System.arraycopy(data, i * 50, b, 0, 50);
       bufs[i] = ByteBuffer.wrap(b);
     }
-    return new WrappedLargeByteBuffer(bufs);
+    return new WrappedLargeByteBuffer(bufs, 50);
   }
 
   @Test
@@ -67,38 +67,35 @@ public void asByteBuffer() throws BufferTooLargeException {
     assertEquals(20, nioBuf2.position());
     assertEquals(80, nioBuf2.remaining());
 
-    // merges the data from multiple buffers
-    ByteBuffer[] bufs = new ByteBuffer[2];
-    for (int i = 0; i < 2; i++) {
-      bufs[i] = ByteBuffer.allocate(250);
-      bufs[i].get(data, i * 250, 250);
-      bufs[i].rewind();
-    }
-    WrappedLargeByteBuffer wrappedBB = new WrappedLargeByteBuffer(bufs);
-    ByteBuffer mergedBuffer = wrappedBB.asByteBuffer(500);
-    assertConsistent(wrappedBB);
-    assertEquals(0, mergedBuffer.position());
-    byte[] copyData = new byte[500];
-    mergedBuffer.get(copyData);
-    mergedBuffer.rewind();
-    assertArrayEquals(data, copyData);
-    wrappedBB.skip(20);
-    assertConsistent(wrappedBB);
-    ByteBuffer mergedBuffer2 = wrappedBB.asByteBuffer(500);
-    assertEquals(0, mergedBuffer2.position());
-    mergedBuffer2.get(copyData);
-    assertArrayEquals(data, copyData);
-    assertEquals(0, mergedBuffer.position());
-    assertEquals(20, wrappedBB.position());
-
     // the right error when the buffer is too big
     try {
-      wrappedBB.asByteBuffer(499);
+      WrappedLargeByteBuffer buf2 = new WrappedLargeByteBuffer(
+        new ByteBuffer[]{ByteBuffer.allocate(10), ByteBuffer.allocate(10)}, 10);
+      buf2.asByteBuffer();
       fail("expected an exception");
     } catch (BufferTooLargeException btl) {
     }
   }
 
+  @Test
+  public void checkSizesOfInternalBuffers() {
+    errorOnBuffersSized(10, new int[]{9,10});
+    errorOnBuffersSized(10, new int[]{10,10,0,10});
+    errorOnBuffersSized(20, new int[]{10,10,10,10});
+  }
+
+  private void errorOnBuffersSized(int chunkSize, int[] sizes) {
+    ByteBuffer[] bufs = new ByteBuffer[sizes.length];
+    for (int i = 0; i < sizes.length; i++) {
+      bufs[i] = ByteBuffer.allocate(sizes[i]);
+    }
+    try {
+      new WrappedLargeByteBuffer(bufs, chunkSize);
+      fail("expected exception");
+    } catch (IllegalArgumentException iae) {
+    }
+  }
+
   @Test
   public void deepCopy() {
     WrappedLargeByteBuffer b = testDataBuf();
@@ -215,14 +212,14 @@ public void constructWithBuffersWithNonZeroPosition() {
     bufs[0].position(50);
     bufs[1].position(5);
 
-    WrappedLargeByteBuffer b1 = new WrappedLargeByteBuffer(bufs);
+    WrappedLargeByteBuffer b1 = new WrappedLargeByteBuffer(bufs, 50);
     assertEquals(55, b1.position());
 
 
     bufs[1].position(50);
     bufs[2].position(50);
     bufs[3].position(35);
-    WrappedLargeByteBuffer b2 = new WrappedLargeByteBuffer(bufs);
+    WrappedLargeByteBuffer b2 = new WrappedLargeByteBuffer(bufs, 50);
     assertEquals(185, b2.position());
 
 

From 34c2131c7a08095f98a64dab63870643ce6740dd Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 14 Apr 2015 10:56:49 -0500
Subject: [PATCH 68/97] sigh, need to make the WrappedLargeBB constructor w/
 chunkSize public for use in IOStream tests :/

---
 .../spark/util/LargeByteBufferOutputStream.scala |  9 ++++++---
 .../util/LargeByteBufferInputStreamSuite.scala   |  2 +-
 .../network/buffer/WrappedLargeByteBuffer.java   | 16 ++++++++++++++--
 .../buffer/WrappedLargeByteBufferSuite.java      |  2 ++
 4 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
index 176825b7ec241..63bb6eaedd3ea 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
@@ -41,8 +41,11 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
     largeBuffer(LargeByteBufferHelper.MAX_CHUNK)
   }
 
-  // exposed for testing
-  private[util] def largeBuffer(maxChunk: Int): LargeByteBuffer = {
+  /**
+   * exposed for testing.  You don't really ever want to call this method -- the returned
+   * buffer will not implement {{asByteBuffer}} correctly.
+   */
+  private[util] def largeBuffer(maxChunk: Int): WrappedLargeByteBuffer = {
     // LargeByteBuffer is supposed to make a "best effort" to get all the data
     // in one nio.ByteBuffer, so we want to try to merge the smaller chunks together
     // as much as possible.  This is necessary b/c there are a number of parts of spark that
@@ -58,7 +61,7 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
       pos += nextSize
       remaining -= nextSize
     }
-    new WrappedLargeByteBuffer(chunks.map(ByteBuffer.wrap))
+    new WrappedLargeByteBuffer(chunks.map(ByteBuffer.wrap), maxChunk)
   }
 
   override def close(): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
index ed8a86b62967e..4802069a7f67e 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
@@ -63,7 +63,7 @@ class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
   }
 
   test("dispose on close") {
-    //don't need to read to the end -- dispose anytime we close
+    // don't need to read to the end -- dispose anytime we close
     val data = new Array[Byte](10)
     val in = new LargeByteBufferInputStream(LargeByteBufferHelper.asLargeByteBuffer(data),
       dispose = true)
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index bbcd9bcfeacbc..a8bc7afc6bdc1 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -51,8 +51,16 @@ public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
     this(underlying, LargeByteBufferHelper.MAX_CHUNK);
   }
 
+  /**
+   * you do **not** want to call this version.  It leads to a buffer which doesn't properly
+   * support {@link #asByteBuffer}.  The only reason it exists is to we can have tests which
+   * don't require 2GB of memory
+   *
+   * @param underlying
+   * @param subBufferSize
+   */
   @VisibleForTesting
-  WrappedLargeByteBuffer(ByteBuffer[] underlying, int subBufferSize) {
+  public WrappedLargeByteBuffer(ByteBuffer[] underlying, int subBufferSize) {
     if (underlying.length == 0) {
       throw new IllegalArgumentException("must wrap at least one ByteBuffer");
     }
@@ -223,7 +231,11 @@ public ByteBuffer asByteBuffer() throws BufferTooLargeException {
       b.rewind();
       return b;
     } else {
-      throw new BufferTooLargeException(size(), LargeByteBufferHelper.MAX_CHUNK);
+      // NOTE: if subBufferSize != LargeByteBufferHelper.MAX_CAPACITY, in theory
+      // we could copy the data into a new buffer.  But we don't want to do any copying.
+      // The only reason we allow smaller subBufferSize is so that we can have tests which
+      // don't require 2GB of memory
+      throw new BufferTooLargeException(size(), underlying[0].capacity());
     }
   }
 
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 4e3db7f1cd75c..14ee9f19cbac4 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -71,6 +71,8 @@ public void asByteBuffer() throws BufferTooLargeException {
     try {
       WrappedLargeByteBuffer buf2 = new WrappedLargeByteBuffer(
         new ByteBuffer[]{ByteBuffer.allocate(10), ByteBuffer.allocate(10)}, 10);
+      // you really shouldn't ever construct a WrappedLargeByteBuffer with
+      // multiple small chunks, so this is somewhat contrived
       buf2.asByteBuffer();
       fail("expected an exception");
     } catch (BufferTooLargeException btl) {

From bf4ec0aee447cc14269098b94870ec3992c10b0a Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 14 Apr 2015 11:24:28 -0500
Subject: [PATCH 69/97] style

---
 .../org/apache/spark/util/LargeByteBufferInputStreamSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
index 4802069a7f67e..67712cd70c856 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
@@ -79,7 +79,7 @@ class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
     out.close()
 
     val lb = out.largeBuffer(128)
-    //just make sure that we test reading from multiple chunks
+    // just make sure that we test reading from multiple chunks
     lb.asInstanceOf[WrappedLargeByteBuffer].underlying.size should be > 1
 
     val rawIn = new LargeByteBufferInputStream(lb)

From 9d232d1b748f36bc3626ed2e6172cc7b610bdf78 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 14 Apr 2015 14:02:18 -0500
Subject: [PATCH 70/97] move package of LargeByteBufferIOStreams so that I can
 keep unsafe methods package-private at least

---
 .../buffer}/LargeByteBufferInputStream.scala           |  7 ++-----
 .../buffer}/LargeByteBufferOutputStream.scala          | 10 ++++------
 .../spark/util/io/ByteArrayChunkOutputStream.scala     |  2 +-
 .../buffer}/LargeByteBufferInputStreamSuite.scala      |  2 +-
 .../buffer}/LargeByteBufferOutputStreamSuite.scala     |  4 +---
 .../spark/network/buffer/WrappedLargeByteBuffer.java   |  2 +-
 6 files changed, 10 insertions(+), 17 deletions(-)
 rename core/src/main/scala/org/apache/spark/{util => network/buffer}/LargeByteBufferInputStream.scala (92%)
 rename core/src/main/scala/org/apache/spark/{util => network/buffer}/LargeByteBufferOutputStream.scala (86%)
 rename core/src/test/scala/org/apache/spark/{util => network/buffer}/LargeByteBufferInputStreamSuite.scala (98%)
 rename core/src/test/scala/org/apache/spark/{util => network/buffer}/LargeByteBufferOutputStreamSuite.scala (95%)

diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferInputStream.scala
similarity index 92%
rename from core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
rename to core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferInputStream.scala
index 1f27f895ada59..e4c4a83b2742f 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferInputStream.scala
@@ -15,13 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.util
+package org.apache.spark.network.buffer
 
 import java.io.InputStream
 
-import org.apache.spark.network.buffer.LargeByteBuffer
-import org.apache.spark.storage.BlockManager
-
 /**
  * Reads data from a LargeByteBuffer, and optionally cleans it up using buffer.dispose()
  * when the stream is closed (e.g. to close a memory-mapped file).
@@ -62,7 +59,7 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
   }
 
   // only for testing
-  private[util] var disposed = false
+  private[buffer] var disposed = false
 
   /**
    * Clean up the buffer, and potentially dispose of it
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStream.scala
similarity index 86%
rename from core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
rename to core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStream.scala
index 63bb6eaedd3ea..5c228fb1c3aef 100644
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStream.scala
@@ -15,19 +15,17 @@
  * limitations under the License.
  */
 
-package org.apache.spark.util
+package org.apache.spark.network.buffer
 
 import java.io.OutputStream
 import java.nio.ByteBuffer
 
-import org.apache.spark.network.buffer.{LargeByteBufferHelper, WrappedLargeByteBuffer, LargeByteBuffer}
 import org.apache.spark.util.io.ByteArrayChunkOutputStream
 
-private[spark]
-class LargeByteBufferOutputStream(chunkSize: Int = 65536)
+private[spark] class LargeByteBufferOutputStream(chunkSize: Int = 65536)
   extends OutputStream {
 
-  private[util] val output = new ByteArrayChunkOutputStream(chunkSize)
+  private[buffer] val output = new ByteArrayChunkOutputStream(chunkSize)
 
   override def write(b: Int): Unit = {
     output.write(b)
@@ -45,7 +43,7 @@ class LargeByteBufferOutputStream(chunkSize: Int = 65536)
    * exposed for testing.  You don't really ever want to call this method -- the returned
    * buffer will not implement {{asByteBuffer}} correctly.
    */
-  private[util] def largeBuffer(maxChunk: Int): WrappedLargeByteBuffer = {
+  private[buffer] def largeBuffer(maxChunk: Int): WrappedLargeByteBuffer = {
     // LargeByteBuffer is supposed to make a "best effort" to get all the data
     // in one nio.ByteBuffer, so we want to try to merge the smaller chunks together
     // as much as possible.  This is necessary b/c there are a number of parts of spark that
diff --git a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
index b9b0243216fc1..907d2cbb7537e 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
@@ -43,7 +43,7 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
    */
   private var position = chunkSize
 
-  private[util] var size: Long = 0L
+  private[spark] var size: Long = 0L
 
   override def write(b: Int): Unit = {
     allocateNewChunkIfNeeded()
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
similarity index 98%
rename from core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
rename to core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
index 67712cd70c856..be4cfd829f8b6 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.util
+package org.apache.spark.network.buffer
 
 import java.io.{FileInputStream, FileOutputStream, OutputStream, File}
 import java.nio.ByteBuffer
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStreamSuite.scala
similarity index 95%
rename from core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
rename to core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStreamSuite.scala
index 4b28d383df908..a55ef03480436 100644
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStreamSuite.scala
@@ -14,14 +14,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.util
+package org.apache.spark.network.buffer
 
 import scala.util.Random
 
 import org.scalatest.{FunSuite, Matchers}
 
-import org.apache.spark.network.buffer.WrappedLargeByteBuffer
-
 class LargeByteBufferOutputStreamSuite extends FunSuite with Matchers {
 
   test("merged buffers for < 2GB") {
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index a8bc7afc6bdc1..56fba1f179031 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -60,7 +60,7 @@ public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
    * @param subBufferSize
    */
   @VisibleForTesting
-  public WrappedLargeByteBuffer(ByteBuffer[] underlying, int subBufferSize) {
+  WrappedLargeByteBuffer(ByteBuffer[] underlying, int subBufferSize) {
     if (underlying.length == 0) {
       throw new IllegalArgumentException("must wrap at least one ByteBuffer");
     }

From 6b2f751a978c40b4f51431a6c8b5c6316cf14521 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 14 Apr 2015 17:02:27 -0500
Subject: [PATCH 71/97] fix typo

---
 .../java/org/apache/spark/network/buffer/LargeByteBuffer.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index 34fe607edf2df..3abda80723cc1 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -94,7 +94,7 @@ public interface LargeByteBuffer {
 
   /**
    * Get the entire contents of this as one ByteBuffer, if possible.  The returned ByteBuffer
-   * will always have the position set 0, and the limit set to the end of the data.  Each
+   * will always have the position set to 0, and the limit set to the end of the data.  Each
    * call will return a new ByteBuffer, but will not require copying the data (eg., it will
    * use ByteBuffer#duplicate()).  The returned byte buffer will share data with this buffer.
    *

From 4da46263355910aaddc5b14e083adfaf0def2fe7 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 1 Jun 2015 11:12:48 -0500
Subject: [PATCH 72/97] error handling for get(); comments, style

---
 .../buffer/LargeByteBufferInputStream.scala   |  3 +-
 .../buffer/LargeByteBufferOutputStream.scala  |  6 +-
 .../LargeByteBufferInputStreamSuite.scala     | 59 ++++++++-------
 .../spark/network/buffer/LargeByteBuffer.java | 33 ++++++--
 .../network/buffer/LargeByteBufferHelper.java | 48 ++++++------
 .../buffer/WrappedLargeByteBuffer.java        | 51 +++++++++----
 .../buffer/LargeByteBufferHelperSuite.java    | 75 ++++++++++---------
 .../buffer/WrappedLargeByteBufferSuite.java   | 19 ++++-
 8 files changed, 179 insertions(+), 115 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferInputStream.scala
index e4c4a83b2742f..f00899ead1ab0 100644
--- a/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferInputStream.scala
@@ -51,8 +51,7 @@ class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: B
 
   override def skip(bytes: Long): Long = {
     if (buffer != null) {
-      val skipped = buffer.skip(bytes)
-      skipped
+      buffer.skip(bytes)
     } else {
       0L
     }
diff --git a/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStream.scala
index 5c228fb1c3aef..e9f13a6b27166 100644
--- a/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStream.scala
@@ -36,7 +36,7 @@ private[spark] class LargeByteBufferOutputStream(chunkSize: Int = 65536)
   }
 
   def largeBuffer: LargeByteBuffer = {
-    largeBuffer(LargeByteBufferHelper.MAX_CHUNK)
+    largeBuffer(LargeByteBufferHelper.MAX_CHUNK_SIZE)
   }
 
   /**
@@ -44,10 +44,6 @@ private[spark] class LargeByteBufferOutputStream(chunkSize: Int = 65536)
    * buffer will not implement {{asByteBuffer}} correctly.
    */
   private[buffer] def largeBuffer(maxChunk: Int): WrappedLargeByteBuffer = {
-    // LargeByteBuffer is supposed to make a "best effort" to get all the data
-    // in one nio.ByteBuffer, so we want to try to merge the smaller chunks together
-    // as much as possible.  This is necessary b/c there are a number of parts of spark that
-    // can only deal w/ one nio.ByteBuffer, and can't use a LargeByteBuffer yet.
     val totalSize = output.size
     val chunksNeeded = ((totalSize + maxChunk - 1) / maxChunk).toInt
     val chunks = new Array[Array[Byte]](chunksNeeded)
diff --git a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
index be4cfd829f8b6..50051e803eb2f 100644
--- a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
@@ -29,37 +29,39 @@ class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
 
   test("read from large mapped file") {
     val testFile = File.createTempFile("large-buffer-input-stream-test",".bin")
-    testFile.deleteOnExit()
 
-    val out: OutputStream = new FileOutputStream(testFile)
-    val buffer: Array[Byte] = new Array[Byte](1 << 16)
-    val len: Long = 3L << 30
-    assertTrue(len > Integer.MAX_VALUE)
-    (0 until buffer.length).foreach { idx =>
-      buffer(idx) = idx.toByte
-    }
-    (0 until (len / buffer.length).toInt).foreach { idx =>
-      out.write(buffer)
-    }
-    out.close
-
-    val channel = new FileInputStream(testFile).getChannel
-    val buf = LargeByteBufferHelper.mapFile(channel, MapMode.READ_ONLY, 0, len)
-    val in = new LargeByteBufferInputStream(buf, dispose = true)
-
-    val read = new Array[Byte](buffer.length)
-    (0 until (len / buffer.length).toInt).foreach { idx =>
-      in.disposed should be (false)
-      in.read(read) should be (read.length)
-      (0 until buffer.length).foreach { arrIdx =>
-        assertEquals(buffer(arrIdx), read(arrIdx))
+    try {
+      val out: OutputStream = new FileOutputStream(testFile)
+      val buffer: Array[Byte] = new Array[Byte](1 << 16)
+      val len: Long = buffer.length.toLong + Integer.MAX_VALUE + 1
+      (0 until buffer.length).foreach { idx =>
+        buffer(idx) = idx.toByte
+      }
+      (0 until (len / buffer.length).toInt).foreach { idx =>
+        out.write(buffer)
+      }
+      out.close
+
+      val channel = new FileInputStream(testFile).getChannel
+      val buf = LargeByteBufferHelper.mapFile(channel, MapMode.READ_ONLY, 0, len)
+      val in = new LargeByteBufferInputStream(buf, dispose = true)
+
+      val read = new Array[Byte](buffer.length)
+      (0 until (len / buffer.length).toInt).foreach { idx =>
+        in.disposed should be(false)
+        in.read(read) should be(read.length)
+        (0 until buffer.length).foreach { arrIdx =>
+          assertEquals(buffer(arrIdx), read(arrIdx))
+        }
       }
+      in.disposed should be(false)
+      in.read(read) should be(-1)
+      in.disposed should be(false)
+      in.close()
+      in.disposed should be(true)
+    } finally {
+      testFile.delete()
     }
-    in.disposed should be (false)
-    in.read(read) should be (-1)
-    in.disposed should be (false)
-    in.close()
-    in.disposed should be (true)
   }
 
   test("dispose on close") {
@@ -73,7 +75,6 @@ class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
   }
 
   test("io stream roundtrip") {
-
     val out = new LargeByteBufferOutputStream(128)
     (0 until 200).foreach{idx => out.write(idx)}
     out.close()
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index 3abda80723cc1..87f695638ede6 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -20,6 +20,30 @@
 import java.nio.ByteBuffer;
 import java.nio.channels.WritableByteChannel;
 
+/**
+ * A byte buffer which can hold over 2GB.
+ * <p/>
+ * This is roughly similar {@link java.nio.ByteBuffer}, with a limited set of operations relevant
+ * to use in Spark, and without the capacity restrictions of a ByteBuffer.
+ * <p/>
+ * Unlike ByteBuffers, this is read-only, and only supports reading bytes (with both single and bulk
+ * <code>get</code> methods).  It supports random access via <code>skip</code> to move around the
+ * buffer.
+ * <p/>
+ * In general, implementations are expected to support O(1) random access.  Furthermore,
+ * neighboring locations in the buffer are likely to be neighboring in memory, so sequential access
+ * will avoid cache-misses.  However, these are only rough guidelines which may differ in
+ * implementations.
+ * <p/>
+ * Any code which expects a ByteBuffer can obtain one via {@link #asByteBuffer} when possible -- see
+ * that method for a full description of its limitations.
+ * <p/>
+ * Instances of this class can be created with
+ * {@link org.apache.spark.network.buffer.LargeByteBufferHelper},
+ * with a LargeByteBufferOutputStream,
+ * or directly from the implementation
+ * {@link org.apache.spark.network.buffer.WrappedLargeByteBuffer}.
+ */
 public interface LargeByteBuffer {
   public byte get();
 
@@ -59,11 +83,11 @@ public interface LargeByteBuffer {
   /**
    * Creates a new byte buffer that shares this buffer's content.
    * <p/>
-   * <p> The content of the new buffer will be that of this buffer.  Changes
+   * The content of the new buffer will be that of this buffer.  Changes
    * to this buffer's content will be visible in the new buffer, and vice
    * versa; the two buffers' positions will be independent.
    * <p/>
-   * <p> The new buffer's position will be identical to those of this buffer
+   * The new buffer's position will be identical to those of this buffer
    */
   public LargeByteBuffer duplicate();
 
@@ -72,8 +96,6 @@ public interface LargeByteBuffer {
 
   /**
    * Total number of bytes in this buffer
-   *
-   * @return
    */
   public long size();
 
@@ -98,13 +120,12 @@ public interface LargeByteBuffer {
    * call will return a new ByteBuffer, but will not require copying the data (eg., it will
    * use ByteBuffer#duplicate()).  The returned byte buffer will share data with this buffer.
    *
-   * @return
    * @throws BufferTooLargeException if this buffer is too large to fit in one {@link ByteBuffer}
    */
   public ByteBuffer asByteBuffer() throws BufferTooLargeException;
 
   /**
-   * Attempt to clean up if it is memory-mapped. This uses an *unsafe* Sun API that
+   * Attempt to clean this up if it is memory-mapped. This uses an *unsafe* Sun API that
    * might cause errors if one attempts to read from the unmapped buffer, but it's better than
    * waiting for the GC to find it because that could lead to huge numbers of open files. There's
    * unfortunately no standard API to do this.
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
index ce2ff266649e8..221dd75c36681 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -16,27 +16,32 @@
  */
 package org.apache.spark.network.buffer;
 
-import com.google.common.annotations.VisibleForTesting;
-
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
-import java.util.ArrayList;
 
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Utils for creating {@link org.apache.spark.network.buffer.LargeByteBuffer}s, either from
+ * pre-allocated byte arrays, ByteBuffers, or by memory mapping a file.
+ */
 public class LargeByteBufferHelper {
 
-  public static final int MAX_CHUNK = Integer.MAX_VALUE - 1000000;
+  // netty can't quite send msgs that are a full 2GB -- they need to be slightly smaller
+  // not sure what the exact limit is, but 200 seems OK.
+  public static final int MAX_CHUNK_SIZE = Integer.MAX_VALUE - 200;
 
   public static LargeByteBuffer asLargeByteBuffer(ByteBuffer buffer) {
     return new WrappedLargeByteBuffer(new ByteBuffer[] { buffer } );
   }
 
   public static LargeByteBuffer asLargeByteBuffer(byte[] bytes) {
-    return new WrappedLargeByteBuffer(new ByteBuffer[] { ByteBuffer.wrap(bytes) } );
+    return asLargeByteBuffer(ByteBuffer.wrap(bytes));
   }
 
   public static LargeByteBuffer allocate(long size) {
-    return allocate(size, MAX_CHUNK);
+    return allocate(size, MAX_CHUNK_SIZE);
   }
 
   @VisibleForTesting
@@ -50,30 +55,27 @@ static LargeByteBuffer allocate(long size, int maxChunk) {
       remaining -= nextSize;
       chunks[i] = next;
     }
-    if (remaining != 0) throw new IllegalStateException("remaining = " + remaining);
+    if (remaining != 0) {
+      throw new IllegalStateException("remaining = " + remaining);
+    }
     return new WrappedLargeByteBuffer(chunks, maxChunk);
   }
 
 
   public static LargeByteBuffer mapFile(
-    FileChannel channel,
-    FileChannel.MapMode mode,
-    long offset,
-    long length
+      FileChannel channel,
+      FileChannel.MapMode mode,
+      long offset,
+      long length
   ) throws IOException {
-    int maxChunk = MAX_CHUNK;
-    ArrayList<Long> offsets = new ArrayList<Long>();
-    long curOffset = offset;
+    int chunksNeeded = (int) ((length  - 1) / MAX_CHUNK_SIZE) + 1;
+    ByteBuffer[] chunks = new ByteBuffer[chunksNeeded];
+    long curPos = offset;
     long end = offset + length;
-    while (curOffset < end) {
-      offsets.add(curOffset);
-      int chunkLength = (int) Math.min((end - curOffset), maxChunk);
-      curOffset += chunkLength;
-    }
-    offsets.add(end);
-    ByteBuffer[] chunks = new ByteBuffer[offsets.size() - 1];
-    for (int i = 0; i < offsets.size() - 1; i++) {
-      chunks[i] = channel.map(mode, offsets.get(i), offsets.get(i + 1) - offsets.get(i));
+    for (int i = 0; i < chunksNeeded; i++) {
+      long nextPos = Math.min(curPos + MAX_CHUNK_SIZE, end);
+      chunks[i] = channel.map(mode, curPos, nextPos - curPos);
+      curPos = nextPos;
     }
     return new WrappedLargeByteBuffer(chunks);
   }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 56fba1f179031..9038d914c0294 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -16,28 +16,34 @@
 */
 package org.apache.spark.network.buffer;
 
-import com.google.common.annotations.VisibleForTesting;
-import sun.nio.ch.DirectBuffer;
-
 import java.io.IOException;
 import java.nio.BufferUnderflowException;
 import java.nio.ByteBuffer;
-import java.nio.MappedByteBuffer;
 import java.nio.channels.WritableByteChannel;
 import java.util.Arrays;
 import java.util.List;
 
+import com.google.common.annotations.VisibleForTesting;
+import sun.nio.ch.DirectBuffer;
+
+/**
+ * A {@link org.apache.spark.network.buffer.LargeByteBuffer} which may contain multiple
+ * {@link java.nio.ByteBuffer}s.  In order to support <code>asByteBuffer</code>, all
+ * of the underlying ByteBuffers must have size equal to
+ * {@link org.apache.spark.network.buffer.LargeByteBufferHelper#MAX_CHUNK_SIZE} (except that last
+ * one).  The underlying ByteBuffers may be on-heap, direct, or memory-mapped.
+ */
 public class WrappedLargeByteBuffer implements LargeByteBuffer {
 
   @VisibleForTesting
-  public final ByteBuffer[] underlying;
+  final ByteBuffer[] underlying;
 
   private final long size;
   /**
    * each sub-ByteBuffer (except for the last one) must be exactly this size.  Note that this
-   * class *really* expects this to be LargeByteBufferHelper.MAX_CHUNK.  The only reason it isn't
+   * class *really* expects this to be LargeByteBufferHelper.MAX_CHUNK_SIZE.  The only reason it isn't
    * is so that we can do tests without creating ginormous buffers.  Public methods force it to
-   * be LargeByteBufferHelper.MAX_CHUNK
+   * be LargeByteBufferHelper.MAX_CHUNK_SIZE
    */
   private final int subBufferSize;
   private long _pos;
@@ -46,9 +52,18 @@ public class WrappedLargeByteBuffer implements LargeByteBuffer {
   @VisibleForTesting
   ByteBuffer currentBuffer;
 
-
+  /**
+   * Construct a WrappedLargeByteBuffer from the given ByteBuffers.  Each of the ByteBuffers must
+   * have size equal to {@link org.apache.spark.network.buffer.LargeByteBufferHelper#MAX_CHUNK_SIZE}
+   * except for the final one.  The buffers are <code>duplicate</code>d, so the position of the
+   * given buffers and the returned buffer will be independent, though the underlying data will be
+   * shared.
+   * <p/>
+   * The <code>position</code> of the returned buffer is determined by the position of the given
+   * buffers. TODO
+   */
   public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
-    this(underlying, LargeByteBufferHelper.MAX_CHUNK);
+    this(underlying, LargeByteBufferHelper.MAX_CHUNK_SIZE);
   }
 
   /**
@@ -69,6 +84,13 @@ public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
     long sum = 0L;
     boolean startFound = false;
     long initialPosition = -1;
+
+    // figure out the position in this LargeByteBuffer, by looking at the positions of the each
+    // of the given ByteBuffers.  The ByteBuffers need to have positions that are consistent
+    // with each other.  Eg., say we have 5 ByteBuffers, and the position is somewhere in the
+    // middle of ByteBuffer 2.  Then ByteBuffers 0 & 1 must have position == capacity,
+    // and ByteBuffers 3 & 4 must have position == 0
+
     for (int i = 0; i < underlying.length; i++) {
       ByteBuffer b = underlying[i];
       if (i != underlying.length -1 && b.capacity() != subBufferSize) {
@@ -137,6 +159,9 @@ public WrappedLargeByteBuffer deepCopy() {
 
   @Override
   public byte get() {
+    if (remaining() < 1L) {
+      throw new BufferUnderflowException();
+    }
     byte r = currentBuffer.get();
     _pos += 1;
     updateCurrentBuffer();
@@ -160,7 +185,7 @@ public long skip(long n) {
     if (n < 0) {
       final long moveTotal = Math.min(-n, _pos);
       long toMove = moveTotal;
-      // move backwards -- set the position to 0 of every buffer's we go back
+      // move backwards and update the position of every buffer as we go
       if (currentBuffer != null) {
         currentBufferIdx += 1;
       }
@@ -176,7 +201,7 @@ public long skip(long n) {
     } else if (n > 0) {
       final long moveTotal = Math.min(n, remaining());
       long toMove = moveTotal;
-      // move forwards -- set the position to the end of every buffer as we go forwards
+      // move forwards and update the position of every buffer as we go
       currentBufferIdx -= 1;
       while (toMove > 0) {
         currentBufferIdx += 1;
@@ -213,7 +238,7 @@ public long size() {
 
   @Override
   public long writeTo(WritableByteChannel channel) throws IOException {
-    long written = 0l;
+    long written = 0L;
     for (; currentBufferIdx < underlying.length; currentBufferIdx++) {
       currentBuffer = underlying[currentBufferIdx];
       written += currentBuffer.remaining();
@@ -240,7 +265,7 @@ public ByteBuffer asByteBuffer() throws BufferTooLargeException {
   }
 
   @VisibleForTesting
-  public List<ByteBuffer> nioBuffers() {
+  List<ByteBuffer> nioBuffers() {
     return Arrays.asList(underlying);
   }
 
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
index 4b15c42595e02..d46acf342a79b 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
@@ -16,11 +16,11 @@
  */
 package org.apache.spark.network.buffer;
 
-import org.junit.Test;
-
 import java.io.*;
 import java.nio.channels.FileChannel;
 
+import org.junit.Test;
+
 import static org.junit.Assert.*;
 
 public class LargeByteBufferHelperSuite {
@@ -28,44 +28,47 @@ public class LargeByteBufferHelperSuite {
   @Test
   public void testMapFile() throws IOException {
     File testFile = File.createTempFile("large-byte-buffer-test", ".bin");
-    testFile.deleteOnExit();
-    OutputStream out = new FileOutputStream(testFile);
-    byte[] buffer = new byte[1 << 16];
-    long len = 3L << 30;
-    assertTrue(len > Integer.MAX_VALUE);  // its 1.5x Integer.MAX_VALUE, just a sanity check
-    for (int i = 0; i < buffer.length; i++) {
-      buffer[i] = (byte) i;
-    }
-    for (int i = 0; i < len / buffer.length; i++) {
-      out.write(buffer);
-    }
-    out.close();
+    try {
+      testFile.deleteOnExit();
+      OutputStream out = new FileOutputStream(testFile);
+      byte[] buffer = new byte[1 << 16];
+      long len = ((long)buffer.length) + Integer.MAX_VALUE + 1;
+      for (int i = 0; i < buffer.length; i++) {
+        buffer[i] = (byte) i;
+      }
+      for (int i = 0; i < len / buffer.length; i++) {
+        out.write(buffer);
+      }
+      out.close();
 
-    FileChannel in = new FileInputStream(testFile).getChannel();
+      FileChannel in = new FileInputStream(testFile).getChannel();
 
-    //fail quickly on bad bounds
-    try {
-      LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, 0, len + 1);
-      fail("expected exception");
-    } catch (IOException ioe) {
-    }
-    try {
-      LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, -1, 10);
-      fail("expected exception");
-    } catch (IllegalArgumentException iae) {
-    }
+      //fail quickly on bad bounds
+      try {
+        LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, 0, len + 1);
+        fail("expected exception");
+      } catch (IOException ioe) {
+      }
+      try {
+        LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, -1, 10);
+        fail("expected exception");
+      } catch (IllegalArgumentException iae) {
+      }
 
-    //now try to read from the buffer
-    LargeByteBuffer buf = LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, 0, len);
-    assertEquals(len, buf.size());
-    byte[] read = new byte[buffer.length];
-    for (int i = 0; i < len / buffer.length; i++) {
-      buf.get(read, 0, buffer.length);
-      // assertArrayEquals() is really slow
-      for (int j = 0; j < buffer.length; j++) {
-        if (read[j] != (byte)(j))
-          fail("bad byte at (i,j) = (" + i + "," + j + ")");
+      //now try to read from the buffer
+      LargeByteBuffer buf = LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, 0, len);
+      assertEquals(len, buf.size());
+      byte[] read = new byte[buffer.length];
+      for (int i = 0; i < len / buffer.length; i++) {
+        buf.get(read, 0, buffer.length);
+        // assertArrayEquals() is really slow
+        for (int j = 0; j < buffer.length; j++) {
+          if (read[j] != (byte) (j))
+            fail("bad byte at (i,j) = (" + i + "," + j + ")");
+        }
       }
+    } finally {
+      testFile.delete();
     }
   }
 
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 14ee9f19cbac4..5aca657b59ac2 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -29,7 +29,7 @@
 
 public class WrappedLargeByteBufferSuite {
 
-  byte[] data = new byte[500];
+  private byte[] data = new byte[500];
   {
     new Random(1234).nextBytes(data);
   }
@@ -159,6 +159,18 @@ public void get() {
       fail("expected exception");
     } catch (BufferUnderflowException bue) {
     }
+    b.rewind();
+    b.skip(495);
+    assertEquals(data[495], b.get());
+    assertEquals(data[496], b.get());
+    assertEquals(data[497], b.get());
+    assertEquals(data[498], b.get());
+    assertEquals(data[499], b.get());
+    try {
+      b.get();
+      fail("expected exception");
+    } catch (BufferUnderflowException bue) {
+    }
   }
 
   @Test
@@ -241,6 +253,11 @@ public void constructWithBuffersWithNonZeroPosition() {
     }
   }
 
+  @Test
+  public void positionsIndependentAfterConstructor() {
+    fail("TODO");
+  }
+
   @Test(expected=IllegalArgumentException.class)
   public void testRequireAtLeastOneBuffer() {
     new WrappedLargeByteBuffer( new ByteBuffer[0]);

From 2afb351a1e4615230863513e52b7a8a96f6b0a8b Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 1 Jun 2015 11:20:11 -0500
Subject: [PATCH 73/97] constructed WrappedLargeByteBuffer always has
 position==0, simplifies things a lot

---
 .../buffer/WrappedLargeByteBuffer.java        | 35 +++++------------
 .../buffer/WrappedLargeByteBufferSuite.java   | 39 -------------------
 2 files changed, 10 insertions(+), 64 deletions(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 9038d914c0294..7c2af1d0be452 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -57,10 +57,7 @@ public class WrappedLargeByteBuffer implements LargeByteBuffer {
    * have size equal to {@link org.apache.spark.network.buffer.LargeByteBufferHelper#MAX_CHUNK_SIZE}
    * except for the final one.  The buffers are <code>duplicate</code>d, so the position of the
    * given buffers and the returned buffer will be independent, though the underlying data will be
-   * shared.
-   * <p/>
-   * The <code>position</code> of the returned buffer is determined by the position of the given
-   * buffers. TODO
+   * shared.  The constructed buffer will always have position == 0.
    */
   public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
     this(underlying, LargeByteBufferHelper.MAX_CHUNK_SIZE);
@@ -79,37 +76,23 @@ public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
     if (underlying.length == 0) {
       throw new IllegalArgumentException("must wrap at least one ByteBuffer");
     }
-    this.underlying = underlying;
+    this.underlying = new ByteBuffer[underlying.length];
     this.subBufferSize = subBufferSize;
     long sum = 0L;
-    boolean startFound = false;
-    long initialPosition = -1;
-
-    // figure out the position in this LargeByteBuffer, by looking at the positions of the each
-    // of the given ByteBuffers.  The ByteBuffers need to have positions that are consistent
-    // with each other.  Eg., say we have 5 ByteBuffers, and the position is somewhere in the
-    // middle of ByteBuffer 2.  Then ByteBuffers 0 & 1 must have position == capacity,
-    // and ByteBuffers 3 & 4 must have position == 0
 
     for (int i = 0; i < underlying.length; i++) {
-      ByteBuffer b = underlying[i];
+      ByteBuffer b = underlying[i].duplicate();
+      b.position(0);
+      this.underlying[i] = b;
       if (i != underlying.length -1 && b.capacity() != subBufferSize) {
         throw new IllegalArgumentException("All buffers, except for the final one, must have " +
           "size = " + subBufferSize);
       }
-      if (startFound) {
-        if (b.position() != 0) {
-          throw new IllegalArgumentException("ByteBuffers have inconsistent positions");
-        }
-      } else if (b.position() != b.capacity()) {
-        startFound = true;
-        initialPosition = sum + b.position();
-      }
       sum += b.capacity();
     }
-    _pos = initialPosition;
+    _pos = 0;
     currentBufferIdx = 0;
-    currentBuffer = underlying[0];
+    currentBuffer = this.underlying[0];
     size = sum;
   }
 
@@ -228,7 +211,9 @@ public WrappedLargeByteBuffer duplicate() {
     for (int i = 0; i < underlying.length; i++) {
       duplicates[i] = underlying[i].duplicate();
     }
-    return new WrappedLargeByteBuffer(duplicates, subBufferSize);
+    WrappedLargeByteBuffer dup = new WrappedLargeByteBuffer(duplicates, subBufferSize);
+    dup.skip(position());
+    return dup;
   }
 
   @Override
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 5aca657b59ac2..5de3f68fe903b 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -219,45 +219,6 @@ public void duplicate() {
     }
   }
 
-  @Test
-  public void constructWithBuffersWithNonZeroPosition() {
-    ByteBuffer[] bufs = testDataBuf().underlying;
-
-    bufs[0].position(50);
-    bufs[1].position(5);
-
-    WrappedLargeByteBuffer b1 = new WrappedLargeByteBuffer(bufs, 50);
-    assertEquals(55, b1.position());
-
-
-    bufs[1].position(50);
-    bufs[2].position(50);
-    bufs[3].position(35);
-    WrappedLargeByteBuffer b2 = new WrappedLargeByteBuffer(bufs, 50);
-    assertEquals(185, b2.position());
-
-
-    bufs[5].position(16);
-    try {
-      new WrappedLargeByteBuffer(bufs);
-      fail("expected exception");
-    } catch (IllegalArgumentException ex) {
-    }
-
-    bufs[5].position(0);
-    bufs[0].position(49);
-    try {
-      new WrappedLargeByteBuffer(bufs);
-      fail("expected exception");
-    } catch (IllegalArgumentException ex) {
-    }
-  }
-
-  @Test
-  public void positionsIndependentAfterConstructor() {
-    fail("TODO");
-  }
-
   @Test(expected=IllegalArgumentException.class)
   public void testRequireAtLeastOneBuffer() {
     new WrappedLargeByteBuffer( new ByteBuffer[0]);

From 4042c1ad7f9370a78d9b88188861f4705f43f5e1 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 1 Jun 2015 11:39:02 -0500
Subject: [PATCH 74/97] move LargeBBIn/Out Streams to java

---
 .../buffer/LargeByteBufferInputStream.java}   | 59 +++++++++-------
 .../buffer/LargeByteBufferOutputStream.java   | 69 +++++++++++++++++++
 .../buffer/LargeByteBufferOutputStream.scala  | 64 -----------------
 .../LargeByteBufferInputStreamSuite.scala     |  8 +--
 4 files changed, 107 insertions(+), 93 deletions(-)
 rename core/src/main/{scala/org/apache/spark/network/buffer/LargeByteBufferInputStream.scala => java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java} (55%)
 create mode 100644 core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
 delete mode 100644 core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStream.scala

diff --git a/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferInputStream.scala b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
similarity index 55%
rename from core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferInputStream.scala
rename to core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
index f00899ead1ab0..a313951990e81 100644
--- a/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferInputStream.scala
+++ b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
@@ -14,62 +14,75 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+package org.apache.spark.network.buffer;
 
-package org.apache.spark.network.buffer
+import java.io.InputStream;
 
-import java.io.InputStream
+import com.google.common.annotations.VisibleForTesting;
 
 /**
  * Reads data from a LargeByteBuffer, and optionally cleans it up using buffer.dispose()
  * when the stream is closed (e.g. to close a memory-mapped file).
  */
-private[spark]
-class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: Boolean = false)
-  extends InputStream {
+public class LargeByteBufferInputStream extends InputStream {
 
-  override def read(): Int = {
+  LargeByteBuffer buffer;
+  final boolean dispose;
+
+  public LargeByteBufferInputStream(LargeByteBuffer buffer, boolean dispose) {
+    this.buffer = buffer;
+    this.dispose = dispose;
+  }
+
+  public LargeByteBufferInputStream(LargeByteBuffer buffer) {
+    this(buffer, false);
+  }
+
+
+  public int read() {
     if (buffer == null || buffer.remaining() == 0) {
-      -1
+      return -1;
     } else {
-      buffer.get() & 0xFF
+      return buffer.get() & 0xFF;
     }
   }
 
-  override def read(dest: Array[Byte]): Int = {
-    read(dest, 0, dest.length)
+  public int read(byte[] dest) {
+    return read(dest, 0, dest.length);
   }
 
-  override def read(dest: Array[Byte], offset: Int, length: Int): Int = {
+  public int read(byte[] dest, int offset, int length) {
     if (buffer == null || buffer.remaining() == 0) {
-      -1
+      return -1;
     } else {
-      val amountToGet = math.min(buffer.remaining(), length).toInt
-      buffer.get(dest, offset, amountToGet)
-      amountToGet
+      int amountToGet = (int) Math.min(buffer.remaining(), length);
+      buffer.get(dest, offset, amountToGet);
+      return amountToGet;
     }
   }
 
-  override def skip(bytes: Long): Long = {
+  public long skip(long toSkip) {
     if (buffer != null) {
-      buffer.skip(bytes)
+      return buffer.skip(toSkip);
     } else {
-      0L
+      return 0L;
     }
   }
 
   // only for testing
-  private[buffer] var disposed = false
+  @VisibleForTesting
+  boolean disposed = false;
 
   /**
    * Clean up the buffer, and potentially dispose of it
    */
-  override def close() {
+  public void close() {
     if (buffer != null) {
       if (dispose) {
-        buffer.dispose()
-        disposed = true
+        buffer.dispose();
+        disposed = true;
       }
-      buffer = null
+      buffer = null;
     }
   }
 }
diff --git a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
new file mode 100644
index 0000000000000..739533992ecac
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.buffer;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+import org.apache.spark.util.io.ByteArrayChunkOutputStream;
+
+public class LargeByteBufferOutputStream extends OutputStream {
+
+  private final int chunkSize;
+  final ByteArrayChunkOutputStream output;
+
+  public LargeByteBufferOutputStream(int chunkSize) {
+    this.chunkSize = chunkSize;
+    output = new ByteArrayChunkOutputStream(chunkSize);
+  }
+
+  public void write(int b) {
+    output.write(b);
+  }
+
+  public void write(byte[] bytes, int off, int len) {
+    output.write(bytes, off, len);
+  }
+
+  public LargeByteBuffer largeBuffer() {
+    return largeBuffer(LargeByteBufferHelper.MAX_CHUNK_SIZE);
+  }
+
+/**
+ * exposed for testing.  You don't really ever want to call this method -- the returned
+ * buffer will not implement {{asByteBuffer}} correctly.
+ */
+  LargeByteBuffer largeBuffer(int maxChunk) {
+    long totalSize = output.size();
+    int chunksNeeded = (int) ((totalSize + maxChunk - 1) / maxChunk);
+    ByteBuffer[] chunks = new ByteBuffer[chunksNeeded];
+    long remaining = totalSize;
+    long pos = 0;
+    for (int idx = 0; idx < chunksNeeded; idx++) {
+      int nextSize = (int) Math.min(maxChunk, remaining);
+      chunks[idx] = ByteBuffer.wrap(output.slice(pos, pos + nextSize));
+      pos += nextSize;
+      remaining -= nextSize;
+    }
+    return new WrappedLargeByteBuffer(chunks, maxChunk);
+  }
+
+  public void close() throws IOException {
+    output.close();
+  }
+}
\ No newline at end of file
diff --git a/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStream.scala
deleted file mode 100644
index e9f13a6b27166..0000000000000
--- a/core/src/main/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStream.scala
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.buffer
-
-import java.io.OutputStream
-import java.nio.ByteBuffer
-
-import org.apache.spark.util.io.ByteArrayChunkOutputStream
-
-private[spark] class LargeByteBufferOutputStream(chunkSize: Int = 65536)
-  extends OutputStream {
-
-  private[buffer] val output = new ByteArrayChunkOutputStream(chunkSize)
-
-  override def write(b: Int): Unit = {
-    output.write(b)
-  }
-
-  override def write(bytes: Array[Byte], offs: Int, len: Int): Unit = {
-    output.write(bytes, offs, len)
-  }
-
-  def largeBuffer: LargeByteBuffer = {
-    largeBuffer(LargeByteBufferHelper.MAX_CHUNK_SIZE)
-  }
-
-  /**
-   * exposed for testing.  You don't really ever want to call this method -- the returned
-   * buffer will not implement {{asByteBuffer}} correctly.
-   */
-  private[buffer] def largeBuffer(maxChunk: Int): WrappedLargeByteBuffer = {
-    val totalSize = output.size
-    val chunksNeeded = ((totalSize + maxChunk - 1) / maxChunk).toInt
-    val chunks = new Array[Array[Byte]](chunksNeeded)
-    var remaining = totalSize
-    var pos = 0
-    (0 until chunksNeeded).foreach { idx =>
-      val nextSize = math.min(maxChunk, remaining).toInt
-      chunks(idx) = output.slice(pos, pos + nextSize)
-      pos += nextSize
-      remaining -= nextSize
-    }
-    new WrappedLargeByteBuffer(chunks.map(ByteBuffer.wrap), maxChunk)
-  }
-
-  override def close(): Unit = {
-    output.close()
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
index 50051e803eb2f..78e06c0381c78 100644
--- a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
@@ -17,14 +17,11 @@
 package org.apache.spark.network.buffer
 
 import java.io.{FileInputStream, FileOutputStream, OutputStream, File}
-import java.nio.ByteBuffer
 import java.nio.channels.FileChannel.MapMode
 
 import org.junit.Assert._
 import org.scalatest.{FunSuite, Matchers}
 
-import org.apache.spark.network.buffer.{LargeByteBuffer, LargeByteBufferHelper, WrappedLargeByteBuffer}
-
 class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
 
   test("read from large mapped file") {
@@ -44,7 +41,7 @@ class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
 
       val channel = new FileInputStream(testFile).getChannel
       val buf = LargeByteBufferHelper.mapFile(channel, MapMode.READ_ONLY, 0, len)
-      val in = new LargeByteBufferInputStream(buf, dispose = true)
+      val in = new LargeByteBufferInputStream(buf, true)
 
       val read = new Array[Byte](buffer.length)
       (0 until (len / buffer.length).toInt).foreach { idx =>
@@ -67,8 +64,7 @@ class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
   test("dispose on close") {
     // don't need to read to the end -- dispose anytime we close
     val data = new Array[Byte](10)
-    val in = new LargeByteBufferInputStream(LargeByteBufferHelper.asLargeByteBuffer(data),
-      dispose = true)
+    val in = new LargeByteBufferInputStream(LargeByteBufferHelper.asLargeByteBuffer(data), true)
     in.disposed should be (false)
     in.close()
     in.disposed should be (true)

From 3c599b2f01e0e4d1ac2a9d02cf6a8e8924da488d Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 1 Jun 2015 12:00:03 -0500
Subject: [PATCH 75/97] add comments for MAX_CHUNK_SIZE

---
 .../org/apache/spark/network/buffer/LargeByteBuffer.java    | 4 +++-
 .../apache/spark/network/buffer/LargeByteBufferHelper.java  | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index 87f695638ede6..fe03ec0a7e64c 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -118,7 +118,9 @@ public interface LargeByteBuffer {
    * Get the entire contents of this as one ByteBuffer, if possible.  The returned ByteBuffer
    * will always have the position set to 0, and the limit set to the end of the data.  Each
    * call will return a new ByteBuffer, but will not require copying the data (eg., it will
-   * use ByteBuffer#duplicate()).  The returned byte buffer will share data with this buffer.
+   * use ByteBuffer#duplicate()).  The returned byte buffer will share data with this buffer.  The
+   * returned buffers will never be larger than
+   * {@link org.apache.spark.network.buffer.LargeByteBufferHelper#MAX_CHUNK_SIZE}
    *
    * @throws BufferTooLargeException if this buffer is too large to fit in one {@link ByteBuffer}
    */
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
index 221dd75c36681..cd85f3365df57 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -30,6 +30,12 @@ public class LargeByteBufferHelper {
 
   // netty can't quite send msgs that are a full 2GB -- they need to be slightly smaller
   // not sure what the exact limit is, but 200 seems OK.
+  /**
+   * The maximum size of any ByteBuffer.
+   * {@link org.apache.spark.network.buffer.LargeByteBuffer#asByteBuffer} will never return a
+   * ByteBuffer larger than this.  This is close to the max ByteBuffer size (2GB), minus a small
+   * amount for message overhead.
+   */
   public static final int MAX_CHUNK_SIZE = Integer.MAX_VALUE - 200;
 
   public static LargeByteBuffer asLargeByteBuffer(ByteBuffer buffer) {

From 95588c25bb40a7971ea05d6b261a1a044548feb9 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 1 Jun 2015 12:05:42 -0500
Subject: [PATCH 76/97] updateCurrentBuffer --> updateCurrentBufferIfNeeded +
 comment

---
 .../spark/network/buffer/WrappedLargeByteBuffer.java   | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 7c2af1d0be452..93c5c7ac2a37d 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -106,7 +106,7 @@ public void get(byte[] dest, int offset, int length) {
       int toRead = Math.min(length - moved, currentBuffer.remaining());
       currentBuffer.get(dest, offset + moved, toRead);
       moved += toRead;
-      updateCurrentBuffer();
+      updateCurrentBufferIfNeeded();
     }
     _pos += moved;
   }
@@ -147,11 +147,15 @@ public byte get() {
     }
     byte r = currentBuffer.get();
     _pos += 1;
-    updateCurrentBuffer();
+    updateCurrentBufferIfNeeded();
     return r;
   }
 
-  private void updateCurrentBuffer() {
+  /**
+   * If we've read to the end of the current buffer, move on to the next one.  Safe to call
+   * even if we haven't moved to the next buffer
+   */
+  private void updateCurrentBufferIfNeeded() {
     while (currentBuffer != null && !currentBuffer.hasRemaining()) {
       currentBufferIdx += 1;
       currentBuffer = currentBufferIdx < underlying.length ? underlying[currentBufferIdx] : null;

From b77bbe28e109db72775a592e99572c039b377517 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 1 Jun 2015 12:09:45 -0500
Subject: [PATCH 77/97] style

---
 .../buffer/LargeByteBufferInputStreamSuite.scala       | 10 ++++++----
 .../buffer/LargeByteBufferOutputStreamSuite.scala      |  6 ++++--
 .../util/io/ByteArrayChunkOutputStreamSuite.scala      |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
index 78e06c0381c78..1fa57ee7e77d9 100644
--- a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
@@ -16,16 +16,18 @@
  */
 package org.apache.spark.network.buffer
 
-import java.io.{FileInputStream, FileOutputStream, OutputStream, File}
+import java.io.{File, FileInputStream, FileOutputStream, OutputStream}
 import java.nio.channels.FileChannel.MapMode
 
 import org.junit.Assert._
-import org.scalatest.{FunSuite, Matchers}
+import org.scalatest.Matchers
 
-class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
+import org.apache.spark.SparkFunSuite
+
+class LargeByteBufferInputStreamSuite extends SparkFunSuite with Matchers {
 
   test("read from large mapped file") {
-    val testFile = File.createTempFile("large-buffer-input-stream-test",".bin")
+    val testFile = File.createTempFile("large-buffer-input-stream-test", ".bin")
 
     try {
       val out: OutputStream = new FileOutputStream(testFile)
diff --git a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStreamSuite.scala
index a55ef03480436..72c98b7feacab 100644
--- a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferOutputStreamSuite.scala
@@ -18,9 +18,11 @@ package org.apache.spark.network.buffer
 
 import scala.util.Random
 
-import org.scalatest.{FunSuite, Matchers}
+import org.scalatest.Matchers
 
-class LargeByteBufferOutputStreamSuite extends FunSuite with Matchers {
+import org.apache.spark.SparkFunSuite
+
+class LargeByteBufferOutputStreamSuite extends SparkFunSuite with Matchers {
 
   test("merged buffers for < 2GB") {
     val out = new LargeByteBufferOutputStream(10)
diff --git a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
index a177365af9689..eaea83689588a 100644
--- a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
@@ -119,7 +119,7 @@ class ByteArrayChunkOutputStreamSuite extends SparkFunSuite {
     } {
       withClue(s"start = $start; end = $end") {
         try {
-          assert(o.slice(start,end).toSeq === ref.slice(start,end))
+          assert(o.slice(start, end).toSeq === ref.slice(start, end))
         } catch {
           case ex => fail(ex)
         }

From 6c2a115b970d31400cd77dc8f0236555e382a1af Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 1 Jun 2015 23:46:30 -0500
Subject: [PATCH 78/97] add tests that buffers position is independent from
 underyling bytebufs; some cleanup

---
 .../buffer/WrappedLargeByteBuffer.java        |  7 ++---
 .../buffer/WrappedLargeByteBufferSuite.java   | 27 ++++++++++++++++++-
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 93c5c7ac2a37d..8c803fae86d12 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -211,11 +211,8 @@ public long remaining() {
 
   @Override
   public WrappedLargeByteBuffer duplicate() {
-    ByteBuffer[] duplicates = new ByteBuffer[underlying.length];
-    for (int i = 0; i < underlying.length; i++) {
-      duplicates[i] = underlying[i].duplicate();
-    }
-    WrappedLargeByteBuffer dup = new WrappedLargeByteBuffer(duplicates, subBufferSize);
+    // the constructor will duplicate the underlying buffers for us
+    WrappedLargeByteBuffer dup = new WrappedLargeByteBuffer(underlying, subBufferSize);
     dup.skip(position());
     return dup;
   }
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 5de3f68fe903b..2943705c40ecd 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -216,6 +216,13 @@ public void duplicate() {
       assertEquals(500 - initialPosition, dup.remaining());
       assertConsistent(buf);
       assertConsistent(dup);
+
+      // check positions of both buffers are independent
+      buf.skip(20);
+      assertEquals(initialPosition + 20, buf.position());
+      assertEquals(initialPosition, dup.position());
+      assertConsistent(buf);
+      assertConsistent(dup);
     }
   }
 
@@ -224,6 +231,19 @@ public void testRequireAtLeastOneBuffer() {
     new WrappedLargeByteBuffer( new ByteBuffer[0]);
   }
 
+  @Test
+  public void positionIndependentOfInitialBuffers() {
+    ByteBuffer[] byteBufs = testDataBuf().underlying;
+    byteBufs[0].position(50);
+    for (int initialPosition: new int[]{0,20, 400}) {
+      WrappedLargeByteBuffer buf = new WrappedLargeByteBuffer(byteBufs, 50);
+      assertEquals(0L, buf.position());
+      assertEquals(50, byteBufs[0].position());
+      buf.skip(initialPosition);
+      assertEquals(initialPosition, buf.position());
+      assertEquals(50, byteBufs[0].position());
+    }
+  }
 
   private void assertConsistent(WrappedLargeByteBuffer buffer) {
     long pos = buffer.position();
@@ -245,7 +265,12 @@ private void assertConsistent(WrappedLargeByteBuffer buffer) {
     }
   }
 
-  private void assertSubArrayEquals(byte[] exp, int expOffset, byte[] act, int actOffset, int length) {
+  private void assertSubArrayEquals(
+      byte[] exp,
+      int expOffset,
+      byte[] act,
+      int actOffset,
+      int length) {
     byte[] expCopy = new byte[length];
     byte[] actCopy = new byte[length];
     System.arraycopy(exp, expOffset, expCopy, 0, length);

From a9616e40e416f67f9a65f2fcf46e20c0510f7513 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 2 Jun 2015 14:38:36 -0500
Subject: [PATCH 79/97] better variable name; more chunks in tests

---
 .../apache/spark/util/io/ByteArrayChunkOutputStream.scala   | 6 +++---
 .../spark/util/io/ByteArrayChunkOutputStreamSuite.scala     | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
index 907d2cbb7537e..43f2b30c9067e 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
@@ -112,11 +112,11 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
     var foundStart = false
     val result = new Array[Byte](length)
     while (!foundStart) {
-      val nextSize = chunkStart + chunks(chunkIdx).size
-      if (nextSize > start) {
+      val nextChunkStart = chunkStart + chunks(chunkIdx).size
+      if (nextChunkStart > start) {
         foundStart = true
       } else {
-        chunkStart = nextSize
+        chunkStart = nextChunkStart
         chunkIdx += 1
       }
     }
diff --git a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
index eaea83689588a..e77e17855d40a 100644
--- a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
@@ -110,7 +110,7 @@ class ByteArrayChunkOutputStreamSuite extends SparkFunSuite {
   test("slice") {
     val ref = new Array[Byte](30)
     Random.nextBytes(ref)
-    val o = new ByteArrayChunkOutputStream(10)
+    val o = new ByteArrayChunkOutputStream(5)
     o.write(ref)
 
     for {

From 0250ac557ba5b5e0fba3f0c2487437c4f5b42c34 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 2 Jun 2015 14:51:25 -0500
Subject: [PATCH 80/97] private, @VisibleForTesting

---
 .../network/buffer/LargeByteBufferInputStream.java |  4 ++--
 .../buffer/LargeByteBufferOutputStream.java        | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
index a313951990e81..af4e0c471dd47 100644
--- a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
+++ b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
@@ -26,8 +26,8 @@
  */
 public class LargeByteBufferInputStream extends InputStream {
 
-  LargeByteBuffer buffer;
-  final boolean dispose;
+  private LargeByteBuffer buffer;
+  private final boolean dispose;
 
   public LargeByteBufferInputStream(LargeByteBuffer buffer, boolean dispose) {
     this.buffer = buffer;
diff --git a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
index 739533992ecac..e6b8dc582cc47 100644
--- a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
+++ b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
@@ -20,15 +20,14 @@
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.spark.util.io.ByteArrayChunkOutputStream;
 
 public class LargeByteBufferOutputStream extends OutputStream {
 
-  private final int chunkSize;
-  final ByteArrayChunkOutputStream output;
+  private final ByteArrayChunkOutputStream output;
 
   public LargeByteBufferOutputStream(int chunkSize) {
-    this.chunkSize = chunkSize;
     output = new ByteArrayChunkOutputStream(chunkSize);
   }
 
@@ -44,10 +43,11 @@ public LargeByteBuffer largeBuffer() {
     return largeBuffer(LargeByteBufferHelper.MAX_CHUNK_SIZE);
   }
 
-/**
- * exposed for testing.  You don't really ever want to call this method -- the returned
- * buffer will not implement {{asByteBuffer}} correctly.
- */
+  /**
+   * exposed for testing.  You don't really ever want to call this method -- the returned
+   * buffer will not implement {{asByteBuffer}} correctly.
+   */
+  @VisibleForTesting
   LargeByteBuffer largeBuffer(int maxChunk) {
     long totalSize = output.size();
     int chunksNeeded = (int) ((totalSize + maxChunk - 1) / maxChunk);

From b3b6363f49e4d581e6caf75738d1d5a8f4569763 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 2 Jun 2015 14:51:42 -0500
Subject: [PATCH 81/97] fix newlines

---
 .../apache/spark/network/buffer/LargeByteBufferInputStream.java | 1 -
 .../spark/network/buffer/LargeByteBufferOutputStream.java       | 2 +-
 .../org/apache/spark/util/io/ByteArrayChunkOutputStream.scala   | 2 --
 .../spark/network/buffer/LargeByteBufferInputStreamSuite.scala  | 2 --
 .../apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala  | 1 -
 .../java/org/apache/spark/network/buffer/LargeByteBuffer.java   | 1 -
 .../org/apache/spark/network/buffer/LargeByteBufferHelper.java  | 2 --
 7 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
index af4e0c471dd47..e57a7e0ad5ab6 100644
--- a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
+++ b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
@@ -38,7 +38,6 @@ public LargeByteBufferInputStream(LargeByteBuffer buffer) {
     this(buffer, false);
   }
 
-
   public int read() {
     if (buffer == null || buffer.remaining() == 0) {
       return -1;
diff --git a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
index e6b8dc582cc47..b03de5fe810c3 100644
--- a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
+++ b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
@@ -66,4 +66,4 @@ LargeByteBuffer largeBuffer(int maxChunk) {
   public void close() throws IOException {
     output.close();
   }
-}
\ No newline at end of file
+}
diff --git a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
index 43f2b30c9067e..8101047618f2b 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
@@ -21,7 +21,6 @@ import java.io.OutputStream
 
 import scala.collection.mutable.ArrayBuffer
 
-
 /**
  * An OutputStream that writes to fixed-size chunks of byte arrays.
  *
@@ -135,5 +134,4 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
     result
   }
 
-
 }
diff --git a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
index 1fa57ee7e77d9..a20a3d48837e4 100644
--- a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
@@ -88,8 +88,6 @@ class LargeByteBufferInputStreamSuite extends SparkFunSuite with Matchers {
     (0 until 200).foreach{idx =>
       arr(idx) should be (idx.toByte)
     }
-
   }
 
-
 }
diff --git a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
index e77e17855d40a..38bc24528f3a7 100644
--- a/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/io/ByteArrayChunkOutputStreamSuite.scala
@@ -21,7 +21,6 @@ import scala.util.Random
 
 import org.apache.spark.SparkFunSuite
 
-
 class ByteArrayChunkOutputStreamSuite extends SparkFunSuite {
 
   test("empty output") {
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index fe03ec0a7e64c..84f8e0e60877b 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -91,7 +91,6 @@ public interface LargeByteBuffer {
    */
   public LargeByteBuffer duplicate();
 
-
   public long remaining();
 
   /**
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
index cd85f3365df57..4941ed6559ea9 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java
@@ -67,7 +67,6 @@ static LargeByteBuffer allocate(long size, int maxChunk) {
     return new WrappedLargeByteBuffer(chunks, maxChunk);
   }
 
-
   public static LargeByteBuffer mapFile(
       FileChannel channel,
       FileChannel.MapMode mode,
@@ -86,5 +85,4 @@ public static LargeByteBuffer mapFile(
     return new WrappedLargeByteBuffer(chunks);
   }
 
-
 }

From 112c49e7074dc3a53f7425f6b93fbff7608f6a23 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 2 Jun 2015 15:13:39 -0500
Subject: [PATCH 82/97] style

---
 .../network/buffer/LargeByteBufferInputStreamSuite.scala      | 4 ++--
 .../apache/spark/network/buffer/WrappedLargeByteBuffer.java   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
index a20a3d48837e4..d8e48db32f78c 100644
--- a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferInputStreamSuite.scala
@@ -74,7 +74,7 @@ class LargeByteBufferInputStreamSuite extends SparkFunSuite with Matchers {
 
   test("io stream roundtrip") {
     val out = new LargeByteBufferOutputStream(128)
-    (0 until 200).foreach{idx => out.write(idx)}
+    (0 until 200).foreach { idx => out.write(idx) }
     out.close()
 
     val lb = out.largeBuffer(128)
@@ -85,7 +85,7 @@ class LargeByteBufferInputStreamSuite extends SparkFunSuite with Matchers {
     val arr = new Array[Byte](500)
     val nRead = rawIn.read(arr, 0, 500)
     nRead should be (200)
-    (0 until 200).foreach{idx =>
+    (0 until 200).foreach { idx =>
       arr(idx) should be (idx.toByte)
     }
   }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index 8c803fae86d12..ba1ff3d4d3467 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -84,7 +84,7 @@ public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
       ByteBuffer b = underlying[i].duplicate();
       b.position(0);
       this.underlying[i] = b;
-      if (i != underlying.length -1 && b.capacity() != subBufferSize) {
+      if (i != underlying.length - 1 && b.capacity() != subBufferSize) {
         throw new IllegalArgumentException("All buffers, except for the final one, must have " +
           "size = " + subBufferSize);
       }

From 8ec2c5c25ff7439c6ecccd4967adb0c03b616ccb Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Tue, 2 Jun 2015 16:10:34 -0500
Subject: [PATCH 83/97] comment explaining check on subBufferSize

---
 .../apache/spark/network/buffer/WrappedLargeByteBuffer.java  | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index ba1ff3d4d3467..dbbbc0d0d7a9d 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -85,6 +85,11 @@ public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
       b.position(0);
       this.underlying[i] = b;
       if (i != underlying.length - 1 && b.capacity() != subBufferSize) {
+        // this is to make sure that asByteBuffer() is implemented correctly.  We need the first
+        // subBuffer to be LargeByteBufferHelper.MAX_CHUNK_SIZE.  We don't *have* to check all the
+        // subBuffers, but I figure its makes it more consistent this way.  (Also, this check
+        // really only serves a purpose when using the public constructor -- subBufferSize is a
+        // a parameter just to allow small tests.)
         throw new IllegalArgumentException("All buffers, except for the final one, must have " +
           "size = " + subBufferSize);
       }

From d0605a18b3f022df9d139a27e65f9b887924536a Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 3 Jun 2015 14:28:51 -0500
Subject: [PATCH 84/97] get() return this; another version of get() which just
 takes dest array

---
 .../spark/network/buffer/LargeByteBuffer.java | 21 ++++++++++---
 .../buffer/WrappedLargeByteBuffer.java        |  9 +++++-
 .../buffer/WrappedLargeByteBufferSuite.java   | 31 +++++++++++++++++--
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
index 84f8e0e60877b..beeb007e2197e 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java
@@ -47,15 +47,28 @@
 public interface LargeByteBuffer {
   public byte get();
 
+
+  /**
+   * Bulk copy data from this buffer into the given array.  First checks there is sufficient
+   * data in this buffer; if not, throws a {@link java.nio.BufferUnderflowException}.  Behaves
+   * in the exact same way as <code>get(dst, 0, dst.length)</code>
+   *
+   * @param dst the destination array
+   * @return this buffer
+   */
+  public LargeByteBuffer get(byte[] dst);
+
   /**
    * Bulk copy data from this buffer into the given array.  First checks there is sufficient
    * data in this buffer; if not, throws a {@link java.nio.BufferUnderflowException}.
    *
-   * @param dst
-   * @param offset
-   * @param length
+   * @param dst the destination array
+   * @param offset the offset within the destination array to write to
+   * @param length how many bytes to write
+   * @return this buffer
    */
-  public void get(byte[] dst, int offset, int length);
+  public LargeByteBuffer get(byte[] dst, int offset, int length);
+
 
   public LargeByteBuffer rewind();
 
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index dbbbc0d0d7a9d..58a621249386f 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -101,8 +101,14 @@ public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
     size = sum;
   }
 
+
   @Override
-  public void get(byte[] dest, int offset, int length) {
+  public WrappedLargeByteBuffer get(byte[] dest) {
+    return get(dest, 0, dest.length);
+  }
+
+  @Override
+  public WrappedLargeByteBuffer get(byte[] dest, int offset, int length) {
     if (length > remaining()) {
       throw new BufferUnderflowException();
     }
@@ -114,6 +120,7 @@ public void get(byte[] dest, int offset, int length) {
       updateCurrentBufferIfNeeded();
     }
     _pos += moved;
+    return this;
   }
 
   @Override
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 2943705c40ecd..3cbd2d8710304 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -138,6 +138,16 @@ public void skipAndGet() {
       assertConsistent(b);
       b.skip(position);
       assertConsistent(b);
+
+      int copy2Length = Math.min(20, 500 - position);
+      byte[] copy2 = new byte[copy2Length];
+      b.rewind();
+      b.skip(position);
+      b.get(copy2);
+      assertSubArrayEquals(data, position, copy2, 0, copy2Length);
+
+      b.rewind();
+      b.skip(position);
     }
   }
 
@@ -146,10 +156,18 @@ public void get() {
     WrappedLargeByteBuffer b = testDataBuf();
     byte[] into = new byte[500];
     for (int[] offsetAndLength: new int[][]{{0, 200}, {10,10}, {300, 20}, {30, 100}}) {
+      int offset = offsetAndLength[0];
+      int length = offsetAndLength[1];
+      b.rewind();
+      b.get(into, offset, length);
+      assertConsistent(b);
+      assertSubArrayEquals(data, 0, into, offset, length);
+
+      byte[] into2 = new byte[length];
       b.rewind();
-      b.get(into, offsetAndLength[0], offsetAndLength[1]);
+      b.get(into2);
       assertConsistent(b);
-      assertSubArrayEquals(data, 0, into, offsetAndLength[0], offsetAndLength[1]);
+      assertSubArrayEquals(data, 0, into2, 0, length);
     }
 
     try {
@@ -159,6 +177,15 @@ public void get() {
       fail("expected exception");
     } catch (BufferUnderflowException bue) {
     }
+
+    try {
+      b.rewind();
+      b.skip(1);
+      b.get(into);
+      fail("expected exception");
+    } catch (BufferUnderflowException bue) {
+    }
+
     b.rewind();
     b.skip(495);
     assertEquals(data[495], b.get());

From b6620d0a5516519b073b49e6f191ff929c2a5152 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 3 Jun 2015 14:29:02 -0500
Subject: [PATCH 85/97] docs on LargeBBOutputStream

---
 .../buffer/LargeByteBufferOutputStream.java   | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
index b03de5fe810c3..e8c8577f361cc 100644
--- a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
+++ b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
@@ -23,10 +23,22 @@
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.spark.util.io.ByteArrayChunkOutputStream;
 
+/**
+ * An OutputStream that will write all data to memory.  It supports writing over 2GB
+ * and the resulting data can be retrieved as a
+ * {@link org.apache.spark.network.buffer.LargeByteBuffer}
+ */
 public class LargeByteBufferOutputStream extends OutputStream {
 
   private final ByteArrayChunkOutputStream output;
 
+  /**
+   * Create a new LargeByteBufferOutputStream which writes to byte arrays of the given size.  Note
+   * that <code>chunkSize</code> has <b>no effect</b> on the LargeByteBuffer returned by
+   * {@link #largeBuffer()}.
+   *
+   * @param chunkSize size of the byte arrays used by this output stream, in bytes
+   */
   public LargeByteBufferOutputStream(int chunkSize) {
     output = new ByteArrayChunkOutputStream(chunkSize);
   }
@@ -39,6 +51,13 @@ public void write(byte[] bytes, int off, int len) {
     output.write(bytes, off, len);
   }
 
+  /**
+   * Get all of the data written to the stream so far as a LargeByteBuffer.  This method can be
+   * called multiple times, and each returned buffer will be completely independent (the data
+   * is copied for each returned buffer).  It does not close the stream.
+   *
+   * @return the data written to the stream as a LargeByteBuffer
+   */
   public LargeByteBuffer largeBuffer() {
     return largeBuffer(LargeByteBufferHelper.MAX_CHUNK_SIZE);
   }

From 54d09af36e66a0fb215a19bf6160d625cbabf8c8 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 3 Jun 2015 14:40:03 -0500
Subject: [PATCH 86/97] @Override

---
 .../spark/network/buffer/LargeByteBufferInputStream.java     | 5 +++++
 .../spark/network/buffer/LargeByteBufferOutputStream.java    | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
index e57a7e0ad5ab6..a4b1e2571af56 100644
--- a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
+++ b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferInputStream.java
@@ -38,6 +38,7 @@ public LargeByteBufferInputStream(LargeByteBuffer buffer) {
     this(buffer, false);
   }
 
+  @Override
   public int read() {
     if (buffer == null || buffer.remaining() == 0) {
       return -1;
@@ -46,10 +47,12 @@ public int read() {
     }
   }
 
+  @Override
   public int read(byte[] dest) {
     return read(dest, 0, dest.length);
   }
 
+  @Override
   public int read(byte[] dest, int offset, int length) {
     if (buffer == null || buffer.remaining() == 0) {
       return -1;
@@ -60,6 +63,7 @@ public int read(byte[] dest, int offset, int length) {
     }
   }
 
+  @Override
   public long skip(long toSkip) {
     if (buffer != null) {
       return buffer.skip(toSkip);
@@ -75,6 +79,7 @@ public long skip(long toSkip) {
   /**
    * Clean up the buffer, and potentially dispose of it
    */
+  @Override
   public void close() {
     if (buffer != null) {
       if (dispose) {
diff --git a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
index e8c8577f361cc..975de7b10f65c 100644
--- a/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
+++ b/core/src/main/java/org/apache/spark/network/buffer/LargeByteBufferOutputStream.java
@@ -43,10 +43,12 @@ public LargeByteBufferOutputStream(int chunkSize) {
     output = new ByteArrayChunkOutputStream(chunkSize);
   }
 
+  @Override
   public void write(int b) {
     output.write(b);
   }
 
+  @Override
   public void write(byte[] bytes, int off, int len) {
     output.write(bytes, off, len);
   }
@@ -82,6 +84,7 @@ LargeByteBuffer largeBuffer(int maxChunk) {
     return new WrappedLargeByteBuffer(chunks, maxChunk);
   }
 
+  @Override
   public void close() throws IOException {
     output.close();
   }

From 31244c80b14685ebc88fcc43d99f06c3fbfe7717 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 3 Jun 2015 16:45:03 -0500
Subject: [PATCH 87/97] fix comment

---
 .../org/apache/spark/util/io/ByteArrayChunkOutputStream.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
index 8101047618f2b..d48eb2f330321 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ByteArrayChunkOutputStream.scala
@@ -97,7 +97,7 @@ class ByteArrayChunkOutputStream(chunkSize: Int) extends OutputStream {
 
   /**
    * Get a copy of the data between the two endpoints, start <= idx < until.  Always returns
-   * an array of size (until - start).  Throws an IllegalArgumentException if
+   * an array of size (until - start).  Throws an IllegalArgumentException unless
    * 0 <= start <= until <= size
    */
   def slice(start: Long, until: Long): Array[Byte] = {

From 6cf204f57e976bef4e42431366dbc18f90f54bb5 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 10 Jun 2015 15:58:55 -0500
Subject: [PATCH 88/97] fix mistakes w/ merge

---
 .../org/apache/spark/storage/BlockManager.scala      |  2 +-
 .../apache/spark/storage/ExternalBlockManager.scala  |  6 ++++--
 .../apache/spark/storage/ExternalBlockStore.scala    |  9 +++++----
 .../apache/spark/storage/TachyonBlockManager.scala   | 12 ++++++++----
 .../streaming/rdd/WriteAheadLogBackedBlockRDD.scala  |  6 ++++--
 .../streaming/receiver/ReceivedBlockHandler.scala    |  2 +-
 .../spark/streaming/ReceivedBlockHandlerSuite.scala  |  2 +-
 7 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 0891624d55df5..6721c72baa62f 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1220,7 +1220,7 @@ private[spark] class BlockManager(
       bytes: LargeByteBuffer,
       serializer: Serializer = defaultSerializer): Iterator[Any] = {
     bytes.rewind()
-    dataDeserializeStream(blockId, LargeByteBufferInputStream(bytes, true), serializer)
+    dataDeserializeStream(blockId, new LargeByteBufferInputStream(bytes, true), serializer)
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/storage/ExternalBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/ExternalBlockManager.scala
index f39325a12d244..d71253539e914 100644
--- a/core/src/main/scala/org/apache/spark/storage/ExternalBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ExternalBlockManager.scala
@@ -19,6 +19,8 @@ package org.apache.spark.storage
 
 import java.nio.ByteBuffer
 
+import org.apache.spark.network.buffer.LargeByteBuffer
+
 /**
  * An abstract class that the concrete external block manager has to inherit.
  * The class has to have a no-argument constructor, and will be initialized by init,
@@ -75,7 +77,7 @@ private[spark] abstract class ExternalBlockManager {
    *
    * @throws java.io.IOException if there is any file system failure in putting the block.
    */
-  def putBytes(blockId: BlockId, bytes: ByteBuffer): Unit
+  def putBytes(blockId: BlockId, bytes: LargeByteBuffer): Unit
 
   def putValues(blockId: BlockId, values: Iterator[_]): Unit = {
     val bytes = blockManager.dataSerialize(blockId, values)
@@ -89,7 +91,7 @@ private[spark] abstract class ExternalBlockManager {
    *
    * @throws java.io.IOException if there is any file system failure in getting the block.
    */
-  def getBytes(blockId: BlockId): Option[ByteBuffer]
+  def getBytes(blockId: BlockId): Option[LargeByteBuffer]
 
   /**
    * Retrieve the block data.
diff --git a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
index 291394ed34816..000c8788f9452 100644
--- a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
@@ -22,6 +22,7 @@ import java.nio.ByteBuffer
 import scala.util.control.NonFatal
 
 import org.apache.spark.Logging
+import org.apache.spark.network.buffer.LargeByteBuffer
 import org.apache.spark.util.Utils
 
 
@@ -47,7 +48,7 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId:
     }
   }
 
-  override def putBytes(blockId: BlockId, bytes: ByteBuffer, level: StorageLevel): PutResult = {
+  override def putBytes(blockId: BlockId, bytes: LargeByteBuffer, level: StorageLevel): PutResult = {
     putIntoExternalBlockStore(blockId, bytes, returnValues = true)
   }
 
@@ -100,7 +101,7 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId:
 
   private def putIntoExternalBlockStore(
       blockId: BlockId,
-      bytes: ByteBuffer,
+      bytes: LargeByteBuffer,
       returnValues: Boolean): PutResult = {
     logTrace(s"Attempting to put block $blockId into ExternalBlockStore")
     // we should never hit here if externalBlockManager is None. Handle it anyway for safety.
@@ -110,7 +111,7 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId:
         val byteBuffer = bytes.duplicate()
         byteBuffer.rewind()
         externalBlockManager.get.putBytes(blockId, byteBuffer)
-        val size = bytes.limit()
+        val size = bytes.size()
         val data = if (returnValues) {
           Right(bytes)
         } else {
@@ -152,7 +153,7 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId:
     }
   }
 
-  override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
+  override def getBytes(blockId: BlockId): Option[LargeByteBuffer] = {
     try {
       externalBlockManager.flatMap(_.getBytes(blockId))
     } catch {
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
index b53c86e89a273..68ea35f45db58 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
@@ -29,6 +29,7 @@ import com.google.common.io.ByteStreams
 import tachyon.client.{ReadType, WriteType, TachyonFS, TachyonFile}
 import tachyon.TachyonURI
 
+import org.apache.spark.network.buffer.{BufferTooLargeException, LargeByteBufferHelper, LargeByteBuffer}
 import org.apache.spark.{SparkException, SparkConf, Logging}
 import org.apache.spark.executor.ExecutorExitCode
 import org.apache.spark.util.Utils
@@ -94,12 +95,14 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log
     fileExists(file)
   }
 
-  override def putBytes(blockId: BlockId, bytes: ByteBuffer): Unit = {
+  override def putBytes(blockId: BlockId, bytes: LargeByteBuffer): Unit = {
     val file = getFile(blockId)
     val os = file.getOutStream(WriteType.TRY_CACHE)
     try {
-      os.write(bytes.array())
+      os.write(bytes.asByteBuffer().array())
     } catch {
+      case tooLarge: BufferTooLargeException =>
+        throw new TachyonBlockSizeLimitException(tooLarge)
       case NonFatal(e) =>
         logWarning(s"Failed to put bytes of block $blockId into Tachyon", e)
         os.cancel()
@@ -122,7 +125,7 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log
     }
   }
 
-  override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
+  override def getBytes(blockId: BlockId): Option[LargeByteBuffer] = {
     val file = getFile(blockId)
     if (file == null || file.getLocationHosts.size == 0) {
       return None
@@ -130,9 +133,10 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log
     val is = file.getInStream(ReadType.CACHE)
     try {
       val size = file.length
+      //TODO get tachyon to support large blocks
       val bs = new Array[Byte](size.asInstanceOf[Int])
       ByteStreams.readFully(is, bs)
-      Some(ByteBuffer.wrap(bs))
+      Some(LargeByteBufferHelper.asLargeByteBuffer(bs))
     } catch {
       case NonFatal(e) =>
         logWarning(s"Failed to get bytes of block $blockId from Tachyon", e)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
index 584332fb4454a..c87304feba1a7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
@@ -158,11 +158,13 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag](
       logInfo(s"Read partition data of $this from write ahead log, record handle " +
         partition.walRecordHandle)
       if (storeInBlockManager) {
-        blockManager.putBytes(blockId, dataRead, storageLevel)
+        blockManager.putBytes(blockId, LargeByteBufferHelper.asLargeByteBuffer(dataRead),
+          storageLevel)
         logDebug(s"Stored partition data of $this into block manager with level $storageLevel")
         dataRead.rewind()
       }
-      blockManager.dataDeserialize(blockId, dataRead).asInstanceOf[Iterator[T]]
+      blockManager.dataDeserialize(blockId, LargeByteBufferHelper.asLargeByteBuffer(dataRead))
+        .asInstanceOf[Iterator[T]]
     }
 
     if (partition.isBlockIdValid) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
index 14f43fcffe2fb..cf95721cf2ace 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -19,7 +19,7 @@ package org.apache.spark.streaming.receiver
 
 
 import scala.concurrent.{Await, ExecutionContext, Future}
-import scala.concurrent.{Await, ExecutionContext, Future}
+import scala.concurrent.duration._
 import scala.language.{existentials, postfixOps}
 
 import org.apache.hadoop.conf.Configuration
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index f7d8f734bbdca..37b988de403fa 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -142,7 +142,7 @@ class ReceivedBlockHandlerSuite
         val loggedData = walSegments.flatMap { walSegment =>
           val fileSegment = walSegment.asInstanceOf[FileBasedWriteAheadLogSegment]
           val reader = new FileBasedWriteAheadLogRandomReader(fileSegment.path, hadoopConf)
-          val bytes = LargeByteBufferHelper.asLargeByteBuffer(reader.read(segment))
+          val bytes = LargeByteBufferHelper.asLargeByteBuffer(reader.read(fileSegment))
           reader.close()
           blockManager.dataDeserialize(generateBlockId(), bytes).toList
         }

From 160a4585c52c25127fe751d4ec2d1fd486c895f4 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 10 Jun 2015 16:14:36 -0500
Subject: [PATCH 89/97] more merge fixes

---
 .../spark/broadcast/TorrentBroadcast.scala    |  5 +-
 .../apache/spark/storage/BlockManager.scala   |  4 +-
 .../util/LargeByteBufferInputStream.scala     | 93 -------------------
 .../util/LargeByteBufferOutputStream.scala    | 70 --------------
 .../spark/storage/BlockManagerSuite.scala     |  9 +-
 .../LargeByteBufferInputStreamSuite.scala     | 85 -----------------
 .../LargeByteBufferOutputStreamSuite.scala    | 67 -------------
 7 files changed, 9 insertions(+), 324 deletions(-)
 delete mode 100644 core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
 delete mode 100644 core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
 delete mode 100644 core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
 delete mode 100644 core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 3c5b778aac4e5..8a516709dd2b8 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -19,17 +19,16 @@ package org.apache.spark.broadcast
 
 import java.io._
 
-import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer}
-
 import scala.collection.JavaConversions.asJavaEnumeration
 import scala.reflect.ClassTag
 import scala.util.Random
 
 import org.apache.spark.{Logging, SparkConf, SparkEnv, SparkException}
 import org.apache.spark.io.CompressionCodec
+import org.apache.spark.network.buffer.{LargeByteBufferInputStream, LargeByteBufferHelper, LargeByteBuffer}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage.{BroadcastBlockId, StorageLevel}
-import org.apache.spark.util.{LargeByteBufferInputStream, Utils}
+import org.apache.spark.util.Utils
 import org.apache.spark.util.io.ByteArrayChunkOutputStream
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 6721c72baa62f..dd2585c12e10d 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1206,7 +1206,7 @@ private[spark] class BlockManager(
       blockId: BlockId,
       values: Iterator[Any],
       serializer: Serializer = defaultSerializer): LargeByteBuffer = {
-    val byteStream = new LargeByteBufferOutputStream()
+    val byteStream = new LargeByteBufferOutputStream(65536)
     dataSerializeStream(blockId, byteStream, values, serializer)
     byteStream.largeBuffer
   }
@@ -1311,7 +1311,7 @@ object BlockSizeLimitException {
   def sizeMsg(cause: BufferTooLargeException): String = {
     s"that was ${Utils.bytesToString(cause.actualSize)} (too " +
     s"large by ${Utils.bytesToString(cause.extra)} / " +
-      s"${cause.actualSize.toDouble / LargeByteBufferHelper.MAX_CHUNK}x)."
+      s"${cause.actualSize.toDouble / LargeByteBufferHelper.MAX_CHUNK_SIZE}x)."
   }
 
   def sizeMsgAndAdvice(cause: BufferTooLargeException): String = {
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
deleted file mode 100644
index 69bc4902d0aac..0000000000000
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util
-
-import java.io.InputStream
-
-import org.apache.spark.network.buffer.LargeByteBuffer
-import org.apache.spark.storage.BlockManager
-
-/**
- * Reads data from a LargeByteBuffer, and optionally cleans it up using buffer.dispose()
- * at the end of the stream (e.g. to close a memory-mapped file).
- */
-private[spark]
-class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: Boolean = false)
-  extends InputStream {
-
-  override def read(): Int = {
-    if (buffer == null || buffer.remaining() == 0) {
-      cleanUp()
-      -1
-    } else {
-      val r = buffer.get() & 0xFF
-      if (buffer.remaining() == 0) {
-        cleanUp()
-      }
-      r
-    }
-  }
-
-  override def read(dest: Array[Byte]): Int = {
-    read(dest, 0, dest.length)
-  }
-
-  override def read(dest: Array[Byte], offset: Int, length: Int): Int = {
-    if (buffer == null || buffer.remaining() == 0) {
-      cleanUp()
-      -1
-    } else {
-      val amountToGet = math.min(buffer.remaining(), length).toInt
-      buffer.get(dest, offset, amountToGet)
-      // XXX I assume its not intentional that the stream is only disposed when you try to read
-      // *past* the end in ByteBufferInputStream, so we do a check here
-      if (buffer.remaining() == 0) {
-        cleanUp()
-      }
-      amountToGet
-    }
-  }
-
-  override def skip(bytes: Long): Long = {
-    if (buffer != null) {
-      val skipped = buffer.skip(bytes)
-      if (buffer.remaining() == 0) {
-        cleanUp()
-      }
-      skipped
-    } else {
-      0L
-    }
-  }
-
-  // only for testing
-  private[util] var disposed = false
-
-  /**
-   * Clean up the buffer, and potentially dispose of it
-   */
-  private def cleanUp() {
-    if (buffer != null) {
-      if (dispose) {
-        buffer.dispose()
-        disposed = true
-      }
-      buffer = null
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
deleted file mode 100644
index 04e685262f38c..0000000000000
--- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util
-
-import java.io.OutputStream
-import java.nio.ByteBuffer
-
-import org.apache.spark.network.buffer.{LargeByteBufferHelper, WrappedLargeByteBuffer, LargeByteBuffer}
-import org.apache.spark.util.io.ByteArrayChunkOutputStream
-
-private[spark]
-class LargeByteBufferOutputStream(chunkSize: Int = 65536)
-  extends OutputStream {
-
-  private[util] val output = new ByteArrayChunkOutputStream(chunkSize)
-
-  private var _pos = 0
-
-  override def write(b: Int): Unit = {
-    output.write(b)
-  }
-
-  override def write(bytes: Array[Byte], offs: Int, len: Int): Unit = {
-    output.write(bytes, offs, len)
-    _pos += len
-  }
-
-  def largeBuffer: LargeByteBuffer = {
-    largeBuffer(LargeByteBufferHelper.MAX_CHUNK)
-  }
-
-  // exposed for testing
-  private[util] def largeBuffer(maxChunk: Int): LargeByteBuffer = {
-    // LargeByteBuffer is supposed to make a "best effort" to get all the data
-    // in one nio.ByteBuffer, so we want to try to merge the smaller chunks together
-    // as much as possible.  This is necessary b/c there are a number of parts of spark that
-    // can only deal w/ one nio.ByteBuffer, and can't use a LargeByteBuffer yet.
-    val totalSize = output.size
-    val chunksNeeded = ((totalSize + maxChunk -1) / maxChunk).toInt
-    val chunks = new Array[Array[Byte]](chunksNeeded)
-    var remaining = totalSize
-    var pos = 0
-    (0 until chunksNeeded).foreach{idx =>
-      val nextSize = math.min(maxChunk, remaining).toInt
-      chunks(idx) = output.slice(pos, pos + nextSize)
-      pos += nextSize
-      remaining -= nextSize
-    }
-    new WrappedLargeByteBuffer(chunks.map{ByteBuffer.wrap})
-  }
-
-  override def close(): Unit = {
-    output.close()
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index df05063a99c1f..5e66e0d75a09d 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -830,10 +830,11 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val notMapped = diskStoreNotMapped.getBytes(blockId).get.asInstanceOf[WrappedLargeByteBuffer]
 
     // Not possible to do isInstanceOf due to visibility of HeapByteBuffer
-    assert(notMapped.nioBuffers().get(0).getClass.getName.endsWith("HeapByteBuffer"),
-      "Expected HeapByteBuffer for un-mapped read")
-    assert(mapped.nioBuffers().get(0).isInstanceOf[MappedByteBuffer],
-      "Expected MappedByteBuffer for mapped read")
+    // TODO fix me
+//    assert(notMapped.nioBuffers().get(0).getClass.getName.endsWith("HeapByteBuffer"),
+//      "Expected HeapByteBuffer for un-mapped read")
+//    assert(mapped.nioBuffers().get(0).isInstanceOf[MappedByteBuffer],
+//      "Expected MappedByteBuffer for mapped read")
 
     def arrayFromByteBuffer(in: LargeByteBuffer): Array[Byte] = {
       val array = new Array[Byte](in.remaining().toInt)
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
deleted file mode 100644
index 209fb0d343fd0..0000000000000
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferInputStreamSuite.scala
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.util
-
-import java.io.{FileInputStream, FileOutputStream, OutputStream, File}
-import java.nio.channels.FileChannel.MapMode
-
-import org.junit.Assert._
-import org.scalatest.{FunSuite, Matchers}
-
-import org.apache.spark.network.buffer.{LargeByteBufferHelper, WrappedLargeByteBuffer}
-
-class LargeByteBufferInputStreamSuite extends FunSuite with Matchers {
-
-  test("read from large mapped file") {
-    val testFile = File.createTempFile("large-buffer-input-stream-test",".bin")
-    testFile.deleteOnExit()
-
-    val out: OutputStream = new FileOutputStream(testFile)
-    val buffer: Array[Byte] = new Array[Byte](1 << 16)
-    val len: Long = 3L << 30
-    assertTrue(len > Integer.MAX_VALUE)
-    (0 until buffer.length).foreach { idx =>
-      buffer(idx) = idx.toByte
-    }
-    (0 until (len / buffer.length).toInt).foreach { idx =>
-      out.write(buffer)
-    }
-    out.close
-
-    val channel = new FileInputStream(testFile).getChannel
-    val buf = LargeByteBufferHelper.mapFile(channel, MapMode.READ_ONLY, 0, len)
-    val in = new LargeByteBufferInputStream(buf, dispose = true)
-
-    val read = new Array[Byte](buffer.length)
-    (0 until (len / buffer.length).toInt).foreach { idx =>
-      in.disposed should be (false)
-      in.read(read) should be (read.length)
-      (0 until buffer.length).foreach { arrIdx =>
-        assertEquals(buffer(arrIdx), read(arrIdx))
-      }
-    }
-    // XXX I assume its *not* intentional that the stream is only disposed when you try to read
-    // *past* the end in ByteBufferInputStream?
-    in.disposed should be (true)
-    in.read(read) should be (-1)
-    in.disposed should be (true)
-  }
-
-  test("io stream roundtrip") {
-
-    val out = new LargeByteBufferOutputStream(128)
-    (0 until 200).foreach{idx => out.write(idx)}
-    out.close()
-
-    val lb = out.largeBuffer(128)
-    //just make sure that we test reading from multiple chunks
-    lb.asInstanceOf[WrappedLargeByteBuffer].underlying.size should be > 1
-
-    val rawIn = new LargeByteBufferInputStream(lb)
-    val arr = new Array[Byte](500)
-    val nRead = rawIn.read(arr, 0, 500)
-    nRead should be (200)
-    (0 until 200).foreach{idx =>
-      arr(idx) should be (idx.toByte)
-    }
-
-  }
-
-
-}
diff --git a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
deleted file mode 100644
index b25dfc5bbaaac..0000000000000
--- a/core/src/test/scala/org/apache/spark/util/LargeByteBufferOutputStreamSuite.scala
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.util
-
-import scala.util.Random
-
-import org.scalatest.{FunSuite, Matchers}
-
-import org.apache.spark.network.buffer.WrappedLargeByteBuffer
-
-class LargeByteBufferOutputStreamSuite extends FunSuite with Matchers {
-
-  test("merged buffers for < 2GB") {
-    val out = new LargeByteBufferOutputStream(10)
-    val bytes = new Array[Byte](100)
-    Random.nextBytes(bytes)
-    out.write(bytes)
-
-    val buffer = out.largeBuffer
-    buffer.position() should be (0)
-    buffer.size() should be (100)
-    val nioBuffer = buffer.asByteBuffer()
-    nioBuffer.position() should be (0)
-    nioBuffer.capacity() should be (100)
-    nioBuffer.limit() should be (100)
-
-    val read = new Array[Byte](100)
-    buffer.get(read, 0, 100)
-    read should be (bytes)
-
-    buffer.rewind()
-    nioBuffer.get(read)
-    read should be (bytes)
-  }
-
-  test("chunking") {
-    val out = new LargeByteBufferOutputStream(10)
-    val bytes = new Array[Byte](100)
-    Random.nextBytes(bytes)
-    out.write(bytes)
-
-    (10 to 100 by 10).foreach{chunkSize =>
-      val buffer = out.largeBuffer(chunkSize).asInstanceOf[WrappedLargeByteBuffer]
-      buffer.position() should be (0)
-      buffer.size() should be (100)
-      val read = new Array[Byte](100)
-      buffer.get(read, 0, 100)
-      read should be (bytes)
-    }
-
-  }
-
-}

From 040a4615063aef46ce214fab4bd8b0eea7003c89 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 19 Aug 2015 10:47:44 -0500
Subject: [PATCH 90/97] use random numbers for test

---
 .../buffer/LargeByteBufferHelperSuite.java    | 34 ++++++++++++++++---
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
index d46acf342a79b..9e636fc032928 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/LargeByteBufferHelperSuite.java
@@ -18,6 +18,7 @@
 
 import java.io.*;
 import java.nio.channels.FileChannel;
+import java.util.Random;
 
 import org.junit.Test;
 
@@ -32,11 +33,10 @@ public void testMapFile() throws IOException {
       testFile.deleteOnExit();
       OutputStream out = new FileOutputStream(testFile);
       byte[] buffer = new byte[1 << 16];
+      Random rng = new XORShiftRandom(0L);
       long len = ((long)buffer.length) + Integer.MAX_VALUE + 1;
-      for (int i = 0; i < buffer.length; i++) {
-        buffer[i] = (byte) i;
-      }
       for (int i = 0; i < len / buffer.length; i++) {
+        rng.nextBytes(buffer);
         out.write(buffer);
       }
       out.close();
@@ -59,11 +59,14 @@ public void testMapFile() throws IOException {
       LargeByteBuffer buf = LargeByteBufferHelper.mapFile(in, FileChannel.MapMode.READ_ONLY, 0, len);
       assertEquals(len, buf.size());
       byte[] read = new byte[buffer.length];
+      byte[] expected = new byte[buffer.length];
+      Random rngExpected = new XORShiftRandom(0L);
       for (int i = 0; i < len / buffer.length; i++) {
         buf.get(read, 0, buffer.length);
         // assertArrayEquals() is really slow
+        rngExpected.nextBytes(expected);
         for (int j = 0; j < buffer.length; j++) {
-          if (read[j] != (byte) (j))
+          if (read[j] !=  expected[j])
             fail("bad byte at (i,j) = (" + i + "," + j + ")");
         }
       }
@@ -81,4 +84,27 @@ public void testAllocate() {
     }
     assertEquals(5, buf.underlying[9].capacity());
   }
+
+
+  private class XORShiftRandom extends Random {
+
+    XORShiftRandom(long init) {
+      super(init);
+      seed = new Random(init).nextLong();
+    }
+
+    long seed;
+
+    // we need to just override next - this will be called by nextInt, nextDouble,
+    // nextGaussian, nextLong, etc.
+    @Override
+    protected int next(int bits) {
+      long nextSeed = seed ^ (seed << 21);
+      nextSeed ^= (nextSeed >>> 35);
+      nextSeed ^= (nextSeed << 4);
+      seed = nextSeed;
+      return (int) (nextSeed & ((1L << bits) -1));
+    }
+  }
+
 }

From 0fa150f7131afef9a445f9ad453856e94d3014ff Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 19 Aug 2015 11:13:35 -0500
Subject: [PATCH 91/97] bring up to date

---
 .../spark/scheduler/TaskResultGetter.scala    |   2 -
 .../apache/spark/storage/BlockManager.scala   | 232 +++++++++---------
 .../spark/storage/DiskBlockObjectWriter.scala |   4 +-
 .../rdd/LargePartitionCachingSuite.scala      |   2 +-
 .../buffer/FileSegmentManagedBuffer.java      |   4 +-
 5 files changed, 123 insertions(+), 121 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index e59a8174d3bce..2c0b3f085ca76 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -20,8 +20,6 @@ package org.apache.spark.scheduler
 import java.nio.ByteBuffer
 import java.util.concurrent.RejectedExecutionException
 
-import org.apache.spark.network.buffer.WrappedLargeByteBuffer
-
 import scala.language.existentials
 import scala.util.control.NonFatal
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index fd3cfc5618368..dd2a198714bac 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -746,129 +746,133 @@ private[spark] class BlockManager(
     // The level we actually use to put the block
     val putLevel = effectiveStorageLevel.getOrElse(level)
 
-    // If we're storing bytes, then initiate the replication before storing them locally.
-    // This is faster as data is already serialized and ready to send.
-    val replicationFuture = data match {
-      case b: ByteBufferValues if putLevel.replication > 1 =>
-        // Duplicate doesn't copy the bytes, but just creates a wrapper
-        val bufferView = try {
-          b.buffer.asByteBuffer()
-        } catch {
-          case ex: BufferTooLargeException =>
-            throw new ReplicationBlockSizeLimitException(ex)
-        }
-        Future {
-          // This is a blocking action and should run in futureExecutionContext which is a cached
-          // thread pool
-          replicate(blockId, bufferView, putLevel)
-        }(futureExecutionContext)
-      case _ => null
-    }
-
-    putBlockInfo.synchronized {
-      logTrace("Put for block %s took %s to get into synchronized block"
-        .format(blockId, Utils.getUsedTimeMs(startTimeMs)))
-
-      var marked = false
-      try {
-        // returnValues - Whether to return the values put
-        // blockStore - The type of storage to put these values into
-        val (returnValues, blockStore: BlockStore) = {
-          if (putLevel.useMemory) {
-            // Put it in memory first, even if it also has useDisk set to true;
-            // We will drop it to disk later if the memory store can't hold it.
-            (true, memoryStore)
-          } else if (putLevel.useOffHeap) {
-            // Use external block store
-            (false, externalBlockStore)
-          } else if (putLevel.useDisk) {
-            // Don't get back the bytes from put unless we replicate them
-            (putLevel.replication > 1, diskStore)
-          } else {
-            assert(putLevel == StorageLevel.NONE)
-            throw new BlockException(
-              blockId, s"Attempted to put block $blockId without specifying storage level!")
+    try {
+      // If we're storing bytes, then initiate the replication before storing them locally.
+      // This is faster as data is already serialized and ready to send.
+      val replicationFuture = data match {
+        case b: ByteBufferValues if putLevel.replication > 1 =>
+          // Duplicate doesn't copy the bytes, but just creates a wrapper
+          val bufferView = try {
+            b.buffer.asByteBuffer()
+          } catch {
+            case ex: BufferTooLargeException =>
+              throw new ReplicationBlockSizeLimitException(ex)
           }
-        }
-
-        // Actually put the values
-        val result = data match {
-          case IteratorValues(iterator) =>
-            blockStore.putIterator(blockId, iterator, putLevel, returnValues)
-          case ArrayValues(array) =>
-            blockStore.putArray(blockId, array, putLevel, returnValues)
-          case ByteBufferValues(bytes) =>
-            bytes.rewind()
-            blockStore.putBytes(blockId, bytes, putLevel)
-        }
-        size = result.size
-        result.data match {
-          case Left (newIterator) if putLevel.useMemory => valuesAfterPut = newIterator
-          case Right (newBytes) => bytesAfterPut = newBytes
-          case _ =>
-        }
+          Future {
+            // This is a blocking action and should run in futureExecutionContext which is a cached
+            // thread pool
+            replicate(blockId, bufferView, putLevel)
+          }(futureExecutionContext)
+        case _ => null
+      }
 
-        // Keep track of which blocks are dropped from memory
-        if (putLevel.useMemory) {
-          result.droppedBlocks.foreach { updatedBlocks += _ }
-        }
+      putBlockInfo.synchronized {
+        logTrace("Put for block %s took %s to get into synchronized block"
+          .format(blockId, Utils.getUsedTimeMs(startTimeMs)))
+
+        var marked = false
+        try {
+          // returnValues - Whether to return the values put
+          // blockStore - The type of storage to put these values into
+          val (returnValues, blockStore: BlockStore) = {
+            if (putLevel.useMemory) {
+              // Put it in memory first, even if it also has useDisk set to true;
+              // We will drop it to disk later if the memory store can't hold it.
+              (true, memoryStore)
+            } else if (putLevel.useOffHeap) {
+              // Use external block store
+              (false, externalBlockStore)
+            } else if (putLevel.useDisk) {
+              // Don't get back the bytes from put unless we replicate them
+              (putLevel.replication > 1, diskStore)
+            } else {
+              assert(putLevel == StorageLevel.NONE)
+              throw new BlockException(
+                blockId, s"Attempted to put block $blockId without specifying storage level!")
+            }
+          }
 
-        val putBlockStatus = getCurrentBlockStatus(blockId, putBlockInfo)
-        if (putBlockStatus.storageLevel != StorageLevel.NONE) {
-          // Now that the block is in either the memory, externalBlockStore, or disk store,
-          // let other threads read it, and tell the master about it.
-          marked = true
-          putBlockInfo.markReady(size)
-          if (tellMaster) {
-            reportBlockStatus(blockId, putBlockInfo, putBlockStatus)
+          // Actually put the values
+          val result = data match {
+            case IteratorValues(iterator) =>
+              blockStore.putIterator(blockId, iterator, putLevel, returnValues)
+            case ArrayValues(array) =>
+              blockStore.putArray(blockId, array, putLevel, returnValues)
+            case ByteBufferValues(bytes) =>
+              bytes.rewind()
+              blockStore.putBytes(blockId, bytes, putLevel)
+          }
+          size = result.size
+          result.data match {
+            case Left(newIterator) if putLevel.useMemory => valuesAfterPut = newIterator
+            case Right(newBytes) => bytesAfterPut = newBytes
+            case _ =>
           }
-          updatedBlocks += ((blockId, putBlockStatus))
-        }
-      } finally {
-        // If we failed in putting the block to memory/disk, notify other possible readers
-        // that it has failed, and then remove it from the block info map.
-        if (!marked) {
-          // Note that the remove must happen before markFailure otherwise another thread
-          // could've inserted a new BlockInfo before we remove it.
-          blockInfo.remove(blockId)
-          putBlockInfo.markFailure()
-          logWarning(s"Putting block $blockId failed")
-        }
-      }
-    }
-    logDebug("Put block %s locally took %s".format(blockId, Utils.getUsedTimeMs(startTimeMs)))
 
-    // Either we're storing bytes and we asynchronously started replication, or we're storing
-    // values and need to serialize and replicate them now:
-    if (putLevel.replication > 1) {
-      data match {
-        case ByteBufferValues(bytes) =>
-          if (replicationFuture != null) {
-            Await.ready(replicationFuture, Duration.Inf)
+          // Keep track of which blocks are dropped from memory
+          if (putLevel.useMemory) {
+            result.droppedBlocks.foreach {
+              updatedBlocks += _
+            }
           }
-        case _ =>
-          val remoteStartTime = System.currentTimeMillis
-          // Serialize the block if not already done
-          if (bytesAfterPut == null) {
-            if (valuesAfterPut == null) {
-              throw new SparkException(
-                "Underlying put returned neither an Iterator nor bytes! This shouldn't happen.")
+
+          val putBlockStatus = getCurrentBlockStatus(blockId, putBlockInfo)
+          if (putBlockStatus.storageLevel != StorageLevel.NONE) {
+            // Now that the block is in either the memory, externalBlockStore, or disk store,
+            // let other threads read it, and tell the master about it.
+            marked = true
+            putBlockInfo.markReady(size)
+            if (tellMaster) {
+              reportBlockStatus(blockId, putBlockInfo, putBlockStatus)
             }
-            bytesAfterPut = dataSerialize(blockId, valuesAfterPut)
+            updatedBlocks += ((blockId, putBlockStatus))
           }
-          try {
-            replicate(blockId, bytesAfterPut.asByteBuffer(), putLevel)
-          } catch {
-            case ex: BufferTooLargeException =>
-              throw new ReplicationBlockSizeLimitException(ex)
+        } finally {
+          // If we failed in putting the block to memory/disk, notify other possible readers
+          // that it has failed, and then remove it from the block info map.
+          if (!marked) {
+            // Note that the remove must happen before markFailure otherwise another thread
+            // could've inserted a new BlockInfo before we remove it.
+            blockInfo.remove(blockId)
+            putBlockInfo.markFailure()
+            logWarning(s"Putting block $blockId failed")
           }
-          logDebug("Put block %s remotely took %s"
-            .format(blockId, Utils.getUsedTimeMs(remoteStartTime)))
+        }
       }
-    }
+      logDebug("Put block %s locally took %s".format(blockId, Utils.getUsedTimeMs(startTimeMs)))
 
-    if (bytesAfterPut != null) {
-      bytesAfterPut.dispose()
+      // Either we're storing bytes and we asynchronously started replication, or we're storing
+      // values and need to serialize and replicate them now:
+      if (putLevel.replication > 1) {
+        data match {
+          case ByteBufferValues(bytes) =>
+            if (replicationFuture != null) {
+              Await.ready(replicationFuture, Duration.Inf)
+            }
+          case _ =>
+            val remoteStartTime = System.currentTimeMillis
+            // Serialize the block if not already done
+            if (bytesAfterPut == null) {
+              if (valuesAfterPut == null) {
+                throw new SparkException(
+                  "Underlying put returned neither an Iterator nor bytes! This shouldn't happen.")
+              }
+              bytesAfterPut = dataSerialize(blockId, valuesAfterPut)
+            }
+            try {
+              replicate(blockId, bytesAfterPut.asByteBuffer(), putLevel)
+            } catch {
+              case ex: BufferTooLargeException =>
+                throw new ReplicationBlockSizeLimitException(ex)
+            }
+            logDebug("Put block %s remotely took %s"
+              .format(blockId, Utils.getUsedTimeMs(remoteStartTime)))
+        }
+      }
+    } finally {
+      if (bytesAfterPut != null) {
+        bytesAfterPut.dispose()
+      }
     }
 
     if (putLevel.replication > 1) {
@@ -956,7 +960,7 @@ private[spark] class BlockManager(
         case Some(peer) =>
           try {
             val onePeerStartTime = System.currentTimeMillis
-            data.position(0)
+            data.rewind()
             logTrace(s"Trying to replicate $blockId of ${data.limit()} bytes to $peer")
             blockTransferService.uploadBlockSync(
               peer.host, peer.port, peer.executorId, blockId, new NioManagedBuffer(data), tLevel)
@@ -1339,7 +1343,7 @@ class TachyonBlockSizeLimitException(cause: BufferTooLargeException)
 class ShuffleBlockSizeLimitException(size: Long)
   extends SparkException("Spark cannot shuffle partitions that are greater than 2GB.  " +
     "You tried to shuffle a block that was at least " + Utils.bytesToString(size) + ".  " +
-    "You should try to increase the number of partitions of this shuffle, and / or increase the " +
+    "You should try to increase the number of partitions of this shuffle, and / or " +
     "figure out which stage created the partitions before the shuffle, and increase the number " +
     "of partitions for that stage.  You may want to make both of these numbers easily " +
     "configurable parameters so you can continue to update as needed.")
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index ad758a44dc9b7..d571ce5e21e74 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -110,7 +110,7 @@ private[spark] class DiskBlockObjectWriter(
         objOut.close()
       }
 
-      if (length > LargeByteBufferHelper.MAX_CHUNK) {
+      if (length > LargeByteBufferHelper.MAX_CHUNK_SIZE) {
         throw new ShuffleBlockSizeLimitException(length)
       }
 
@@ -207,7 +207,7 @@ private[spark] class DiskBlockObjectWriter(
 
     if (numRecordsWritten % 32 == 0) {
       updateBytesWritten()
-      if (reportedPosition > LargeByteBufferHelper.MAX_CHUNK) {
+      if (reportedPosition > LargeByteBufferHelper.MAX_CHUNK_SIZE) {
         throw new ShuffleBlockSizeLimitException(reportedPosition)
       }
     }
diff --git a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
index baa28cceaab99..4859d0b4f47fc 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
@@ -46,7 +46,7 @@ class LargePartitionCachingSuite extends FunSuite with SharedSparkContext with M
           .persist(StorageLevel.DISK_ONLY_2)
         myRDD.count()
       }
-      exc.getMessage() should include (classOf[ReplicationBlockSizeLimitException].getSimpleName)
+      exc.getCause() shouldBe a [ReplicationBlockSizeLimitException]
     } finally {
       clusterSc.stop()
     }
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
index 5e318d8ca8b78..2d534f12abd62 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -73,8 +73,8 @@ public ByteBuffer nioByteBuffer() throws IOException {
         buf.flip();
         return buf;
       } else {
-        if (length > LargeByteBufferHelper.MAX_CHUNK) {
-          throw new BufferTooLargeException(length);
+        if (length > LargeByteBufferHelper.MAX_CHUNK_SIZE) {
+          throw new BufferTooLargeException(length, LargeByteBufferHelper.MAX_CHUNK_SIZE);
         }
         return channel.map(FileChannel.MapMode.READ_ONLY, offset, length);
       }

From fb733c5059982767437ee4b2dc2104ee1eb76867 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 19 Aug 2015 11:19:19 -0500
Subject: [PATCH 92/97] fix check for underlying ByteBuffer class

---
 .../buffer/LargeByteBufferTestHelper.scala    | 30 +++++++++++++++++++
 .../spark/storage/BlockManagerSuite.scala     | 11 ++++---
 2 files changed, 35 insertions(+), 6 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferTestHelper.scala

diff --git a/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferTestHelper.scala b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferTestHelper.scala
new file mode 100644
index 0000000000000..a04bb41fae366
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/network/buffer/LargeByteBufferTestHelper.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.buffer
+
+import java.nio.ByteBuffer
+import java.util.{List => JList}
+
+/**
+ * cheat to access package-protected members in test
+ */
+object LargeByteBufferTestHelper {
+  def nioBuffers(wbb: WrappedLargeByteBuffer): JList[ByteBuffer] = {
+    wbb.nioBuffers()
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 5b38d28f0722c..d589234df7106 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.storage
 import java.nio.{ByteBuffer, MappedByteBuffer}
 import java.util.Arrays
 
-import org.apache.spark.network.buffer.{WrappedLargeByteBuffer, LargeByteBufferHelper, LargeByteBuffer}
+import org.apache.spark.network.buffer.{LargeByteBufferTestHelper, WrappedLargeByteBuffer, LargeByteBufferHelper, LargeByteBuffer}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
@@ -830,11 +830,10 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val notMapped = diskStoreNotMapped.getBytes(blockId).get.asInstanceOf[WrappedLargeByteBuffer]
 
     // Not possible to do isInstanceOf due to visibility of HeapByteBuffer
-    // TODO fix me
-//    assert(notMapped.nioBuffers().get(0).getClass.getName.endsWith("HeapByteBuffer"),
-//      "Expected HeapByteBuffer for un-mapped read")
-//    assert(mapped.nioBuffers().get(0).isInstanceOf[MappedByteBuffer],
-//      "Expected MappedByteBuffer for mapped read")
+    assert(LargeByteBufferTestHelper.nioBuffers(mapped).get(0).getClass.getName
+      .endsWith("HeapByteBuffer"), "Expected HeapByteBuffer for un-mapped read")
+    assert(LargeByteBufferTestHelper.nioBuffers(notMapped).get(0).isInstanceOf[MappedByteBuffer],
+      "Expected MappedByteBuffer for mapped read")
 
     def arrayFromByteBuffer(in: LargeByteBuffer): Array[Byte] = {
       val array = new Array[Byte](in.remaining().toInt)

From b921b0bd3e24f8a3e0ebc99d91b772299eed2851 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 19 Aug 2015 12:21:13 -0500
Subject: [PATCH 93/97] fix check, add test

---
 .../spark/storage/DiskBlockObjectWriter.scala      |  3 ++-
 .../test/scala/org/apache/spark/ShuffleSuite.scala | 14 +++++++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index d571ce5e21e74..61af603c351e1 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -97,7 +97,6 @@ private[spark] class DiskBlockObjectWriter(
 
   override def close() {
     if (initialized) {
-      val length = channel.position()
       Utils.tryWithSafeFinally {
         if (syncWrites) {
           // Force outstanding writes to disk and track how long it takes
@@ -110,6 +109,8 @@ private[spark] class DiskBlockObjectWriter(
         objOut.close()
       }
 
+      finalPosition = file.length()
+      val length = finalPosition - initialPosition
       if (length > LargeByteBufferHelper.MAX_CHUNK_SIZE) {
         throw new ShuffleBlockSizeLimitException(length)
       }
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index 7e5640bcba0d5..8ba16e0639692 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -283,6 +283,18 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
     rdd.count()
   }
 
+  test("shuffle total > 2GB ok if each block is small") {
+    sc = new SparkContext("local", "test", conf)
+    val rdd = sc.parallelize(1 to 1e6.toInt, 1).map{ i =>
+      val n = 3e3.toInt
+      val arr = new Array[Byte](n)
+      //need to make sure the array doesn't compress to something small
+      scala.util.Random.nextBytes(arr)
+      (i, arr)
+    }
+    rdd.partitionBy(new HashPartitioner(100)).count()
+  }
+
   test("shuffle blocks > 2GB fail with sane exception") {
     //  note that this *could* succeed in local mode, b/c local shuffles actually don't
     //  have a limit at 2GB.  BUT, we make them fail in any case, b/c its better to have
@@ -301,7 +313,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
       rdd.partitionBy(new org.apache.spark.HashPartitioner(2)).count()
     }
 
-    exc.getMessage should include (classOf[ShuffleBlockSizeLimitException].getSimpleName)
+    exc.getCause shouldBe a[ShuffleBlockSizeLimitException]
   }
 
   test("metrics for shuffle without aggregation") {

From e0f5130b90c6c6d5a2327f7362a46866a5f97e74 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 19 Aug 2015 14:22:32 -0500
Subject: [PATCH 94/97] fix check

---
 .../org/apache/spark/storage/DiskBlockObjectWriter.scala     | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index 61af603c351e1..ee35281c93ee7 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -208,8 +208,9 @@ private[spark] class DiskBlockObjectWriter(
 
     if (numRecordsWritten % 32 == 0) {
       updateBytesWritten()
-      if (reportedPosition > LargeByteBufferHelper.MAX_CHUNK_SIZE) {
-        throw new ShuffleBlockSizeLimitException(reportedPosition)
+      val length = reportedPosition - initialPosition
+      if (length > LargeByteBufferHelper.MAX_CHUNK_SIZE) {
+        throw new ShuffleBlockSizeLimitException(length)
       }
     }
   }

From 331d517549fc9a3af804094b9e66a45452be35cd Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 19 Aug 2015 14:26:19 -0500
Subject: [PATCH 95/97] ignore the big tests for now

---
 core/src/test/scala/org/apache/spark/ShuffleSuite.scala     | 4 ++--
 .../org/apache/spark/rdd/LargePartitionCachingSuite.scala   | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index 8ba16e0639692..92aeeb7de467d 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -283,7 +283,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
     rdd.count()
   }
 
-  test("shuffle total > 2GB ok if each block is small") {
+  ignore("shuffle total > 2GB ok if each block is small") {
     sc = new SparkContext("local", "test", conf)
     val rdd = sc.parallelize(1 to 1e6.toInt, 1).map{ i =>
       val n = 3e3.toInt
@@ -295,7 +295,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
     rdd.partitionBy(new HashPartitioner(100)).count()
   }
 
-  test("shuffle blocks > 2GB fail with sane exception") {
+  ignore("shuffle blocks > 2GB fail with sane exception") {
     //  note that this *could* succeed in local mode, b/c local shuffles actually don't
     //  have a limit at 2GB.  BUT, we make them fail in any case, b/c its better to have
     //  a consistent failure, and not have success depend on where tasks get scheduled
diff --git a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
index 4859d0b4f47fc..609336339deb8 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
@@ -29,13 +29,13 @@ class LargePartitionCachingSuite extends FunSuite with SharedSparkContext with M
     largePartitionRdd.persist(StorageLevel.MEMORY_ONLY_SER).count() should be (1e6.toInt)
   }
 
-  test("disk cache large partitions") {
+  ignore("disk cache large partitions") {
     largePartitionRdd.persist(StorageLevel.DISK_ONLY).count() should be (1e6.toInt)
   }
 
-  test("disk cache large partitions with replications") {
+  ignore("disk cache large partitions with replications") {
     val conf = new SparkConf()
-      .setMaster("local-cluster[2, 1, 512]")
+      .setMaster("local-cluster[2, 1, 1024]")
       .setAppName("test-cluster")
       .set("spark.task.maxFailures", "1")
       .set("spark.akka.frameSize", "1") // set to 1MB to detect direct serialization of data

From 06c8ffa6f576baad66b5897756eda990f114ade4 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 19 Aug 2015 15:54:10 -0500
Subject: [PATCH 96/97] style

---
 .../org/apache/spark/storage/ExternalBlockStore.scala  |  5 ++++-
 .../org/apache/spark/storage/TachyonBlockManager.scala |  2 +-
 .../src/test/scala/org/apache/spark/ShuffleSuite.scala |  4 ++--
 .../apache/spark/rdd/LargePartitionCachingSuite.scala  | 10 ++++++----
 4 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
index 6a54e51a0f169..c2271bccc8f4c 100644
--- a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
@@ -48,7 +48,10 @@ private[spark] class ExternalBlockStore(blockManager: BlockManager, executorId:
     }
   }
 
-  override def putBytes(blockId: BlockId, bytes: LargeByteBuffer, level: StorageLevel): PutResult = {
+  override def putBytes(
+      blockId: BlockId,
+      bytes: LargeByteBuffer,
+      level: StorageLevel): PutResult = {
     putIntoExternalBlockStore(blockId, bytes, returnValues = true)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
index 7593dd41bf6b9..f806407c73ead 100644
--- a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala
@@ -137,7 +137,7 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log
     val is = file.getInStream(ReadType.CACHE)
     try {
       val size = file.length
-      //TODO get tachyon to support large blocks
+      // TODO get tachyon to support large blocks
       val bs = new Array[Byte](size.asInstanceOf[Int])
       ByteStreams.readFully(is, bs)
       Some(LargeByteBufferHelper.asLargeByteBuffer(bs))
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index 92aeeb7de467d..e28818098b918 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -288,7 +288,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
     val rdd = sc.parallelize(1 to 1e6.toInt, 1).map{ i =>
       val n = 3e3.toInt
       val arr = new Array[Byte](n)
-      //need to make sure the array doesn't compress to something small
+      // need to make sure the array doesn't compress to something small
       scala.util.Random.nextBytes(arr)
       (i, arr)
     }
@@ -304,7 +304,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
     val rdd = sc.parallelize(1 to 1e6.toInt, 1).map{ i =>
       val n = 3e3.toInt
       val arr = new Array[Byte](n)
-      //need to make sure the array doesn't compress to something small
+      // need to make sure the array doesn't compress to something small
       scala.util.Random.nextBytes(arr)
       (2 * i, arr)
     }
diff --git a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
index 609336339deb8..dfcc90df32a18 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LargePartitionCachingSuite.scala
@@ -18,13 +18,15 @@ package org.apache.spark.rdd
 
 import org.apache.spark._
 import org.apache.spark.storage.{ReplicationBlockSizeLimitException, StorageLevel}
-import org.scalatest.{Matchers, FunSuite}
+import org.scalatest.Matchers
 
-class LargePartitionCachingSuite extends FunSuite with SharedSparkContext with Matchers {
+class LargePartitionCachingSuite extends SparkFunSuite with SharedSparkContext with Matchers {
 
-  def largePartitionRdd = sc.parallelize(1 to 1e6.toInt, 1).map{i => new Array[Byte](2.2e3.toInt)}
+  def largePartitionRdd: RDD[Array[Byte]] = {
+    sc.parallelize(1 to 1e6.toInt, 1).map{i => new Array[Byte](2.2e3.toInt)}
+  }
 
-  //just don't want to kill the test server
+  // just don't want to kill the test server
   ignore("memory serialized cache large partitions") {
     largePartitionRdd.persist(StorageLevel.MEMORY_ONLY_SER).count() should be (1e6.toInt)
   }

From 200afdc33f59508a04172cfefcc4a40a3bdf6169 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Wed, 19 Aug 2015 19:38:47 -0500
Subject: [PATCH 97/97] whoops, swap cases

---
 .../scala/org/apache/spark/storage/BlockManagerSuite.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index d589234df7106..d1d6778180a4c 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -830,9 +830,9 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val notMapped = diskStoreNotMapped.getBytes(blockId).get.asInstanceOf[WrappedLargeByteBuffer]
 
     // Not possible to do isInstanceOf due to visibility of HeapByteBuffer
-    assert(LargeByteBufferTestHelper.nioBuffers(mapped).get(0).getClass.getName
+    assert(LargeByteBufferTestHelper.nioBuffers(notMapped).get(0).getClass.getName
       .endsWith("HeapByteBuffer"), "Expected HeapByteBuffer for un-mapped read")
-    assert(LargeByteBufferTestHelper.nioBuffers(notMapped).get(0).isInstanceOf[MappedByteBuffer],
+    assert(LargeByteBufferTestHelper.nioBuffers(mapped).get(0).isInstanceOf[MappedByteBuffer],
       "Expected MappedByteBuffer for mapped read")
 
     def arrayFromByteBuffer(in: LargeByteBuffer): Array[Byte] = {