From 5cdcd4246e586346a8e1ac2242dd795fdb1ae068 Mon Sep 17 00:00:00 2001
From: Imran Rashid Invoke this method before using a sequence of channel-read or
+ * put operations to fill this buffer.
+ *
+ * This method does not actually erase the data in the buffer, but it
+ * is named as if it did because it will most often be used in situations
+ * in which that might as well be the case. After a sequence of channel-read or put operations, invoke
+ * this method to prepare for a sequence of channel-write or relative
+ * get operations.
+ */
+ final def flip() {
+ needReleaseIndices += 0
+ globalLimit = globalPosition
+ globalPosition = 0L
+ toNewContainer(0)
+
+ // Now free all pending containers
+ releasePendingContainers()
+ }
+
+ /**
+ * Rewinds this buffer. The position is set to zero and the mark is
+ * discarded.
+ *
+ * Invoke this method before a sequence of channel-write or get
+ * operations, assuming that the limit has already been set
+ * appropriately.
+ */
+ final def rewind() {
+ needReleaseIndices += 0
+ globalPosition = 0L
+ toNewContainer(0)
+
+ // Now free all pending containers
+ releasePendingContainers()
+ }
+
+ /**
+ * Returns the number of elements between the current position and the
+ * limit.
Invoke this method before using a sequence of channel-read or - * put operations to fill this buffer. - * - *
This method does not actually erase the data in the buffer, but it - * is named as if it did because it will most often be used in situations - * in which that might as well be the case.
+ * Sets this buffer's limit. If the position is larger than the new limit then it is set to the + * new limit. If the mark is defined and larger than the new limit then it is discarded. */ - final def clear() { - // if (0 == globalCapacity) return - - needReleaseIndices += 0 - globalPosition = 0L - toNewContainer(0) - globalLimit = globalCapacity - - // Now free all pending containers - releasePendingContainers() - } + def limit(newLimit: Long): Unit /** - * Flips this buffer. The limit is set to the current position and then - * the position is set to zero. If the mark is defined then it is - * discarded. - * - *After a sequence of channel-read or put operations, invoke - * this method to prepare for a sequence of channel-write or relative - * get operations. + * return this buffer's limit + * @return */ - final def flip() { - needReleaseIndices += 0 - globalLimit = globalPosition - globalPosition = 0L - toNewContainer(0) - - // Now free all pending containers - releasePendingContainers() - } + def limit(): Long + +// +// def skip(skipBy: Long): Unit +// +// def position(newPosition: Long): Unit +// +// /** +// * Clears this buffer. The position is set to zero, the limit is set to +// * the capacity, and the mark is discarded. +// * +// *
Invoke this method before using a sequence of channel-read or +// * put operations to fill this buffer. +// * +// *
This method does not actually erase the data in the buffer, but it +// * is named as if it did because it will most often be used in situations +// * in which that might as well be the case.
+// */ +// def clear(): Unit +// +// /** +// * Flips this buffer. The limit is set to the current position and then +// * the position is set to zero. If the mark is defined then it is +// * discarded. +// * +// *After a sequence of channel-read or put operations, invoke +// * this method to prepare for a sequence of channel-write or relative +// * get operations. +// */ +// def flip(): Unit /** * Rewinds this buffer. The position is set to zero and the mark is @@ -299,14 +104,7 @@ class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[By * operations, assuming that the limit has already been set * appropriately. */ - final def rewind() { - needReleaseIndices += 0 - globalPosition = 0L - toNewContainer(0) - - // Now free all pending containers - releasePendingContainers() - } + def rewind(): Unit /** * Returns the number of elements between the current position and the @@ -314,1192 +112,1555 @@ class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[By * * @return The number of elements remaining in this buffer */ - final def remaining(): Long = { - globalLimit - globalPosition - } - - /** - * Tells whether there are any elements between the current position and - * the limit.
- * - * @return true if, and only if, there is at least one element - * remaining in this buffer - */ - final def hasRemaining() = { - globalPosition < globalLimit - } - - // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex) - - // number of bytes remaining in currently active underlying buffer - private def currentRemaining(): Int = { - if (hasRemaining()) { - // validate currentContainerIndex is valid - assert (globalPosition >= bufferPositionStart(currentContainerIndex) && - globalPosition < bufferPositionStart(currentContainerIndex + 1), - "globalPosition = " + globalPosition + - ", currentContainerIndex = " + currentContainerIndex + - ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) - - currentRemaining0(currentContainerIndex) - } else 0 - } - - // Without any validation : required when we are bumping the index (when validation will fail) ... - private def currentRemaining0(which: Int): Int = { - // currentBuffer().remaining() - math.max(0, math.min(bufferPositionStart(which + 1), - globalLimit) - globalPosition).asInstanceOf[Int] - } - - // Set the approppriate position/limit for the current underlying buffer to mirror our - // the LargeByteBuffer's state. - private def fetchCurrentBuffer(): ByteBuffer = { - releasePendingContainers() - - assert (currentContainerIndex < containers.length) - - val container = containers(currentContainerIndex) - if (! container.isAcquired) { - container.acquire() - } - - assert (container.isAcquired) - if (LargeByteBuffer.enableExpensiveAssert) { - assert (! containers.exists( b => (b ne container) && b.isAcquired)) - } - - assert (currentContainerIndex < bufferPositionStart.length && - globalPosition < bufferPositionStart(currentContainerIndex + 1), - "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " + - bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this) - - val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)). - asInstanceOf[Int] - - val buffer = container.getByteBuffer - buffer.position(buffPosition) - val diff = buffer.capacity - buffPosition - val left = remaining() - if (diff <= left) { - buffer.limit(buffer.capacity()) - } else { - // Can happen if limit() was called. - buffer.limit(buffPosition + left.asInstanceOf[Int]) - } - - buffer - } - - // To be used ONLY to test in suites. - private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = { - if ("1" != System.getProperty("SPARK_TESTING")) { - throw new IllegalStateException("This method is to be used ONLY within spark test suites") - } - - fetchCurrentBuffer() - } - - // Expects that the invoker has ensured that this can be safely invoked. - // That is, it wont be invoked when the loop wont terminate. - private def toNonEmptyBuffer() { - - if (! hasRemaining()) { - var newIndex = currentContainerIndex - // Ensure we are in the right block or not. - while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) { - newIndex += 1 - } - toNewContainer(newIndex) - // Do not do this - since we might not yet have consumed the buffer which caused EOF right now - /* - // Add last one also, and release it too - since we are at the end of the buffer with nothing - // more pending. - if (newIndex >= 0 && currentContainerIndex < containers.length) { - needReleaseIndices += newIndex - } - */ - assert (currentContainerIndex >= 0) - // releasePendingContainers() - return - } - - var index = currentContainerIndex - while (0 == currentRemaining0(index) && index < containers.length) { - index += 1 - } - assert (currentContainerIndex < containers.length) - toNewContainer(index) - assert (0 != currentRemaining()) - } - - private def assertPreconditions(containerIndex: Int) { - assert (globalPosition >= bufferPositionStart(containerIndex), - "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + - ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) - assert (globalPosition < bufferPositionStart(containerIndex + 1), - "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + - ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) - - assert (globalLimit <= globalCapacity) - assert (containerIndex < containers.length) - } - - - /** - * Attempts to return a ByteBuffer of the requested size. - * It is possible to return a buffer of size smaller than requested - * even though hasRemaining == true - * - * On return, position would have been moved 'ahead' by the size of the buffer returned : - * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer - * - * - * This is used to primarily retrieve content of this buffer to expose via ByteBuffer - * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the - * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer - * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying - * container is a disk backed container, and we make subsequent calls to get(), the returned - * ByteBuffer can be dispose'ed off - * - * @param maxChunkSize Max size of the ByteBuffer to retrieve. - * @return - */ - - private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = { - fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true) - } - - private def fetchBufferOfSizeImpl(maxChunkSize: Int, - canReleaseContainers: Boolean): ByteBuffer = { - if (canReleaseContainers) releasePendingContainers() - assert (maxChunkSize > 0) - - // not checking for degenerate case of maxChunkSize == 0 - if (globalPosition >= globalLimit) { - // throw exception - throw new BufferUnderflowException() - } - - // Check preconditions : disable these later, since they might be expensive to - // evaluate for every IO op - assertPreconditions(currentContainerIndex) - - val currentBufferRemaining = currentRemaining() - - assert (currentBufferRemaining > 0) - - val size = math.min(currentBufferRemaining, maxChunkSize) - - val newBuffer = if (currentBufferRemaining > maxChunkSize) { - val currentBuffer = fetchCurrentBuffer() - val buff = ByteBufferContainer.createSlice(currentBuffer, - currentBuffer.position(), maxChunkSize) - assert (buff.remaining() == maxChunkSize) - buff - } else { - val currentBuffer = fetchCurrentBuffer() - val buff = currentBuffer.slice() - assert (buff.remaining() == currentBufferRemaining) - buff - } - - assert (size == newBuffer.remaining()) - assert (0 == newBuffer.position()) - assert (size == newBuffer.limit()) - assert (newBuffer.capacity() == newBuffer.limit()) - - globalPosition += newBuffer.remaining - toNonEmptyBuffer() - - newBuffer - } - - // Can we service the read/write from the currently active (underlying) bytebuffer or not. - // For almost all cases, this will return true allowing us to optimize away the more expensive - // computations. - private def localReadWritePossible(size: Int) = - size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1) - - - def getLong(): Long = { - assert (readable) - releasePendingContainers() - - if (remaining() < 8) throw new BufferUnderflowException - - if (localReadWritePossible(8)) { - val buff = fetchCurrentBuffer() - assert (buff.remaining() >= 8) - val retval = buff.getLong - globalPosition += 8 - toNonEmptyBuffer() - return retval - } - - val buff = readFully(8) - buff.getLong - } - - def getInt(): Int = { - assert (readable) - releasePendingContainers() - - if (remaining() < 4) throw new BufferUnderflowException - - if (localReadWritePossible(4)) { - val buff = fetchCurrentBuffer() - assert (buff.remaining() >= 4) - val retval = buff.getInt - globalPosition += 4 - toNonEmptyBuffer() - return retval - } - - val buff = readFully(4) - buff.getInt - } + def remaining(): Long +} - def getChar(): Char = { - assert (readable) - releasePendingContainers() +class ChainedLargeByteBuffer(private[io] val underlying: ChainedBuffer) extends LargeByteBuffer { - if (remaining() < 2) throw new BufferUnderflowException + def capacity = underlying.capacity - if (localReadWritePossible(2)) { - val buff = fetchCurrentBuffer() - assert (buff.remaining() >= 2) - val retval = buff.getChar - globalPosition += 2 - toNonEmptyBuffer() - return retval - } + var _pos = 0l - // if slice is becoming too expensive, revisit this ... - val buff = readFully(2) - buff.getChar + def get(dst: Array[Byte],offset: Int,length: Int): Unit = { + underlying.read(_pos, dst, offset, length) + _pos += length } def get(): Byte = { - assert (readable) - releasePendingContainers() - - if (! hasRemaining()) throw new BufferUnderflowException - - // If we have remaining bytes, previous invocations MUST have ensured that we are at - // a buffer which has data to be read. - assert (localReadWritePossible(1)) - - val buff = fetchCurrentBuffer() - assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining()) - val retval = buff.get() - globalPosition += 1 - toNonEmptyBuffer() - - retval - } - - def get(arr: Array[Byte], offset: Int, size: Int): Int = { - assert (readable) - releasePendingContainers() - - LargeByteBuffer.checkOffsets(arr, offset, size) - - // kyro depends on this it seems ? - // assert (size > 0) - if (0 == size) return 0 - - if (! hasRemaining()) return -1 - - if (localReadWritePossible(size)) { - val buff = fetchCurrentBuffer() - assert (buff.remaining() >= size) - buff.get(arr, offset, size) - globalPosition += size - toNonEmptyBuffer() - return size - } - - var remainingSize = math.min(size, remaining()).asInstanceOf[Int] - var currentOffset = offset - - while (remainingSize > 0) { - val buff = fetchBufferOfSize(remainingSize) - val toCopy = math.min(buff.remaining(), remainingSize) - - buff.get(arr, currentOffset, toCopy) - currentOffset += toCopy - remainingSize -= toCopy - } - - currentOffset - offset - } - - - private def createSlice(size: Long): LargeByteBuffer = { - - releasePendingContainers() - - if (remaining() < size) { - // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this) - throw new BufferOverflowException - } - - // kyro depends on this it seems ? - // assert (size > 0) - if (0 == size) return LargeByteBuffer.EMPTY_BUFFER - - val arr = new ArrayBuffer[ByteBufferContainer](2) - var totalLeft = size - - // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer) - - var containerIndex = currentContainerIndex - while (totalLeft > 0 && hasRemaining()) { - assertPreconditions(containerIndex) - val container = containers(containerIndex) - val currentLeft = currentRemaining0(containerIndex) - - assert (globalPosition + currentLeft <= globalLimit) - assert (globalPosition >= bufferPositionStart(containerIndex) && - (globalPosition < bufferPositionStart(containerIndex + 1))) - - val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int] - val sliceSize = math.min(totalLeft, currentLeft) - assert (from >= 0) - assert (sliceSize > 0 && sliceSize <= Int.MaxValue) - - val slice = container.createSlice(from, sliceSize.asInstanceOf[Int]) - arr += slice - - globalPosition += sliceSize - totalLeft -= sliceSize - if (currentLeft == sliceSize) containerIndex += 1 - } - - // Using toNonEmptyBuffer instead of directly moving to next here so that - // other checks can be performed there. - toNonEmptyBuffer() - // force cleanup - this is fine since we are not using the buffers directly - // which are actively needed (the returned value is on containers which can - // recreate) - releasePendingContainers() - // free current container if acquired. - if (currentContainerIndex < containers.length) { - containers(currentContainerIndex).release() - } - assert (currentContainerIndex == containerIndex) - - val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked) - retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction) - retval - } - - // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers - // This is to be used only for writes : and ensures that writes are done into the appropriate - // underlying bytebuffers. - def getCompositeWriteBuffer(size: Long): LargeByteBuffer = { - assert(writable) - assert(size >= 0) - - createSlice(size) - } - - // get a buffer which is of the specified size and contains data from the underlying buffers - // Note, the actual data might be spread across the underlying buffers. - // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy ! - private def readFully(size: Int): ByteBuffer = { - assert (readable) - - if (remaining() < size) { - // throw exception - throw new BufferUnderflowException() - } - - // kyro depends on this it seems ? - // assert (size > 0) - if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER - - // Expected to be handled elsewhere. - assert (! localReadWritePossible(size)) - - val localBuff = { - val buff = fetchBufferOfSize(size) - // assert(buff.remaining() <= size) - // if (buff.remaining() == size) return buff - assert(buff.remaining() < size) - ByteBuffer.allocate(size).put(buff) - } - - // assert (localBuff.hasRemaining) - - while (localBuff.hasRemaining) { - val buff = fetchBufferOfSize(localBuff.remaining()) - localBuff.put(buff) - } - - localBuff.flip() - localBuff - } - - - - def put(b: Byte) { - assert (writable) - if (remaining() < 1) { - // logInfo("put byte. remaining = " + remaining() + ", this = " + this) - throw new BufferOverflowException - } - - assert (currentRemaining() > 0) - - fetchCurrentBuffer().put(b) - globalPosition += 1 - // Check to need to bump the index ? - toNonEmptyBuffer() - } - - - def put(buffer: ByteBuffer) { - assert (writable) - if (remaining() < buffer.remaining()) { - throw new BufferOverflowException - } - - val bufferRemaining = buffer.remaining() - if (localReadWritePossible(bufferRemaining)) { - - assert (currentRemaining() >= bufferRemaining) - - fetchCurrentBuffer().put(buffer) - - globalPosition += bufferRemaining - toNonEmptyBuffer() - return - } - - while (buffer.hasRemaining) { - val currentBufferRemaining = currentRemaining() - val bufferRemaining = buffer.remaining() - - if (currentBufferRemaining >= bufferRemaining) { - fetchCurrentBuffer().put(buffer) - globalPosition += bufferRemaining - } else { - // Split across buffers. - val currentBuffer = fetchCurrentBuffer() - assert (currentBuffer.remaining() >= currentBufferRemaining) - val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(), - currentBufferRemaining) - assert (sliced.remaining() == currentBufferRemaining) - currentBuffer.put(sliced) - // move buffer pos - buffer.position(buffer.position() + currentBufferRemaining) - - globalPosition += currentBufferRemaining - } - toNonEmptyBuffer() - } - - assert (! hasRemaining() || currentRemaining() > 0) - } - - def put(other: LargeByteBuffer) { - assert (writable) - if (this.remaining() < other.remaining()) { - throw new BufferOverflowException - } - - while (other.hasRemaining()) { - val buffer = other.fetchBufferOfSize(other.currentRemaining()) - this.put(buffer) - } - } - - - def duplicate(): LargeByteBuffer = { - val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size) - // We do a duplicate as part of construction - so avoid double duplicate. - // containersCopy ++= containers.map(_.duplicate()) - containersCopy ++= containers - val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked) - - // set limit and position (in that order) ... - retval.limit(this.limit()) - retval.position(this.position()) - - // Now release our containers - if any had been acquired - releasePendingContainers() - - retval - } - - - /** - * 'read' a LargeByteBuffer of size specified and return that. - * Position will be incremented by size - * - * The name might be slightly confusing : rename ? - * - * @param size Amount of data to be read from this buffer and returned - * @return - */ - def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = { - if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException - if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException - - - assert (readable) - assert (size >= 0) - - releasePendingContainers() - - if (0 == size) return LargeByteBuffer.EMPTY_BUFFER - - createSlice(size) - } - - - // This is essentially a workaround to exposing underlying buffers - def readFrom(channel: ReadableByteChannel): Long = { - - assert (writable) - releasePendingContainers() - - // this also allows us to avoid nasty corner cases in the loop. - if (! hasRemaining()) { - // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this) - throw new BufferOverflowException - } - - var totalBytesRead = 0L - - while (hasRemaining()) { - // read what we can ... - val buffer = fetchCurrentBuffer() - val bufferRemaining = currentRemaining() - val bytesRead = channel.read(buffer) - - if (bytesRead > 0) { - totalBytesRead += bytesRead - // bump position too .. - globalPosition += bytesRead - if (bytesRead >= bufferRemaining) toNonEmptyBuffer() - } - else if (-1 == bytesRead) { - // if we had already read some data in the loop, return that. - if (totalBytesRead > 0) return totalBytesRead - return -1 - } // nothing available to read, retry later. return - else if (0 == bytesRead) { - return totalBytesRead - } - - // toNonEmptyBuffer() - } - - // Cleanup last buffer ? - toNonEmptyBuffer() - totalBytesRead - } - - // This is essentially a workaround to exposing underlying buffers - def readFrom(inStrm: InputStream): Long = { - - assert (writable) - releasePendingContainers() - - // this also allows us to avoid nasty corner cases in the loop. - // if (! hasRemaining()) throw new BufferOverflowException - if (! hasRemaining()) return 0 - - var totalBytesRead = 0L - - val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) - - while (hasRemaining()) { - // read what we can ... note, since there is no gaurantee that underlying buffer might - // expose array() method, we do double copy - from stream to buff and from buff to bytearray. - // see if we can optimize this later ... - val buffer = fetchCurrentBuffer() - val bufferRemaining = buffer.remaining() - val max = math.min(buff.length, bufferRemaining) - val bytesRead = inStrm.read(buff, 0, max) - - if (bytesRead > 0) { - buffer.put(buff, 0, bytesRead) - totalBytesRead += bytesRead - // bump position too .. - globalPosition += bytesRead - // buffer.position(buffer.position + bytesRead) - if (bytesRead >= bufferRemaining) toNonEmptyBuffer() - } - else if (-1 == bytesRead) { - // if we had already read some data in the loop, return that. - if (totalBytesRead > 0) return totalBytesRead - return -1 - } // nothing available to read, retry later. return - else if (0 == bytesRead) { - return totalBytesRead - } - - // toNonEmptyBuffer() - } - - totalBytesRead - } - - // This is essentially a workaround to exposing underlying buffers - // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce - // code for performance reasons. - def readFrom(inStrm: DataInput): Long = { - - assert (writable) - releasePendingContainers() - - // this also allows us to avoid nasty corner cases in the loop. - // if (! hasRemaining()) throw new BufferOverflowException - if (! hasRemaining()) return 0 - - var totalBytesRead = 0L - - val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) - - while (hasRemaining()) { - // read what we can ... note, since there is no gaurantee that underlying buffer might - // expose array() method, we do double copy - from stream to buff and from buff to bytearray. - // see if we can optimize this later ... - val buffer = fetchCurrentBuffer() - val bufferRemaining = buffer.remaining() - val max = math.min(buff.length, bufferRemaining) - inStrm.readFully(buff, 0, max) - val bytesRead = max - - if (bytesRead > 0) { - buffer.put(buff, 0, bytesRead) - totalBytesRead += bytesRead - // bump position too .. - globalPosition += bytesRead - // buffer.position(buffer.position() + bytesRead) - if (bytesRead >= bufferRemaining) toNonEmptyBuffer() - } - else if (-1 == bytesRead) { - // if we had already read some data in the loop, return that. - if (totalBytesRead > 0) return totalBytesRead - return -1 - } // nothing available to read, retry later. return - else if (0 == bytesRead) { - return totalBytesRead - } - - // toNonEmptyBuffer() - } - - totalBytesRead - } - - // This is essentially a workaround to exposing underlying buffers - // Note: tries to do it efficiently without needing to load everything into memory - // (particularly for diskbacked buffers, etc). - def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = { - - assert (readable) - releasePendingContainers() - - // this also allows us to avoid nasty corner cases in the loop. - if (! hasRemaining()) throw new BufferUnderflowException - - var totalBytesWritten = 0L - - while (hasRemaining()) { - // Write what we can ... - val buffer = fetchCurrentBuffer() - val bufferRemaining = buffer.remaining() - assert (bufferRemaining > 0) - val bytesWritten = channel.write(buffer) - - if (bytesWritten > 0) { - totalBytesWritten += bytesWritten - // bump position too .. - globalPosition += bytesWritten - if (bytesWritten >= bufferRemaining) toNonEmptyBuffer() - assert (! hasRemaining() || currentRemaining() > 0) - } - else if (0 == bytesWritten) { - return totalBytesWritten - } - - // toNonEmptyBuffer() - } - - assert (! hasRemaining()) - if (cleanup) { - free() - } - totalBytesWritten - } - - // This is essentially a workaround to exposing underlying buffers - def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = { - - assert (readable) - releasePendingContainers() - - // this also allows us to avoid nasty corner cases in the loop. - if (! hasRemaining()) throw new BufferUnderflowException - - var totalBytesWritten = 0L - val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) - - while (hasRemaining()) { - // write what we can ... note, since there is no gaurantee that underlying buffer might - // expose array() method, we do double copy - from bytearray to buff and from - // buff to outputstream. see if we can optimize this later ... - val buffer = fetchCurrentBuffer() - val bufferRemaining = buffer.remaining() - val size = math.min(bufferRemaining, buff.length) - buffer.get(buff, 0, size) - outStrm.write(buff, 0, size) - - totalBytesWritten += size - // bump position too .. - globalPosition += size - - if (size >= bufferRemaining) toNonEmptyBuffer() - } - - toNonEmptyBuffer() - if (cleanup) { - free() - } - totalBytesWritten + val b = underlying.read(_pos) + _pos += 1 + b } - def asInputStream(): InputStream = { - new InputStream() { - override def read(): Int = { - if (! hasRemaining()) return -1 - get() - } - - override def read(arr: Array[Byte], off: Int, len: Int): Int = { - if (! hasRemaining()) return -1 - - get(arr, off, len) - } - - override def available(): Int = { - // current remaining is what can be read without blocking - // anything higher might need disk access/buffer swapping. - /* - val left = remaining() - math.min(left, Int.MaxValue).asInstanceOf[Int] - */ - currentRemaining() - } - } - } - - def getCleaner() = cleaner - - /** - * @param cleaner The previous cleaner, so that the caller can chain them if required. - * @return - */ - private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = { - overrideCleaner(cleaner, allowOverride = true) - } - - private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = { - if (! this.allowCleanerOverride) { - // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free - return this.cleaner - } - - this.allowCleanerOverride = allowOverride - assert (null != cleaner) - val prev = this.cleaner - this.cleaner = cleaner - // logInfo("Overriding " + prev + " with " + this.cleaner) - prev - } - - private def doReleaseAll() { - for (container <- containers) { - container.release() - } - } - - def free(invokeCleaner: Boolean = true) { - // logInfo("Free on " + this + ", cleaner = " + cleaner) - // always invoking release - doReleaseAll() - - if (invokeCleaner) cleaner.clean(this) - } - - private def doDispose(needRelease: Boolean) { - - if (disposeLocationThrowable ne null) { - logError("Already free'ed earlier at : ", disposeLocationThrowable) - logError("Current at ", new Throwable) - throw new IllegalStateException("Already freed.") - } - disposeLocationThrowable = new Throwable() - - // Forcefully cleanup all - if (needRelease) doReleaseAll() - - // Free in a different loop, in case different containers refer to same resource - // to release (like file) - for (container <- containers) { - container.free() - } - - needReleaseIndices.clear() - - // We should not use this buffer anymore : set the values such that f - // we dont ... - globalPosition = 0 - globalLimit = 0 - globalCapacity = 0 + def put(bytes: LargeByteBuffer): Unit = { + ??? } - // copy data over ... MUST be used only for cases where array is known to be - // small to begin with. slightly risky method due to that assumption - def toByteArray(): Array[Byte] = { - val positionBackup = position() - val size = remaining() - if (size > Int.MaxValue) { - throw new IllegalStateException( - "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G") - } - - val retval = new Array[Byte](size.asInstanceOf[Int]) - val readSize = get(retval, 0, retval.length) - assert (readSize == retval.length, - "readSize = " + readSize + ", retval.length = " + retval.length) - - position(positionBackup) - - retval + def position: Long = _pos + def position(position: Long): Unit = { + _pos = position } - - // copy data over ... MUST be used only for cases where array is known to be - // small to begin with. slightly risky method due to that assumption - def toByteBuffer(): ByteBuffer = { - ByteBuffer.wrap(toByteArray()) + def remaining(): Long = { + underlying.size - position } - def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = { - val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf) - val currentPosition = position() - retval.put(this) - position(currentPosition) - retval.clear() - retval + def duplicate(): ChainedLargeByteBuffer = { + new ChainedLargeByteBuffer(underlying) } - - - // This is ONLY used for testing : that too as part of development of this and associated classes - // remove before contributing to spark. - def hexDump(): String = { - if (remaining() * 64 > Int.MaxValue) { - throw new UnsupportedOperationException("buffer too large " + remaining()) - } - - val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int]) - - var perLine = 0 - var first = true - for (b <- toByteArray()) { - perLine += 1 - if (perLine % 8 == 0) { - sb.append('\n') - first = true - } - if (! first) sb.append(' ') - first = false - sb.append(java.lang.Integer.toHexString(b & 0xff)) - } - sb.append('\n') - sb.toString() + def rewind(): Unit = { + _pos = 0 } - override def toString: String = { - val sb: StringBuffer = new StringBuffer - sb.append(getClass.getName) - sb.append(' ') - sb.append(System.identityHashCode(this)) - sb.append("@[pos=") - sb.append(position()) - sb.append(" lim=") - sb.append(limit()) - sb.append(" cap=") - sb.append(capacity()) - sb.append("]") - sb.toString + def limit(): Long = { + capacity } - - - override def finalize(): Unit = { - var marked = false - if (containers ne null) { - if (containers.exists(container => container.isAcquired && container.requireRelease())) { - marked = true - logError("BUG: buffer was not released - and now going out of scope. " + - "Potential resource leak. Allocated at ", allocateLocationThrowable) - containers.foreach(_.release()) - } - if (containers.exists(container => !container.isFreed && container.requireFree())) { - if (!marked) { - logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak", - allocateLocationThrowable) - } - else { - logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak") - } - containers.foreach(_.free()) - } - } - super.finalize() + def limit(newLimit: Long): Unit = { + ??? } } +class WrappedLargeByteBuffer(private val underlying: ByteBuffer) extends LargeByteBuffer { + def capacity = underlying.capacity -object LargeByteBuffer extends Logging { - - private val noopDisposeFunction = new BufferCleaner() { - protected def doClean(buffer: LargeByteBuffer) { - buffer.free(invokeCleaner = false) - } + def get(dst: Array[Byte], offset: Int, length: Int): Unit = { + underlying.get(dst, offset, length) } - val enableExpensiveAssert = false - private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0) - val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer( - new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false) - // Do not allow anyone else to override cleaner - EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false) - - // 8K sufficient ? - private val TEMP_ARRAY_SIZE = 8192 - - /** - * Create a LargeByteBuffer of specified size which is split across - * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory - * ByteBuffer - * - */ - def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = { - if (0 == totalSize) { - return EMPTY_BUFFER - } - - assert (totalSize > 0) - - val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY) - val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize) - val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize) - - assert (lastBlockSize > 0) - - val bufferArray = { - val arr = new ArrayBuffer[ByteBufferContainer](numBlocks) - for (index <- 0 until numBlocks - 1) { - val buff = ByteBuffer.allocate(blockSize) - // buff.clear() - arr += new HeapByteBufferContainer(buff, true) - } - arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true) - assert (arr.length == numBlocks) - arr - } - - new LargeByteBuffer(bufferArray, false, false) + def get(): Byte = { + underlying.get() } - /** - * Create a LargeByteBuffer of specified size which is split across - * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk - * - */ - private def allocateDiskBuffer(totalSize: Long, - blockManager: BlockManager): LargeByteBuffer = { - if (0 == totalSize) { - return EMPTY_BUFFER - } - - assert (totalSize > 0) - - // Create a file of the specified size. - val file = blockManager.diskBlockManager.createTempBlock()._2 - val raf = new RandomAccessFile(file, "rw") - try { - raf.setLength(totalSize) - } finally { - raf.close() - } - - readWriteDiskSegment(new FileSegment(file, 0, totalSize), - ephemeralDiskBacked = true, blockManager.ioConf) + def position: Long = underlying.position + def position(position: Long): Unit = { + //XXX check range? + underlying.position(position.toInt) } - - // The returned buffer takes up ownership of the underlying buffers - // (including dispos'ing that when done) - def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = { - val nonEmpty = buffers.filter(_.hasRemaining) - - // cleanup the empty buffers - buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b)) - - - if (nonEmpty.isEmpty) { - return EMPTY_BUFFER - } - - // slice so that offsets match our requirement - new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b => - new HeapByteBufferContainer(b.slice(), true)), false, false) + def remaining(): Long = { + underlying.remaining() } - def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = { - // only non empty arrays - val arrays = byteArrays.filter(_.length > 0) - if (0 == arrays.length) return EMPTY_BUFFER - - new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr => - new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false) + def duplicate(): WrappedLargeByteBuffer = { + new WrappedLargeByteBuffer(underlying.duplicate()) } - def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = { - - if (inputBuffers.isEmpty) return EMPTY_BUFFER - - if (! inputBuffers.exists(_.hasRemaining())) { - if (canDispose) inputBuffers.map(_.free()) - return EMPTY_BUFFER - } - - // release all temp resources acquired - inputBuffers.foreach(buff => buff.releasePendingContainers()) - // free current container if acquired. - inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) { - buff.containers(buff.currentContainerIndex).release() - }) - // inputBuffers.foreach(b => b.doReleaseAll()) - - - // Dispose of any empty buffers - if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free()) - - // Find all containers we need. - val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining())) - - val containers = buffers.flatMap(_.containers) - assert (! containers.isEmpty) - // The in order containers of "buffers" seq constitute the required return value - val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers, - // if you cant dispose, then we dont own the buffers : in which case, need duplicate - ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked)) - - if (canDispose) { - // override dispose of all other buffers. - val disposeFunctions = inputBuffers.map { - buffer => { - (buffer, buffer.overrideCleaner(noopDisposeFunction)) - } - } - - val cleaner = retval.getCleaner() - val newCleaner = new BufferCleaner { - protected def doClean(buffer: LargeByteBuffer) { - - assert (retval == buffer) - // default cleaner. - cleaner.clean(retval) - // not required, since we are within clean anyway. - // retval.free(invokeCleaner = false) - - // retval.doDispose(needRelease = true) - - // This might actually call dispose twice on some (initially) empty buffers, - // which is fine since we now guard against that. - disposeFunctions.foreach(v => v._2.clean(v._1)) - // Call the free method too : so that buffers are marked free ... - disposeFunctions.foreach(v => v._1.free(invokeCleaner = false)) - } - } - - val prev = retval.overrideCleaner(newCleaner) - assert (prev == cleaner) - } - - retval + def rewind(): Unit = { + underlying.duplicate() } - private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) { - if (arr == null) { - throw new NullPointerException - } else if (offset < 0 || size < 0 || offset + size > arr.length) { - throw new IndexOutOfBoundsException - } + def limit(): Long = { + underlying.limit() } - def allocateTransientBuffer(size: Long, blockManager: BlockManager) = { - if (size <= blockManager.ioConf.maxInMemSize) { - LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf) - } else { - LargeByteBuffer.allocateDiskBuffer(size, blockManager) - } + def limit(newLimit: Long) = { + //XXX check range? + underlying.limit(newLimit.toInt) } - def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig, - ephemeralDiskBacked: Boolean): LargeByteBuffer = { - // Split the block into multiple of BlockStore.maxBlockSize - val segmentSize = segment.length - val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] - val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) - val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) - - val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) - - for (index <- 0 until numBlocks - 1) { - buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, - segment.offset + index * blockSize, blockSize), ioConf) - } - - // Last block - buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, - segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf) +} - new LargeByteBuffer(buffers, false, ephemeralDiskBacked) +object LargeByteBuffer { + def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = { + val buffer = ChainedBuffer.withInitialSize(maxChunk, size) + new ChainedLargeByteBuffer(buffer) } +} - def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean, - ioConf: IOConfig): LargeByteBuffer = { - - // Split the block into multiple of BlockStore.maxBlockSize - val segmentSize = segment.length - val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] - val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) - val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) - - logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks + - ", lastBlockSize = " + lastBlockSize) - val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) - - for (index <- 0 until numBlocks - 1) { - buffers += new ReadWriteFileContainer(new FileSegment(segment.file, - segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null) - } - - // Last block - buffers += new ReadWriteFileContainer(new FileSegment(segment.file, - segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null) - new LargeByteBuffer(buffers, false, ephemeralDiskBacked) - } -} +// +///** +// * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G +// * which ByteBuffers are limited to. +// * Externally, it exposes all the api which java.nio.ByteBuffer exposes. +// * Internally, it maintains a sequence of Containers which manage the ByteBuffer data. +// * Not all the data might be loaded into memory (like disk or tachyon data) - so actual +// * memory footprint - heap and vm could be much lower than capacity. +// * +// * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this +// * buffer, maybe revisit this later ? Note: this is not much different from earlier though ! +// * +// * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this +// * will require the file to be kept open (repeatedly opening/closing file is not good +// * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is +// * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy) +// * +// * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is +// * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some +// * cases (when we duplicate/slice them). Currently spark does not need this, but might in future +// * so relook at it later. +// */ +//// We should make this constructor private: but for now, +//// leaving it public since TachyonStore needs it +//class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer], +// private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging { +// +// // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME +// private val allocateLocationThrowable: Throwable = { +// if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) { +// new Throwable("blockId = " + BlockManager.getLookupBlockId) +// } else { +// null +// } +// } +// private var disposeLocationThrowable: Throwable = null +// +// @volatile private var allowCleanerOverride = true +// @volatile private var cleaner: BufferCleaner = new BufferCleaner { +// override def doClean(buffer: LargeByteBuffer) = { +// assert (LargeByteBuffer.this == buffer) +// doDispose(needRelease = false) +// } +// } +// +// // should not be empty +// assert (null != inputContainers && ! inputContainers.isEmpty) +// // should not have any null's +// assert (inputContainers.find(_ == null).isEmpty) +// +// // println("Num containers = " + inputContainers.size) +// +// // Position, limit and capacity relevant over the engire LargeByteBuffer +// @volatile private var globalPosition = 0L +// @volatile private var globalLimit = 0L +// @volatile private var currentContainerIndex = 0 +// +// // The buffers in which the actual data is held. +// private var containers: Array[ByteBufferContainer] = null +// +// // aggregate capacities of the individual buffers. +// // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be +// // sum of capacity of 0th and 1st block buffer +// private var bufferPositionStart: Array[Long] = null +// +// // Contains the indices of a containers which requires release before subsequent invocation of +// // read/write should be serviced. This is required since current read/write might have moved the +// // position but since we are returning bytebuffers which depend on the validity of the existing +// // bytebuffer, we cant release them yet. +// private var needReleaseIndices = new HashSet[Int]() +// +// private val readable = ! inputContainers.exists(! _.isReadable) +// private val writable = ! inputContainers.exists(! _.isWritable) +// +// +// // initialize +// @volatile private var globalCapacity = { +// +// // Ensure that there are no empty buffers : messes up with our code : unless it +// // is a single buffer (for empty buffer for marker case) +// assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length) +// +// containers = { +// if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray +// } +// containers.foreach(_.validate()) +// +// def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) { +// val buff = new ArrayBuffer[Long](arr.length + 1) +// buff += 0L +// +// buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1) +// assert (buff.length == arr.length + 1) +// bufferPositionStart = buff.toArray +// } +// +// initializeBufferPositionStart(containers) +// +// // remove references from inputBuffers +// inputContainers.clear() +// +// globalLimit = bufferPositionStart(containers.length) +// globalPosition = 0L +// currentContainerIndex = 0 +// +// assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum) +// +// globalLimit +// } +// +// final def position(): Long = globalPosition +// +// final def limit(): Long = globalLimit +// +// final def capacity(): Long = globalCapacity +// +// final def limit(newLimit: Long) { +// if ((newLimit > capacity()) || (newLimit < 0)) { +// throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity()) +// } +// +// globalLimit = newLimit +// if (position() > newLimit) position(newLimit) +// } +// +// def skip(skipBy: Long) = position(position() + skipBy) +// +// private def releasePendingContainers() { +// if (! needReleaseIndices.isEmpty) { +// val iter = needReleaseIndices.iterator +// while (iter.hasNext) { +// val index = iter.next() +// assert (index >= 0 && index < containers.length) +// // It is possible to move from one container to next before the previous +// // container was acquired. For example, get forcing move to next container +// // since current was exhausted immediatelly followed by a position() +// // so the container we moved to was never acquired. +// +// // assert (containers(index).isAcquired) +// // will this always be satisfied ? +// // assert (index != currentContainerIndex) +// if (containers(index).isAcquired) containers(index).release() +// } +// needReleaseIndices.clear() +// } +// } +// +// private def toNewContainer(newIndex: Int) { +// if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) { +// +// assert (currentContainerIndex >= 0) +// needReleaseIndices += currentContainerIndex +// } +// currentContainerIndex = newIndex +// } +// +// // expensive method, sigh ... optimize it later ? +// final def position(newPosition: Long) { +// +// if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException() +// +// if (currentContainerIndex < bufferPositionStart.length - 1 && +// newPosition >= bufferPositionStart(currentContainerIndex) && +// newPosition < bufferPositionStart(currentContainerIndex + 1)) { +// // Same buffer - easy method ... +// globalPosition = newPosition +// // Changed position - free previously returned buffers. +// releasePendingContainers() +// return +// } +// +// // Find appropriate currentContainerIndex +// // Since bufferPositionStart is sorted, can be replaced with binary search if required. +// // For now, not in the perf critical path since buffers size is very low typically. +// var index = 0 +// val cLen = containers.length +// while (index < cLen) { +// if (newPosition >= bufferPositionStart(index) && +// newPosition < bufferPositionStart(index + 1)) { +// globalPosition = newPosition +// toNewContainer(index) +// // Changed position - free earlier and previously returned buffers. +// releasePendingContainers() +// return +// } +// index += 1 +// } +// +// if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) { +// // boundary. +// globalPosition = newPosition +// toNewContainer(cLen) +// // Changed position - free earlier and previously returned buffers. +// releasePendingContainers() +// return +// } +// +// assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition + +// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]")) +// } +// +// +// /** +// * Clears this buffer. The position is set to zero, the limit is set to +// * the capacity, and the mark is discarded. +// * +// *Invoke this method before using a sequence of channel-read or +// * put operations to fill this buffer. +// * +// *
This method does not actually erase the data in the buffer, but it +// * is named as if it did because it will most often be used in situations +// * in which that might as well be the case.
+// */ +// final def clear() { +// // if (0 == globalCapacity) return +// +// needReleaseIndices += 0 +// globalPosition = 0L +// toNewContainer(0) +// globalLimit = globalCapacity +// +// // Now free all pending containers +// releasePendingContainers() +// } +// +// /** +// * Flips this buffer. The limit is set to the current position and then +// * the position is set to zero. If the mark is defined then it is +// * discarded. +// * +// *After a sequence of channel-read or put operations, invoke +// * this method to prepare for a sequence of channel-write or relative +// * get operations. +// */ +// final def flip() { +// needReleaseIndices += 0 +// globalLimit = globalPosition +// globalPosition = 0L +// toNewContainer(0) +// +// // Now free all pending containers +// releasePendingContainers() +// } +// +// /** +// * Rewinds this buffer. The position is set to zero and the mark is +// * discarded. +// * +// *
Invoke this method before a sequence of channel-write or get +// * operations, assuming that the limit has already been set +// * appropriately. +// */ +// final def rewind() { +// needReleaseIndices += 0 +// globalPosition = 0L +// toNewContainer(0) +// +// // Now free all pending containers +// releasePendingContainers() +// } +// +// /** +// * Returns the number of elements between the current position and the +// * limit.
+// * +// * @return The number of elements remaining in this buffer +// */ +// final def remaining(): Long = { +// globalLimit - globalPosition +// } +// +// /** +// * Tells whether there are any elements between the current position and +// * the limit. +// * +// * @return true if, and only if, there is at least one element +// * remaining in this buffer +// */ +// final def hasRemaining() = { +// globalPosition < globalLimit +// } +// +// // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex) +// +// // number of bytes remaining in currently active underlying buffer +// private def currentRemaining(): Int = { +// if (hasRemaining()) { +// // validate currentContainerIndex is valid +// assert (globalPosition >= bufferPositionStart(currentContainerIndex) && +// globalPosition < bufferPositionStart(currentContainerIndex + 1), +// "globalPosition = " + globalPosition + +// ", currentContainerIndex = " + currentContainerIndex + +// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) +// +// currentRemaining0(currentContainerIndex) +// } else 0 +// } +// +// // Without any validation : required when we are bumping the index (when validation will fail) ... +// private def currentRemaining0(which: Int): Int = { +// // currentBuffer().remaining() +// math.max(0, math.min(bufferPositionStart(which + 1), +// globalLimit) - globalPosition).asInstanceOf[Int] +// } +// +// // Set the approppriate position/limit for the current underlying buffer to mirror our +// // the LargeByteBuffer's state. +// private def fetchCurrentBuffer(): ByteBuffer = { +// releasePendingContainers() +// +// assert (currentContainerIndex < containers.length) +// +// val container = containers(currentContainerIndex) +// if (! container.isAcquired) { +// container.acquire() +// } +// +// assert (container.isAcquired) +// if (LargeByteBuffer.enableExpensiveAssert) { +// assert (! containers.exists( b => (b ne container) && b.isAcquired)) +// } +// +// assert (currentContainerIndex < bufferPositionStart.length && +// globalPosition < bufferPositionStart(currentContainerIndex + 1), +// "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " + +// bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this) +// +// val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)). +// asInstanceOf[Int] +// +// val buffer = container.getByteBuffer +// buffer.position(buffPosition) +// val diff = buffer.capacity - buffPosition +// val left = remaining() +// if (diff <= left) { +// buffer.limit(buffer.capacity()) +// } else { +// // Can happen if limit() was called. +// buffer.limit(buffPosition + left.asInstanceOf[Int]) +// } +// +// buffer +// } +// +// // To be used ONLY to test in suites. +// private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = { +// if ("1" != System.getProperty("SPARK_TESTING")) { +// throw new IllegalStateException("This method is to be used ONLY within spark test suites") +// } +// +// fetchCurrentBuffer() +// } +// +// // Expects that the invoker has ensured that this can be safely invoked. +// // That is, it wont be invoked when the loop wont terminate. +// private def toNonEmptyBuffer() { +// +// if (! hasRemaining()) { +// var newIndex = currentContainerIndex +// // Ensure we are in the right block or not. +// while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) { +// newIndex += 1 +// } +// toNewContainer(newIndex) +// // Do not do this - since we might not yet have consumed the buffer which caused EOF right now +// /* +// // Add last one also, and release it too - since we are at the end of the buffer with nothing +// // more pending. +// if (newIndex >= 0 && currentContainerIndex < containers.length) { +// needReleaseIndices += newIndex +// } +// */ +// assert (currentContainerIndex >= 0) +// // releasePendingContainers() +// return +// } +// +// var index = currentContainerIndex +// while (0 == currentRemaining0(index) && index < containers.length) { +// index += 1 +// } +// assert (currentContainerIndex < containers.length) +// toNewContainer(index) +// assert (0 != currentRemaining()) +// } +// +// private def assertPreconditions(containerIndex: Int) { +// assert (globalPosition >= bufferPositionStart(containerIndex), +// "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + +// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) +// assert (globalPosition < bufferPositionStart(containerIndex + 1), +// "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + +// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) +// +// assert (globalLimit <= globalCapacity) +// assert (containerIndex < containers.length) +// } +// +// +// /** +// * Attempts to return a ByteBuffer of the requested size. +// * It is possible to return a buffer of size smaller than requested +// * even though hasRemaining == true +// * +// * On return, position would have been moved 'ahead' by the size of the buffer returned : +// * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer +// * +// * +// * This is used to primarily retrieve content of this buffer to expose via ByteBuffer +// * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the +// * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer +// * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying +// * container is a disk backed container, and we make subsequent calls to get(), the returned +// * ByteBuffer can be dispose'ed off +// * +// * @param maxChunkSize Max size of the ByteBuffer to retrieve. +// * @return +// */ +// +// private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = { +// fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true) +// } +// +// private def fetchBufferOfSizeImpl(maxChunkSize: Int, +// canReleaseContainers: Boolean): ByteBuffer = { +// if (canReleaseContainers) releasePendingContainers() +// assert (maxChunkSize > 0) +// +// // not checking for degenerate case of maxChunkSize == 0 +// if (globalPosition >= globalLimit) { +// // throw exception +// throw new BufferUnderflowException() +// } +// +// // Check preconditions : disable these later, since they might be expensive to +// // evaluate for every IO op +// assertPreconditions(currentContainerIndex) +// +// val currentBufferRemaining = currentRemaining() +// +// assert (currentBufferRemaining > 0) +// +// val size = math.min(currentBufferRemaining, maxChunkSize) +// +// val newBuffer = if (currentBufferRemaining > maxChunkSize) { +// val currentBuffer = fetchCurrentBuffer() +// val buff = ByteBufferContainer.createSlice(currentBuffer, +// currentBuffer.position(), maxChunkSize) +// assert (buff.remaining() == maxChunkSize) +// buff +// } else { +// val currentBuffer = fetchCurrentBuffer() +// val buff = currentBuffer.slice() +// assert (buff.remaining() == currentBufferRemaining) +// buff +// } +// +// assert (size == newBuffer.remaining()) +// assert (0 == newBuffer.position()) +// assert (size == newBuffer.limit()) +// assert (newBuffer.capacity() == newBuffer.limit()) +// +// globalPosition += newBuffer.remaining +// toNonEmptyBuffer() +// +// newBuffer +// } +// +// // Can we service the read/write from the currently active (underlying) bytebuffer or not. +// // For almost all cases, this will return true allowing us to optimize away the more expensive +// // computations. +// private def localReadWritePossible(size: Int) = +// size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1) +// +// +// def getLong(): Long = { +// assert (readable) +// releasePendingContainers() +// +// if (remaining() < 8) throw new BufferUnderflowException +// +// if (localReadWritePossible(8)) { +// val buff = fetchCurrentBuffer() +// assert (buff.remaining() >= 8) +// val retval = buff.getLong +// globalPosition += 8 +// toNonEmptyBuffer() +// return retval +// } +// +// val buff = readFully(8) +// buff.getLong +// } +// +// def getInt(): Int = { +// assert (readable) +// releasePendingContainers() +// +// if (remaining() < 4) throw new BufferUnderflowException +// +// if (localReadWritePossible(4)) { +// val buff = fetchCurrentBuffer() +// assert (buff.remaining() >= 4) +// val retval = buff.getInt +// globalPosition += 4 +// toNonEmptyBuffer() +// return retval +// } +// +// val buff = readFully(4) +// buff.getInt +// } +// +// def getChar(): Char = { +// assert (readable) +// releasePendingContainers() +// +// if (remaining() < 2) throw new BufferUnderflowException +// +// if (localReadWritePossible(2)) { +// val buff = fetchCurrentBuffer() +// assert (buff.remaining() >= 2) +// val retval = buff.getChar +// globalPosition += 2 +// toNonEmptyBuffer() +// return retval +// } +// +// // if slice is becoming too expensive, revisit this ... +// val buff = readFully(2) +// buff.getChar +// } +// +// def get(): Byte = { +// assert (readable) +// releasePendingContainers() +// +// if (! hasRemaining()) throw new BufferUnderflowException +// +// // If we have remaining bytes, previous invocations MUST have ensured that we are at +// // a buffer which has data to be read. +// assert (localReadWritePossible(1)) +// +// val buff = fetchCurrentBuffer() +// assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining()) +// val retval = buff.get() +// globalPosition += 1 +// toNonEmptyBuffer() +// +// retval +// } +// +// def get(arr: Array[Byte], offset: Int, size: Int): Int = { +// assert (readable) +// releasePendingContainers() +// +// LargeByteBuffer.checkOffsets(arr, offset, size) +// +// // kyro depends on this it seems ? +// // assert (size > 0) +// if (0 == size) return 0 +// +// if (! hasRemaining()) return -1 +// +// if (localReadWritePossible(size)) { +// val buff = fetchCurrentBuffer() +// assert (buff.remaining() >= size) +// buff.get(arr, offset, size) +// globalPosition += size +// toNonEmptyBuffer() +// return size +// } +// +// var remainingSize = math.min(size, remaining()).asInstanceOf[Int] +// var currentOffset = offset +// +// while (remainingSize > 0) { +// val buff = fetchBufferOfSize(remainingSize) +// val toCopy = math.min(buff.remaining(), remainingSize) +// +// buff.get(arr, currentOffset, toCopy) +// currentOffset += toCopy +// remainingSize -= toCopy +// } +// +// currentOffset - offset +// } +// +// +// private def createSlice(size: Long): LargeByteBuffer = { +// +// releasePendingContainers() +// +// if (remaining() < size) { +// // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this) +// throw new BufferOverflowException +// } +// +// // kyro depends on this it seems ? +// // assert (size > 0) +// if (0 == size) return LargeByteBuffer.EMPTY_BUFFER +// +// val arr = new ArrayBuffer[ByteBufferContainer](2) +// var totalLeft = size +// +// // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer) +// +// var containerIndex = currentContainerIndex +// while (totalLeft > 0 && hasRemaining()) { +// assertPreconditions(containerIndex) +// val container = containers(containerIndex) +// val currentLeft = currentRemaining0(containerIndex) +// +// assert (globalPosition + currentLeft <= globalLimit) +// assert (globalPosition >= bufferPositionStart(containerIndex) && +// (globalPosition < bufferPositionStart(containerIndex + 1))) +// +// val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int] +// val sliceSize = math.min(totalLeft, currentLeft) +// assert (from >= 0) +// assert (sliceSize > 0 && sliceSize <= Int.MaxValue) +// +// val slice = container.createSlice(from, sliceSize.asInstanceOf[Int]) +// arr += slice +// +// globalPosition += sliceSize +// totalLeft -= sliceSize +// if (currentLeft == sliceSize) containerIndex += 1 +// } +// +// // Using toNonEmptyBuffer instead of directly moving to next here so that +// // other checks can be performed there. +// toNonEmptyBuffer() +// // force cleanup - this is fine since we are not using the buffers directly +// // which are actively needed (the returned value is on containers which can +// // recreate) +// releasePendingContainers() +// // free current container if acquired. +// if (currentContainerIndex < containers.length) { +// containers(currentContainerIndex).release() +// } +// assert (currentContainerIndex == containerIndex) +// +// val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked) +// retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction) +// retval +// } +// +// // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers +// // This is to be used only for writes : and ensures that writes are done into the appropriate +// // underlying bytebuffers. +// def getCompositeWriteBuffer(size: Long): LargeByteBuffer = { +// assert(writable) +// assert(size >= 0) +// +// createSlice(size) +// } +// +// // get a buffer which is of the specified size and contains data from the underlying buffers +// // Note, the actual data might be spread across the underlying buffers. +// // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy ! +// private def readFully(size: Int): ByteBuffer = { +// assert (readable) +// +// if (remaining() < size) { +// // throw exception +// throw new BufferUnderflowException() +// } +// +// // kyro depends on this it seems ? +// // assert (size > 0) +// if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER +// +// // Expected to be handled elsewhere. +// assert (! localReadWritePossible(size)) +// +// val localBuff = { +// val buff = fetchBufferOfSize(size) +// // assert(buff.remaining() <= size) +// // if (buff.remaining() == size) return buff +// assert(buff.remaining() < size) +// ByteBuffer.allocate(size).put(buff) +// } +// +// // assert (localBuff.hasRemaining) +// +// while (localBuff.hasRemaining) { +// val buff = fetchBufferOfSize(localBuff.remaining()) +// localBuff.put(buff) +// } +// +// localBuff.flip() +// localBuff +// } +// +// +// +// def put(b: Byte) { +// assert (writable) +// if (remaining() < 1) { +// // logInfo("put byte. remaining = " + remaining() + ", this = " + this) +// throw new BufferOverflowException +// } +// +// assert (currentRemaining() > 0) +// +// fetchCurrentBuffer().put(b) +// globalPosition += 1 +// // Check to need to bump the index ? +// toNonEmptyBuffer() +// } +// +// +// def put(buffer: ByteBuffer) { +// assert (writable) +// if (remaining() < buffer.remaining()) { +// throw new BufferOverflowException +// } +// +// val bufferRemaining = buffer.remaining() +// if (localReadWritePossible(bufferRemaining)) { +// +// assert (currentRemaining() >= bufferRemaining) +// +// fetchCurrentBuffer().put(buffer) +// +// globalPosition += bufferRemaining +// toNonEmptyBuffer() +// return +// } +// +// while (buffer.hasRemaining) { +// val currentBufferRemaining = currentRemaining() +// val bufferRemaining = buffer.remaining() +// +// if (currentBufferRemaining >= bufferRemaining) { +// fetchCurrentBuffer().put(buffer) +// globalPosition += bufferRemaining +// } else { +// // Split across buffers. +// val currentBuffer = fetchCurrentBuffer() +// assert (currentBuffer.remaining() >= currentBufferRemaining) +// val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(), +// currentBufferRemaining) +// assert (sliced.remaining() == currentBufferRemaining) +// currentBuffer.put(sliced) +// // move buffer pos +// buffer.position(buffer.position() + currentBufferRemaining) +// +// globalPosition += currentBufferRemaining +// } +// toNonEmptyBuffer() +// } +// +// assert (! hasRemaining() || currentRemaining() > 0) +// } +// +// def put(other: LargeByteBuffer) { +// assert (writable) +// if (this.remaining() < other.remaining()) { +// throw new BufferOverflowException +// } +// +// while (other.hasRemaining()) { +// val buffer = other.fetchBufferOfSize(other.currentRemaining()) +// this.put(buffer) +// } +// } +// +// +// def duplicate(): LargeByteBuffer = { +// val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size) +// // We do a duplicate as part of construction - so avoid double duplicate. +// // containersCopy ++= containers.map(_.duplicate()) +// containersCopy ++= containers +// val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked) +// +// // set limit and position (in that order) ... +// retval.limit(this.limit()) +// retval.position(this.position()) +// +// // Now release our containers - if any had been acquired +// releasePendingContainers() +// +// retval +// } +// +// +// /** +// * 'read' a LargeByteBuffer of size specified and return that. +// * Position will be incremented by size +// * +// * The name might be slightly confusing : rename ? +// * +// * @param size Amount of data to be read from this buffer and returned +// * @return +// */ +// def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = { +// if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException +// if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException +// +// +// assert (readable) +// assert (size >= 0) +// +// releasePendingContainers() +// +// if (0 == size) return LargeByteBuffer.EMPTY_BUFFER +// +// createSlice(size) +// } +// +// +// // This is essentially a workaround to exposing underlying buffers +// def readFrom(channel: ReadableByteChannel): Long = { +// +// assert (writable) +// releasePendingContainers() +// +// // this also allows us to avoid nasty corner cases in the loop. +// if (! hasRemaining()) { +// // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this) +// throw new BufferOverflowException +// } +// +// var totalBytesRead = 0L +// +// while (hasRemaining()) { +// // read what we can ... +// val buffer = fetchCurrentBuffer() +// val bufferRemaining = currentRemaining() +// val bytesRead = channel.read(buffer) +// +// if (bytesRead > 0) { +// totalBytesRead += bytesRead +// // bump position too .. +// globalPosition += bytesRead +// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() +// } +// else if (-1 == bytesRead) { +// // if we had already read some data in the loop, return that. +// if (totalBytesRead > 0) return totalBytesRead +// return -1 +// } // nothing available to read, retry later. return +// else if (0 == bytesRead) { +// return totalBytesRead +// } +// +// // toNonEmptyBuffer() +// } +// +// // Cleanup last buffer ? +// toNonEmptyBuffer() +// totalBytesRead +// } +// +// // This is essentially a workaround to exposing underlying buffers +// def readFrom(inStrm: InputStream): Long = { +// +// assert (writable) +// releasePendingContainers() +// +// // this also allows us to avoid nasty corner cases in the loop. +// // if (! hasRemaining()) throw new BufferOverflowException +// if (! hasRemaining()) return 0 +// +// var totalBytesRead = 0L +// +// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) +// +// while (hasRemaining()) { +// // read what we can ... note, since there is no gaurantee that underlying buffer might +// // expose array() method, we do double copy - from stream to buff and from buff to bytearray. +// // see if we can optimize this later ... +// val buffer = fetchCurrentBuffer() +// val bufferRemaining = buffer.remaining() +// val max = math.min(buff.length, bufferRemaining) +// val bytesRead = inStrm.read(buff, 0, max) +// +// if (bytesRead > 0) { +// buffer.put(buff, 0, bytesRead) +// totalBytesRead += bytesRead +// // bump position too .. +// globalPosition += bytesRead +// // buffer.position(buffer.position + bytesRead) +// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() +// } +// else if (-1 == bytesRead) { +// // if we had already read some data in the loop, return that. +// if (totalBytesRead > 0) return totalBytesRead +// return -1 +// } // nothing available to read, retry later. return +// else if (0 == bytesRead) { +// return totalBytesRead +// } +// +// // toNonEmptyBuffer() +// } +// +// totalBytesRead +// } +// +// // This is essentially a workaround to exposing underlying buffers +// // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce +// // code for performance reasons. +// def readFrom(inStrm: DataInput): Long = { +// +// assert (writable) +// releasePendingContainers() +// +// // this also allows us to avoid nasty corner cases in the loop. +// // if (! hasRemaining()) throw new BufferOverflowException +// if (! hasRemaining()) return 0 +// +// var totalBytesRead = 0L +// +// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) +// +// while (hasRemaining()) { +// // read what we can ... note, since there is no gaurantee that underlying buffer might +// // expose array() method, we do double copy - from stream to buff and from buff to bytearray. +// // see if we can optimize this later ... +// val buffer = fetchCurrentBuffer() +// val bufferRemaining = buffer.remaining() +// val max = math.min(buff.length, bufferRemaining) +// inStrm.readFully(buff, 0, max) +// val bytesRead = max +// +// if (bytesRead > 0) { +// buffer.put(buff, 0, bytesRead) +// totalBytesRead += bytesRead +// // bump position too .. +// globalPosition += bytesRead +// // buffer.position(buffer.position() + bytesRead) +// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() +// } +// else if (-1 == bytesRead) { +// // if we had already read some data in the loop, return that. +// if (totalBytesRead > 0) return totalBytesRead +// return -1 +// } // nothing available to read, retry later. return +// else if (0 == bytesRead) { +// return totalBytesRead +// } +// +// // toNonEmptyBuffer() +// } +// +// totalBytesRead +// } +// +// // This is essentially a workaround to exposing underlying buffers +// // Note: tries to do it efficiently without needing to load everything into memory +// // (particularly for diskbacked buffers, etc). +// def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = { +// +// assert (readable) +// releasePendingContainers() +// +// // this also allows us to avoid nasty corner cases in the loop. +// if (! hasRemaining()) throw new BufferUnderflowException +// +// var totalBytesWritten = 0L +// +// while (hasRemaining()) { +// // Write what we can ... +// val buffer = fetchCurrentBuffer() +// val bufferRemaining = buffer.remaining() +// assert (bufferRemaining > 0) +// val bytesWritten = channel.write(buffer) +// +// if (bytesWritten > 0) { +// totalBytesWritten += bytesWritten +// // bump position too .. +// globalPosition += bytesWritten +// if (bytesWritten >= bufferRemaining) toNonEmptyBuffer() +// assert (! hasRemaining() || currentRemaining() > 0) +// } +// else if (0 == bytesWritten) { +// return totalBytesWritten +// } +// +// // toNonEmptyBuffer() +// } +// +// assert (! hasRemaining()) +// if (cleanup) { +// free() +// } +// totalBytesWritten +// } +// +// // This is essentially a workaround to exposing underlying buffers +// def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = { +// +// assert (readable) +// releasePendingContainers() +// +// // this also allows us to avoid nasty corner cases in the loop. +// if (! hasRemaining()) throw new BufferUnderflowException +// +// var totalBytesWritten = 0L +// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) +// +// while (hasRemaining()) { +// // write what we can ... note, since there is no gaurantee that underlying buffer might +// // expose array() method, we do double copy - from bytearray to buff and from +// // buff to outputstream. see if we can optimize this later ... +// val buffer = fetchCurrentBuffer() +// val bufferRemaining = buffer.remaining() +// val size = math.min(bufferRemaining, buff.length) +// buffer.get(buff, 0, size) +// outStrm.write(buff, 0, size) +// +// totalBytesWritten += size +// // bump position too .. +// globalPosition += size +// +// if (size >= bufferRemaining) toNonEmptyBuffer() +// } +// +// toNonEmptyBuffer() +// if (cleanup) { +// free() +// } +// totalBytesWritten +// } +// +// def asInputStream(): InputStream = { +// new InputStream() { +// override def read(): Int = { +// if (! hasRemaining()) return -1 +// get() +// } +// +// override def read(arr: Array[Byte], off: Int, len: Int): Int = { +// if (! hasRemaining()) return -1 +// +// get(arr, off, len) +// } +// +// override def available(): Int = { +// // current remaining is what can be read without blocking +// // anything higher might need disk access/buffer swapping. +// /* +// val left = remaining() +// math.min(left, Int.MaxValue).asInstanceOf[Int] +// */ +// currentRemaining() +// } +// } +// } +// +// def getCleaner() = cleaner +// +// /** +// * @param cleaner The previous cleaner, so that the caller can chain them if required. +// * @return +// */ +// private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = { +// overrideCleaner(cleaner, allowOverride = true) +// } +// +// private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = { +// if (! this.allowCleanerOverride) { +// // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free +// return this.cleaner +// } +// +// this.allowCleanerOverride = allowOverride +// assert (null != cleaner) +// val prev = this.cleaner +// this.cleaner = cleaner +// // logInfo("Overriding " + prev + " with " + this.cleaner) +// prev +// } +// +// private def doReleaseAll() { +// for (container <- containers) { +// container.release() +// } +// } +// +// def free(invokeCleaner: Boolean = true) { +// // logInfo("Free on " + this + ", cleaner = " + cleaner) +// // always invoking release +// doReleaseAll() +// +// if (invokeCleaner) cleaner.clean(this) +// } +// +// private def doDispose(needRelease: Boolean) { +// +// if (disposeLocationThrowable ne null) { +// logError("Already free'ed earlier at : ", disposeLocationThrowable) +// logError("Current at ", new Throwable) +// throw new IllegalStateException("Already freed.") +// } +// disposeLocationThrowable = new Throwable() +// +// // Forcefully cleanup all +// if (needRelease) doReleaseAll() +// +// // Free in a different loop, in case different containers refer to same resource +// // to release (like file) +// for (container <- containers) { +// container.free() +// } +// +// needReleaseIndices.clear() +// +// // We should not use this buffer anymore : set the values such that f +// // we dont ... +// globalPosition = 0 +// globalLimit = 0 +// globalCapacity = 0 +// } +// +// // copy data over ... MUST be used only for cases where array is known to be +// // small to begin with. slightly risky method due to that assumption +// def toByteArray(): Array[Byte] = { +// val positionBackup = position() +// val size = remaining() +// if (size > Int.MaxValue) { +// throw new IllegalStateException( +// "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G") +// } +// +// val retval = new Array[Byte](size.asInstanceOf[Int]) +// val readSize = get(retval, 0, retval.length) +// assert (readSize == retval.length, +// "readSize = " + readSize + ", retval.length = " + retval.length) +// +// position(positionBackup) +// +// retval +// } +// +// // copy data over ... MUST be used only for cases where array is known to be +// // small to begin with. slightly risky method due to that assumption +// def toByteBuffer(): ByteBuffer = { +// ByteBuffer.wrap(toByteArray()) +// } +// +// def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = { +// val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf) +// val currentPosition = position() +// retval.put(this) +// position(currentPosition) +// retval.clear() +// retval +// } +// +// +// +// // This is ONLY used for testing : that too as part of development of this and associated classes +// // remove before contributing to spark. +// def hexDump(): String = { +// if (remaining() * 64 > Int.MaxValue) { +// throw new UnsupportedOperationException("buffer too large " + remaining()) +// } +// +// val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int]) +// +// var perLine = 0 +// var first = true +// for (b <- toByteArray()) { +// perLine += 1 +// if (perLine % 8 == 0) { +// sb.append('\n') +// first = true +// } +// if (! first) sb.append(' ') +// first = false +// sb.append(java.lang.Integer.toHexString(b & 0xff)) +// } +// sb.append('\n') +// sb.toString() +// } +// +// override def toString: String = { +// val sb: StringBuffer = new StringBuffer +// sb.append(getClass.getName) +// sb.append(' ') +// sb.append(System.identityHashCode(this)) +// sb.append("@[pos=") +// sb.append(position()) +// sb.append(" lim=") +// sb.append(limit()) +// sb.append(" cap=") +// sb.append(capacity()) +// sb.append("]") +// sb.toString +// } +// +// +// +// override def finalize(): Unit = { +// var marked = false +// if (containers ne null) { +// if (containers.exists(container => container.isAcquired && container.requireRelease())) { +// marked = true +// logError("BUG: buffer was not released - and now going out of scope. " + +// "Potential resource leak. Allocated at ", allocateLocationThrowable) +// containers.foreach(_.release()) +// } +// if (containers.exists(container => !container.isFreed && container.requireFree())) { +// if (!marked) { +// logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak", +// allocateLocationThrowable) +// } +// else { +// logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak") +// } +// containers.foreach(_.free()) +// } +// } +// super.finalize() +// } +//} +// +// +//object LargeByteBuffer extends Logging { +// +// private val noopDisposeFunction = new BufferCleaner() { +// protected def doClean(buffer: LargeByteBuffer) { +// buffer.free(invokeCleaner = false) +// } +// } +// +// val enableExpensiveAssert = false +// private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0) +// val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer( +// new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false) +// // Do not allow anyone else to override cleaner +// EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false) +// +// // 8K sufficient ? +// private val TEMP_ARRAY_SIZE = 8192 +// +// /** +// * Create a LargeByteBuffer of specified size which is split across +// * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory +// * ByteBuffer +// * +// */ +// def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = { +// if (0 == totalSize) { +// return EMPTY_BUFFER +// } +// +// assert (totalSize > 0) +// +// val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY) +// val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize) +// val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize) +// +// assert (lastBlockSize > 0) +// +// val bufferArray = { +// val arr = new ArrayBuffer[ByteBufferContainer](numBlocks) +// for (index <- 0 until numBlocks - 1) { +// val buff = ByteBuffer.allocate(blockSize) +// // buff.clear() +// arr += new HeapByteBufferContainer(buff, true) +// } +// arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true) +// assert (arr.length == numBlocks) +// arr +// } +// +// new LargeByteBuffer(bufferArray, false, false) +// } +// +// /** +// * Create a LargeByteBuffer of specified size which is split across +// * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk +// * +// */ +// private def allocateDiskBuffer(totalSize: Long, +// blockManager: BlockManager): LargeByteBuffer = { +// if (0 == totalSize) { +// return EMPTY_BUFFER +// } +// +// assert (totalSize > 0) +// +// // Create a file of the specified size. +// val file = blockManager.diskBlockManager.createTempBlock()._2 +// val raf = new RandomAccessFile(file, "rw") +// try { +// raf.setLength(totalSize) +// } finally { +// raf.close() +// } +// +// readWriteDiskSegment(new FileSegment(file, 0, totalSize), +// ephemeralDiskBacked = true, blockManager.ioConf) +// } +// +// // The returned buffer takes up ownership of the underlying buffers +// // (including dispos'ing that when done) +// def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = { +// val nonEmpty = buffers.filter(_.hasRemaining) +// +// // cleanup the empty buffers +// buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b)) +// +// +// if (nonEmpty.isEmpty) { +// return EMPTY_BUFFER +// } +// +// // slice so that offsets match our requirement +// new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b => +// new HeapByteBufferContainer(b.slice(), true)), false, false) +// } +// +// def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = { +// // only non empty arrays +// val arrays = byteArrays.filter(_.length > 0) +// if (0 == arrays.length) return EMPTY_BUFFER +// +// new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr => +// new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false) +// } +// +// def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = { +// +// if (inputBuffers.isEmpty) return EMPTY_BUFFER +// +// if (! inputBuffers.exists(_.hasRemaining())) { +// if (canDispose) inputBuffers.map(_.free()) +// return EMPTY_BUFFER +// } +// +// // release all temp resources acquired +// inputBuffers.foreach(buff => buff.releasePendingContainers()) +// // free current container if acquired. +// inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) { +// buff.containers(buff.currentContainerIndex).release() +// }) +// // inputBuffers.foreach(b => b.doReleaseAll()) +// +// +// // Dispose of any empty buffers +// if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free()) +// +// // Find all containers we need. +// val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining())) +// +// val containers = buffers.flatMap(_.containers) +// assert (! containers.isEmpty) +// // The in order containers of "buffers" seq constitute the required return value +// val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers, +// // if you cant dispose, then we dont own the buffers : in which case, need duplicate +// ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked)) +// +// if (canDispose) { +// // override dispose of all other buffers. +// val disposeFunctions = inputBuffers.map { +// buffer => { +// (buffer, buffer.overrideCleaner(noopDisposeFunction)) +// } +// } +// +// val cleaner = retval.getCleaner() +// val newCleaner = new BufferCleaner { +// protected def doClean(buffer: LargeByteBuffer) { +// +// assert (retval == buffer) +// // default cleaner. +// cleaner.clean(retval) +// // not required, since we are within clean anyway. +// // retval.free(invokeCleaner = false) +// +// // retval.doDispose(needRelease = true) +// +// // This might actually call dispose twice on some (initially) empty buffers, +// // which is fine since we now guard against that. +// disposeFunctions.foreach(v => v._2.clean(v._1)) +// // Call the free method too : so that buffers are marked free ... +// disposeFunctions.foreach(v => v._1.free(invokeCleaner = false)) +// } +// } +// +// val prev = retval.overrideCleaner(newCleaner) +// assert (prev == cleaner) +// } +// +// retval +// } +// +// private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) { +// if (arr == null) { +// throw new NullPointerException +// } else if (offset < 0 || size < 0 || offset + size > arr.length) { +// throw new IndexOutOfBoundsException +// } +// } +// +// def allocateTransientBuffer(size: Long, blockManager: BlockManager) = { +// if (size <= blockManager.ioConf.maxInMemSize) { +// LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf) +// } else { +// LargeByteBuffer.allocateDiskBuffer(size, blockManager) +// } +// } +// +// def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig, +// ephemeralDiskBacked: Boolean): LargeByteBuffer = { +// // Split the block into multiple of BlockStore.maxBlockSize +// val segmentSize = segment.length +// val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] +// val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) +// val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) +// +// val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) +// +// for (index <- 0 until numBlocks - 1) { +// buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, +// segment.offset + index * blockSize, blockSize), ioConf) +// } +// +// // Last block +// buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, +// segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf) +// +// new LargeByteBuffer(buffers, false, ephemeralDiskBacked) +// } +// +// def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean, +// ioConf: IOConfig): LargeByteBuffer = { +// +// // Split the block into multiple of BlockStore.maxBlockSize +// val segmentSize = segment.length +// val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] +// val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) +// val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) +// +// logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks + +// ", lastBlockSize = " + lastBlockSize) +// val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) +// +// for (index <- 0 until numBlocks - 1) { +// buffers += new ReadWriteFileContainer(new FileSegment(segment.file, +// segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null) +// } +// +// // Last block +// buffers += new ReadWriteFileContainer(new FileSegment(segment.file, +// segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null) +// +// new LargeByteBuffer(buffers, false, ephemeralDiskBacked) +// } +//} diff --git a/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala b/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala index 0dd7e8e736ad6..6657c4f7efc52 100644 --- a/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala +++ b/core/src/main/scala/org/apache/spark/io/WrappedByteArrayOutputStream.scala @@ -1,121 +1,121 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.io - -import java.io.OutputStream -import scala.collection.mutable.ArrayBuffer - -import org.apache.spark.Logging -import org.apache.spark.io.IOConfig.BufferType - -/** - * byte array backed streams (FastByteArrayOutputStream, ByteArrayOutputStream, etc) are limited to - * array length of 2 gig - since that is the array size limit. - * - * So we move from one to the next as soon as we hit the limit per stream. - * And once done, asBuffers or toByteArrays can be used to pull data as a sequence of bytebuffers - * or byte arrays. - * @param initialSize initial size for the byte array stream ... - */ -class WrappedByteArrayOutputStream(private val initialSize: Int, - ioConf: IOConfig) extends OutputStream with Logging { - - private val maxStreamSize = ioConf.getMaxBlockSize(BufferType.MEMORY) - - private val allStreams = new ArrayBuffer[SparkByteArrayOutputStream](4) - - private var current: SparkByteArrayOutputStream = null - private var currentWritten = 0 - - nextWriter() - - override def flush(): Unit = { - current.flush() - } - - override def write(b: Int): Unit = { - if (currentWritten >= maxStreamSize) { - nextWriter() - } - current.write(b) - currentWritten += 1 - } - - - override def write(b: Array[Byte], off: Int, len: Int): Unit = { - // invariant checks - from OutputStream.java - if (b == null) { - throw new NullPointerException - } else if ((off < 0) || (off > b.length) || (len < 0) || - ((off + len) > b.length) || ((off + len) < 0)) { - throw new IndexOutOfBoundsException - } else if (len == 0) { - return - } - - // Else, write to stream. - - // common case first - if (currentWritten + len < maxStreamSize) { - current.write(b, off, len) - currentWritten += len - return - } - - // We might need to split the write into two streams. - var startOff = off - var remaining = len - - while (remaining > 0) { - var toCurrent = math.min(remaining, maxStreamSize - currentWritten) - if (toCurrent > 0) { - current.write(b, startOff, toCurrent) - currentWritten += toCurrent - remaining -= toCurrent - startOff += toCurrent - } - - if (currentWritten >= maxStreamSize) { - // to next - nextWriter() - } - } - } - - def toLargeByteBuffer(): LargeByteBuffer = { - current.compact() - val seq = allStreams.filter(_.size > 0).map(_.toByteBuffer) - val retval = LargeByteBuffer.fromBuffers(seq:_*) - - retval - } - - private def nextWriter() { - if (null != current) { - current.flush() - current.compact() - current = null - } - - current = new SparkByteArrayOutputStream(initialSize, ioConf) - currentWritten = 0 - allStreams += current - } -} - - +///* +// * Licensed to the Apache Software Foundation (ASF) under one or more +// * contributor license agreements. See the NOTICE file distributed with +// * this work for additional information regarding copyright ownership. +// * The ASF licenses this file to You under the Apache License, Version 2.0 +// * (the "License"); you may not use this file except in compliance with +// * the License. You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// */ +// +//package org.apache.spark.io +// +//import java.io.OutputStream +//import scala.collection.mutable.ArrayBuffer +// +//import org.apache.spark.Logging +//import org.apache.spark.io.IOConfig.BufferType +// +///** +// * byte array backed streams (FastByteArrayOutputStream, ByteArrayOutputStream, etc) are limited to +// * array length of 2 gig - since that is the array size limit. +// * +// * So we move from one to the next as soon as we hit the limit per stream. +// * And once done, asBuffers or toByteArrays can be used to pull data as a sequence of bytebuffers +// * or byte arrays. +// * @param initialSize initial size for the byte array stream ... +// */ +//class WrappedByteArrayOutputStream(private val initialSize: Int, +// ioConf: IOConfig) extends OutputStream with Logging { +// +// private val maxStreamSize = ioConf.getMaxBlockSize(BufferType.MEMORY) +// +// private val allStreams = new ArrayBuffer[SparkByteArrayOutputStream](4) +// +// private var current: SparkByteArrayOutputStream = null +// private var currentWritten = 0 +// +// nextWriter() +// +// override def flush(): Unit = { +// current.flush() +// } +// +// override def write(b: Int): Unit = { +// if (currentWritten >= maxStreamSize) { +// nextWriter() +// } +// current.write(b) +// currentWritten += 1 +// } +// +// +// override def write(b: Array[Byte], off: Int, len: Int): Unit = { +// // invariant checks - from OutputStream.java +// if (b == null) { +// throw new NullPointerException +// } else if ((off < 0) || (off > b.length) || (len < 0) || +// ((off + len) > b.length) || ((off + len) < 0)) { +// throw new IndexOutOfBoundsException +// } else if (len == 0) { +// return +// } +// +// // Else, write to stream. +// +// // common case first +// if (currentWritten + len < maxStreamSize) { +// current.write(b, off, len) +// currentWritten += len +// return +// } +// +// // We might need to split the write into two streams. +// var startOff = off +// var remaining = len +// +// while (remaining > 0) { +// var toCurrent = math.min(remaining, maxStreamSize - currentWritten) +// if (toCurrent > 0) { +// current.write(b, startOff, toCurrent) +// currentWritten += toCurrent +// remaining -= toCurrent +// startOff += toCurrent +// } +// +// if (currentWritten >= maxStreamSize) { +// // to next +// nextWriter() +// } +// } +// } +// +// def toLargeByteBuffer(): LargeByteBuffer = { +// current.compact() +// val seq = allStreams.filter(_.size > 0).map(_.toByteBuffer) +// val retval = LargeByteBuffer.fromBuffers(seq:_*) +// +// retval +// } +// +// private def nextWriter() { +// if (null != current) { +// current.flush() +// current.compact() +// current = null +// } +// +// current = new SparkByteArrayOutputStream(initialSize, ioConf) +// currentWritten = 0 +// allStreams += current +// } +//} +// +// diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 86dbd89f0ffb8..ad895ff338d54 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -31,7 +31,7 @@ import sun.nio.ch.DirectBuffer import org.apache.spark._ import org.apache.spark.executor._ -import org.apache.spark.io.CompressionCodec +import org.apache.spark.io.{WrappedLargeByteBuffer, ChainedLargeByteBuffer, LargeByteBuffer, CompressionCodec} import org.apache.spark.network._ import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.netty.SparkTransportConf @@ -43,7 +43,7 @@ import org.apache.spark.shuffle.hash.HashShuffleManager import org.apache.spark.util._ private[spark] sealed trait BlockValues -private[spark] case class ByteBufferValues(buffer: ByteBuffer) extends BlockValues +private[spark] case class ByteBufferValues(buffer: LargeByteBuffer) extends BlockValues private[spark] case class IteratorValues(iterator: Iterator[Any]) extends BlockValues private[spark] case class ArrayValues(buffer: Array[Any]) extends BlockValues @@ -78,6 +78,9 @@ private[spark] class BlockManager( val diskBlockManager = new DiskBlockManager(this, conf) + //XXX + val largeByteBufferChunkSize = 65536 + private val blockInfo = new TimeStampedHashMap[BlockId, BlockInfo] // Actual storage of where blocks are kept @@ -318,7 +321,7 @@ private[spark] class BlockManager( * Put the block locally, using the given storage level. */ override def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Unit = { - putBytes(blockId, data.nioByteBuffer(), level) + putBytes(blockId, new WrappedLargeByteBuffer(data.nioByteBuffer()), level) } /** @@ -513,7 +516,7 @@ private[spark] class BlockManager( // Look for block on disk, potentially storing it back in memory if required if (level.useDisk) { logDebug(s"Getting block $blockId from disk") - val bytes: ByteBuffer = diskStore.getBytes(blockId) match { + val bytes: LargeByteBuffer = diskStore.getBytes(blockId) match { case Some(b) => b case None => throw new BlockException( @@ -535,7 +538,7 @@ private[spark] class BlockManager( /* We'll store the bytes in memory if the block's storage level includes * "memory serialized", or if it should be cached as objects in memory * but we only requested its serialized bytes. */ - val copyForMemory = ByteBuffer.allocate(bytes.limit) + val copyForMemory = LargeByteBuffer.allocateOnHeap(bytes.limit, largeByteBufferChunkSize) copyForMemory.put(bytes) memoryStore.putBytes(blockId, copyForMemory, level) bytes.rewind() @@ -591,8 +594,9 @@ private[spark] class BlockManager( val locations = Random.shuffle(master.getLocations(blockId)) for (loc <- locations) { logDebug(s"Getting remote block $blockId from $loc") - val data = blockTransferService.fetchBlockSync( - loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer() + //the fetch will always be one byte buffer till we fix SPARK-5928 + val data: LargeByteBuffer = new WrappedLargeByteBuffer(blockTransferService.fetchBlockSync( + loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()) if (data != null) { if (asBlockResult) { @@ -674,7 +678,7 @@ private[spark] class BlockManager( */ def putBytes( blockId: BlockId, - bytes: ByteBuffer, + bytes: LargeByteBuffer, level: StorageLevel, tellMaster: Boolean = true, effectiveStorageLevel: Option[StorageLevel] = None): Seq[(BlockId, BlockStatus)] = { @@ -736,7 +740,7 @@ private[spark] class BlockManager( var valuesAfterPut: Iterator[Any] = null // Ditto for the bytes after the put - var bytesAfterPut: ByteBuffer = null + var bytesAfterPut: LargeByteBuffer = null // Size of the block in bytes var size = 0L @@ -884,7 +888,7 @@ private[spark] class BlockManager( * Replicate block to another node. Not that this is a blocking call that returns after * the block has been replicated. */ - private def replicate(blockId: BlockId, data: ByteBuffer, level: StorageLevel): Unit = { + private def replicate(blockId: BlockId, data: LargeByteBuffer, level: StorageLevel): Unit = { val maxReplicationFailures = conf.getInt("spark.storage.maxReplicationFailures", 1) val numPeersToReplicateTo = level.replication - 1 val peersForReplication = new ArrayBuffer[BlockManagerId] @@ -940,8 +944,11 @@ private[spark] class BlockManager( val onePeerStartTime = System.currentTimeMillis data.rewind() logTrace(s"Trying to replicate $blockId of ${data.limit()} bytes to $peer") - blockTransferService.uploadBlockSync( - peer.host, peer.port, peer.executorId, blockId, new NioManagedBuffer(data), tLevel) + //TODO + //ACK! here we're stuck -- we can't replicate a large block until we figure out + // how to deal w/ shuffling more than 2 gb +// blockTransferService.uploadBlockSync( +// peer.host, peer.port, peer.executorId, blockId, new NioManagedBuffer(data), tLevel) logTrace(s"Replicated $blockId of ${data.limit()} bytes to $peer in %s ms" .format(System.currentTimeMillis - onePeerStartTime)) peersReplicatedTo += peer @@ -1180,10 +1187,10 @@ private[spark] class BlockManager( def dataSerialize( blockId: BlockId, values: Iterator[Any], - serializer: Serializer = defaultSerializer): ByteBuffer = { - val byteStream = new ByteArrayOutputStream(4096) + serializer: Serializer = defaultSerializer): LargeByteBuffer = { + val byteStream = new LargeByteBufferOutputStream() dataSerializeStream(blockId, byteStream, values, serializer) - ByteBuffer.wrap(byteStream.toByteArray) + byteStream.largeBuffer } /** @@ -1192,10 +1199,10 @@ private[spark] class BlockManager( */ def dataDeserialize( blockId: BlockId, - bytes: ByteBuffer, + bytes: LargeByteBuffer, serializer: Serializer = defaultSerializer): Iterator[Any] = { bytes.rewind() - val stream = wrapForCompression(blockId, new ByteBufferInputStream(bytes, true)) + val stream = wrapForCompression(blockId, new LargeByteBufferInputStream(bytes, true)) serializer.newInstance().deserializeStream(stream).asIterator } @@ -1245,6 +1252,11 @@ private[spark] object BlockManager extends Logging { } } + def dispose(buffer: LargeByteBuffer): Unit = { + // TODO + ??? + } + def blockIdsToBlockManagers( blockIds: Array[BlockId], env: SparkEnv, diff --git a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala index b14b5e91d1794..38989f0c07681 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala @@ -17,6 +17,8 @@ package org.apache.spark.storage +import java.nio.ByteBuffer + import scala.collection.mutable.ArrayBuffer import org.apache.spark.Logging @@ -43,15 +45,15 @@ private[spark] abstract class BlockStore(val blockManager: BlockManager) extends * @return a PutResult that contains the size of the data, as well as the values put if * returnValues is true (if not, the result's data field can be null) */ - def putValues( + def putIterator( blockId: BlockId, values: Iterator[Any], level: StorageLevel, returnValues: Boolean): PutResult - def putValues( + def putArray( blockId: BlockId, - values: ArrayBuffer[Any], + values: Array[Any], level: StorageLevel, returnValues: Boolean): PutResult diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala index 61ef5ff168791..18293f3314a5f 100644 --- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala @@ -22,6 +22,7 @@ import java.nio.ByteBuffer import java.nio.channels.FileChannel.MapMode import org.apache.spark.Logging +import org.apache.spark.io.LargeByteBuffer import org.apache.spark.serializer.Serializer import org.apache.spark.util.Utils @@ -104,7 +105,7 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc } } - private def getBytes(file: File, offset: Long, length: Long): Option[ByteBuffer] = { + private def getBytes(file: File, offset: Long, length: Long): Option[LargeByteBuffer] = { val channel = new RandomAccessFile(file, "r").getChannel try { @@ -128,12 +129,12 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc } } - override def getBytes(blockId: BlockId): Option[ByteBuffer] = { + override def getBytes(blockId: BlockId): Option[LargeByteBuffer] = { val file = diskManager.getFile(blockId.name) getBytes(file, 0, file.length) } - def getBytes(segment: FileSegment): Option[ByteBuffer] = { + def getBytes(segment: FileSegment): Option[LargeByteBuffer] = { getBytes(segment.file, segment.offset, segment.length) } diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala index 71305a46bf570..90ced59104432 100644 --- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala @@ -20,6 +20,8 @@ package org.apache.spark.storage import java.nio.ByteBuffer import java.util.LinkedHashMap +import org.apache.spark.io.LargeByteBuffer + import scala.collection.mutable import scala.collection.mutable.ArrayBuffer @@ -77,7 +79,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long) } } - override def putBytes(blockId: BlockId, _bytes: ByteBuffer, level: StorageLevel): PutResult = { + override def putBytes(blockId: BlockId, _bytes: LargeByteBuffer, level: StorageLevel): PutResult = { // Work on a duplicate - since the original input might be used elsewhere. val bytes = _bytes.duplicate() bytes.rewind() diff --git a/core/src/main/scala/org/apache/spark/storage/PutResult.scala b/core/src/main/scala/org/apache/spark/storage/PutResult.scala index f0eac7594ecf6..2e00934bde243 100644 --- a/core/src/main/scala/org/apache/spark/storage/PutResult.scala +++ b/core/src/main/scala/org/apache/spark/storage/PutResult.scala @@ -17,7 +17,7 @@ package org.apache.spark.storage -import java.nio.ByteBuffer +import org.apache.spark.io.LargeByteBuffer /** * Result of adding a block into a BlockStore. This case class contains a few things: @@ -28,5 +28,5 @@ import java.nio.ByteBuffer */ private[spark] case class PutResult( size: Long, - data: Either[Iterator[_], ByteBuffer], + data: Either[Iterator[_], LargeByteBuffer], droppedBlocks: Seq[(BlockId, BlockStatus)] = Seq.empty) diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala index 233d1e2b7c616..fd7cb49ef9d50 100644 --- a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala @@ -21,6 +21,7 @@ import java.io.IOException import java.nio.ByteBuffer import com.google.common.io.ByteStreams +import org.apache.spark.io.LargeByteBuffer import tachyon.client.{ReadType, WriteType} import org.apache.spark.Logging @@ -64,7 +65,7 @@ private[spark] class TachyonStore( private def putIntoTachyonStore( blockId: BlockId, - bytes: ByteBuffer, + bytes: LargeByteBuffer, returnValues: Boolean): PutResult = { // So that we do not modify the input offsets ! // duplicate does not copy buffer, so inexpensive @@ -100,7 +101,7 @@ private[spark] class TachyonStore( getBytes(blockId).map(buffer => blockManager.dataDeserialize(blockId, buffer)) } - override def getBytes(blockId: BlockId): Option[ByteBuffer] = { + override def getBytes(blockId: BlockId): Option[LargeByteBuffer] = { val file = tachyonManager.getFile(blockId) if (file == null || file.getLocationHosts.size == 0) { return None diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala new file mode 100644 index 0000000000000..26f2d7848bb29 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import java.io.InputStream +import java.nio.ByteBuffer + +import org.apache.spark.io.LargeByteBuffer +import org.apache.spark.storage.BlockManager + +/** + * Reads data from a ByteBuffer, and optionally cleans it up using BlockManager.dispose() + * at the end of the stream (e.g. to close a memory-mapped file). + */ +private[spark] +class LargeByteBufferInputStream(private var buffer: LargeByteBuffer, dispose: Boolean = false) + extends InputStream { + + override def read(): Int = { + if (buffer == null || buffer.remaining() == 0) { + cleanUp() + -1 + } else { + buffer.get() & 0xFF + } + } + + override def read(dest: Array[Byte]): Int = { + read(dest, 0, dest.length) + } + + override def read(dest: Array[Byte], offset: Int, length: Int): Int = { + if (buffer == null || buffer.remaining() == 0) { + cleanUp() + -1 + } else { + val amountToGet = math.min(buffer.remaining(), length).toInt + buffer.get(dest, offset, amountToGet) + amountToGet + } + } + + override def skip(bytes: Long): Long = { + if (buffer != null) { + val amountToSkip = math.min(bytes, buffer.remaining).toInt + buffer.position(buffer.position + amountToSkip) + if (buffer.remaining() == 0) { + cleanUp() + } + amountToSkip + } else { + 0L + } + } + + /** + * Clean up the buffer, and potentially dispose of it using BlockManager.dispose(). + */ + private def cleanUp() { + if (buffer != null) { + if (dispose) { + BlockManager.dispose(buffer) + } + buffer = null + } + } +} diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala new file mode 100644 index 0000000000000..246ebca999437 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import java.io.OutputStream + +import org.apache.spark.io.{ChainedLargeByteBuffer, LargeByteBuffer} +import org.apache.spark.util.collection.ChainedBuffer + +private[spark] +class LargeByteBufferOutputStream(chunkSize: Int = 65536) + extends OutputStream { + + val buffer = new ChainedBuffer(chunkSize) + + private var _pos = 0 + + override def write(b: Int): Unit = { + throw new UnsupportedOperationException() + } + + override def write(bytes: Array[Byte], offs: Int, len: Int): Unit = { + buffer.write(_pos, bytes, offs, len) + _pos += len + } + + def pos: Int = _pos + + def largeBuffer: LargeByteBuffer = new ChainedLargeByteBuffer(buffer) +} diff --git a/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala new file mode 100644 index 0000000000000..c39a2fd1f8a11 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util.collection + +import java.io.OutputStream + +import scala.collection.mutable.ArrayBuffer + +/** + * A logical byte buffer that wraps a list of byte arrays. All the byte arrays have equal size. The + * advantage of this over a standard ArrayBuffer is that it can grow without claiming large amounts + * of memory and needing to copy the full contents. + */ +private[spark] class ChainedBuffer private(val chunks: ArrayBuffer[Array[Byte]], chunkSize: Int) { + private val chunkSizeLog2 = (math.log(chunkSize) / math.log(2)).toInt + assert(math.pow(2, chunkSizeLog2).toInt == chunkSize) + private var _size: Long = _ + + /** + * Read bytes from this buffer into a byte array. + * + * @param pos Offset in the buffer to read from. + * @param bytes Byte array to read into. + * @param offs Offset in the byte array to read to. + * @param len Number of bytes to read. + */ + def read(pos: Long, bytes: Array[Byte], offs: Int, len: Int): Unit = { + var chunkIndex = (pos >> chunkSizeLog2).toInt + var posInChunk = (pos - (chunkIndex << chunkSizeLog2)).toInt + var moved = 0 + while (moved < len) { + val toRead = math.min(len - moved, chunkSize - posInChunk) + System.arraycopy(chunks(chunkIndex), posInChunk, bytes, offs + moved, toRead) + moved += toRead + chunkIndex += 1 + posInChunk = 0 + } + } + + def read(pos:Long): Byte = { + val chunkIndex = (pos >> chunkSizeLog2).toInt + val posInChunk = (pos - (chunkIndex << chunkSizeLog2)).toInt + chunks(chunkIndex)(posInChunk) + } + + /** + * Write bytes from a byte array into this buffer. + * + * @param pos Offset in the buffer to write to. + * @param bytes Byte array to write from. + * @param offs Offset in the byte array to write from. + * @param len Number of bytes to write. + */ + def write(pos: Long, bytes: Array[Byte], offs: Int, len: Int): Unit = { + // Grow if needed + val endChunkIndex = ((pos + len - 1) >> chunkSizeLog2).toInt + while (endChunkIndex >= chunks.length) { + chunks += new Array[Byte](chunkSize) + } + + var chunkIndex = (pos >> chunkSizeLog2).toInt + var posInChunk = (pos - (chunkIndex << chunkSizeLog2)).toInt + var moved = 0 + while (moved < len) { + val toWrite = math.min(len - moved, chunkSize - posInChunk) + System.arraycopy(bytes, offs + moved, chunks(chunkIndex), posInChunk, toWrite) + moved += toWrite + chunkIndex += 1 + posInChunk = 0 + } + + _size = math.max(_size, pos + len) + } + + /** + * Total size of buffer that can be written to without allocating additional memory. + */ + def capacity: Int = chunks.size * chunkSize + + /** + * Size of the logical buffer. + */ + def size: Long = _size +} + +private[spark] object ChainedBuffer { + def withInitialSize(chunkSize: Int, minInitialSize: Long = 0): ChainedBuffer = { + val nChunks = (((minInitialSize - 1) / chunkSize).toInt) + 1 + val chunks = new ArrayBuffer[Array[Byte]](nChunks) + (0 until nChunks).foreach{idx => chunks(idx) = new Array[Byte](chunkSize)} + new ChainedBuffer(chunks, chunkSize) + } +} + +/** + * Output stream that writes to a ChainedBuffer. + */ +private[spark] class ChainedBufferOutputStream(chainedBuffer: ChainedBuffer) extends OutputStream { + private var _pos = 0 + + override def write(b: Int): Unit = { + throw new UnsupportedOperationException() + } + + override def write(bytes: Array[Byte], offs: Int, len: Int): Unit = { + chainedBuffer.write(_pos, bytes, offs, len) + _pos += len + } + + def pos: Int = _pos +} \ No newline at end of file diff --git a/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala new file mode 100644 index 0000000000000..06d3d223c3858 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/io/LargeByteBufferTest.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.io + +import java.io.{ObjectInputStream, ObjectOutputStream} + +import org.apache.spark.util.{LargeByteBufferInputStream, LargeByteBufferOutputStream} +import org.scalatest.{Matchers, FunSuite} + +class LargeByteBufferTest extends FunSuite with Matchers { + +// test("allocateOnHeap") { +// val bufs = LargeByteBuffer.allocateOnHeap(10, 3).asInstanceOf[ChainedLargeByteBuffer] +// bufs.underlying.foreach{buf => buf.capacity should be <= 3} +// bufs.underlying.map{_.capacity}.sum should be (10) +// } +// +// test("allocate large") { +// val size = Integer.MAX_VALUE.toLong + 10 +// val bufs = LargeByteBuffer.allocateOnHeap(size, 1e9.toInt).asInstanceOf[WrappedLargeByteBuffer] +// bufs.capacity should be (size) +// bufs.underlying.map{_.capacity.toLong}.sum should be (Integer.MAX_VALUE.toLong + 10) +// } + + + test("io stream roundtrip") { + + val rawOut = new LargeByteBufferOutputStream(128) + val objOut = new ObjectOutputStream(rawOut) + val someObject = (1 to 100).map{x => x -> scala.util.Random.nextInt(x)}.toMap + objOut.writeObject(someObject) + objOut.close() + + rawOut.largeBuffer.asInstanceOf[ChainedLargeByteBuffer].underlying.chunks.size should be > 1 + + val rawIn = new LargeByteBufferInputStream(rawOut.largeBuffer) + val objIn = new ObjectInputStream(rawIn) + val deser = objIn.readObject() + deser should be (someObject) + + } + +} diff --git a/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala b/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala new file mode 100644 index 0000000000000..e99d5ecc639c9 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/util/collection/ChainedBufferTest.scala @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util.collection + +import java.nio.ByteBuffer + +import org.scalatest.FunSuite +import org.scalatest.Matchers._ + +class ChainedBufferSuite extends FunSuite { + test("write and read at start") { + // write from start of source array + val buffer = new ChainedBuffer(8) + buffer.capacity should be (0) + verifyWriteAndRead(buffer, 0, 0, 0, 4) + buffer.capacity should be (8) + + // write from middle of source array + verifyWriteAndRead(buffer, 0, 5, 0, 4) + buffer.capacity should be (8) + + // read to middle of target array + verifyWriteAndRead(buffer, 0, 0, 5, 4) + buffer.capacity should be (8) + + // write up to border + verifyWriteAndRead(buffer, 0, 0, 0, 8) + buffer.capacity should be (8) + + // expand into second buffer + verifyWriteAndRead(buffer, 0, 0, 0, 12) + buffer.capacity should be (16) + + // expand into multiple buffers + verifyWriteAndRead(buffer, 0, 0, 0, 28) + buffer.capacity should be (32) + } + + test("write and read at middle") { + // write from start of source array + val buffer = new ChainedBuffer(8) + verifyWriteAndRead(buffer, 3, 0, 0, 4) + buffer.capacity should be (8) + + // write from middle of source array + verifyWriteAndRead(buffer, 3, 5, 0, 4) + buffer.capacity should be (8) + + // read to middle of target array + verifyWriteAndRead(buffer, 3, 0, 5, 4) + buffer.capacity should be (8) + + // write up to border + verifyWriteAndRead(buffer, 3, 0, 0, 5) + buffer.capacity should be (8) + + // expand into second buffer + verifyWriteAndRead(buffer, 3, 0, 0, 12) + buffer.capacity should be (16) + + // expand into multiple buffers + verifyWriteAndRead(buffer, 3, 0, 0, 28) + buffer.capacity should be (32) + } + + test("write and read at later buffer") { + // write from start of source array + val buffer = new ChainedBuffer(8) + verifyWriteAndRead(buffer, 11, 0, 0, 4) + buffer.capacity should be (16) + + // write from middle of source array + verifyWriteAndRead(buffer, 11, 5, 0, 4) + buffer.capacity should be (16) + + // read to middle of target array + verifyWriteAndRead(buffer, 11, 0, 5, 4) + buffer.capacity should be (16) + + // write up to border + verifyWriteAndRead(buffer, 11, 0, 0, 5) + buffer.capacity should be (16) + + // expand into second buffer + verifyWriteAndRead(buffer, 11, 0, 0, 12) + buffer.capacity should be (24) + + // expand into multiple buffers + verifyWriteAndRead(buffer, 11, 0, 0, 28) + buffer.capacity should be (40) + } + + + // Used to make sure we're writing different bytes each time + var rangeStart = 0 + + /** + * @param buffer The buffer to write to and read from. + * @param offsetInBuffer The offset to write to in the buffer. + * @param offsetInSource The offset in the array that the bytes are written from. + * @param offsetInTarget The offset in the array to read the bytes into. + * @param length The number of bytes to read and write + */ + def verifyWriteAndRead( + buffer: ChainedBuffer, + offsetInBuffer: Int, + offsetInSource: Int, + offsetInTarget: Int, + length: Int): Unit = { + val source = new Array[Byte](offsetInSource + length) + (rangeStart until rangeStart + length).map(_.toByte).copyToArray(source, offsetInSource) + buffer.write(offsetInBuffer, source, offsetInSource, length) + val target = new Array[Byte](offsetInTarget + length) + buffer.read(offsetInBuffer, target, offsetInTarget, length) + ByteBuffer.wrap(source, offsetInSource, length) should be + (ByteBuffer.wrap(target, offsetInTarget, length)) + + rangeStart += 100 + } +} \ No newline at end of file From a139e97fe1aeac279b9c47119745c0f45eb7d8c5 Mon Sep 17 00:00:00 2001 From: Imran RashidInvoke this method before using a sequence of channel-read or -// * put operations to fill this buffer. -// * -// *
This method does not actually erase the data in the buffer, but it -// * is named as if it did because it will most often be used in situations -// * in which that might as well be the case.
-// */ -// def clear(): Unit -// -// /** -// * Flips this buffer. The limit is set to the current position and then -// * the position is set to zero. If the mark is defined then it is -// * discarded. -// * -// *After a sequence of channel-read or put operations, invoke -// * this method to prepare for a sequence of channel-write or relative -// * get operations. -// */ -// def flip(): Unit - - /** - * Rewinds this buffer. The position is set to zero and the mark is - * discarded. - * - *
Invoke this method before a sequence of channel-write or get - * operations, assuming that the limit has already been set - * appropriately. - */ - def rewind(): Unit - - /** - * Returns the number of elements between the current position and the - * limit.
- * - * @return The number of elements remaining in this buffer - */ - def remaining(): Long -} - -class ChainedLargeByteBuffer(private[io] val underlying: ChainedBuffer) extends LargeByteBuffer { - - def capacity = underlying.capacity - - var _pos = 0l - - def get(dst: Array[Byte],offset: Int,length: Int): Unit = { - underlying.read(_pos, dst, offset, length) - _pos += length - } - - def get(): Byte = { - val b = underlying.read(_pos) - _pos += 1 - b - } - - def put(bytes: LargeByteBuffer): Unit = { - ??? - } - - def position: Long = _pos - def position(position: Long): Unit = { - _pos = position - } - def remaining(): Long = { - underlying.size - position - } - - def duplicate(): ChainedLargeByteBuffer = { - new ChainedLargeByteBuffer(underlying) - } - - def rewind(): Unit = { - _pos = 0 - } - - def limit(): Long = { - capacity - } - - def limit(newLimit: Long): Unit = { - ??? - } - - def writeTo(channel:WritableByteChannel): Long = { - var written = 0l - underlying.chunks.foreach{bytes => - //TODO test this - val buffer = ByteBuffer.wrap(bytes) - while (buffer.hasRemaining) - channel.write(buffer) - written += bytes.length - } - written - } -} - -class WrappedLargeByteBuffer(private[spark] val underlying: Array[ByteBuffer]) extends LargeByteBuffer { - - val (totalCapacity, chunkOffsets) = { - var sum = 0l - val offsets = new Array[Long](underlying.size) - (0 until underlying.size).foreach{idx => - offsets(idx) = sum - sum += underlying(idx).capacity() - } - (sum, offsets) - } - - private var _pos = 0l - private var currentBufferIdx = 0 - private var currentBuffer = underlying(0) - private var _limit = totalCapacity - - def capacity = totalCapacity - - def get(dst: Array[Byte], offset: Int, length: Int): Unit = { - var moved = 0 - while (moved < length) { - val toRead = math.min(length - moved, currentBuffer.remaining()) - currentBuffer.get(dst, offset, toRead) - moved += toRead - updateCurrentBuffer() - } - } - - def get(): Byte = { - val r = currentBuffer.get() - _pos += 1 - updateCurrentBuffer() - r - } - - private def updateCurrentBuffer(): Unit = { - //TODO fix end condition - while(!currentBuffer.hasRemaining()) { - currentBufferIdx += 1 - currentBuffer = underlying(currentBufferIdx) - } - } - - def put(bytes: LargeByteBuffer): Unit = { - ??? - } - - def position: Long = _pos - def position(position: Long): Unit = { - //XXX check range? - _pos = position - } - def remaining(): Long = { - totalCapacity - _pos - } - - def duplicate(): WrappedLargeByteBuffer = { - new WrappedLargeByteBuffer(underlying.map{_.duplicate()}) - } - - def rewind(): Unit = { - _pos = 0 - underlying.foreach{_.rewind()} - } - - def limit(): Long = { - totalCapacity - } - - def limit(newLimit: Long) = { - //XXX check range? set limits in sub buffers? - _limit = newLimit - } - - def writeTo(channel: WritableByteChannel): Long = { - var written = 0l - underlying.foreach{buffer => - //TODO test this - //XXX do we care about respecting the limit here? - written += buffer.remaining() - while (buffer.hasRemaining) - channel.write(buffer) - } - written - } - -} - -object LargeByteBuffer { - - def asLargeByteBuffer(byteBuffer: ByteBuffer): LargeByteBuffer = { - new WrappedLargeByteBuffer(Array(byteBuffer)) - } - - def asLargeByteBuffer(bytes: Array[Byte]): LargeByteBuffer = { - new WrappedLargeByteBuffer(Array(ByteBuffer.wrap(bytes))) - } - - - def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = { - val buffer = ChainedBuffer.withInitialSize(maxChunk, size) - new ChainedLargeByteBuffer(buffer) - } - - def mapFile( - channel: FileChannel, - mode: MapMode, - offset: Long, - length: Long, - maxChunk: Int = Integer.MAX_VALUE - 1e6.toInt - ): LargeByteBuffer = { - val offsets = new ArrayBuffer[Long]() - var curOffset = offset - val end = offset + length - while (curOffset < end) { - offsets += curOffset - val length = math.min(end - curOffset, maxChunk) - curOffset += length - } - offsets += end - val chunks = new Array[ByteBuffer](offsets.size - 1) - (0 until offsets.size - 1).foreach{idx => - chunks(idx) = channel.map(mode, offsets(idx), offsets(idx + 1) - offsets(idx)) - } - new WrappedLargeByteBuffer(chunks) - } -} - - -// -///** -// * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G -// * which ByteBuffers are limited to. -// * Externally, it exposes all the api which java.nio.ByteBuffer exposes. -// * Internally, it maintains a sequence of Containers which manage the ByteBuffer data. -// * Not all the data might be loaded into memory (like disk or tachyon data) - so actual -// * memory footprint - heap and vm could be much lower than capacity. +///* +// * Licensed to the Apache Software Foundation (ASF) under one or more +// * contributor license agreements. See the NOTICE file distributed with +// * this work for additional information regarding copyright ownership. +// * The ASF licenses this file to You under the Apache License, Version 2.0 +// * (the "License"); you may not use this file except in compliance with +// * the License. You may obtain a copy of the License at // * -// * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this -// * buffer, maybe revisit this later ? Note: this is not much different from earlier though ! +// * http://www.apache.org/licenses/LICENSE-2.0 // * -// * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this -// * will require the file to be kept open (repeatedly opening/closing file is not good -// * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is -// * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy) -// * -// * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is -// * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some -// * cases (when we duplicate/slice them). Currently spark does not need this, but might in future -// * so relook at it later. +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. // */ -//// We should make this constructor private: but for now, -//// leaving it public since TachyonStore needs it -//class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer], -// private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging { -// -// // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME -// private val allocateLocationThrowable: Throwable = { -// if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) { -// new Throwable("blockId = " + BlockManager.getLookupBlockId) -// } else { -// null -// } -// } -// private var disposeLocationThrowable: Throwable = null -// -// @volatile private var allowCleanerOverride = true -// @volatile private var cleaner: BufferCleaner = new BufferCleaner { -// override def doClean(buffer: LargeByteBuffer) = { -// assert (LargeByteBuffer.this == buffer) -// doDispose(needRelease = false) -// } -// } // -// // should not be empty -// assert (null != inputContainers && ! inputContainers.isEmpty) -// // should not have any null's -// assert (inputContainers.find(_ == null).isEmpty) +//package org.apache.spark.io // -// // println("Num containers = " + inputContainers.size) +//import java.io.{RandomAccessFile, DataInput, InputStream, OutputStream} +//import java.nio.channels.FileChannel.MapMode +//import java.nio.{ByteBuffer, BufferUnderflowException, BufferOverflowException} +//import java.nio.channels.{FileChannel, WritableByteChannel, ReadableByteChannel} // -// // Position, limit and capacity relevant over the engire LargeByteBuffer -// @volatile private var globalPosition = 0L -// @volatile private var globalLimit = 0L -// @volatile private var currentContainerIndex = 0 +//import org.apache.spark.util.collection.ChainedBuffer // -// // The buffers in which the actual data is held. -// private var containers: Array[ByteBufferContainer] = null +//import scala.collection.mutable.{ArrayBuffer, HashSet} // -// // aggregate capacities of the individual buffers. -// // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be -// // sum of capacity of 0th and 1st block buffer -// private var bufferPositionStart: Array[Long] = null // -// // Contains the indices of a containers which requires release before subsequent invocation of -// // read/write should be serviced. This is required since current read/write might have moved the -// // position but since we are returning bytebuffers which depend on the validity of the existing -// // bytebuffer, we cant release them yet. -// private var needReleaseIndices = new HashSet[Int]() // -// private val readable = ! inputContainers.exists(! _.isReadable) -// private val writable = ! inputContainers.exists(! _.isWritable) // +//trait LargeByteBuffer { +//// def position(): Long +//// +//// def limit(): Long // -// // initialize -// @volatile private var globalCapacity = { +// def capacity(): Long // -// // Ensure that there are no empty buffers : messes up with our code : unless it -// // is a single buffer (for empty buffer for marker case) -// assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length) +// def get(): Byte //needed for ByteBufferInputStream // -// containers = { -// if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray -// } -// containers.foreach(_.validate()) +// def get(dst: Array[Byte], offset: Int, length: Int): Unit // for ByteBufferInputStream // -// def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) { -// val buff = new ArrayBuffer[Long](arr.length + 1) -// buff += 0L +// def position(position: Long): Unit //for ByteBufferInputStream // -// buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1) -// assert (buff.length == arr.length + 1) -// bufferPositionStart = buff.toArray -// } +// def position(): Long //for ByteBufferInputStream // -// initializeBufferPositionStart(containers) +// /** doesn't copy data, just copies references & offsets */ +// def duplicate(): LargeByteBuffer // -// // remove references from inputBuffers -// inputContainers.clear() +// def put(bytes: LargeByteBuffer): Unit // -// globalLimit = bufferPositionStart(containers.length) -// globalPosition = 0L -// currentContainerIndex = 0 +// //also need whatever is necessary for ByteArrayOutputStream for BlockManager#dataSerialize // -// assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum) -// -// globalLimit -// } -// -// final def position(): Long = globalPosition -// -// final def limit(): Long = globalLimit -// -// final def capacity(): Long = globalCapacity -// -// final def limit(newLimit: Long) { -// if ((newLimit > capacity()) || (newLimit < 0)) { -// throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity()) -// } -// -// globalLimit = newLimit -// if (position() > newLimit) position(newLimit) -// } -// -// def skip(skipBy: Long) = position(position() + skipBy) -// -// private def releasePendingContainers() { -// if (! needReleaseIndices.isEmpty) { -// val iter = needReleaseIndices.iterator -// while (iter.hasNext) { -// val index = iter.next() -// assert (index >= 0 && index < containers.length) -// // It is possible to move from one container to next before the previous -// // container was acquired. For example, get forcing move to next container -// // since current was exhausted immediatelly followed by a position() -// // so the container we moved to was never acquired. -// -// // assert (containers(index).isAcquired) -// // will this always be satisfied ? -// // assert (index != currentContainerIndex) -// if (containers(index).isAcquired) containers(index).release() -// } -// needReleaseIndices.clear() -// } -// } -// -// private def toNewContainer(newIndex: Int) { -// if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) { -// -// assert (currentContainerIndex >= 0) -// needReleaseIndices += currentContainerIndex -// } -// currentContainerIndex = newIndex -// } -// -// // expensive method, sigh ... optimize it later ? -// final def position(newPosition: Long) { -// -// if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException() -// -// if (currentContainerIndex < bufferPositionStart.length - 1 && -// newPosition >= bufferPositionStart(currentContainerIndex) && -// newPosition < bufferPositionStart(currentContainerIndex + 1)) { -// // Same buffer - easy method ... -// globalPosition = newPosition -// // Changed position - free previously returned buffers. -// releasePendingContainers() -// return -// } -// -// // Find appropriate currentContainerIndex -// // Since bufferPositionStart is sorted, can be replaced with binary search if required. -// // For now, not in the perf critical path since buffers size is very low typically. -// var index = 0 -// val cLen = containers.length -// while (index < cLen) { -// if (newPosition >= bufferPositionStart(index) && -// newPosition < bufferPositionStart(index + 1)) { -// globalPosition = newPosition -// toNewContainer(index) -// // Changed position - free earlier and previously returned buffers. -// releasePendingContainers() -// return -// } -// index += 1 -// } -// -// if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) { -// // boundary. -// globalPosition = newPosition -// toNewContainer(cLen) -// // Changed position - free earlier and previously returned buffers. -// releasePendingContainers() -// return -// } -// -// assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition + -// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]")) -// } // +// //TODO checks on limit semantics // // /** -// * Clears this buffer. The position is set to zero, the limit is set to -// * the capacity, and the mark is discarded. -// * -// *Invoke this method before using a sequence of channel-read or -// * put operations to fill this buffer. -// * -// *
This method does not actually erase the data in the buffer, but it -// * is named as if it did because it will most often be used in situations -// * in which that might as well be the case.
+// * Sets this buffer's limit. If the position is larger than the new limit then it is set to the +// * new limit. If the mark is defined and larger than the new limit then it is discarded. // */ -// final def clear() { -// // if (0 == globalCapacity) return -// -// needReleaseIndices += 0 -// globalPosition = 0L -// toNewContainer(0) -// globalLimit = globalCapacity -// -// // Now free all pending containers -// releasePendingContainers() -// } +// def limit(newLimit: Long): Unit // // /** -// * Flips this buffer. The limit is set to the current position and then -// * the position is set to zero. If the mark is defined then it is -// * discarded. -// * -// *After a sequence of channel-read or put operations, invoke -// * this method to prepare for a sequence of channel-write or relative -// * get operations. +// * return this buffer's limit +// * @return // */ -// final def flip() { -// needReleaseIndices += 0 -// globalLimit = globalPosition -// globalPosition = 0L -// toNewContainer(0) +// def limit(): Long // -// // Now free all pending containers -// releasePendingContainers() -// } +// +// //an alternative to having this method would be having a foreachBuffer(f: Buffer => T) +// def writeTo(channel: WritableByteChannel): Long +// +//// +//// def skip(skipBy: Long): Unit +//// +//// def position(newPosition: Long): Unit +//// +//// /** +//// * Clears this buffer. The position is set to zero, the limit is set to +//// * the capacity, and the mark is discarded. +//// * +//// *
Invoke this method before using a sequence of channel-read or +//// * put operations to fill this buffer. +//// * +//// *
This method does not actually erase the data in the buffer, but it +//// * is named as if it did because it will most often be used in situations +//// * in which that might as well be the case.
+//// */ +//// def clear(): Unit +//// +//// /** +//// * Flips this buffer. The limit is set to the current position and then +//// * the position is set to zero. If the mark is defined then it is +//// * discarded. +//// * +//// *After a sequence of channel-read or put operations, invoke +//// * this method to prepare for a sequence of channel-write or relative +//// * get operations. +//// */ +//// def flip(): Unit // // /** // * Rewinds this buffer. The position is set to zero and the mark is @@ -558,14 +107,7 @@ object LargeByteBuffer { // * operations, assuming that the limit has already been set // * appropriately. // */ -// final def rewind() { -// needReleaseIndices += 0 -// globalPosition = 0L -// toNewContainer(0) -// -// // Now free all pending containers -// releasePendingContainers() -// } +// def rewind(): Unit // // /** // * Returns the number of elements between the current position and the @@ -573,1192 +115,1650 @@ object LargeByteBuffer { // * // * @return The number of elements remaining in this buffer // */ -// final def remaining(): Long = { -// globalLimit - globalPosition -// } -// -// /** -// * Tells whether there are any elements between the current position and -// * the limit.
-// * -// * @return true if, and only if, there is at least one element -// * remaining in this buffer -// */ -// final def hasRemaining() = { -// globalPosition < globalLimit -// } -// -// // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex) -// -// // number of bytes remaining in currently active underlying buffer -// private def currentRemaining(): Int = { -// if (hasRemaining()) { -// // validate currentContainerIndex is valid -// assert (globalPosition >= bufferPositionStart(currentContainerIndex) && -// globalPosition < bufferPositionStart(currentContainerIndex + 1), -// "globalPosition = " + globalPosition + -// ", currentContainerIndex = " + currentContainerIndex + -// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) -// -// currentRemaining0(currentContainerIndex) -// } else 0 -// } -// -// // Without any validation : required when we are bumping the index (when validation will fail) ... -// private def currentRemaining0(which: Int): Int = { -// // currentBuffer().remaining() -// math.max(0, math.min(bufferPositionStart(which + 1), -// globalLimit) - globalPosition).asInstanceOf[Int] -// } -// -// // Set the approppriate position/limit for the current underlying buffer to mirror our -// // the LargeByteBuffer's state. -// private def fetchCurrentBuffer(): ByteBuffer = { -// releasePendingContainers() -// -// assert (currentContainerIndex < containers.length) -// -// val container = containers(currentContainerIndex) -// if (! container.isAcquired) { -// container.acquire() -// } -// -// assert (container.isAcquired) -// if (LargeByteBuffer.enableExpensiveAssert) { -// assert (! containers.exists( b => (b ne container) && b.isAcquired)) -// } -// -// assert (currentContainerIndex < bufferPositionStart.length && -// globalPosition < bufferPositionStart(currentContainerIndex + 1), -// "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " + -// bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this) -// -// val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)). -// asInstanceOf[Int] -// -// val buffer = container.getByteBuffer -// buffer.position(buffPosition) -// val diff = buffer.capacity - buffPosition -// val left = remaining() -// if (diff <= left) { -// buffer.limit(buffer.capacity()) -// } else { -// // Can happen if limit() was called. -// buffer.limit(buffPosition + left.asInstanceOf[Int]) -// } -// -// buffer -// } -// -// // To be used ONLY to test in suites. -// private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = { -// if ("1" != System.getProperty("SPARK_TESTING")) { -// throw new IllegalStateException("This method is to be used ONLY within spark test suites") -// } -// -// fetchCurrentBuffer() -// } -// -// // Expects that the invoker has ensured that this can be safely invoked. -// // That is, it wont be invoked when the loop wont terminate. -// private def toNonEmptyBuffer() { -// -// if (! hasRemaining()) { -// var newIndex = currentContainerIndex -// // Ensure we are in the right block or not. -// while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) { -// newIndex += 1 -// } -// toNewContainer(newIndex) -// // Do not do this - since we might not yet have consumed the buffer which caused EOF right now -// /* -// // Add last one also, and release it too - since we are at the end of the buffer with nothing -// // more pending. -// if (newIndex >= 0 && currentContainerIndex < containers.length) { -// needReleaseIndices += newIndex -// } -// */ -// assert (currentContainerIndex >= 0) -// // releasePendingContainers() -// return -// } -// -// var index = currentContainerIndex -// while (0 == currentRemaining0(index) && index < containers.length) { -// index += 1 -// } -// assert (currentContainerIndex < containers.length) -// toNewContainer(index) -// assert (0 != currentRemaining()) -// } -// -// private def assertPreconditions(containerIndex: Int) { -// assert (globalPosition >= bufferPositionStart(containerIndex), -// "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + -// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) -// assert (globalPosition < bufferPositionStart(containerIndex + 1), -// "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + -// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) -// -// assert (globalLimit <= globalCapacity) -// assert (containerIndex < containers.length) -// } -// -// -// /** -// * Attempts to return a ByteBuffer of the requested size. -// * It is possible to return a buffer of size smaller than requested -// * even though hasRemaining == true -// * -// * On return, position would have been moved 'ahead' by the size of the buffer returned : -// * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer -// * -// * -// * This is used to primarily retrieve content of this buffer to expose via ByteBuffer -// * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the -// * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer -// * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying -// * container is a disk backed container, and we make subsequent calls to get(), the returned -// * ByteBuffer can be dispose'ed off -// * -// * @param maxChunkSize Max size of the ByteBuffer to retrieve. -// * @return -// */ -// -// private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = { -// fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true) -// } -// -// private def fetchBufferOfSizeImpl(maxChunkSize: Int, -// canReleaseContainers: Boolean): ByteBuffer = { -// if (canReleaseContainers) releasePendingContainers() -// assert (maxChunkSize > 0) -// -// // not checking for degenerate case of maxChunkSize == 0 -// if (globalPosition >= globalLimit) { -// // throw exception -// throw new BufferUnderflowException() -// } -// -// // Check preconditions : disable these later, since they might be expensive to -// // evaluate for every IO op -// assertPreconditions(currentContainerIndex) -// -// val currentBufferRemaining = currentRemaining() -// -// assert (currentBufferRemaining > 0) -// -// val size = math.min(currentBufferRemaining, maxChunkSize) -// -// val newBuffer = if (currentBufferRemaining > maxChunkSize) { -// val currentBuffer = fetchCurrentBuffer() -// val buff = ByteBufferContainer.createSlice(currentBuffer, -// currentBuffer.position(), maxChunkSize) -// assert (buff.remaining() == maxChunkSize) -// buff -// } else { -// val currentBuffer = fetchCurrentBuffer() -// val buff = currentBuffer.slice() -// assert (buff.remaining() == currentBufferRemaining) -// buff -// } -// -// assert (size == newBuffer.remaining()) -// assert (0 == newBuffer.position()) -// assert (size == newBuffer.limit()) -// assert (newBuffer.capacity() == newBuffer.limit()) -// -// globalPosition += newBuffer.remaining -// toNonEmptyBuffer() -// -// newBuffer -// } -// -// // Can we service the read/write from the currently active (underlying) bytebuffer or not. -// // For almost all cases, this will return true allowing us to optimize away the more expensive -// // computations. -// private def localReadWritePossible(size: Int) = -// size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1) -// -// -// def getLong(): Long = { -// assert (readable) -// releasePendingContainers() -// -// if (remaining() < 8) throw new BufferUnderflowException -// -// if (localReadWritePossible(8)) { -// val buff = fetchCurrentBuffer() -// assert (buff.remaining() >= 8) -// val retval = buff.getLong -// globalPosition += 8 -// toNonEmptyBuffer() -// return retval -// } -// -// val buff = readFully(8) -// buff.getLong -// } -// -// def getInt(): Int = { -// assert (readable) -// releasePendingContainers() -// -// if (remaining() < 4) throw new BufferUnderflowException -// -// if (localReadWritePossible(4)) { -// val buff = fetchCurrentBuffer() -// assert (buff.remaining() >= 4) -// val retval = buff.getInt -// globalPosition += 4 -// toNonEmptyBuffer() -// return retval -// } -// -// val buff = readFully(4) -// buff.getInt -// } +// def remaining(): Long +//} // -// def getChar(): Char = { -// assert (readable) -// releasePendingContainers() +//class ChainedLargeByteBuffer(private[io] val underlying: ChainedBuffer) extends LargeByteBuffer { // -// if (remaining() < 2) throw new BufferUnderflowException +// def capacity = underlying.capacity // -// if (localReadWritePossible(2)) { -// val buff = fetchCurrentBuffer() -// assert (buff.remaining() >= 2) -// val retval = buff.getChar -// globalPosition += 2 -// toNonEmptyBuffer() -// return retval -// } +// var _pos = 0l // -// // if slice is becoming too expensive, revisit this ... -// val buff = readFully(2) -// buff.getChar +// def get(dst: Array[Byte],offset: Int,length: Int): Unit = { +// underlying.read(_pos, dst, offset, length) +// _pos += length // } // // def get(): Byte = { -// assert (readable) -// releasePendingContainers() -// -// if (! hasRemaining()) throw new BufferUnderflowException -// -// // If we have remaining bytes, previous invocations MUST have ensured that we are at -// // a buffer which has data to be read. -// assert (localReadWritePossible(1)) -// -// val buff = fetchCurrentBuffer() -// assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining()) -// val retval = buff.get() -// globalPosition += 1 -// toNonEmptyBuffer() -// -// retval -// } -// -// def get(arr: Array[Byte], offset: Int, size: Int): Int = { -// assert (readable) -// releasePendingContainers() -// -// LargeByteBuffer.checkOffsets(arr, offset, size) -// -// // kyro depends on this it seems ? -// // assert (size > 0) -// if (0 == size) return 0 -// -// if (! hasRemaining()) return -1 -// -// if (localReadWritePossible(size)) { -// val buff = fetchCurrentBuffer() -// assert (buff.remaining() >= size) -// buff.get(arr, offset, size) -// globalPosition += size -// toNonEmptyBuffer() -// return size -// } -// -// var remainingSize = math.min(size, remaining()).asInstanceOf[Int] -// var currentOffset = offset -// -// while (remainingSize > 0) { -// val buff = fetchBufferOfSize(remainingSize) -// val toCopy = math.min(buff.remaining(), remainingSize) -// -// buff.get(arr, currentOffset, toCopy) -// currentOffset += toCopy -// remainingSize -= toCopy -// } -// -// currentOffset - offset -// } -// -// -// private def createSlice(size: Long): LargeByteBuffer = { -// -// releasePendingContainers() -// -// if (remaining() < size) { -// // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this) -// throw new BufferOverflowException -// } -// -// // kyro depends on this it seems ? -// // assert (size > 0) -// if (0 == size) return LargeByteBuffer.EMPTY_BUFFER -// -// val arr = new ArrayBuffer[ByteBufferContainer](2) -// var totalLeft = size -// -// // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer) -// -// var containerIndex = currentContainerIndex -// while (totalLeft > 0 && hasRemaining()) { -// assertPreconditions(containerIndex) -// val container = containers(containerIndex) -// val currentLeft = currentRemaining0(containerIndex) -// -// assert (globalPosition + currentLeft <= globalLimit) -// assert (globalPosition >= bufferPositionStart(containerIndex) && -// (globalPosition < bufferPositionStart(containerIndex + 1))) -// -// val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int] -// val sliceSize = math.min(totalLeft, currentLeft) -// assert (from >= 0) -// assert (sliceSize > 0 && sliceSize <= Int.MaxValue) -// -// val slice = container.createSlice(from, sliceSize.asInstanceOf[Int]) -// arr += slice -// -// globalPosition += sliceSize -// totalLeft -= sliceSize -// if (currentLeft == sliceSize) containerIndex += 1 -// } -// -// // Using toNonEmptyBuffer instead of directly moving to next here so that -// // other checks can be performed there. -// toNonEmptyBuffer() -// // force cleanup - this is fine since we are not using the buffers directly -// // which are actively needed (the returned value is on containers which can -// // recreate) -// releasePendingContainers() -// // free current container if acquired. -// if (currentContainerIndex < containers.length) { -// containers(currentContainerIndex).release() -// } -// assert (currentContainerIndex == containerIndex) -// -// val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked) -// retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction) -// retval -// } -// -// // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers -// // This is to be used only for writes : and ensures that writes are done into the appropriate -// // underlying bytebuffers. -// def getCompositeWriteBuffer(size: Long): LargeByteBuffer = { -// assert(writable) -// assert(size >= 0) -// -// createSlice(size) +// val b = underlying.read(_pos) +// _pos += 1 +// b // } // -// // get a buffer which is of the specified size and contains data from the underlying buffers -// // Note, the actual data might be spread across the underlying buffers. -// // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy ! -// private def readFully(size: Int): ByteBuffer = { -// assert (readable) -// -// if (remaining() < size) { -// // throw exception -// throw new BufferUnderflowException() -// } -// -// // kyro depends on this it seems ? -// // assert (size > 0) -// if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER -// -// // Expected to be handled elsewhere. -// assert (! localReadWritePossible(size)) -// -// val localBuff = { -// val buff = fetchBufferOfSize(size) -// // assert(buff.remaining() <= size) -// // if (buff.remaining() == size) return buff -// assert(buff.remaining() < size) -// ByteBuffer.allocate(size).put(buff) -// } -// -// // assert (localBuff.hasRemaining) -// -// while (localBuff.hasRemaining) { -// val buff = fetchBufferOfSize(localBuff.remaining()) -// localBuff.put(buff) -// } -// -// localBuff.flip() -// localBuff +// def put(bytes: LargeByteBuffer): Unit = { +// ??? // } // -// -// -// def put(b: Byte) { -// assert (writable) -// if (remaining() < 1) { -// // logInfo("put byte. remaining = " + remaining() + ", this = " + this) -// throw new BufferOverflowException -// } -// -// assert (currentRemaining() > 0) -// -// fetchCurrentBuffer().put(b) -// globalPosition += 1 -// // Check to need to bump the index ? -// toNonEmptyBuffer() +// def position: Long = _pos +// def position(position: Long): Unit = { +// _pos = position // } -// -// -// def put(buffer: ByteBuffer) { -// assert (writable) -// if (remaining() < buffer.remaining()) { -// throw new BufferOverflowException -// } -// -// val bufferRemaining = buffer.remaining() -// if (localReadWritePossible(bufferRemaining)) { -// -// assert (currentRemaining() >= bufferRemaining) -// -// fetchCurrentBuffer().put(buffer) -// -// globalPosition += bufferRemaining -// toNonEmptyBuffer() -// return -// } -// -// while (buffer.hasRemaining) { -// val currentBufferRemaining = currentRemaining() -// val bufferRemaining = buffer.remaining() -// -// if (currentBufferRemaining >= bufferRemaining) { -// fetchCurrentBuffer().put(buffer) -// globalPosition += bufferRemaining -// } else { -// // Split across buffers. -// val currentBuffer = fetchCurrentBuffer() -// assert (currentBuffer.remaining() >= currentBufferRemaining) -// val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(), -// currentBufferRemaining) -// assert (sliced.remaining() == currentBufferRemaining) -// currentBuffer.put(sliced) -// // move buffer pos -// buffer.position(buffer.position() + currentBufferRemaining) -// -// globalPosition += currentBufferRemaining -// } -// toNonEmptyBuffer() -// } -// -// assert (! hasRemaining() || currentRemaining() > 0) +// def remaining(): Long = { +// underlying.size - position // } // -// def put(other: LargeByteBuffer) { -// assert (writable) -// if (this.remaining() < other.remaining()) { -// throw new BufferOverflowException -// } -// -// while (other.hasRemaining()) { -// val buffer = other.fetchBufferOfSize(other.currentRemaining()) -// this.put(buffer) -// } +// def duplicate(): ChainedLargeByteBuffer = { +// new ChainedLargeByteBuffer(underlying) // } // -// -// def duplicate(): LargeByteBuffer = { -// val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size) -// // We do a duplicate as part of construction - so avoid double duplicate. -// // containersCopy ++= containers.map(_.duplicate()) -// containersCopy ++= containers -// val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked) -// -// // set limit and position (in that order) ... -// retval.limit(this.limit()) -// retval.position(this.position()) -// -// // Now release our containers - if any had been acquired -// releasePendingContainers() -// -// retval +// def rewind(): Unit = { +// _pos = 0 // } // -// -// /** -// * 'read' a LargeByteBuffer of size specified and return that. -// * Position will be incremented by size -// * -// * The name might be slightly confusing : rename ? -// * -// * @param size Amount of data to be read from this buffer and returned -// * @return -// */ -// def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = { -// if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException -// if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException -// -// -// assert (readable) -// assert (size >= 0) -// -// releasePendingContainers() -// -// if (0 == size) return LargeByteBuffer.EMPTY_BUFFER -// -// createSlice(size) +// def limit(): Long = { +// capacity // } // -// -// // This is essentially a workaround to exposing underlying buffers -// def readFrom(channel: ReadableByteChannel): Long = { -// -// assert (writable) -// releasePendingContainers() -// -// // this also allows us to avoid nasty corner cases in the loop. -// if (! hasRemaining()) { -// // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this) -// throw new BufferOverflowException -// } -// -// var totalBytesRead = 0L -// -// while (hasRemaining()) { -// // read what we can ... -// val buffer = fetchCurrentBuffer() -// val bufferRemaining = currentRemaining() -// val bytesRead = channel.read(buffer) -// -// if (bytesRead > 0) { -// totalBytesRead += bytesRead -// // bump position too .. -// globalPosition += bytesRead -// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() -// } -// else if (-1 == bytesRead) { -// // if we had already read some data in the loop, return that. -// if (totalBytesRead > 0) return totalBytesRead -// return -1 -// } // nothing available to read, retry later. return -// else if (0 == bytesRead) { -// return totalBytesRead -// } -// -// // toNonEmptyBuffer() -// } -// -// // Cleanup last buffer ? -// toNonEmptyBuffer() -// totalBytesRead +// def limit(newLimit: Long): Unit = { +// ??? // } // -// // This is essentially a workaround to exposing underlying buffers -// def readFrom(inStrm: InputStream): Long = { -// -// assert (writable) -// releasePendingContainers() -// -// // this also allows us to avoid nasty corner cases in the loop. -// // if (! hasRemaining()) throw new BufferOverflowException -// if (! hasRemaining()) return 0 -// -// var totalBytesRead = 0L -// -// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) -// -// while (hasRemaining()) { -// // read what we can ... note, since there is no gaurantee that underlying buffer might -// // expose array() method, we do double copy - from stream to buff and from buff to bytearray. -// // see if we can optimize this later ... -// val buffer = fetchCurrentBuffer() -// val bufferRemaining = buffer.remaining() -// val max = math.min(buff.length, bufferRemaining) -// val bytesRead = inStrm.read(buff, 0, max) -// -// if (bytesRead > 0) { -// buffer.put(buff, 0, bytesRead) -// totalBytesRead += bytesRead -// // bump position too .. -// globalPosition += bytesRead -// // buffer.position(buffer.position + bytesRead) -// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() -// } -// else if (-1 == bytesRead) { -// // if we had already read some data in the loop, return that. -// if (totalBytesRead > 0) return totalBytesRead -// return -1 -// } // nothing available to read, retry later. return -// else if (0 == bytesRead) { -// return totalBytesRead -// } -// -// // toNonEmptyBuffer() +// def writeTo(channel:WritableByteChannel): Long = { +// var written = 0l +// underlying.chunks.foreach{bytes => +// //TODO test this +// val buffer = ByteBuffer.wrap(bytes) +// while (buffer.hasRemaining) +// channel.write(buffer) +// written += bytes.length // } -// -// totalBytesRead +// written // } +//} // -// // This is essentially a workaround to exposing underlying buffers -// // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce -// // code for performance reasons. -// def readFrom(inStrm: DataInput): Long = { -// -// assert (writable) -// releasePendingContainers() -// -// // this also allows us to avoid nasty corner cases in the loop. -// // if (! hasRemaining()) throw new BufferOverflowException -// if (! hasRemaining()) return 0 -// -// var totalBytesRead = 0L -// -// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) -// -// while (hasRemaining()) { -// // read what we can ... note, since there is no gaurantee that underlying buffer might -// // expose array() method, we do double copy - from stream to buff and from buff to bytearray. -// // see if we can optimize this later ... -// val buffer = fetchCurrentBuffer() -// val bufferRemaining = buffer.remaining() -// val max = math.min(buff.length, bufferRemaining) -// inStrm.readFully(buff, 0, max) -// val bytesRead = max -// -// if (bytesRead > 0) { -// buffer.put(buff, 0, bytesRead) -// totalBytesRead += bytesRead -// // bump position too .. -// globalPosition += bytesRead -// // buffer.position(buffer.position() + bytesRead) -// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() -// } -// else if (-1 == bytesRead) { -// // if we had already read some data in the loop, return that. -// if (totalBytesRead > 0) return totalBytesRead -// return -1 -// } // nothing available to read, retry later. return -// else if (0 == bytesRead) { -// return totalBytesRead -// } +//class WrappedLargeByteBuffer(private[spark] val underlying: Array[ByteBuffer]) extends LargeByteBuffer { // -// // toNonEmptyBuffer() +// val (totalCapacity, chunkOffsets) = { +// var sum = 0l +// val offsets = new Array[Long](underlying.size) +// (0 until underlying.size).foreach{idx => +// offsets(idx) = sum +// sum += underlying(idx).capacity() // } -// -// totalBytesRead +// (sum, offsets) // } // -// // This is essentially a workaround to exposing underlying buffers -// // Note: tries to do it efficiently without needing to load everything into memory -// // (particularly for diskbacked buffers, etc). -// def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = { -// -// assert (readable) -// releasePendingContainers() -// -// // this also allows us to avoid nasty corner cases in the loop. -// if (! hasRemaining()) throw new BufferUnderflowException -// -// var totalBytesWritten = 0L -// -// while (hasRemaining()) { -// // Write what we can ... -// val buffer = fetchCurrentBuffer() -// val bufferRemaining = buffer.remaining() -// assert (bufferRemaining > 0) -// val bytesWritten = channel.write(buffer) -// -// if (bytesWritten > 0) { -// totalBytesWritten += bytesWritten -// // bump position too .. -// globalPosition += bytesWritten -// if (bytesWritten >= bufferRemaining) toNonEmptyBuffer() -// assert (! hasRemaining() || currentRemaining() > 0) -// } -// else if (0 == bytesWritten) { -// return totalBytesWritten -// } +// private var _pos = 0l +// private var currentBufferIdx = 0 +// private var currentBuffer = underlying(0) +// private var _limit = totalCapacity // -// // toNonEmptyBuffer() -// } +// def capacity = totalCapacity // -// assert (! hasRemaining()) -// if (cleanup) { -// free() +// def get(dst: Array[Byte], offset: Int, length: Int): Unit = { +// var moved = 0 +// while (moved < length) { +// val toRead = math.min(length - moved, currentBuffer.remaining()) +// currentBuffer.get(dst, offset, toRead) +// moved += toRead +// updateCurrentBuffer() // } -// totalBytesWritten // } // -// // This is essentially a workaround to exposing underlying buffers -// def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = { -// -// assert (readable) -// releasePendingContainers() -// -// // this also allows us to avoid nasty corner cases in the loop. -// if (! hasRemaining()) throw new BufferUnderflowException -// -// var totalBytesWritten = 0L -// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) -// -// while (hasRemaining()) { -// // write what we can ... note, since there is no gaurantee that underlying buffer might -// // expose array() method, we do double copy - from bytearray to buff and from -// // buff to outputstream. see if we can optimize this later ... -// val buffer = fetchCurrentBuffer() -// val bufferRemaining = buffer.remaining() -// val size = math.min(bufferRemaining, buff.length) -// buffer.get(buff, 0, size) -// outStrm.write(buff, 0, size) -// -// totalBytesWritten += size -// // bump position too .. -// globalPosition += size -// -// if (size >= bufferRemaining) toNonEmptyBuffer() -// } -// -// toNonEmptyBuffer() -// if (cleanup) { -// free() -// } -// totalBytesWritten +// def get(): Byte = { +// val r = currentBuffer.get() +// _pos += 1 +// updateCurrentBuffer() +// r // } // -// def asInputStream(): InputStream = { -// new InputStream() { -// override def read(): Int = { -// if (! hasRemaining()) return -1 -// get() -// } -// -// override def read(arr: Array[Byte], off: Int, len: Int): Int = { -// if (! hasRemaining()) return -1 -// -// get(arr, off, len) -// } -// -// override def available(): Int = { -// // current remaining is what can be read without blocking -// // anything higher might need disk access/buffer swapping. -// /* -// val left = remaining() -// math.min(left, Int.MaxValue).asInstanceOf[Int] -// */ -// currentRemaining() -// } +// private def updateCurrentBuffer(): Unit = { +// //TODO fix end condition +// while(!currentBuffer.hasRemaining()) { +// currentBufferIdx += 1 +// currentBuffer = underlying(currentBufferIdx) // } // } // -// def getCleaner() = cleaner -// -// /** -// * @param cleaner The previous cleaner, so that the caller can chain them if required. -// * @return -// */ -// private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = { -// overrideCleaner(cleaner, allowOverride = true) +// def put(bytes: LargeByteBuffer): Unit = { +// ??? // } // -// private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = { -// if (! this.allowCleanerOverride) { -// // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free -// return this.cleaner -// } -// -// this.allowCleanerOverride = allowOverride -// assert (null != cleaner) -// val prev = this.cleaner -// this.cleaner = cleaner -// // logInfo("Overriding " + prev + " with " + this.cleaner) -// prev +// def position: Long = _pos +// def position(position: Long): Unit = { +// //XXX check range? +// _pos = position // } -// -// private def doReleaseAll() { -// for (container <- containers) { -// container.release() -// } +// def remaining(): Long = { +// totalCapacity - _pos // } // -// def free(invokeCleaner: Boolean = true) { -// // logInfo("Free on " + this + ", cleaner = " + cleaner) -// // always invoking release -// doReleaseAll() -// -// if (invokeCleaner) cleaner.clean(this) +// def duplicate(): WrappedLargeByteBuffer = { +// new WrappedLargeByteBuffer(underlying.map{_.duplicate()}) // } // -// private def doDispose(needRelease: Boolean) { -// -// if (disposeLocationThrowable ne null) { -// logError("Already free'ed earlier at : ", disposeLocationThrowable) -// logError("Current at ", new Throwable) -// throw new IllegalStateException("Already freed.") -// } -// disposeLocationThrowable = new Throwable() -// -// // Forcefully cleanup all -// if (needRelease) doReleaseAll() -// -// // Free in a different loop, in case different containers refer to same resource -// // to release (like file) -// for (container <- containers) { -// container.free() -// } -// -// needReleaseIndices.clear() -// -// // We should not use this buffer anymore : set the values such that f -// // we dont ... -// globalPosition = 0 -// globalLimit = 0 -// globalCapacity = 0 +// def rewind(): Unit = { +// _pos = 0 +// underlying.foreach{_.rewind()} // } // -// // copy data over ... MUST be used only for cases where array is known to be -// // small to begin with. slightly risky method due to that assumption -// def toByteArray(): Array[Byte] = { -// val positionBackup = position() -// val size = remaining() -// if (size > Int.MaxValue) { -// throw new IllegalStateException( -// "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G") -// } -// -// val retval = new Array[Byte](size.asInstanceOf[Int]) -// val readSize = get(retval, 0, retval.length) -// assert (readSize == retval.length, -// "readSize = " + readSize + ", retval.length = " + retval.length) -// -// position(positionBackup) -// -// retval +// def limit(): Long = { +// totalCapacity // } // -// // copy data over ... MUST be used only for cases where array is known to be -// // small to begin with. slightly risky method due to that assumption -// def toByteBuffer(): ByteBuffer = { -// ByteBuffer.wrap(toByteArray()) +// def limit(newLimit: Long) = { +// //XXX check range? set limits in sub buffers? +// _limit = newLimit // } // -// def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = { -// val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf) -// val currentPosition = position() -// retval.put(this) -// position(currentPosition) -// retval.clear() -// retval -// } -// -// -// -// // This is ONLY used for testing : that too as part of development of this and associated classes -// // remove before contributing to spark. -// def hexDump(): String = { -// if (remaining() * 64 > Int.MaxValue) { -// throw new UnsupportedOperationException("buffer too large " + remaining()) -// } -// -// val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int]) -// -// var perLine = 0 -// var first = true -// for (b <- toByteArray()) { -// perLine += 1 -// if (perLine % 8 == 0) { -// sb.append('\n') -// first = true -// } -// if (! first) sb.append(' ') -// first = false -// sb.append(java.lang.Integer.toHexString(b & 0xff)) +// def writeTo(channel: WritableByteChannel): Long = { +// var written = 0l +// underlying.foreach{buffer => +// //TODO test this +// //XXX do we care about respecting the limit here? +// written += buffer.remaining() +// while (buffer.hasRemaining) +// channel.write(buffer) // } -// sb.append('\n') -// sb.toString() +// written // } // -// override def toString: String = { -// val sb: StringBuffer = new StringBuffer -// sb.append(getClass.getName) -// sb.append(' ') -// sb.append(System.identityHashCode(this)) -// sb.append("@[pos=") -// sb.append(position()) -// sb.append(" lim=") -// sb.append(limit()) -// sb.append(" cap=") -// sb.append(capacity()) -// sb.append("]") -// sb.toString -// } -// -// -// -// override def finalize(): Unit = { -// var marked = false -// if (containers ne null) { -// if (containers.exists(container => container.isAcquired && container.requireRelease())) { -// marked = true -// logError("BUG: buffer was not released - and now going out of scope. " + -// "Potential resource leak. Allocated at ", allocateLocationThrowable) -// containers.foreach(_.release()) -// } -// if (containers.exists(container => !container.isFreed && container.requireFree())) { -// if (!marked) { -// logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak", -// allocateLocationThrowable) -// } -// else { -// logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak") -// } -// containers.foreach(_.free()) -// } -// } -// super.finalize() -// } //} // +//object LargeByteBuffer { // -//object LargeByteBuffer extends Logging { -// -// private val noopDisposeFunction = new BufferCleaner() { -// protected def doClean(buffer: LargeByteBuffer) { -// buffer.free(invokeCleaner = false) -// } -// } -// -// val enableExpensiveAssert = false -// private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0) -// val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer( -// new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false) -// // Do not allow anyone else to override cleaner -// EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false) -// -// // 8K sufficient ? -// private val TEMP_ARRAY_SIZE = 8192 -// -// /** -// * Create a LargeByteBuffer of specified size which is split across -// * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory -// * ByteBuffer -// * -// */ -// def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = { -// if (0 == totalSize) { -// return EMPTY_BUFFER -// } -// -// assert (totalSize > 0) -// -// val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY) -// val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize) -// val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize) -// -// assert (lastBlockSize > 0) -// -// val bufferArray = { -// val arr = new ArrayBuffer[ByteBufferContainer](numBlocks) -// for (index <- 0 until numBlocks - 1) { -// val buff = ByteBuffer.allocate(blockSize) -// // buff.clear() -// arr += new HeapByteBufferContainer(buff, true) -// } -// arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true) -// assert (arr.length == numBlocks) -// arr -// } -// -// new LargeByteBuffer(bufferArray, false, false) -// } -// -// /** -// * Create a LargeByteBuffer of specified size which is split across -// * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk -// * -// */ -// private def allocateDiskBuffer(totalSize: Long, -// blockManager: BlockManager): LargeByteBuffer = { -// if (0 == totalSize) { -// return EMPTY_BUFFER -// } -// -// assert (totalSize > 0) -// -// // Create a file of the specified size. -// val file = blockManager.diskBlockManager.createTempBlock()._2 -// val raf = new RandomAccessFile(file, "rw") -// try { -// raf.setLength(totalSize) -// } finally { -// raf.close() -// } -// -// readWriteDiskSegment(new FileSegment(file, 0, totalSize), -// ephemeralDiskBacked = true, blockManager.ioConf) -// } -// -// // The returned buffer takes up ownership of the underlying buffers -// // (including dispos'ing that when done) -// def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = { -// val nonEmpty = buffers.filter(_.hasRemaining) -// -// // cleanup the empty buffers -// buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b)) -// -// -// if (nonEmpty.isEmpty) { -// return EMPTY_BUFFER -// } -// -// // slice so that offsets match our requirement -// new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b => -// new HeapByteBufferContainer(b.slice(), true)), false, false) +// def asLargeByteBuffer(byteBuffer: ByteBuffer): LargeByteBuffer = { +// new WrappedLargeByteBuffer(Array(byteBuffer)) // } // -// def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = { -// // only non empty arrays -// val arrays = byteArrays.filter(_.length > 0) -// if (0 == arrays.length) return EMPTY_BUFFER -// -// new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr => -// new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false) -// } -// -// def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = { -// -// if (inputBuffers.isEmpty) return EMPTY_BUFFER -// -// if (! inputBuffers.exists(_.hasRemaining())) { -// if (canDispose) inputBuffers.map(_.free()) -// return EMPTY_BUFFER -// } -// -// // release all temp resources acquired -// inputBuffers.foreach(buff => buff.releasePendingContainers()) -// // free current container if acquired. -// inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) { -// buff.containers(buff.currentContainerIndex).release() -// }) -// // inputBuffers.foreach(b => b.doReleaseAll()) -// -// -// // Dispose of any empty buffers -// if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free()) -// -// // Find all containers we need. -// val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining())) -// -// val containers = buffers.flatMap(_.containers) -// assert (! containers.isEmpty) -// // The in order containers of "buffers" seq constitute the required return value -// val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers, -// // if you cant dispose, then we dont own the buffers : in which case, need duplicate -// ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked)) -// -// if (canDispose) { -// // override dispose of all other buffers. -// val disposeFunctions = inputBuffers.map { -// buffer => { -// (buffer, buffer.overrideCleaner(noopDisposeFunction)) -// } -// } -// -// val cleaner = retval.getCleaner() -// val newCleaner = new BufferCleaner { -// protected def doClean(buffer: LargeByteBuffer) { -// -// assert (retval == buffer) -// // default cleaner. -// cleaner.clean(retval) -// // not required, since we are within clean anyway. -// // retval.free(invokeCleaner = false) -// -// // retval.doDispose(needRelease = true) -// -// // This might actually call dispose twice on some (initially) empty buffers, -// // which is fine since we now guard against that. -// disposeFunctions.foreach(v => v._2.clean(v._1)) -// // Call the free method too : so that buffers are marked free ... -// disposeFunctions.foreach(v => v._1.free(invokeCleaner = false)) -// } -// } -// -// val prev = retval.overrideCleaner(newCleaner) -// assert (prev == cleaner) -// } -// -// retval +// def asLargeByteBuffer(bytes: Array[Byte]): LargeByteBuffer = { +// new WrappedLargeByteBuffer(Array(ByteBuffer.wrap(bytes))) // } // -// private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) { -// if (arr == null) { -// throw new NullPointerException -// } else if (offset < 0 || size < 0 || offset + size > arr.length) { -// throw new IndexOutOfBoundsException -// } -// } // -// def allocateTransientBuffer(size: Long, blockManager: BlockManager) = { -// if (size <= blockManager.ioConf.maxInMemSize) { -// LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf) -// } else { -// LargeByteBuffer.allocateDiskBuffer(size, blockManager) -// } +// def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = { +// val buffer = ChainedBuffer.withInitialSize(maxChunk, size) +// new ChainedLargeByteBuffer(buffer) // } // -// def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig, -// ephemeralDiskBacked: Boolean): LargeByteBuffer = { -// // Split the block into multiple of BlockStore.maxBlockSize -// val segmentSize = segment.length -// val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] -// val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) -// val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) -// -// val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) -// -// for (index <- 0 until numBlocks - 1) { -// buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, -// segment.offset + index * blockSize, blockSize), ioConf) -// } -// -// // Last block -// buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, -// segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf) -// -// new LargeByteBuffer(buffers, false, ephemeralDiskBacked) +// def mapFile( +// channel: FileChannel, +// mode: MapMode, +// offset: Long, +// length: Long, +// maxChunk: Int = Integer.MAX_VALUE - 1e6.toInt +// ): LargeByteBuffer = { +// val offsets = new ArrayBuffer[Long]() +// var curOffset = offset +// val end = offset + length +// while (curOffset < end) { +// offsets += curOffset +// val length = math.min(end - curOffset, maxChunk) +// curOffset += length +// } +// offsets += end +// val chunks = new Array[ByteBuffer](offsets.size - 1) +// (0 until offsets.size - 1).foreach{idx => +// chunks(idx) = channel.map(mode, offsets(idx), offsets(idx + 1) - offsets(idx)) +// } +// new WrappedLargeByteBuffer(chunks) // } +//} // -// def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean, -// ioConf: IOConfig): LargeByteBuffer = { -// -// // Split the block into multiple of BlockStore.maxBlockSize -// val segmentSize = segment.length -// val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] -// val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) -// val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) -// -// logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks + -// ", lastBlockSize = " + lastBlockSize) -// val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) -// -// for (index <- 0 until numBlocks - 1) { -// buffers += new ReadWriteFileContainer(new FileSegment(segment.file, -// segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null) -// } -// -// // Last block -// buffers += new ReadWriteFileContainer(new FileSegment(segment.file, -// segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null) // -// new LargeByteBuffer(buffers, false, ephemeralDiskBacked) -// } -//} +//// +/////** +//// * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G +//// * which ByteBuffers are limited to. +//// * Externally, it exposes all the api which java.nio.ByteBuffer exposes. +//// * Internally, it maintains a sequence of Containers which manage the ByteBuffer data. +//// * Not all the data might be loaded into memory (like disk or tachyon data) - so actual +//// * memory footprint - heap and vm could be much lower than capacity. +//// * +//// * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this +//// * buffer, maybe revisit this later ? Note: this is not much different from earlier though ! +//// * +//// * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this +//// * will require the file to be kept open (repeatedly opening/closing file is not good +//// * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is +//// * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy) +//// * +//// * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is +//// * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some +//// * cases (when we duplicate/slice them). Currently spark does not need this, but might in future +//// * so relook at it later. +//// */ +////// We should make this constructor private: but for now, +////// leaving it public since TachyonStore needs it +////class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer], +//// private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging { +//// +//// // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME +//// private val allocateLocationThrowable: Throwable = { +//// if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) { +//// new Throwable("blockId = " + BlockManager.getLookupBlockId) +//// } else { +//// null +//// } +//// } +//// private var disposeLocationThrowable: Throwable = null +//// +//// @volatile private var allowCleanerOverride = true +//// @volatile private var cleaner: BufferCleaner = new BufferCleaner { +//// override def doClean(buffer: LargeByteBuffer) = { +//// assert (LargeByteBuffer.this == buffer) +//// doDispose(needRelease = false) +//// } +//// } +//// +//// // should not be empty +//// assert (null != inputContainers && ! inputContainers.isEmpty) +//// // should not have any null's +//// assert (inputContainers.find(_ == null).isEmpty) +//// +//// // println("Num containers = " + inputContainers.size) +//// +//// // Position, limit and capacity relevant over the engire LargeByteBuffer +//// @volatile private var globalPosition = 0L +//// @volatile private var globalLimit = 0L +//// @volatile private var currentContainerIndex = 0 +//// +//// // The buffers in which the actual data is held. +//// private var containers: Array[ByteBufferContainer] = null +//// +//// // aggregate capacities of the individual buffers. +//// // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be +//// // sum of capacity of 0th and 1st block buffer +//// private var bufferPositionStart: Array[Long] = null +//// +//// // Contains the indices of a containers which requires release before subsequent invocation of +//// // read/write should be serviced. This is required since current read/write might have moved the +//// // position but since we are returning bytebuffers which depend on the validity of the existing +//// // bytebuffer, we cant release them yet. +//// private var needReleaseIndices = new HashSet[Int]() +//// +//// private val readable = ! inputContainers.exists(! _.isReadable) +//// private val writable = ! inputContainers.exists(! _.isWritable) +//// +//// +//// // initialize +//// @volatile private var globalCapacity = { +//// +//// // Ensure that there are no empty buffers : messes up with our code : unless it +//// // is a single buffer (for empty buffer for marker case) +//// assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length) +//// +//// containers = { +//// if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray +//// } +//// containers.foreach(_.validate()) +//// +//// def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) { +//// val buff = new ArrayBuffer[Long](arr.length + 1) +//// buff += 0L +//// +//// buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1) +//// assert (buff.length == arr.length + 1) +//// bufferPositionStart = buff.toArray +//// } +//// +//// initializeBufferPositionStart(containers) +//// +//// // remove references from inputBuffers +//// inputContainers.clear() +//// +//// globalLimit = bufferPositionStart(containers.length) +//// globalPosition = 0L +//// currentContainerIndex = 0 +//// +//// assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum) +//// +//// globalLimit +//// } +//// +//// final def position(): Long = globalPosition +//// +//// final def limit(): Long = globalLimit +//// +//// final def capacity(): Long = globalCapacity +//// +//// final def limit(newLimit: Long) { +//// if ((newLimit > capacity()) || (newLimit < 0)) { +//// throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity()) +//// } +//// +//// globalLimit = newLimit +//// if (position() > newLimit) position(newLimit) +//// } +//// +//// def skip(skipBy: Long) = position(position() + skipBy) +//// +//// private def releasePendingContainers() { +//// if (! needReleaseIndices.isEmpty) { +//// val iter = needReleaseIndices.iterator +//// while (iter.hasNext) { +//// val index = iter.next() +//// assert (index >= 0 && index < containers.length) +//// // It is possible to move from one container to next before the previous +//// // container was acquired. For example, get forcing move to next container +//// // since current was exhausted immediatelly followed by a position() +//// // so the container we moved to was never acquired. +//// +//// // assert (containers(index).isAcquired) +//// // will this always be satisfied ? +//// // assert (index != currentContainerIndex) +//// if (containers(index).isAcquired) containers(index).release() +//// } +//// needReleaseIndices.clear() +//// } +//// } +//// +//// private def toNewContainer(newIndex: Int) { +//// if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) { +//// +//// assert (currentContainerIndex >= 0) +//// needReleaseIndices += currentContainerIndex +//// } +//// currentContainerIndex = newIndex +//// } +//// +//// // expensive method, sigh ... optimize it later ? +//// final def position(newPosition: Long) { +//// +//// if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException() +//// +//// if (currentContainerIndex < bufferPositionStart.length - 1 && +//// newPosition >= bufferPositionStart(currentContainerIndex) && +//// newPosition < bufferPositionStart(currentContainerIndex + 1)) { +//// // Same buffer - easy method ... +//// globalPosition = newPosition +//// // Changed position - free previously returned buffers. +//// releasePendingContainers() +//// return +//// } +//// +//// // Find appropriate currentContainerIndex +//// // Since bufferPositionStart is sorted, can be replaced with binary search if required. +//// // For now, not in the perf critical path since buffers size is very low typically. +//// var index = 0 +//// val cLen = containers.length +//// while (index < cLen) { +//// if (newPosition >= bufferPositionStart(index) && +//// newPosition < bufferPositionStart(index + 1)) { +//// globalPosition = newPosition +//// toNewContainer(index) +//// // Changed position - free earlier and previously returned buffers. +//// releasePendingContainers() +//// return +//// } +//// index += 1 +//// } +//// +//// if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) { +//// // boundary. +//// globalPosition = newPosition +//// toNewContainer(cLen) +//// // Changed position - free earlier and previously returned buffers. +//// releasePendingContainers() +//// return +//// } +//// +//// assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition + +//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]")) +//// } +//// +//// +//// /** +//// * Clears this buffer. The position is set to zero, the limit is set to +//// * the capacity, and the mark is discarded. +//// * +//// *Invoke this method before using a sequence of channel-read or +//// * put operations to fill this buffer. +//// * +//// *
This method does not actually erase the data in the buffer, but it +//// * is named as if it did because it will most often be used in situations +//// * in which that might as well be the case.
+//// */ +//// final def clear() { +//// // if (0 == globalCapacity) return +//// +//// needReleaseIndices += 0 +//// globalPosition = 0L +//// toNewContainer(0) +//// globalLimit = globalCapacity +//// +//// // Now free all pending containers +//// releasePendingContainers() +//// } +//// +//// /** +//// * Flips this buffer. The limit is set to the current position and then +//// * the position is set to zero. If the mark is defined then it is +//// * discarded. +//// * +//// *After a sequence of channel-read or put operations, invoke +//// * this method to prepare for a sequence of channel-write or relative +//// * get operations. +//// */ +//// final def flip() { +//// needReleaseIndices += 0 +//// globalLimit = globalPosition +//// globalPosition = 0L +//// toNewContainer(0) +//// +//// // Now free all pending containers +//// releasePendingContainers() +//// } +//// +//// /** +//// * Rewinds this buffer. The position is set to zero and the mark is +//// * discarded. +//// * +//// *
Invoke this method before a sequence of channel-write or get +//// * operations, assuming that the limit has already been set +//// * appropriately. +//// */ +//// final def rewind() { +//// needReleaseIndices += 0 +//// globalPosition = 0L +//// toNewContainer(0) +//// +//// // Now free all pending containers +//// releasePendingContainers() +//// } +//// +//// /** +//// * Returns the number of elements between the current position and the +//// * limit.
+//// * +//// * @return The number of elements remaining in this buffer +//// */ +//// final def remaining(): Long = { +//// globalLimit - globalPosition +//// } +//// +//// /** +//// * Tells whether there are any elements between the current position and +//// * the limit. +//// * +//// * @return true if, and only if, there is at least one element +//// * remaining in this buffer +//// */ +//// final def hasRemaining() = { +//// globalPosition < globalLimit +//// } +//// +//// // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex) +//// +//// // number of bytes remaining in currently active underlying buffer +//// private def currentRemaining(): Int = { +//// if (hasRemaining()) { +//// // validate currentContainerIndex is valid +//// assert (globalPosition >= bufferPositionStart(currentContainerIndex) && +//// globalPosition < bufferPositionStart(currentContainerIndex + 1), +//// "globalPosition = " + globalPosition + +//// ", currentContainerIndex = " + currentContainerIndex + +//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) +//// +//// currentRemaining0(currentContainerIndex) +//// } else 0 +//// } +//// +//// // Without any validation : required when we are bumping the index (when validation will fail) ... +//// private def currentRemaining0(which: Int): Int = { +//// // currentBuffer().remaining() +//// math.max(0, math.min(bufferPositionStart(which + 1), +//// globalLimit) - globalPosition).asInstanceOf[Int] +//// } +//// +//// // Set the approppriate position/limit for the current underlying buffer to mirror our +//// // the LargeByteBuffer's state. +//// private def fetchCurrentBuffer(): ByteBuffer = { +//// releasePendingContainers() +//// +//// assert (currentContainerIndex < containers.length) +//// +//// val container = containers(currentContainerIndex) +//// if (! container.isAcquired) { +//// container.acquire() +//// } +//// +//// assert (container.isAcquired) +//// if (LargeByteBuffer.enableExpensiveAssert) { +//// assert (! containers.exists( b => (b ne container) && b.isAcquired)) +//// } +//// +//// assert (currentContainerIndex < bufferPositionStart.length && +//// globalPosition < bufferPositionStart(currentContainerIndex + 1), +//// "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " + +//// bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this) +//// +//// val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)). +//// asInstanceOf[Int] +//// +//// val buffer = container.getByteBuffer +//// buffer.position(buffPosition) +//// val diff = buffer.capacity - buffPosition +//// val left = remaining() +//// if (diff <= left) { +//// buffer.limit(buffer.capacity()) +//// } else { +//// // Can happen if limit() was called. +//// buffer.limit(buffPosition + left.asInstanceOf[Int]) +//// } +//// +//// buffer +//// } +//// +//// // To be used ONLY to test in suites. +//// private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = { +//// if ("1" != System.getProperty("SPARK_TESTING")) { +//// throw new IllegalStateException("This method is to be used ONLY within spark test suites") +//// } +//// +//// fetchCurrentBuffer() +//// } +//// +//// // Expects that the invoker has ensured that this can be safely invoked. +//// // That is, it wont be invoked when the loop wont terminate. +//// private def toNonEmptyBuffer() { +//// +//// if (! hasRemaining()) { +//// var newIndex = currentContainerIndex +//// // Ensure we are in the right block or not. +//// while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) { +//// newIndex += 1 +//// } +//// toNewContainer(newIndex) +//// // Do not do this - since we might not yet have consumed the buffer which caused EOF right now +//// /* +//// // Add last one also, and release it too - since we are at the end of the buffer with nothing +//// // more pending. +//// if (newIndex >= 0 && currentContainerIndex < containers.length) { +//// needReleaseIndices += newIndex +//// } +//// */ +//// assert (currentContainerIndex >= 0) +//// // releasePendingContainers() +//// return +//// } +//// +//// var index = currentContainerIndex +//// while (0 == currentRemaining0(index) && index < containers.length) { +//// index += 1 +//// } +//// assert (currentContainerIndex < containers.length) +//// toNewContainer(index) +//// assert (0 != currentRemaining()) +//// } +//// +//// private def assertPreconditions(containerIndex: Int) { +//// assert (globalPosition >= bufferPositionStart(containerIndex), +//// "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + +//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) +//// assert (globalPosition < bufferPositionStart(containerIndex + 1), +//// "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + +//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) +//// +//// assert (globalLimit <= globalCapacity) +//// assert (containerIndex < containers.length) +//// } +//// +//// +//// /** +//// * Attempts to return a ByteBuffer of the requested size. +//// * It is possible to return a buffer of size smaller than requested +//// * even though hasRemaining == true +//// * +//// * On return, position would have been moved 'ahead' by the size of the buffer returned : +//// * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer +//// * +//// * +//// * This is used to primarily retrieve content of this buffer to expose via ByteBuffer +//// * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the +//// * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer +//// * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying +//// * container is a disk backed container, and we make subsequent calls to get(), the returned +//// * ByteBuffer can be dispose'ed off +//// * +//// * @param maxChunkSize Max size of the ByteBuffer to retrieve. +//// * @return +//// */ +//// +//// private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = { +//// fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true) +//// } +//// +//// private def fetchBufferOfSizeImpl(maxChunkSize: Int, +//// canReleaseContainers: Boolean): ByteBuffer = { +//// if (canReleaseContainers) releasePendingContainers() +//// assert (maxChunkSize > 0) +//// +//// // not checking for degenerate case of maxChunkSize == 0 +//// if (globalPosition >= globalLimit) { +//// // throw exception +//// throw new BufferUnderflowException() +//// } +//// +//// // Check preconditions : disable these later, since they might be expensive to +//// // evaluate for every IO op +//// assertPreconditions(currentContainerIndex) +//// +//// val currentBufferRemaining = currentRemaining() +//// +//// assert (currentBufferRemaining > 0) +//// +//// val size = math.min(currentBufferRemaining, maxChunkSize) +//// +//// val newBuffer = if (currentBufferRemaining > maxChunkSize) { +//// val currentBuffer = fetchCurrentBuffer() +//// val buff = ByteBufferContainer.createSlice(currentBuffer, +//// currentBuffer.position(), maxChunkSize) +//// assert (buff.remaining() == maxChunkSize) +//// buff +//// } else { +//// val currentBuffer = fetchCurrentBuffer() +//// val buff = currentBuffer.slice() +//// assert (buff.remaining() == currentBufferRemaining) +//// buff +//// } +//// +//// assert (size == newBuffer.remaining()) +//// assert (0 == newBuffer.position()) +//// assert (size == newBuffer.limit()) +//// assert (newBuffer.capacity() == newBuffer.limit()) +//// +//// globalPosition += newBuffer.remaining +//// toNonEmptyBuffer() +//// +//// newBuffer +//// } +//// +//// // Can we service the read/write from the currently active (underlying) bytebuffer or not. +//// // For almost all cases, this will return true allowing us to optimize away the more expensive +//// // computations. +//// private def localReadWritePossible(size: Int) = +//// size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1) +//// +//// +//// def getLong(): Long = { +//// assert (readable) +//// releasePendingContainers() +//// +//// if (remaining() < 8) throw new BufferUnderflowException +//// +//// if (localReadWritePossible(8)) { +//// val buff = fetchCurrentBuffer() +//// assert (buff.remaining() >= 8) +//// val retval = buff.getLong +//// globalPosition += 8 +//// toNonEmptyBuffer() +//// return retval +//// } +//// +//// val buff = readFully(8) +//// buff.getLong +//// } +//// +//// def getInt(): Int = { +//// assert (readable) +//// releasePendingContainers() +//// +//// if (remaining() < 4) throw new BufferUnderflowException +//// +//// if (localReadWritePossible(4)) { +//// val buff = fetchCurrentBuffer() +//// assert (buff.remaining() >= 4) +//// val retval = buff.getInt +//// globalPosition += 4 +//// toNonEmptyBuffer() +//// return retval +//// } +//// +//// val buff = readFully(4) +//// buff.getInt +//// } +//// +//// def getChar(): Char = { +//// assert (readable) +//// releasePendingContainers() +//// +//// if (remaining() < 2) throw new BufferUnderflowException +//// +//// if (localReadWritePossible(2)) { +//// val buff = fetchCurrentBuffer() +//// assert (buff.remaining() >= 2) +//// val retval = buff.getChar +//// globalPosition += 2 +//// toNonEmptyBuffer() +//// return retval +//// } +//// +//// // if slice is becoming too expensive, revisit this ... +//// val buff = readFully(2) +//// buff.getChar +//// } +//// +//// def get(): Byte = { +//// assert (readable) +//// releasePendingContainers() +//// +//// if (! hasRemaining()) throw new BufferUnderflowException +//// +//// // If we have remaining bytes, previous invocations MUST have ensured that we are at +//// // a buffer which has data to be read. +//// assert (localReadWritePossible(1)) +//// +//// val buff = fetchCurrentBuffer() +//// assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining()) +//// val retval = buff.get() +//// globalPosition += 1 +//// toNonEmptyBuffer() +//// +//// retval +//// } +//// +//// def get(arr: Array[Byte], offset: Int, size: Int): Int = { +//// assert (readable) +//// releasePendingContainers() +//// +//// LargeByteBuffer.checkOffsets(arr, offset, size) +//// +//// // kyro depends on this it seems ? +//// // assert (size > 0) +//// if (0 == size) return 0 +//// +//// if (! hasRemaining()) return -1 +//// +//// if (localReadWritePossible(size)) { +//// val buff = fetchCurrentBuffer() +//// assert (buff.remaining() >= size) +//// buff.get(arr, offset, size) +//// globalPosition += size +//// toNonEmptyBuffer() +//// return size +//// } +//// +//// var remainingSize = math.min(size, remaining()).asInstanceOf[Int] +//// var currentOffset = offset +//// +//// while (remainingSize > 0) { +//// val buff = fetchBufferOfSize(remainingSize) +//// val toCopy = math.min(buff.remaining(), remainingSize) +//// +//// buff.get(arr, currentOffset, toCopy) +//// currentOffset += toCopy +//// remainingSize -= toCopy +//// } +//// +//// currentOffset - offset +//// } +//// +//// +//// private def createSlice(size: Long): LargeByteBuffer = { +//// +//// releasePendingContainers() +//// +//// if (remaining() < size) { +//// // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this) +//// throw new BufferOverflowException +//// } +//// +//// // kyro depends on this it seems ? +//// // assert (size > 0) +//// if (0 == size) return LargeByteBuffer.EMPTY_BUFFER +//// +//// val arr = new ArrayBuffer[ByteBufferContainer](2) +//// var totalLeft = size +//// +//// // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer) +//// +//// var containerIndex = currentContainerIndex +//// while (totalLeft > 0 && hasRemaining()) { +//// assertPreconditions(containerIndex) +//// val container = containers(containerIndex) +//// val currentLeft = currentRemaining0(containerIndex) +//// +//// assert (globalPosition + currentLeft <= globalLimit) +//// assert (globalPosition >= bufferPositionStart(containerIndex) && +//// (globalPosition < bufferPositionStart(containerIndex + 1))) +//// +//// val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int] +//// val sliceSize = math.min(totalLeft, currentLeft) +//// assert (from >= 0) +//// assert (sliceSize > 0 && sliceSize <= Int.MaxValue) +//// +//// val slice = container.createSlice(from, sliceSize.asInstanceOf[Int]) +//// arr += slice +//// +//// globalPosition += sliceSize +//// totalLeft -= sliceSize +//// if (currentLeft == sliceSize) containerIndex += 1 +//// } +//// +//// // Using toNonEmptyBuffer instead of directly moving to next here so that +//// // other checks can be performed there. +//// toNonEmptyBuffer() +//// // force cleanup - this is fine since we are not using the buffers directly +//// // which are actively needed (the returned value is on containers which can +//// // recreate) +//// releasePendingContainers() +//// // free current container if acquired. +//// if (currentContainerIndex < containers.length) { +//// containers(currentContainerIndex).release() +//// } +//// assert (currentContainerIndex == containerIndex) +//// +//// val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked) +//// retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction) +//// retval +//// } +//// +//// // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers +//// // This is to be used only for writes : and ensures that writes are done into the appropriate +//// // underlying bytebuffers. +//// def getCompositeWriteBuffer(size: Long): LargeByteBuffer = { +//// assert(writable) +//// assert(size >= 0) +//// +//// createSlice(size) +//// } +//// +//// // get a buffer which is of the specified size and contains data from the underlying buffers +//// // Note, the actual data might be spread across the underlying buffers. +//// // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy ! +//// private def readFully(size: Int): ByteBuffer = { +//// assert (readable) +//// +//// if (remaining() < size) { +//// // throw exception +//// throw new BufferUnderflowException() +//// } +//// +//// // kyro depends on this it seems ? +//// // assert (size > 0) +//// if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER +//// +//// // Expected to be handled elsewhere. +//// assert (! localReadWritePossible(size)) +//// +//// val localBuff = { +//// val buff = fetchBufferOfSize(size) +//// // assert(buff.remaining() <= size) +//// // if (buff.remaining() == size) return buff +//// assert(buff.remaining() < size) +//// ByteBuffer.allocate(size).put(buff) +//// } +//// +//// // assert (localBuff.hasRemaining) +//// +//// while (localBuff.hasRemaining) { +//// val buff = fetchBufferOfSize(localBuff.remaining()) +//// localBuff.put(buff) +//// } +//// +//// localBuff.flip() +//// localBuff +//// } +//// +//// +//// +//// def put(b: Byte) { +//// assert (writable) +//// if (remaining() < 1) { +//// // logInfo("put byte. remaining = " + remaining() + ", this = " + this) +//// throw new BufferOverflowException +//// } +//// +//// assert (currentRemaining() > 0) +//// +//// fetchCurrentBuffer().put(b) +//// globalPosition += 1 +//// // Check to need to bump the index ? +//// toNonEmptyBuffer() +//// } +//// +//// +//// def put(buffer: ByteBuffer) { +//// assert (writable) +//// if (remaining() < buffer.remaining()) { +//// throw new BufferOverflowException +//// } +//// +//// val bufferRemaining = buffer.remaining() +//// if (localReadWritePossible(bufferRemaining)) { +//// +//// assert (currentRemaining() >= bufferRemaining) +//// +//// fetchCurrentBuffer().put(buffer) +//// +//// globalPosition += bufferRemaining +//// toNonEmptyBuffer() +//// return +//// } +//// +//// while (buffer.hasRemaining) { +//// val currentBufferRemaining = currentRemaining() +//// val bufferRemaining = buffer.remaining() +//// +//// if (currentBufferRemaining >= bufferRemaining) { +//// fetchCurrentBuffer().put(buffer) +//// globalPosition += bufferRemaining +//// } else { +//// // Split across buffers. +//// val currentBuffer = fetchCurrentBuffer() +//// assert (currentBuffer.remaining() >= currentBufferRemaining) +//// val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(), +//// currentBufferRemaining) +//// assert (sliced.remaining() == currentBufferRemaining) +//// currentBuffer.put(sliced) +//// // move buffer pos +//// buffer.position(buffer.position() + currentBufferRemaining) +//// +//// globalPosition += currentBufferRemaining +//// } +//// toNonEmptyBuffer() +//// } +//// +//// assert (! hasRemaining() || currentRemaining() > 0) +//// } +//// +//// def put(other: LargeByteBuffer) { +//// assert (writable) +//// if (this.remaining() < other.remaining()) { +//// throw new BufferOverflowException +//// } +//// +//// while (other.hasRemaining()) { +//// val buffer = other.fetchBufferOfSize(other.currentRemaining()) +//// this.put(buffer) +//// } +//// } +//// +//// +//// def duplicate(): LargeByteBuffer = { +//// val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size) +//// // We do a duplicate as part of construction - so avoid double duplicate. +//// // containersCopy ++= containers.map(_.duplicate()) +//// containersCopy ++= containers +//// val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked) +//// +//// // set limit and position (in that order) ... +//// retval.limit(this.limit()) +//// retval.position(this.position()) +//// +//// // Now release our containers - if any had been acquired +//// releasePendingContainers() +//// +//// retval +//// } +//// +//// +//// /** +//// * 'read' a LargeByteBuffer of size specified and return that. +//// * Position will be incremented by size +//// * +//// * The name might be slightly confusing : rename ? +//// * +//// * @param size Amount of data to be read from this buffer and returned +//// * @return +//// */ +//// def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = { +//// if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException +//// if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException +//// +//// +//// assert (readable) +//// assert (size >= 0) +//// +//// releasePendingContainers() +//// +//// if (0 == size) return LargeByteBuffer.EMPTY_BUFFER +//// +//// createSlice(size) +//// } +//// +//// +//// // This is essentially a workaround to exposing underlying buffers +//// def readFrom(channel: ReadableByteChannel): Long = { +//// +//// assert (writable) +//// releasePendingContainers() +//// +//// // this also allows us to avoid nasty corner cases in the loop. +//// if (! hasRemaining()) { +//// // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this) +//// throw new BufferOverflowException +//// } +//// +//// var totalBytesRead = 0L +//// +//// while (hasRemaining()) { +//// // read what we can ... +//// val buffer = fetchCurrentBuffer() +//// val bufferRemaining = currentRemaining() +//// val bytesRead = channel.read(buffer) +//// +//// if (bytesRead > 0) { +//// totalBytesRead += bytesRead +//// // bump position too .. +//// globalPosition += bytesRead +//// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() +//// } +//// else if (-1 == bytesRead) { +//// // if we had already read some data in the loop, return that. +//// if (totalBytesRead > 0) return totalBytesRead +//// return -1 +//// } // nothing available to read, retry later. return +//// else if (0 == bytesRead) { +//// return totalBytesRead +//// } +//// +//// // toNonEmptyBuffer() +//// } +//// +//// // Cleanup last buffer ? +//// toNonEmptyBuffer() +//// totalBytesRead +//// } +//// +//// // This is essentially a workaround to exposing underlying buffers +//// def readFrom(inStrm: InputStream): Long = { +//// +//// assert (writable) +//// releasePendingContainers() +//// +//// // this also allows us to avoid nasty corner cases in the loop. +//// // if (! hasRemaining()) throw new BufferOverflowException +//// if (! hasRemaining()) return 0 +//// +//// var totalBytesRead = 0L +//// +//// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) +//// +//// while (hasRemaining()) { +//// // read what we can ... note, since there is no gaurantee that underlying buffer might +//// // expose array() method, we do double copy - from stream to buff and from buff to bytearray. +//// // see if we can optimize this later ... +//// val buffer = fetchCurrentBuffer() +//// val bufferRemaining = buffer.remaining() +//// val max = math.min(buff.length, bufferRemaining) +//// val bytesRead = inStrm.read(buff, 0, max) +//// +//// if (bytesRead > 0) { +//// buffer.put(buff, 0, bytesRead) +//// totalBytesRead += bytesRead +//// // bump position too .. +//// globalPosition += bytesRead +//// // buffer.position(buffer.position + bytesRead) +//// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() +//// } +//// else if (-1 == bytesRead) { +//// // if we had already read some data in the loop, return that. +//// if (totalBytesRead > 0) return totalBytesRead +//// return -1 +//// } // nothing available to read, retry later. return +//// else if (0 == bytesRead) { +//// return totalBytesRead +//// } +//// +//// // toNonEmptyBuffer() +//// } +//// +//// totalBytesRead +//// } +//// +//// // This is essentially a workaround to exposing underlying buffers +//// // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce +//// // code for performance reasons. +//// def readFrom(inStrm: DataInput): Long = { +//// +//// assert (writable) +//// releasePendingContainers() +//// +//// // this also allows us to avoid nasty corner cases in the loop. +//// // if (! hasRemaining()) throw new BufferOverflowException +//// if (! hasRemaining()) return 0 +//// +//// var totalBytesRead = 0L +//// +//// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) +//// +//// while (hasRemaining()) { +//// // read what we can ... note, since there is no gaurantee that underlying buffer might +//// // expose array() method, we do double copy - from stream to buff and from buff to bytearray. +//// // see if we can optimize this later ... +//// val buffer = fetchCurrentBuffer() +//// val bufferRemaining = buffer.remaining() +//// val max = math.min(buff.length, bufferRemaining) +//// inStrm.readFully(buff, 0, max) +//// val bytesRead = max +//// +//// if (bytesRead > 0) { +//// buffer.put(buff, 0, bytesRead) +//// totalBytesRead += bytesRead +//// // bump position too .. +//// globalPosition += bytesRead +//// // buffer.position(buffer.position() + bytesRead) +//// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() +//// } +//// else if (-1 == bytesRead) { +//// // if we had already read some data in the loop, return that. +//// if (totalBytesRead > 0) return totalBytesRead +//// return -1 +//// } // nothing available to read, retry later. return +//// else if (0 == bytesRead) { +//// return totalBytesRead +//// } +//// +//// // toNonEmptyBuffer() +//// } +//// +//// totalBytesRead +//// } +//// +//// // This is essentially a workaround to exposing underlying buffers +//// // Note: tries to do it efficiently without needing to load everything into memory +//// // (particularly for diskbacked buffers, etc). +//// def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = { +//// +//// assert (readable) +//// releasePendingContainers() +//// +//// // this also allows us to avoid nasty corner cases in the loop. +//// if (! hasRemaining()) throw new BufferUnderflowException +//// +//// var totalBytesWritten = 0L +//// +//// while (hasRemaining()) { +//// // Write what we can ... +//// val buffer = fetchCurrentBuffer() +//// val bufferRemaining = buffer.remaining() +//// assert (bufferRemaining > 0) +//// val bytesWritten = channel.write(buffer) +//// +//// if (bytesWritten > 0) { +//// totalBytesWritten += bytesWritten +//// // bump position too .. +//// globalPosition += bytesWritten +//// if (bytesWritten >= bufferRemaining) toNonEmptyBuffer() +//// assert (! hasRemaining() || currentRemaining() > 0) +//// } +//// else if (0 == bytesWritten) { +//// return totalBytesWritten +//// } +//// +//// // toNonEmptyBuffer() +//// } +//// +//// assert (! hasRemaining()) +//// if (cleanup) { +//// free() +//// } +//// totalBytesWritten +//// } +//// +//// // This is essentially a workaround to exposing underlying buffers +//// def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = { +//// +//// assert (readable) +//// releasePendingContainers() +//// +//// // this also allows us to avoid nasty corner cases in the loop. +//// if (! hasRemaining()) throw new BufferUnderflowException +//// +//// var totalBytesWritten = 0L +//// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) +//// +//// while (hasRemaining()) { +//// // write what we can ... note, since there is no gaurantee that underlying buffer might +//// // expose array() method, we do double copy - from bytearray to buff and from +//// // buff to outputstream. see if we can optimize this later ... +//// val buffer = fetchCurrentBuffer() +//// val bufferRemaining = buffer.remaining() +//// val size = math.min(bufferRemaining, buff.length) +//// buffer.get(buff, 0, size) +//// outStrm.write(buff, 0, size) +//// +//// totalBytesWritten += size +//// // bump position too .. +//// globalPosition += size +//// +//// if (size >= bufferRemaining) toNonEmptyBuffer() +//// } +//// +//// toNonEmptyBuffer() +//// if (cleanup) { +//// free() +//// } +//// totalBytesWritten +//// } +//// +//// def asInputStream(): InputStream = { +//// new InputStream() { +//// override def read(): Int = { +//// if (! hasRemaining()) return -1 +//// get() +//// } +//// +//// override def read(arr: Array[Byte], off: Int, len: Int): Int = { +//// if (! hasRemaining()) return -1 +//// +//// get(arr, off, len) +//// } +//// +//// override def available(): Int = { +//// // current remaining is what can be read without blocking +//// // anything higher might need disk access/buffer swapping. +//// /* +//// val left = remaining() +//// math.min(left, Int.MaxValue).asInstanceOf[Int] +//// */ +//// currentRemaining() +//// } +//// } +//// } +//// +//// def getCleaner() = cleaner +//// +//// /** +//// * @param cleaner The previous cleaner, so that the caller can chain them if required. +//// * @return +//// */ +//// private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = { +//// overrideCleaner(cleaner, allowOverride = true) +//// } +//// +//// private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = { +//// if (! this.allowCleanerOverride) { +//// // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free +//// return this.cleaner +//// } +//// +//// this.allowCleanerOverride = allowOverride +//// assert (null != cleaner) +//// val prev = this.cleaner +//// this.cleaner = cleaner +//// // logInfo("Overriding " + prev + " with " + this.cleaner) +//// prev +//// } +//// +//// private def doReleaseAll() { +//// for (container <- containers) { +//// container.release() +//// } +//// } +//// +//// def free(invokeCleaner: Boolean = true) { +//// // logInfo("Free on " + this + ", cleaner = " + cleaner) +//// // always invoking release +//// doReleaseAll() +//// +//// if (invokeCleaner) cleaner.clean(this) +//// } +//// +//// private def doDispose(needRelease: Boolean) { +//// +//// if (disposeLocationThrowable ne null) { +//// logError("Already free'ed earlier at : ", disposeLocationThrowable) +//// logError("Current at ", new Throwable) +//// throw new IllegalStateException("Already freed.") +//// } +//// disposeLocationThrowable = new Throwable() +//// +//// // Forcefully cleanup all +//// if (needRelease) doReleaseAll() +//// +//// // Free in a different loop, in case different containers refer to same resource +//// // to release (like file) +//// for (container <- containers) { +//// container.free() +//// } +//// +//// needReleaseIndices.clear() +//// +//// // We should not use this buffer anymore : set the values such that f +//// // we dont ... +//// globalPosition = 0 +//// globalLimit = 0 +//// globalCapacity = 0 +//// } +//// +//// // copy data over ... MUST be used only for cases where array is known to be +//// // small to begin with. slightly risky method due to that assumption +//// def toByteArray(): Array[Byte] = { +//// val positionBackup = position() +//// val size = remaining() +//// if (size > Int.MaxValue) { +//// throw new IllegalStateException( +//// "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G") +//// } +//// +//// val retval = new Array[Byte](size.asInstanceOf[Int]) +//// val readSize = get(retval, 0, retval.length) +//// assert (readSize == retval.length, +//// "readSize = " + readSize + ", retval.length = " + retval.length) +//// +//// position(positionBackup) +//// +//// retval +//// } +//// +//// // copy data over ... MUST be used only for cases where array is known to be +//// // small to begin with. slightly risky method due to that assumption +//// def toByteBuffer(): ByteBuffer = { +//// ByteBuffer.wrap(toByteArray()) +//// } +//// +//// def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = { +//// val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf) +//// val currentPosition = position() +//// retval.put(this) +//// position(currentPosition) +//// retval.clear() +//// retval +//// } +//// +//// +//// +//// // This is ONLY used for testing : that too as part of development of this and associated classes +//// // remove before contributing to spark. +//// def hexDump(): String = { +//// if (remaining() * 64 > Int.MaxValue) { +//// throw new UnsupportedOperationException("buffer too large " + remaining()) +//// } +//// +//// val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int]) +//// +//// var perLine = 0 +//// var first = true +//// for (b <- toByteArray()) { +//// perLine += 1 +//// if (perLine % 8 == 0) { +//// sb.append('\n') +//// first = true +//// } +//// if (! first) sb.append(' ') +//// first = false +//// sb.append(java.lang.Integer.toHexString(b & 0xff)) +//// } +//// sb.append('\n') +//// sb.toString() +//// } +//// +//// override def toString: String = { +//// val sb: StringBuffer = new StringBuffer +//// sb.append(getClass.getName) +//// sb.append(' ') +//// sb.append(System.identityHashCode(this)) +//// sb.append("@[pos=") +//// sb.append(position()) +//// sb.append(" lim=") +//// sb.append(limit()) +//// sb.append(" cap=") +//// sb.append(capacity()) +//// sb.append("]") +//// sb.toString +//// } +//// +//// +//// +//// override def finalize(): Unit = { +//// var marked = false +//// if (containers ne null) { +//// if (containers.exists(container => container.isAcquired && container.requireRelease())) { +//// marked = true +//// logError("BUG: buffer was not released - and now going out of scope. " + +//// "Potential resource leak. Allocated at ", allocateLocationThrowable) +//// containers.foreach(_.release()) +//// } +//// if (containers.exists(container => !container.isFreed && container.requireFree())) { +//// if (!marked) { +//// logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak", +//// allocateLocationThrowable) +//// } +//// else { +//// logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak") +//// } +//// containers.foreach(_.free()) +//// } +//// } +//// super.finalize() +//// } +////} +//// +//// +////object LargeByteBuffer extends Logging { +//// +//// private val noopDisposeFunction = new BufferCleaner() { +//// protected def doClean(buffer: LargeByteBuffer) { +//// buffer.free(invokeCleaner = false) +//// } +//// } +//// +//// val enableExpensiveAssert = false +//// private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0) +//// val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer( +//// new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false) +//// // Do not allow anyone else to override cleaner +//// EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false) +//// +//// // 8K sufficient ? +//// private val TEMP_ARRAY_SIZE = 8192 +//// +//// /** +//// * Create a LargeByteBuffer of specified size which is split across +//// * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory +//// * ByteBuffer +//// * +//// */ +//// def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = { +//// if (0 == totalSize) { +//// return EMPTY_BUFFER +//// } +//// +//// assert (totalSize > 0) +//// +//// val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY) +//// val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize) +//// val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize) +//// +//// assert (lastBlockSize > 0) +//// +//// val bufferArray = { +//// val arr = new ArrayBuffer[ByteBufferContainer](numBlocks) +//// for (index <- 0 until numBlocks - 1) { +//// val buff = ByteBuffer.allocate(blockSize) +//// // buff.clear() +//// arr += new HeapByteBufferContainer(buff, true) +//// } +//// arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true) +//// assert (arr.length == numBlocks) +//// arr +//// } +//// +//// new LargeByteBuffer(bufferArray, false, false) +//// } +//// +//// /** +//// * Create a LargeByteBuffer of specified size which is split across +//// * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk +//// * +//// */ +//// private def allocateDiskBuffer(totalSize: Long, +//// blockManager: BlockManager): LargeByteBuffer = { +//// if (0 == totalSize) { +//// return EMPTY_BUFFER +//// } +//// +//// assert (totalSize > 0) +//// +//// // Create a file of the specified size. +//// val file = blockManager.diskBlockManager.createTempBlock()._2 +//// val raf = new RandomAccessFile(file, "rw") +//// try { +//// raf.setLength(totalSize) +//// } finally { +//// raf.close() +//// } +//// +//// readWriteDiskSegment(new FileSegment(file, 0, totalSize), +//// ephemeralDiskBacked = true, blockManager.ioConf) +//// } +//// +//// // The returned buffer takes up ownership of the underlying buffers +//// // (including dispos'ing that when done) +//// def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = { +//// val nonEmpty = buffers.filter(_.hasRemaining) +//// +//// // cleanup the empty buffers +//// buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b)) +//// +//// +//// if (nonEmpty.isEmpty) { +//// return EMPTY_BUFFER +//// } +//// +//// // slice so that offsets match our requirement +//// new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b => +//// new HeapByteBufferContainer(b.slice(), true)), false, false) +//// } +//// +//// def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = { +//// // only non empty arrays +//// val arrays = byteArrays.filter(_.length > 0) +//// if (0 == arrays.length) return EMPTY_BUFFER +//// +//// new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr => +//// new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false) +//// } +//// +//// def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = { +//// +//// if (inputBuffers.isEmpty) return EMPTY_BUFFER +//// +//// if (! inputBuffers.exists(_.hasRemaining())) { +//// if (canDispose) inputBuffers.map(_.free()) +//// return EMPTY_BUFFER +//// } +//// +//// // release all temp resources acquired +//// inputBuffers.foreach(buff => buff.releasePendingContainers()) +//// // free current container if acquired. +//// inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) { +//// buff.containers(buff.currentContainerIndex).release() +//// }) +//// // inputBuffers.foreach(b => b.doReleaseAll()) +//// +//// +//// // Dispose of any empty buffers +//// if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free()) +//// +//// // Find all containers we need. +//// val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining())) +//// +//// val containers = buffers.flatMap(_.containers) +//// assert (! containers.isEmpty) +//// // The in order containers of "buffers" seq constitute the required return value +//// val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers, +//// // if you cant dispose, then we dont own the buffers : in which case, need duplicate +//// ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked)) +//// +//// if (canDispose) { +//// // override dispose of all other buffers. +//// val disposeFunctions = inputBuffers.map { +//// buffer => { +//// (buffer, buffer.overrideCleaner(noopDisposeFunction)) +//// } +//// } +//// +//// val cleaner = retval.getCleaner() +//// val newCleaner = new BufferCleaner { +//// protected def doClean(buffer: LargeByteBuffer) { +//// +//// assert (retval == buffer) +//// // default cleaner. +//// cleaner.clean(retval) +//// // not required, since we are within clean anyway. +//// // retval.free(invokeCleaner = false) +//// +//// // retval.doDispose(needRelease = true) +//// +//// // This might actually call dispose twice on some (initially) empty buffers, +//// // which is fine since we now guard against that. +//// disposeFunctions.foreach(v => v._2.clean(v._1)) +//// // Call the free method too : so that buffers are marked free ... +//// disposeFunctions.foreach(v => v._1.free(invokeCleaner = false)) +//// } +//// } +//// +//// val prev = retval.overrideCleaner(newCleaner) +//// assert (prev == cleaner) +//// } +//// +//// retval +//// } +//// +//// private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) { +//// if (arr == null) { +//// throw new NullPointerException +//// } else if (offset < 0 || size < 0 || offset + size > arr.length) { +//// throw new IndexOutOfBoundsException +//// } +//// } +//// +//// def allocateTransientBuffer(size: Long, blockManager: BlockManager) = { +//// if (size <= blockManager.ioConf.maxInMemSize) { +//// LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf) +//// } else { +//// LargeByteBuffer.allocateDiskBuffer(size, blockManager) +//// } +//// } +//// +//// def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig, +//// ephemeralDiskBacked: Boolean): LargeByteBuffer = { +//// // Split the block into multiple of BlockStore.maxBlockSize +//// val segmentSize = segment.length +//// val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] +//// val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) +//// val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) +//// +//// val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) +//// +//// for (index <- 0 until numBlocks - 1) { +//// buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, +//// segment.offset + index * blockSize, blockSize), ioConf) +//// } +//// +//// // Last block +//// buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, +//// segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf) +//// +//// new LargeByteBuffer(buffers, false, ephemeralDiskBacked) +//// } +//// +//// def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean, +//// ioConf: IOConfig): LargeByteBuffer = { +//// +//// // Split the block into multiple of BlockStore.maxBlockSize +//// val segmentSize = segment.length +//// val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] +//// val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) +//// val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) +//// +//// logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks + +//// ", lastBlockSize = " + lastBlockSize) +//// val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) +//// +//// for (index <- 0 until numBlocks - 1) { +//// buffers += new ReadWriteFileContainer(new FileSegment(segment.file, +//// segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null) +//// } +//// +//// // Last block +//// buffers += new ReadWriteFileContainer(new FileSegment(segment.file, +//// segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null) +//// +//// new LargeByteBuffer(buffers, false, ephemeralDiskBacked) +//// } +////} diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala index dcbda5a8515dd..644544cf869df 100644 --- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala +++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala @@ -24,7 +24,7 @@ import scala.concurrent.{Promise, Await, Future} import scala.concurrent.duration.Duration import org.apache.spark.Logging -import org.apache.spark.network.buffer.{NioManagedBuffer, ManagedBuffer} +import org.apache.spark.network.buffer.{LargeByteBufferHelper, NioManagedBuffer, ManagedBuffer} import org.apache.spark.network.shuffle.{ShuffleClient, BlockFetchingListener} import org.apache.spark.storage.{BlockManagerId, BlockId, StorageLevel} @@ -92,9 +92,10 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo result.failure(exception) } override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = { - val ret = ByteBuffer.allocate(data.size.toInt) + val ret = LargeByteBufferHelper.allocate(data.size) ret.put(data.nioByteBuffer()) - ret.flip() + //XXX do we need ret.flip()?? + ret.position(0l) result.success(new NioManagedBuffer(ret)) } }) diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala index b089da8596e2b..86df34920a666 100644 --- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala +++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala @@ -23,7 +23,7 @@ import scala.collection.JavaConversions._ import org.apache.spark.Logging import org.apache.spark.network.BlockDataManager -import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} +import org.apache.spark.network.buffer.{LargeByteBufferHelper, ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock} @@ -63,7 +63,7 @@ class NettyBlockRpcServer( // StorageLevel is serialized as bytes using our JavaSerializer. val level: StorageLevel = serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata)) - val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) + val data = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(uploadBlock.blockData)) blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level) responseContext.onSuccess(new Array[Byte](0)) } diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala index 3f0950dae1f24..9824c7c38c188 100644 --- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala +++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala @@ -115,13 +115,10 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage // Convert or copy nio buffer into array in order to serialize it. val nioBuffer = blockData.nioByteBuffer() - val array = if (nioBuffer.hasArray) { - nioBuffer.array() - } else { - val data = new Array[Byte](nioBuffer.remaining()) - nioBuffer.get(data) - data - } + //TODO key change -- multiple uploads here + // this stub is not even efficient when the buffer actually is small + val array = new Array[Byte](nioBuffer.remaining().toInt) + nioBuffer.get(array, 0, nioBuffer.remaining().toInt) client.sendRpc(new UploadBlock(appId, execId, blockId.toString, levelBytes, array).toByteArray, new RpcResponseCallback { diff --git a/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala index b2aec160635c7..d0ba9d8948594 100644 --- a/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala +++ b/core/src/main/scala/org/apache/spark/network/nio/NioBlockTransferService.scala @@ -20,7 +20,7 @@ package org.apache.spark.network.nio import java.nio.ByteBuffer import org.apache.spark.network._ -import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} +import org.apache.spark.network.buffer.{LargeByteBuffer, LargeByteBufferHelper, ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.shuffle.BlockFetchingListener import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.util.Utils @@ -116,7 +116,7 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa val blockId = blockMessage.getId val networkSize = blockMessage.getData.limit() listener.onBlockFetchSuccess( - blockId.toString, new NioManagedBuffer(blockMessage.getData)) + blockId.toString, new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(blockMessage.getData))) } } } @@ -143,7 +143,7 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa level: StorageLevel) : Future[Unit] = { checkInit() - val msg = PutBlock(blockId, blockData.nioByteBuffer(), level) + val msg = PutBlock(blockId, blockData.nioByteBuffer().firstByteBuffer(), level) val blockMessageArray = new BlockMessageArray(BlockMessage.fromPutBlock(msg)) val remoteCmId = new ConnectionManagerId(hostName, port) val reply = cm.sendMessageReliably(remoteCmId, blockMessageArray.toBufferMessage) @@ -192,7 +192,7 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa if (buffer == null) { return None } - Some(BlockMessage.fromGotBlock(GotBlock(msg.id, buffer))) + Some(BlockMessage.fromGotBlock(GotBlock(msg.id, buffer.firstByteBuffer()))) case _ => None } @@ -201,12 +201,12 @@ final class NioBlockTransferService(conf: SparkConf, securityManager: SecurityMa private def putBlock(blockId: BlockId, bytes: ByteBuffer, level: StorageLevel) { val startTimeMs = System.currentTimeMillis() logDebug("PutBlock " + blockId + " started from " + startTimeMs + " with data: " + bytes) - blockDataManager.putBlockData(blockId, new NioManagedBuffer(bytes), level) + blockDataManager.putBlockData(blockId, new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(bytes)), level) logDebug("PutBlock " + blockId + " used " + Utils.getUsedTimeMs(startTimeMs) + " with data size: " + bytes.limit) } - private def getBlock(blockId: BlockId): ByteBuffer = { + private def getBlock(blockId: BlockId): LargeByteBuffer = { val startTimeMs = System.currentTimeMillis() logDebug("GetBlock " + blockId + " started from " + startTimeMs) val buffer = blockDataManager.getBlockData(blockId) diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala index 9428273561cd8..1ea26ee8f9946 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala @@ -20,7 +20,7 @@ package org.apache.spark.scheduler import java.nio.ByteBuffer import java.util.concurrent.RejectedExecutionException -import org.apache.spark.io.WrappedLargeByteBuffer +import org.apache.spark.network.buffer.WrappedLargeByteBuffer import scala.language.existentials import scala.util.control.NonFatal @@ -76,7 +76,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul } //TODO either change serializer interface, or ... val deserializedResult = serializer.get().deserialize[DirectTaskResult[_]]( - serializedTaskResult.get.asInstanceOf[WrappedLargeByteBuffer].underlying(0)) + serializedTaskResult.get.firstByteBuffer()) sparkEnv.blockManager.master.removeBlock(blockId) (deserializedResult, size) } diff --git a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala index 016964fc274d8..4c0bc9ccec06e 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala @@ -18,17 +18,14 @@ package org.apache.spark.shuffle import java.io.File -import java.nio.ByteBuffer import java.util.concurrent.ConcurrentLinkedQueue import java.util.concurrent.atomic.AtomicInteger -import org.apache.spark.io.LargeByteBuffer - import scala.collection.JavaConversions._ import org.apache.spark.{Logging, SparkConf, SparkEnv} import org.apache.spark.executor.ShuffleWriteMetrics -import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} +import org.apache.spark.network.buffer.{LargeByteBuffer, FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.serializer.Serializer import org.apache.spark.shuffle.FileShuffleBlockManager.ShuffleFileGroup @@ -176,7 +173,7 @@ class FileShuffleBlockManager(conf: SparkConf) override def getBytes(blockId: ShuffleBlockId): Option[LargeByteBuffer] = { //TODO val segment = getBlockData(blockId) - Some(LargeByteBuffer.asLargeByteBuffer(segment.nioByteBuffer())) + Some(segment.nioByteBuffer()) } override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = { diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala index 76c2e4180c838..4dfdf9987a5fe 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala @@ -18,13 +18,11 @@ package org.apache.spark.shuffle import java.io._ -import java.nio.ByteBuffer import com.google.common.io.ByteStreams -import org.apache.spark.io.LargeByteBuffer import org.apache.spark.{SparkConf, SparkEnv} -import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} +import org.apache.spark.network.buffer.{LargeByteBuffer, FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.storage._ @@ -99,7 +97,7 @@ class IndexShuffleBlockManager(conf: SparkConf) extends ShuffleBlockManager { } override def getBytes(blockId: ShuffleBlockId): Option[LargeByteBuffer] = { - Some(LargeByteBuffer.asLargeByteBuffer(getBlockData(blockId).nioByteBuffer())) + Some(getBlockData(blockId).nioByteBuffer()) } override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = { diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala index fa737729b8758..96dde7f53b84b 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockManager.scala @@ -17,8 +17,7 @@ package org.apache.spark.shuffle -import org.apache.spark.io.LargeByteBuffer -import org.apache.spark.network.buffer.ManagedBuffer +import org.apache.spark.network.buffer.{LargeByteBuffer, ManagedBuffer} import org.apache.spark.storage.ShuffleBlockId private[spark] diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 479fcf35283ad..be63f9cb03d29 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -17,7 +17,7 @@ package org.apache.spark.storage -import java.io.{BufferedOutputStream, ByteArrayOutputStream, File, InputStream, OutputStream} +import java.io.{BufferedOutputStream, File, InputStream, OutputStream} import java.nio.{ByteBuffer, MappedByteBuffer} import scala.collection.mutable.{ArrayBuffer, HashMap} @@ -31,9 +31,9 @@ import sun.nio.ch.DirectBuffer import org.apache.spark._ import org.apache.spark.executor._ -import org.apache.spark.io.{WrappedLargeByteBuffer, ChainedLargeByteBuffer, LargeByteBuffer, CompressionCodec} +import org.apache.spark.io.CompressionCodec import org.apache.spark.network._ -import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} +import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer, ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.shuffle.ExternalShuffleClient import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo @@ -307,7 +307,7 @@ private[spark] class BlockManager( shuffleManager.shuffleBlockManager.getBlockData(blockId.asInstanceOf[ShuffleBlockId]) } else { val blockBytesOpt = doGetLocal(blockId, asBlockResult = false) - .asInstanceOf[Option[ByteBuffer]] + .asInstanceOf[Option[LargeByteBuffer]] if (blockBytesOpt.isDefined) { val buffer = blockBytesOpt.get new NioManagedBuffer(buffer) @@ -321,7 +321,7 @@ private[spark] class BlockManager( * Put the block locally, using the given storage level. */ override def putBlockData(blockId: BlockId, data: ManagedBuffer, level: StorageLevel): Unit = { - putBytes(blockId, LargeByteBuffer.asLargeByteBuffer(data.nioByteBuffer()), level) + putBytes(blockId, data.nioByteBuffer(), level) } /** @@ -538,10 +538,10 @@ private[spark] class BlockManager( /* We'll store the bytes in memory if the block's storage level includes * "memory serialized", or if it should be cached as objects in memory * but we only requested its serialized bytes. */ - val copyForMemory = LargeByteBuffer.allocateOnHeap(bytes.limit, largeByteBufferChunkSize) + val copyForMemory = LargeByteBufferHelper.allocate(bytes.limit) copyForMemory.put(bytes) memoryStore.putBytes(blockId, copyForMemory, level) - bytes.rewind() + bytes.position(0l) } if (!asBlockResult) { return Some(bytes) @@ -595,8 +595,8 @@ private[spark] class BlockManager( for (loc <- locations) { logDebug(s"Getting remote block $blockId from $loc") //TODO the fetch will always be one byte buffer till we fix SPARK-5928 - val data: LargeByteBuffer = LargeByteBuffer.asLargeByteBuffer(blockTransferService.fetchBlockSync( - loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()) + val data: LargeByteBuffer = blockTransferService.fetchBlockSync( + loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer() if (data != null) { if (asBlockResult) { @@ -791,7 +791,7 @@ private[spark] class BlockManager( case ArrayValues(array) => blockStore.putArray(blockId, array, putLevel, returnValues) case ByteBufferValues(bytes) => - bytes.rewind() + bytes.position(0l) blockStore.putBytes(blockId, bytes, putLevel) } size = result.size @@ -942,7 +942,7 @@ private[spark] class BlockManager( case Some(peer) => try { val onePeerStartTime = System.currentTimeMillis - data.rewind() + data.position(0l) logTrace(s"Trying to replicate $blockId of ${data.limit()} bytes to $peer") //TODO //ACK! here we're stuck -- we can't replicate a large block until we figure out @@ -1201,7 +1201,7 @@ private[spark] class BlockManager( blockId: BlockId, bytes: LargeByteBuffer, serializer: Serializer = defaultSerializer): Iterator[Any] = { - bytes.rewind() + bytes.position(0); val stream = wrapForCompression(blockId, new LargeByteBufferInputStream(bytes, true)) serializer.newInstance().deserializeStream(stream).asIterator } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala index 38989f0c07681..e1b48bf11bcc7 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala @@ -17,12 +17,8 @@ package org.apache.spark.storage -import java.nio.ByteBuffer - -import scala.collection.mutable.ArrayBuffer - import org.apache.spark.Logging -import org.apache.spark.io.LargeByteBuffer +import org.apache.spark.network.buffer.LargeByteBuffer /** * Abstract class to store blocks. diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala index 180b237a1a1f2..5ef9929feaabf 100644 --- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala @@ -22,7 +22,7 @@ import java.nio.ByteBuffer import java.nio.channels.FileChannel.MapMode import org.apache.spark.Logging -import org.apache.spark.io.{WrappedLargeByteBuffer, LargeByteBuffer} +import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer} import org.apache.spark.serializer.Serializer import org.apache.spark.util.Utils @@ -118,9 +118,9 @@ private[spark] class DiskStore(blockManager: BlockManager, diskManager: DiskBloc } } buf.flip() - Some(LargeByteBuffer.asLargeByteBuffer(buf)) + Some(LargeByteBufferHelper.asLargeByteBuffer(buf)) } else { - Some(LargeByteBuffer.mapFile(channel, MapMode.READ_ONLY, offset, length)) + Some(LargeByteBufferHelper.mapFile(channel, MapMode.READ_ONLY, offset, length)) } } finally { channel.close() diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala index 8fccc0f3e78d2..074919fdeb706 100644 --- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala @@ -19,7 +19,7 @@ package org.apache.spark.storage import java.util.LinkedHashMap -import org.apache.spark.io.LargeByteBuffer +import org.apache.spark.network.buffer.LargeByteBuffer import scala.collection.mutable import scala.collection.mutable.ArrayBuffer @@ -81,7 +81,7 @@ private[spark] class MemoryStore(blockManager: BlockManager, maxMemory: Long) override def putBytes(blockId: BlockId, _bytes: LargeByteBuffer, level: StorageLevel): PutResult = { // Work on a duplicate - since the original input might be used elsewhere. val bytes = _bytes.duplicate() - bytes.rewind() + bytes.position(0l); if (level.deserialized) { val values = blockManager.dataDeserialize(blockId, bytes) putIterator(blockId, values, level, returnValues = true) diff --git a/core/src/main/scala/org/apache/spark/storage/PutResult.scala b/core/src/main/scala/org/apache/spark/storage/PutResult.scala index 2e00934bde243..aa9176791b319 100644 --- a/core/src/main/scala/org/apache/spark/storage/PutResult.scala +++ b/core/src/main/scala/org/apache/spark/storage/PutResult.scala @@ -17,7 +17,7 @@ package org.apache.spark.storage -import org.apache.spark.io.LargeByteBuffer +import org.apache.spark.network.buffer.LargeByteBuffer /** * Result of adding a block into a BlockStore. This case class contains a few things: diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala index 9f964ed456d5e..62cfd9e65eaec 100644 --- a/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/TachyonStore.scala @@ -21,7 +21,7 @@ import java.io.IOException import java.nio.ByteBuffer import com.google.common.io.ByteStreams -import org.apache.spark.io.LargeByteBuffer +import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer} import tachyon.client.{ReadType, WriteType} import org.apache.spark.Logging @@ -70,7 +70,7 @@ private[spark] class TachyonStore( // So that we do not modify the input offsets ! // duplicate does not copy buffer, so inexpensive val byteBuffer = bytes.duplicate() - byteBuffer.rewind() + byteBuffer.position(0l) logDebug(s"Attempting to put block $blockId into Tachyon") val startTime = System.currentTimeMillis val file = tachyonManager.getFile(blockId) @@ -114,7 +114,7 @@ private[spark] class TachyonStore( //TODO val bs = new Array[Byte](size.asInstanceOf[Int]) ByteStreams.readFully(is, bs) - Some(LargeByteBuffer.asLargeByteBuffer(ByteBuffer.wrap(bs))) + Some(LargeByteBufferHelper.asLargeByteBuffer(bs)) } catch { case ioe: IOException => logWarning(s"Failed to fetch the block $blockId from Tachyon", ioe) diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala index 26f2d7848bb29..98e41906b8251 100644 --- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala +++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferInputStream.scala @@ -18,9 +18,8 @@ package org.apache.spark.util import java.io.InputStream -import java.nio.ByteBuffer -import org.apache.spark.io.LargeByteBuffer +import org.apache.spark.network.buffer.LargeByteBuffer import org.apache.spark.storage.BlockManager /** diff --git a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala index 2fe904b10c53c..6c61d52ecca75 100644 --- a/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala +++ b/core/src/main/scala/org/apache/spark/util/LargeByteBufferOutputStream.scala @@ -19,7 +19,8 @@ package org.apache.spark.util import java.io.OutputStream -import org.apache.spark.io.{ChainedLargeByteBuffer, LargeByteBuffer} +import org.apache.spark.io.ChainedLargeByteBuffer +import org.apache.spark.network.buffer.LargeByteBuffer import org.apache.spark.util.collection.ChainedBuffer private[spark] diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala index 94bfa67451892..6c01db61168b3 100644 --- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala +++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala @@ -25,7 +25,7 @@ import scala.concurrent.{Await, Promise} import scala.util.{Failure, Success, Try} import org.apache.commons.io.IOUtils -import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} +import org.apache.spark.network.buffer.{LargeByteBufferHelper, LargeByteBuffer, ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.shuffle.BlockFetchingListener import org.apache.spark.network.{BlockDataManager, BlockTransferService} import org.apache.spark.storage.{BlockId, ShuffleBlockId} @@ -100,7 +100,7 @@ class NettyBlockTransferSecuritySuite extends FunSuite with MockitoSugar with Sh val blockManager = mock[BlockDataManager] val blockId = ShuffleBlockId(0, 1, 2) val blockString = "Hello, world!" - val blockBuffer = new NioManagedBuffer(ByteBuffer.wrap(blockString.getBytes)) + val blockBuffer = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(blockString.getBytes)) when(blockManager.getBlockData(blockId)).thenReturn(blockBuffer) val securityManager0 = new SecurityManager(conf0) diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala new file mode 100644 index 0000000000000..b242ff593f04a --- /dev/null +++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSuite.scala @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.network.netty + +import java.util.concurrent.TimeUnit + +import org.apache.spark.network.BlockDataManager +import org.apache.spark.network.buffer.{ManagedBuffer, LargeByteBufferHelper, NioManagedBuffer} +import org.apache.spark.network.shuffle.BlockFetchingListener +import org.apache.spark.storage.ShuffleBlockId +import org.apache.spark.{SecurityManager, SparkConf} +import org.mockito.Mockito._ +import org.scalatest.mock.MockitoSugar +import org.scalatest.{Matchers, FunSuite} + +import scala.concurrent.duration.FiniteDuration +import scala.concurrent.{Await, Promise} + +class NettyBlockTransferSuite extends FunSuite with Matchers with MockitoSugar { + + val conf = new SparkConf() + .set("spark.app.id", "app-id") + val securityManager = new SecurityManager(conf) + + + + test("simple fetch") { + + val blockManager = mock[BlockDataManager] + val blockId = ShuffleBlockId(0, 1, 2) + val blockString = "Hello, world!" + val blockBuffer = new NioManagedBuffer(LargeByteBufferHelper.asLargeByteBuffer(blockString.getBytes)) + when(blockManager.getBlockData(blockId)).thenReturn(blockBuffer) + + val from = new NettyBlockTransferService(conf, securityManager, numCores = 1) + from.init(blockManager) + val to = new NettyBlockTransferService(conf, securityManager, numCores = 1) + to.init(blockManager) + + try { + val promise = Promise[ManagedBuffer]() + + to.fetchBlocks(from.hostName, from.port, "1", Array(blockId.toString), + new BlockFetchingListener { + override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = { + promise.failure(exception) + } + + override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = { + promise.success(data.retain()) + } + }) + + Await.ready(promise.future, FiniteDuration(1000, TimeUnit.MILLISECONDS)) + } finally { + from.close() + to.close() + } + + + } + + + + +} diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java index 75c426f379238..213013e14b7c6 100644 --- a/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java +++ b/network/common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java @@ -1,19 +1,19 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ package org.apache.spark.network.buffer; @@ -34,8 +34,8 @@ import org.apache.spark.network.util.TransportConf; /** - * A {@link ManagedBuffer} backed by a segment in a file. - */ +* A {@link ManagedBuffer} backed by a segment in a file. +*/ public final class FileSegmentManagedBuffer extends ManagedBuffer { private final TransportConf conf; private final File file; diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java index c56fdebb887a1..4997dcecc3370 100644 --- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java +++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBuffer.java @@ -17,6 +17,7 @@ package org.apache.spark.network.buffer; import java.io.IOException; +import java.nio.ByteBuffer; import java.nio.channels.WritableByteChannel; public interface LargeByteBuffer { @@ -35,6 +36,8 @@ public interface LargeByteBuffer { public void put(LargeByteBuffer bytes); + public long remaining(); + //TODO checks on limit semantics /** @@ -53,4 +56,7 @@ public interface LargeByteBuffer { public long writeTo(WritableByteChannel channel) throws IOException; + //TODO this should be deleted -- just to help me get going + public ByteBuffer firstByteBuffer(); + } diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java index 2585b65b1f969..5fe01d87e36e9 100644 --- a/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java +++ b/network/common/src/main/java/org/apache/spark/network/buffer/LargeByteBufferHelper.java @@ -29,6 +29,23 @@ public static LargeByteBuffer asLargeByteBuffer(ByteBuffer buffer) { return new WrappedLargeByteBuffer(new ByteBuffer[]{buffer}); } + public static LargeByteBuffer asLargeByteBuffer(byte[] bytes) { + return new WrappedLargeByteBuffer(new ByteBuffer[]{ByteBuffer.wrap(bytes)}); + } + + public static LargeByteBuffer allocate(long size) { + ArrayListInvoke this method before using a sequence of channel-read or +//// * put operations to fill this buffer. +//// * +//// *
This method does not actually erase the data in the buffer, but it +//// * is named as if it did because it will most often be used in situations +//// * in which that might as well be the case.
+//// */ +//// def clear(): Unit +//// +//// /** +//// * Flips this buffer. The limit is set to the current position and then +//// * the position is set to zero. If the mark is defined then it is +//// * discarded. +//// * +//// *After a sequence of channel-read or put operations, invoke +//// * this method to prepare for a sequence of channel-write or relative +//// * get operations. +//// */ +//// def flip(): Unit +// +// /** +// * Rewinds this buffer. The position is set to zero and the mark is +// * discarded. +// * +// *
Invoke this method before a sequence of channel-write or get +// * operations, assuming that the limit has already been set +// * appropriately. +// */ +// def rewind(): Unit +// +// /** +// * Returns the number of elements between the current position and the +// * limit.
+// * +// * @return The number of elements remaining in this buffer +// */ +// def remaining(): Long +//} +//// +////class ChainedLargeByteBuffer(private[network] val underlying: ChainedBuffer) extends LargeByteBuffer { +//// +//// def capacity = underlying.capacity +//// +//// var _pos = 0l +//// +//// def get(dst: Array[Byte],offset: Int,length: Int): Unit = { +//// underlying.read(_pos, dst, offset, length) +//// _pos += length +//// } +//// +//// def get(): Byte = { +//// val b = underlying.read(_pos) +//// _pos += 1 +//// b +//// } +//// +//// def put(bytes: LargeByteBuffer): Unit = { +//// ??? +//// } +//// +//// def position: Long = _pos +//// def position(position: Long): Unit = { +//// _pos = position +//// } +//// def remaining(): Long = { +//// underlying.size - position +//// } +//// +//// def duplicate(): ChainedLargeByteBuffer = { +//// new ChainedLargeByteBuffer(underlying) +//// } +//// +//// def rewind(): Unit = { +//// _pos = 0 +//// } +//// +//// def limit(): Long = { +//// capacity +//// } +//// +//// def limit(newLimit: Long): Unit = { +//// ??? +//// } +//// +//// def writeTo(channel:WritableByteChannel): Long = { +//// var written = 0l +//// underlying.chunks.foreach{bytes => +//// //TODO test this +//// val buffer = ByteBuffer.wrap(bytes) +//// while (buffer.hasRemaining) +//// channel.write(buffer) +//// written += bytes.length +//// } +//// written +//// } +////} +// +//class WrappedLargeByteBuffer(private[spark] val underlying: Array[ByteBuffer]) extends LargeByteBuffer { +// +// val (totalCapacity, chunkOffsets) = { +// var sum = 0l +// val offsets = new Array[Long](underlying.size) +// (0 until underlying.size).foreach{idx => +// offsets(idx) = sum +// sum += underlying(idx).capacity() +// } +// (sum, offsets) +// } +// +// private var _pos = 0l +// private var currentBufferIdx = 0 +// private var currentBuffer = underlying(0) +// private var _limit = totalCapacity +// +// def capacity = totalCapacity +// +// def get(dst: Array[Byte], offset: Int, length: Int): Unit = { +// var moved = 0 +// while (moved < length) { +// val toRead = math.min(length - moved, currentBuffer.remaining()) +// currentBuffer.get(dst, offset, toRead) +// moved += toRead +// updateCurrentBuffer() +// } +// } +// +// def get(): Byte = { +// val r = currentBuffer.get() +// _pos += 1 +// updateCurrentBuffer() +// r +// } +// +// private def updateCurrentBuffer(): Unit = { +// //TODO fix end condition +// while(!currentBuffer.hasRemaining()) { +// currentBufferIdx += 1 +// currentBuffer = underlying(currentBufferIdx) +// } +// } +// +// def put(bytes: LargeByteBuffer): Unit = { +// ??? +// } +// +// def position: Long = _pos +// def position(position: Long): Unit = { +// //XXX check range? +// _pos = position +// } +// def remaining(): Long = { +// totalCapacity - _pos +// } +// +// def duplicate(): WrappedLargeByteBuffer = { +// new WrappedLargeByteBuffer(underlying.map{_.duplicate()}) +// } +// +// def rewind(): Unit = { +// _pos = 0 +// underlying.foreach{_.rewind()} +// } +// +// def limit(): Long = { +// totalCapacity +// } +// +// def limit(newLimit: Long) = { +// //XXX check range? set limits in sub buffers? +// _limit = newLimit +// } +// +// def writeTo(channel: WritableByteChannel): Long = { +// var written = 0l +// underlying.foreach{buffer => +// //TODO test this +// //XXX do we care about respecting the limit here? +// written += buffer.remaining() +// while (buffer.hasRemaining) +// channel.write(buffer) +// } +// written +// } +// +//} +// +//object LargeByteBuffer { +// +// def asLargeByteBuffer(byteBuffer: ByteBuffer): LargeByteBuffer = { +// new WrappedLargeByteBuffer(Array(byteBuffer)) +// } +// +// def asLargeByteBuffer(bytes: Array[Byte]): LargeByteBuffer = { +// new WrappedLargeByteBuffer(Array(ByteBuffer.wrap(bytes))) +// } +// +//// +//// def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = { +//// val buffer = ChainedBuffer.withInitialSize(maxChunk, size) +//// new ChainedLargeByteBuffer(buffer) +//// } +// +// def mapFile( +// channel: FileChannel, +// mode: MapMode, +// offset: Long, +// length: Long, +// maxChunk: Int = Integer.MAX_VALUE - 1e6.toInt +// ): LargeByteBuffer = { +// val offsets = new ArrayBuffer[Long]() +// var curOffset = offset +// val end = offset + length +// while (curOffset < end) { +// offsets += curOffset +// val length = math.min(end - curOffset, maxChunk) +// curOffset += length +// } +// offsets += end +// val chunks = new Array[ByteBuffer](offsets.size - 1) +// (0 until offsets.size - 1).foreach{idx => +// chunks(idx) = channel.map(mode, offsets(idx), offsets(idx + 1) - offsets(idx)) +// } +// new WrappedLargeByteBuffer(chunks) +// } +//} +// +// +//// +/////** +//// * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G +//// * which ByteBuffers are limited to. +//// * Externally, it exposes all the api which java.nio.ByteBuffer exposes. +//// * Internally, it maintains a sequence of Containers which manage the ByteBuffer data. +//// * Not all the data might be loaded into memory (like disk or tachyon data) - so actual +//// * memory footprint - heap and vm could be much lower than capacity. +//// * +//// * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this +//// * buffer, maybe revisit this later ? Note: this is not much different from earlier though ! +//// * +//// * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this +//// * will require the file to be kept open (repeatedly opening/closing file is not good +//// * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is +//// * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy) +//// * +//// * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is +//// * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some +//// * cases (when we duplicate/slice them). Currently spark does not need this, but might in future +//// * so relook at it later. +//// */ +////// We should make this constructor private: but for now, +////// leaving it public since TachyonStore needs it +////class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer], +//// private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging { +//// +//// // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME +//// private val allocateLocationThrowable: Throwable = { +//// if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) { +//// new Throwable("blockId = " + BlockManager.getLookupBlockId) +//// } else { +//// null +//// } +//// } +//// private var disposeLocationThrowable: Throwable = null +//// +//// @volatile private var allowCleanerOverride = true +//// @volatile private var cleaner: BufferCleaner = new BufferCleaner { +//// override def doClean(buffer: LargeByteBuffer) = { +//// assert (LargeByteBuffer.this == buffer) +//// doDispose(needRelease = false) +//// } +//// } +//// +//// // should not be empty +//// assert (null != inputContainers && ! inputContainers.isEmpty) +//// // should not have any null's +//// assert (inputContainers.find(_ == null).isEmpty) +//// +//// // println("Num containers = " + inputContainers.size) +//// +//// // Position, limit and capacity relevant over the engire LargeByteBuffer +//// @volatile private var globalPosition = 0L +//// @volatile private var globalLimit = 0L +//// @volatile private var currentContainerIndex = 0 +//// +//// // The buffers in which the actual data is held. +//// private var containers: Array[ByteBufferContainer] = null +//// +//// // aggregate capacities of the individual buffers. +//// // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be +//// // sum of capacity of 0th and 1st block buffer +//// private var bufferPositionStart: Array[Long] = null +//// +//// // Contains the indices of a containers which requires release before subsequent invocation of +//// // read/write should be serviced. This is required since current read/write might have moved the +//// // position but since we are returning bytebuffers which depend on the validity of the existing +//// // bytebuffer, we cant release them yet. +//// private var needReleaseIndices = new HashSet[Int]() +//// +//// private val readable = ! inputContainers.exists(! _.isReadable) +//// private val writable = ! inputContainers.exists(! _.isWritable) +//// +//// +//// // initialize +//// @volatile private var globalCapacity = { +//// +//// // Ensure that there are no empty buffers : messes up with our code : unless it +//// // is a single buffer (for empty buffer for marker case) +//// assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length) +//// +//// containers = { +//// if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray +//// } +//// containers.foreach(_.validate()) +//// +//// def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) { +//// val buff = new ArrayBuffer[Long](arr.length + 1) +//// buff += 0L +//// +//// buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1) +//// assert (buff.length == arr.length + 1) +//// bufferPositionStart = buff.toArray +//// } +//// +//// initializeBufferPositionStart(containers) +//// +//// // remove references from inputBuffers +//// inputContainers.clear() +//// +//// globalLimit = bufferPositionStart(containers.length) +//// globalPosition = 0L +//// currentContainerIndex = 0 +//// +//// assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum) +//// +//// globalLimit +//// } +//// +//// final def position(): Long = globalPosition +//// +//// final def limit(): Long = globalLimit +//// +//// final def capacity(): Long = globalCapacity +//// +//// final def limit(newLimit: Long) { +//// if ((newLimit > capacity()) || (newLimit < 0)) { +//// throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity()) +//// } +//// +//// globalLimit = newLimit +//// if (position() > newLimit) position(newLimit) +//// } +//// +//// def skip(skipBy: Long) = position(position() + skipBy) +//// +//// private def releasePendingContainers() { +//// if (! needReleaseIndices.isEmpty) { +//// val iter = needReleaseIndices.iterator +//// while (iter.hasNext) { +//// val index = iter.next() +//// assert (index >= 0 && index < containers.length) +//// // It is possible to move from one container to next before the previous +//// // container was acquired. For example, get forcing move to next container +//// // since current was exhausted immediatelly followed by a position() +//// // so the container we moved to was never acquired. +//// +//// // assert (containers(index).isAcquired) +//// // will this always be satisfied ? +//// // assert (index != currentContainerIndex) +//// if (containers(index).isAcquired) containers(index).release() +//// } +//// needReleaseIndices.clear() +//// } +//// } +//// +//// private def toNewContainer(newIndex: Int) { +//// if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) { +//// +//// assert (currentContainerIndex >= 0) +//// needReleaseIndices += currentContainerIndex +//// } +//// currentContainerIndex = newIndex +//// } +//// +//// // expensive method, sigh ... optimize it later ? +//// final def position(newPosition: Long) { +//// +//// if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException() +//// +//// if (currentContainerIndex < bufferPositionStart.length - 1 && +//// newPosition >= bufferPositionStart(currentContainerIndex) && +//// newPosition < bufferPositionStart(currentContainerIndex + 1)) { +//// // Same buffer - easy method ... +//// globalPosition = newPosition +//// // Changed position - free previously returned buffers. +//// releasePendingContainers() +//// return +//// } +//// +//// // Find appropriate currentContainerIndex +//// // Since bufferPositionStart is sorted, can be replaced with binary search if required. +//// // For now, not in the perf critical path since buffers size is very low typically. +//// var index = 0 +//// val cLen = containers.length +//// while (index < cLen) { +//// if (newPosition >= bufferPositionStart(index) && +//// newPosition < bufferPositionStart(index + 1)) { +//// globalPosition = newPosition +//// toNewContainer(index) +//// // Changed position - free earlier and previously returned buffers. +//// releasePendingContainers() +//// return +//// } +//// index += 1 +//// } +//// +//// if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) { +//// // boundary. +//// globalPosition = newPosition +//// toNewContainer(cLen) +//// // Changed position - free earlier and previously returned buffers. +//// releasePendingContainers() +//// return +//// } +//// +//// assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition + +//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]")) +//// } +//// +//// +//// /** +//// * Clears this buffer. The position is set to zero, the limit is set to +//// * the capacity, and the mark is discarded. +//// * +//// *Invoke this method before using a sequence of channel-read or +//// * put operations to fill this buffer. +//// * +//// *
This method does not actually erase the data in the buffer, but it +//// * is named as if it did because it will most often be used in situations +//// * in which that might as well be the case.
+//// */ +//// final def clear() { +//// // if (0 == globalCapacity) return +//// +//// needReleaseIndices += 0 +//// globalPosition = 0L +//// toNewContainer(0) +//// globalLimit = globalCapacity +//// +//// // Now free all pending containers +//// releasePendingContainers() +//// } +//// +//// /** +//// * Flips this buffer. The limit is set to the current position and then +//// * the position is set to zero. If the mark is defined then it is +//// * discarded. +//// * +//// *After a sequence of channel-read or put operations, invoke +//// * this method to prepare for a sequence of channel-write or relative +//// * get operations. +//// */ +//// final def flip() { +//// needReleaseIndices += 0 +//// globalLimit = globalPosition +//// globalPosition = 0L +//// toNewContainer(0) +//// +//// // Now free all pending containers +//// releasePendingContainers() +//// } +//// +//// /** +//// * Rewinds this buffer. The position is set to zero and the mark is +//// * discarded. +//// * +//// *
Invoke this method before a sequence of channel-write or get +//// * operations, assuming that the limit has already been set +//// * appropriately. +//// */ +//// final def rewind() { +//// needReleaseIndices += 0 +//// globalPosition = 0L +//// toNewContainer(0) +//// +//// // Now free all pending containers +//// releasePendingContainers() +//// } +//// +//// /** +//// * Returns the number of elements between the current position and the +//// * limit.
+//// * +//// * @return The number of elements remaining in this buffer +//// */ +//// final def remaining(): Long = { +//// globalLimit - globalPosition +//// } +//// +//// /** +//// * Tells whether there are any elements between the current position and +//// * the limit. +//// * +//// * @return true if, and only if, there is at least one element +//// * remaining in this buffer +//// */ +//// final def hasRemaining() = { +//// globalPosition < globalLimit +//// } +//// +//// // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex) +//// +//// // number of bytes remaining in currently active underlying buffer +//// private def currentRemaining(): Int = { +//// if (hasRemaining()) { +//// // validate currentContainerIndex is valid +//// assert (globalPosition >= bufferPositionStart(currentContainerIndex) && +//// globalPosition < bufferPositionStart(currentContainerIndex + 1), +//// "globalPosition = " + globalPosition + +//// ", currentContainerIndex = " + currentContainerIndex + +//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) +//// +//// currentRemaining0(currentContainerIndex) +//// } else 0 +//// } +//// +//// // Without any validation : required when we are bumping the index (when validation will fail) ... +//// private def currentRemaining0(which: Int): Int = { +//// // currentBuffer().remaining() +//// math.max(0, math.min(bufferPositionStart(which + 1), +//// globalLimit) - globalPosition).asInstanceOf[Int] +//// } +//// +//// // Set the approppriate position/limit for the current underlying buffer to mirror our +//// // the LargeByteBuffer's state. +//// private def fetchCurrentBuffer(): ByteBuffer = { +//// releasePendingContainers() +//// +//// assert (currentContainerIndex < containers.length) +//// +//// val container = containers(currentContainerIndex) +//// if (! container.isAcquired) { +//// container.acquire() +//// } +//// +//// assert (container.isAcquired) +//// if (LargeByteBuffer.enableExpensiveAssert) { +//// assert (! containers.exists( b => (b ne container) && b.isAcquired)) +//// } +//// +//// assert (currentContainerIndex < bufferPositionStart.length && +//// globalPosition < bufferPositionStart(currentContainerIndex + 1), +//// "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " + +//// bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this) +//// +//// val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)). +//// asInstanceOf[Int] +//// +//// val buffer = container.getByteBuffer +//// buffer.position(buffPosition) +//// val diff = buffer.capacity - buffPosition +//// val left = remaining() +//// if (diff <= left) { +//// buffer.limit(buffer.capacity()) +//// } else { +//// // Can happen if limit() was called. +//// buffer.limit(buffPosition + left.asInstanceOf[Int]) +//// } +//// +//// buffer +//// } +//// +//// // To be used ONLY to test in suites. +//// private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = { +//// if ("1" != System.getProperty("SPARK_TESTING")) { +//// throw new IllegalStateException("This method is to be used ONLY within spark test suites") +//// } +//// +//// fetchCurrentBuffer() +//// } +//// +//// // Expects that the invoker has ensured that this can be safely invoked. +//// // That is, it wont be invoked when the loop wont terminate. +//// private def toNonEmptyBuffer() { +//// +//// if (! hasRemaining()) { +//// var newIndex = currentContainerIndex +//// // Ensure we are in the right block or not. +//// while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) { +//// newIndex += 1 +//// } +//// toNewContainer(newIndex) +//// // Do not do this - since we might not yet have consumed the buffer which caused EOF right now +//// /* +//// // Add last one also, and release it too - since we are at the end of the buffer with nothing +//// // more pending. +//// if (newIndex >= 0 && currentContainerIndex < containers.length) { +//// needReleaseIndices += newIndex +//// } +//// */ +//// assert (currentContainerIndex >= 0) +//// // releasePendingContainers() +//// return +//// } +//// +//// var index = currentContainerIndex +//// while (0 == currentRemaining0(index) && index < containers.length) { +//// index += 1 +//// } +//// assert (currentContainerIndex < containers.length) +//// toNewContainer(index) +//// assert (0 != currentRemaining()) +//// } +//// +//// private def assertPreconditions(containerIndex: Int) { +//// assert (globalPosition >= bufferPositionStart(containerIndex), +//// "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + +//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) +//// assert (globalPosition < bufferPositionStart(containerIndex + 1), +//// "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + +//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) +//// +//// assert (globalLimit <= globalCapacity) +//// assert (containerIndex < containers.length) +//// } +//// +//// +//// /** +//// * Attempts to return a ByteBuffer of the requested size. +//// * It is possible to return a buffer of size smaller than requested +//// * even though hasRemaining == true +//// * +//// * On return, position would have been moved 'ahead' by the size of the buffer returned : +//// * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer +//// * +//// * +//// * This is used to primarily retrieve content of this buffer to expose via ByteBuffer +//// * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the +//// * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer +//// * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying +//// * container is a disk backed container, and we make subsequent calls to get(), the returned +//// * ByteBuffer can be dispose'ed off +//// * +//// * @param maxChunkSize Max size of the ByteBuffer to retrieve. +//// * @return +//// */ +//// +//// private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = { +//// fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true) +//// } +//// +//// private def fetchBufferOfSizeImpl(maxChunkSize: Int, +//// canReleaseContainers: Boolean): ByteBuffer = { +//// if (canReleaseContainers) releasePendingContainers() +//// assert (maxChunkSize > 0) +//// +//// // not checking for degenerate case of maxChunkSize == 0 +//// if (globalPosition >= globalLimit) { +//// // throw exception +//// throw new BufferUnderflowException() +//// } +//// +//// // Check preconditions : disable these later, since they might be expensive to +//// // evaluate for every IO op +//// assertPreconditions(currentContainerIndex) +//// +//// val currentBufferRemaining = currentRemaining() +//// +//// assert (currentBufferRemaining > 0) +//// +//// val size = math.min(currentBufferRemaining, maxChunkSize) +//// +//// val newBuffer = if (currentBufferRemaining > maxChunkSize) { +//// val currentBuffer = fetchCurrentBuffer() +//// val buff = ByteBufferContainer.createSlice(currentBuffer, +//// currentBuffer.position(), maxChunkSize) +//// assert (buff.remaining() == maxChunkSize) +//// buff +//// } else { +//// val currentBuffer = fetchCurrentBuffer() +//// val buff = currentBuffer.slice() +//// assert (buff.remaining() == currentBufferRemaining) +//// buff +//// } +//// +//// assert (size == newBuffer.remaining()) +//// assert (0 == newBuffer.position()) +//// assert (size == newBuffer.limit()) +//// assert (newBuffer.capacity() == newBuffer.limit()) +//// +//// globalPosition += newBuffer.remaining +//// toNonEmptyBuffer() +//// +//// newBuffer +//// } +//// +//// // Can we service the read/write from the currently active (underlying) bytebuffer or not. +//// // For almost all cases, this will return true allowing us to optimize away the more expensive +//// // computations. +//// private def localReadWritePossible(size: Int) = +//// size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1) +//// +//// +//// def getLong(): Long = { +//// assert (readable) +//// releasePendingContainers() +//// +//// if (remaining() < 8) throw new BufferUnderflowException +//// +//// if (localReadWritePossible(8)) { +//// val buff = fetchCurrentBuffer() +//// assert (buff.remaining() >= 8) +//// val retval = buff.getLong +//// globalPosition += 8 +//// toNonEmptyBuffer() +//// return retval +//// } +//// +//// val buff = readFully(8) +//// buff.getLong +//// } +//// +//// def getInt(): Int = { +//// assert (readable) +//// releasePendingContainers() +//// +//// if (remaining() < 4) throw new BufferUnderflowException +//// +//// if (localReadWritePossible(4)) { +//// val buff = fetchCurrentBuffer() +//// assert (buff.remaining() >= 4) +//// val retval = buff.getInt +//// globalPosition += 4 +//// toNonEmptyBuffer() +//// return retval +//// } +//// +//// val buff = readFully(4) +//// buff.getInt +//// } +//// +//// def getChar(): Char = { +//// assert (readable) +//// releasePendingContainers() +//// +//// if (remaining() < 2) throw new BufferUnderflowException +//// +//// if (localReadWritePossible(2)) { +//// val buff = fetchCurrentBuffer() +//// assert (buff.remaining() >= 2) +//// val retval = buff.getChar +//// globalPosition += 2 +//// toNonEmptyBuffer() +//// return retval +//// } +//// +//// // if slice is becoming too expensive, revisit this ... +//// val buff = readFully(2) +//// buff.getChar +//// } +//// +//// def get(): Byte = { +//// assert (readable) +//// releasePendingContainers() +//// +//// if (! hasRemaining()) throw new BufferUnderflowException +//// +//// // If we have remaining bytes, previous invocations MUST have ensured that we are at +//// // a buffer which has data to be read. +//// assert (localReadWritePossible(1)) +//// +//// val buff = fetchCurrentBuffer() +//// assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining()) +//// val retval = buff.get() +//// globalPosition += 1 +//// toNonEmptyBuffer() +//// +//// retval +//// } +//// +//// def get(arr: Array[Byte], offset: Int, size: Int): Int = { +//// assert (readable) +//// releasePendingContainers() +//// +//// LargeByteBuffer.checkOffsets(arr, offset, size) +//// +//// // kyro depends on this it seems ? +//// // assert (size > 0) +//// if (0 == size) return 0 +//// +//// if (! hasRemaining()) return -1 +//// +//// if (localReadWritePossible(size)) { +//// val buff = fetchCurrentBuffer() +//// assert (buff.remaining() >= size) +//// buff.get(arr, offset, size) +//// globalPosition += size +//// toNonEmptyBuffer() +//// return size +//// } +//// +//// var remainingSize = math.min(size, remaining()).asInstanceOf[Int] +//// var currentOffset = offset +//// +//// while (remainingSize > 0) { +//// val buff = fetchBufferOfSize(remainingSize) +//// val toCopy = math.min(buff.remaining(), remainingSize) +//// +//// buff.get(arr, currentOffset, toCopy) +//// currentOffset += toCopy +//// remainingSize -= toCopy +//// } +//// +//// currentOffset - offset +//// } +//// +//// +//// private def createSlice(size: Long): LargeByteBuffer = { +//// +//// releasePendingContainers() +//// +//// if (remaining() < size) { +//// // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this) +//// throw new BufferOverflowException +//// } +//// +//// // kyro depends on this it seems ? +//// // assert (size > 0) +//// if (0 == size) return LargeByteBuffer.EMPTY_BUFFER +//// +//// val arr = new ArrayBuffer[ByteBufferContainer](2) +//// var totalLeft = size +//// +//// // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer) +//// +//// var containerIndex = currentContainerIndex +//// while (totalLeft > 0 && hasRemaining()) { +//// assertPreconditions(containerIndex) +//// val container = containers(containerIndex) +//// val currentLeft = currentRemaining0(containerIndex) +//// +//// assert (globalPosition + currentLeft <= globalLimit) +//// assert (globalPosition >= bufferPositionStart(containerIndex) && +//// (globalPosition < bufferPositionStart(containerIndex + 1))) +//// +//// val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int] +//// val sliceSize = math.min(totalLeft, currentLeft) +//// assert (from >= 0) +//// assert (sliceSize > 0 && sliceSize <= Int.MaxValue) +//// +//// val slice = container.createSlice(from, sliceSize.asInstanceOf[Int]) +//// arr += slice +//// +//// globalPosition += sliceSize +//// totalLeft -= sliceSize +//// if (currentLeft == sliceSize) containerIndex += 1 +//// } +//// +//// // Using toNonEmptyBuffer instead of directly moving to next here so that +//// // other checks can be performed there. +//// toNonEmptyBuffer() +//// // force cleanup - this is fine since we are not using the buffers directly +//// // which are actively needed (the returned value is on containers which can +//// // recreate) +//// releasePendingContainers() +//// // free current container if acquired. +//// if (currentContainerIndex < containers.length) { +//// containers(currentContainerIndex).release() +//// } +//// assert (currentContainerIndex == containerIndex) +//// +//// val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked) +//// retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction) +//// retval +//// } +//// +//// // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers +//// // This is to be used only for writes : and ensures that writes are done into the appropriate +//// // underlying bytebuffers. +//// def getCompositeWriteBuffer(size: Long): LargeByteBuffer = { +//// assert(writable) +//// assert(size >= 0) +//// +//// createSlice(size) +//// } +//// +//// // get a buffer which is of the specified size and contains data from the underlying buffers +//// // Note, the actual data might be spread across the underlying buffers. +//// // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy ! +//// private def readFully(size: Int): ByteBuffer = { +//// assert (readable) +//// +//// if (remaining() < size) { +//// // throw exception +//// throw new BufferUnderflowException() +//// } +//// +//// // kyro depends on this it seems ? +//// // assert (size > 0) +//// if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER +//// +//// // Expected to be handled elsewhere. +//// assert (! localReadWritePossible(size)) +//// +//// val localBuff = { +//// val buff = fetchBufferOfSize(size) +//// // assert(buff.remaining() <= size) +//// // if (buff.remaining() == size) return buff +//// assert(buff.remaining() < size) +//// ByteBuffer.allocate(size).put(buff) +//// } +//// +//// // assert (localBuff.hasRemaining) +//// +//// while (localBuff.hasRemaining) { +//// val buff = fetchBufferOfSize(localBuff.remaining()) +//// localBuff.put(buff) +//// } +//// +//// localBuff.flip() +//// localBuff +//// } +//// +//// +//// +//// def put(b: Byte) { +//// assert (writable) +//// if (remaining() < 1) { +//// // logInfo("put byte. remaining = " + remaining() + ", this = " + this) +//// throw new BufferOverflowException +//// } +//// +//// assert (currentRemaining() > 0) +//// +//// fetchCurrentBuffer().put(b) +//// globalPosition += 1 +//// // Check to need to bump the index ? +//// toNonEmptyBuffer() +//// } +//// +//// +//// def put(buffer: ByteBuffer) { +//// assert (writable) +//// if (remaining() < buffer.remaining()) { +//// throw new BufferOverflowException +//// } +//// +//// val bufferRemaining = buffer.remaining() +//// if (localReadWritePossible(bufferRemaining)) { +//// +//// assert (currentRemaining() >= bufferRemaining) +//// +//// fetchCurrentBuffer().put(buffer) +//// +//// globalPosition += bufferRemaining +//// toNonEmptyBuffer() +//// return +//// } +//// +//// while (buffer.hasRemaining) { +//// val currentBufferRemaining = currentRemaining() +//// val bufferRemaining = buffer.remaining() +//// +//// if (currentBufferRemaining >= bufferRemaining) { +//// fetchCurrentBuffer().put(buffer) +//// globalPosition += bufferRemaining +//// } else { +//// // Split across buffers. +//// val currentBuffer = fetchCurrentBuffer() +//// assert (currentBuffer.remaining() >= currentBufferRemaining) +//// val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(), +//// currentBufferRemaining) +//// assert (sliced.remaining() == currentBufferRemaining) +//// currentBuffer.put(sliced) +//// // move buffer pos +//// buffer.position(buffer.position() + currentBufferRemaining) +//// +//// globalPosition += currentBufferRemaining +//// } +//// toNonEmptyBuffer() +//// } +//// +//// assert (! hasRemaining() || currentRemaining() > 0) +//// } +//// +//// def put(other: LargeByteBuffer) { +//// assert (writable) +//// if (this.remaining() < other.remaining()) { +//// throw new BufferOverflowException +//// } +//// +//// while (other.hasRemaining()) { +//// val buffer = other.fetchBufferOfSize(other.currentRemaining()) +//// this.put(buffer) +//// } +//// } +//// +//// +//// def duplicate(): LargeByteBuffer = { +//// val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size) +//// // We do a duplicate as part of construction - so avoid double duplicate. +//// // containersCopy ++= containers.map(_.duplicate()) +//// containersCopy ++= containers +//// val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked) +//// +//// // set limit and position (in that order) ... +//// retval.limit(this.limit()) +//// retval.position(this.position()) +//// +//// // Now release our containers - if any had been acquired +//// releasePendingContainers() +//// +//// retval +//// } +//// +//// +//// /** +//// * 'read' a LargeByteBuffer of size specified and return that. +//// * Position will be incremented by size +//// * +//// * The name might be slightly confusing : rename ? +//// * +//// * @param size Amount of data to be read from this buffer and returned +//// * @return +//// */ +//// def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = { +//// if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException +//// if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException +//// +//// +//// assert (readable) +//// assert (size >= 0) +//// +//// releasePendingContainers() +//// +//// if (0 == size) return LargeByteBuffer.EMPTY_BUFFER +//// +//// createSlice(size) +//// } +//// +//// +//// // This is essentially a workaround to exposing underlying buffers +//// def readFrom(channel: ReadableByteChannel): Long = { +//// +//// assert (writable) +//// releasePendingContainers() +//// +//// // this also allows us to avoid nasty corner cases in the loop. +//// if (! hasRemaining()) { +//// // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this) +//// throw new BufferOverflowException +//// } +//// +//// var totalBytesRead = 0L +//// +//// while (hasRemaining()) { +//// // read what we can ... +//// val buffer = fetchCurrentBuffer() +//// val bufferRemaining = currentRemaining() +//// val bytesRead = channel.read(buffer) +//// +//// if (bytesRead > 0) { +//// totalBytesRead += bytesRead +//// // bump position too .. +//// globalPosition += bytesRead +//// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() +//// } +//// else if (-1 == bytesRead) { +//// // if we had already read some data in the loop, return that. +//// if (totalBytesRead > 0) return totalBytesRead +//// return -1 +//// } // nothing available to read, retry later. return +//// else if (0 == bytesRead) { +//// return totalBytesRead +//// } +//// +//// // toNonEmptyBuffer() +//// } +//// +//// // Cleanup last buffer ? +//// toNonEmptyBuffer() +//// totalBytesRead +//// } +//// +//// // This is essentially a workaround to exposing underlying buffers +//// def readFrom(inStrm: InputStream): Long = { +//// +//// assert (writable) +//// releasePendingContainers() +//// +//// // this also allows us to avoid nasty corner cases in the loop. +//// // if (! hasRemaining()) throw new BufferOverflowException +//// if (! hasRemaining()) return 0 +//// +//// var totalBytesRead = 0L +//// +//// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) +//// +//// while (hasRemaining()) { +//// // read what we can ... note, since there is no gaurantee that underlying buffer might +//// // expose array() method, we do double copy - from stream to buff and from buff to bytearray. +//// // see if we can optimize this later ... +//// val buffer = fetchCurrentBuffer() +//// val bufferRemaining = buffer.remaining() +//// val max = math.min(buff.length, bufferRemaining) +//// val bytesRead = inStrm.read(buff, 0, max) +//// +//// if (bytesRead > 0) { +//// buffer.put(buff, 0, bytesRead) +//// totalBytesRead += bytesRead +//// // bump position too .. +//// globalPosition += bytesRead +//// // buffer.position(buffer.position + bytesRead) +//// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() +//// } +//// else if (-1 == bytesRead) { +//// // if we had already read some data in the loop, return that. +//// if (totalBytesRead > 0) return totalBytesRead +//// return -1 +//// } // nothing available to read, retry later. return +//// else if (0 == bytesRead) { +//// return totalBytesRead +//// } +//// +//// // toNonEmptyBuffer() +//// } +//// +//// totalBytesRead +//// } +//// +//// // This is essentially a workaround to exposing underlying buffers +//// // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce +//// // code for performance reasons. +//// def readFrom(inStrm: DataInput): Long = { +//// +//// assert (writable) +//// releasePendingContainers() +//// +//// // this also allows us to avoid nasty corner cases in the loop. +//// // if (! hasRemaining()) throw new BufferOverflowException +//// if (! hasRemaining()) return 0 +//// +//// var totalBytesRead = 0L +//// +//// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) +//// +//// while (hasRemaining()) { +//// // read what we can ... note, since there is no gaurantee that underlying buffer might +//// // expose array() method, we do double copy - from stream to buff and from buff to bytearray. +//// // see if we can optimize this later ... +//// val buffer = fetchCurrentBuffer() +//// val bufferRemaining = buffer.remaining() +//// val max = math.min(buff.length, bufferRemaining) +//// inStrm.readFully(buff, 0, max) +//// val bytesRead = max +//// +//// if (bytesRead > 0) { +//// buffer.put(buff, 0, bytesRead) +//// totalBytesRead += bytesRead +//// // bump position too .. +//// globalPosition += bytesRead +//// // buffer.position(buffer.position() + bytesRead) +//// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() +//// } +//// else if (-1 == bytesRead) { +//// // if we had already read some data in the loop, return that. +//// if (totalBytesRead > 0) return totalBytesRead +//// return -1 +//// } // nothing available to read, retry later. return +//// else if (0 == bytesRead) { +//// return totalBytesRead +//// } +//// +//// // toNonEmptyBuffer() +//// } +//// +//// totalBytesRead +//// } +//// +//// // This is essentially a workaround to exposing underlying buffers +//// // Note: tries to do it efficiently without needing to load everything into memory +//// // (particularly for diskbacked buffers, etc). +//// def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = { +//// +//// assert (readable) +//// releasePendingContainers() +//// +//// // this also allows us to avoid nasty corner cases in the loop. +//// if (! hasRemaining()) throw new BufferUnderflowException +//// +//// var totalBytesWritten = 0L +//// +//// while (hasRemaining()) { +//// // Write what we can ... +//// val buffer = fetchCurrentBuffer() +//// val bufferRemaining = buffer.remaining() +//// assert (bufferRemaining > 0) +//// val bytesWritten = channel.write(buffer) +//// +//// if (bytesWritten > 0) { +//// totalBytesWritten += bytesWritten +//// // bump position too .. +//// globalPosition += bytesWritten +//// if (bytesWritten >= bufferRemaining) toNonEmptyBuffer() +//// assert (! hasRemaining() || currentRemaining() > 0) +//// } +//// else if (0 == bytesWritten) { +//// return totalBytesWritten +//// } +//// +//// // toNonEmptyBuffer() +//// } +//// +//// assert (! hasRemaining()) +//// if (cleanup) { +//// free() +//// } +//// totalBytesWritten +//// } +//// +//// // This is essentially a workaround to exposing underlying buffers +//// def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = { +//// +//// assert (readable) +//// releasePendingContainers() +//// +//// // this also allows us to avoid nasty corner cases in the loop. +//// if (! hasRemaining()) throw new BufferUnderflowException +//// +//// var totalBytesWritten = 0L +//// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) +//// +//// while (hasRemaining()) { +//// // write what we can ... note, since there is no gaurantee that underlying buffer might +//// // expose array() method, we do double copy - from bytearray to buff and from +//// // buff to outputstream. see if we can optimize this later ... +//// val buffer = fetchCurrentBuffer() +//// val bufferRemaining = buffer.remaining() +//// val size = math.min(bufferRemaining, buff.length) +//// buffer.get(buff, 0, size) +//// outStrm.write(buff, 0, size) +//// +//// totalBytesWritten += size +//// // bump position too .. +//// globalPosition += size +//// +//// if (size >= bufferRemaining) toNonEmptyBuffer() +//// } +//// +//// toNonEmptyBuffer() +//// if (cleanup) { +//// free() +//// } +//// totalBytesWritten +//// } +//// +//// def asInputStream(): InputStream = { +//// new InputStream() { +//// override def read(): Int = { +//// if (! hasRemaining()) return -1 +//// get() +//// } +//// +//// override def read(arr: Array[Byte], off: Int, len: Int): Int = { +//// if (! hasRemaining()) return -1 +//// +//// get(arr, off, len) +//// } +//// +//// override def available(): Int = { +//// // current remaining is what can be read without blocking +//// // anything higher might need disk access/buffer swapping. +//// /* +//// val left = remaining() +//// math.min(left, Int.MaxValue).asInstanceOf[Int] +//// */ +//// currentRemaining() +//// } +//// } +//// } +//// +//// def getCleaner() = cleaner +//// +//// /** +//// * @param cleaner The previous cleaner, so that the caller can chain them if required. +//// * @return +//// */ +//// private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = { +//// overrideCleaner(cleaner, allowOverride = true) +//// } +//// +//// private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = { +//// if (! this.allowCleanerOverride) { +//// // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free +//// return this.cleaner +//// } +//// +//// this.allowCleanerOverride = allowOverride +//// assert (null != cleaner) +//// val prev = this.cleaner +//// this.cleaner = cleaner +//// // logInfo("Overriding " + prev + " with " + this.cleaner) +//// prev +//// } +//// +//// private def doReleaseAll() { +//// for (container <- containers) { +//// container.release() +//// } +//// } +//// +//// def free(invokeCleaner: Boolean = true) { +//// // logInfo("Free on " + this + ", cleaner = " + cleaner) +//// // always invoking release +//// doReleaseAll() +//// +//// if (invokeCleaner) cleaner.clean(this) +//// } +//// +//// private def doDispose(needRelease: Boolean) { +//// +//// if (disposeLocationThrowable ne null) { +//// logError("Already free'ed earlier at : ", disposeLocationThrowable) +//// logError("Current at ", new Throwable) +//// throw new IllegalStateException("Already freed.") +//// } +//// disposeLocationThrowable = new Throwable() +//// +//// // Forcefully cleanup all +//// if (needRelease) doReleaseAll() +//// +//// // Free in a different loop, in case different containers refer to same resource +//// // to release (like file) +//// for (container <- containers) { +//// container.free() +//// } +//// +//// needReleaseIndices.clear() +//// +//// // We should not use this buffer anymore : set the values such that f +//// // we dont ... +//// globalPosition = 0 +//// globalLimit = 0 +//// globalCapacity = 0 +//// } +//// +//// // copy data over ... MUST be used only for cases where array is known to be +//// // small to begin with. slightly risky method due to that assumption +//// def toByteArray(): Array[Byte] = { +//// val positionBackup = position() +//// val size = remaining() +//// if (size > Int.MaxValue) { +//// throw new IllegalStateException( +//// "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G") +//// } +//// +//// val retval = new Array[Byte](size.asInstanceOf[Int]) +//// val readSize = get(retval, 0, retval.length) +//// assert (readSize == retval.length, +//// "readSize = " + readSize + ", retval.length = " + retval.length) +//// +//// position(positionBackup) +//// +//// retval +//// } +//// +//// // copy data over ... MUST be used only for cases where array is known to be +//// // small to begin with. slightly risky method due to that assumption +//// def toByteBuffer(): ByteBuffer = { +//// ByteBuffer.wrap(toByteArray()) +//// } +//// +//// def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = { +//// val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf) +//// val currentPosition = position() +//// retval.put(this) +//// position(currentPosition) +//// retval.clear() +//// retval +//// } +//// +//// +//// +//// // This is ONLY used for testing : that too as part of development of this and associated classes +//// // remove before contributing to spark. +//// def hexDump(): String = { +//// if (remaining() * 64 > Int.MaxValue) { +//// throw new UnsupportedOperationException("buffer too large " + remaining()) +//// } +//// +//// val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int]) +//// +//// var perLine = 0 +//// var first = true +//// for (b <- toByteArray()) { +//// perLine += 1 +//// if (perLine % 8 == 0) { +//// sb.append('\n') +//// first = true +//// } +//// if (! first) sb.append(' ') +//// first = false +//// sb.append(java.lang.Integer.toHexString(b & 0xff)) +//// } +//// sb.append('\n') +//// sb.toString() +//// } +//// +//// override def toString: String = { +//// val sb: StringBuffer = new StringBuffer +//// sb.append(getClass.getName) +//// sb.append(' ') +//// sb.append(System.identityHashCode(this)) +//// sb.append("@[pos=") +//// sb.append(position()) +//// sb.append(" lim=") +//// sb.append(limit()) +//// sb.append(" cap=") +//// sb.append(capacity()) +//// sb.append("]") +//// sb.toString +//// } +//// +//// +//// +//// override def finalize(): Unit = { +//// var marked = false +//// if (containers ne null) { +//// if (containers.exists(container => container.isAcquired && container.requireRelease())) { +//// marked = true +//// logError("BUG: buffer was not released - and now going out of scope. " + +//// "Potential resource leak. Allocated at ", allocateLocationThrowable) +//// containers.foreach(_.release()) +//// } +//// if (containers.exists(container => !container.isFreed && container.requireFree())) { +//// if (!marked) { +//// logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak", +//// allocateLocationThrowable) +//// } +//// else { +//// logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak") +//// } +//// containers.foreach(_.free()) +//// } +//// } +//// super.finalize() +//// } +////} +//// +//// +////object LargeByteBuffer extends Logging { +//// +//// private val noopDisposeFunction = new BufferCleaner() { +//// protected def doClean(buffer: LargeByteBuffer) { +//// buffer.free(invokeCleaner = false) +//// } +//// } +//// +//// val enableExpensiveAssert = false +//// private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0) +//// val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer( +//// new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false) +//// // Do not allow anyone else to override cleaner +//// EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false) +//// +//// // 8K sufficient ? +//// private val TEMP_ARRAY_SIZE = 8192 +//// +//// /** +//// * Create a LargeByteBuffer of specified size which is split across +//// * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory +//// * ByteBuffer +//// * +//// */ +//// def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = { +//// if (0 == totalSize) { +//// return EMPTY_BUFFER +//// } +//// +//// assert (totalSize > 0) +//// +//// val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY) +//// val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize) +//// val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize) +//// +//// assert (lastBlockSize > 0) +//// +//// val bufferArray = { +//// val arr = new ArrayBuffer[ByteBufferContainer](numBlocks) +//// for (index <- 0 until numBlocks - 1) { +//// val buff = ByteBuffer.allocate(blockSize) +//// // buff.clear() +//// arr += new HeapByteBufferContainer(buff, true) +//// } +//// arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true) +//// assert (arr.length == numBlocks) +//// arr +//// } +//// +//// new LargeByteBuffer(bufferArray, false, false) +//// } +//// +//// /** +//// * Create a LargeByteBuffer of specified size which is split across +//// * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk +//// * +//// */ +//// private def allocateDiskBuffer(totalSize: Long, +//// blockManager: BlockManager): LargeByteBuffer = { +//// if (0 == totalSize) { +//// return EMPTY_BUFFER +//// } +//// +//// assert (totalSize > 0) +//// +//// // Create a file of the specified size. +//// val file = blockManager.diskBlockManager.createTempBlock()._2 +//// val raf = new RandomAccessFile(file, "rw") +//// try { +//// raf.setLength(totalSize) +//// } finally { +//// raf.close() +//// } +//// +//// readWriteDiskSegment(new FileSegment(file, 0, totalSize), +//// ephemeralDiskBacked = true, blockManager.ioConf) +//// } +//// +//// // The returned buffer takes up ownership of the underlying buffers +//// // (including dispos'ing that when done) +//// def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = { +//// val nonEmpty = buffers.filter(_.hasRemaining) +//// +//// // cleanup the empty buffers +//// buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b)) +//// +//// +//// if (nonEmpty.isEmpty) { +//// return EMPTY_BUFFER +//// } +//// +//// // slice so that offsets match our requirement +//// new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b => +//// new HeapByteBufferContainer(b.slice(), true)), false, false) +//// } +//// +//// def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = { +//// // only non empty arrays +//// val arrays = byteArrays.filter(_.length > 0) +//// if (0 == arrays.length) return EMPTY_BUFFER +//// +//// new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr => +//// new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false) +//// } +//// +//// def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = { +//// +//// if (inputBuffers.isEmpty) return EMPTY_BUFFER +//// +//// if (! inputBuffers.exists(_.hasRemaining())) { +//// if (canDispose) inputBuffers.map(_.free()) +//// return EMPTY_BUFFER +//// } +//// +//// // release all temp resources acquired +//// inputBuffers.foreach(buff => buff.releasePendingContainers()) +//// // free current container if acquired. +//// inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) { +//// buff.containers(buff.currentContainerIndex).release() +//// }) +//// // inputBuffers.foreach(b => b.doReleaseAll()) +//// +//// +//// // Dispose of any empty buffers +//// if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free()) +//// +//// // Find all containers we need. +//// val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining())) +//// +//// val containers = buffers.flatMap(_.containers) +//// assert (! containers.isEmpty) +//// // The in order containers of "buffers" seq constitute the required return value +//// val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers, +//// // if you cant dispose, then we dont own the buffers : in which case, need duplicate +//// ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked)) +//// +//// if (canDispose) { +//// // override dispose of all other buffers. +//// val disposeFunctions = inputBuffers.map { +//// buffer => { +//// (buffer, buffer.overrideCleaner(noopDisposeFunction)) +//// } +//// } +//// +//// val cleaner = retval.getCleaner() +//// val newCleaner = new BufferCleaner { +//// protected def doClean(buffer: LargeByteBuffer) { +//// +//// assert (retval == buffer) +//// // default cleaner. +//// cleaner.clean(retval) +//// // not required, since we are within clean anyway. +//// // retval.free(invokeCleaner = false) +//// +//// // retval.doDispose(needRelease = true) +//// +//// // This might actually call dispose twice on some (initially) empty buffers, +//// // which is fine since we now guard against that. +//// disposeFunctions.foreach(v => v._2.clean(v._1)) +//// // Call the free method too : so that buffers are marked free ... +//// disposeFunctions.foreach(v => v._1.free(invokeCleaner = false)) +//// } +//// } +//// +//// val prev = retval.overrideCleaner(newCleaner) +//// assert (prev == cleaner) +//// } +//// +//// retval +//// } +//// +//// private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) { +//// if (arr == null) { +//// throw new NullPointerException +//// } else if (offset < 0 || size < 0 || offset + size > arr.length) { +//// throw new IndexOutOfBoundsException +//// } +//// } +//// +//// def allocateTransientBuffer(size: Long, blockManager: BlockManager) = { +//// if (size <= blockManager.ioConf.maxInMemSize) { +//// LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf) +//// } else { +//// LargeByteBuffer.allocateDiskBuffer(size, blockManager) +//// } +//// } +//// +//// def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig, +//// ephemeralDiskBacked: Boolean): LargeByteBuffer = { +//// // Split the block into multiple of BlockStore.maxBlockSize +//// val segmentSize = segment.length +//// val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] +//// val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) +//// val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) +//// +//// val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) +//// +//// for (index <- 0 until numBlocks - 1) { +//// buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, +//// segment.offset + index * blockSize, blockSize), ioConf) +//// } +//// +//// // Last block +//// buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, +//// segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf) +//// +//// new LargeByteBuffer(buffers, false, ephemeralDiskBacked) +//// } +//// +//// def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean, +//// ioConf: IOConfig): LargeByteBuffer = { +//// +//// // Split the block into multiple of BlockStore.maxBlockSize +//// val segmentSize = segment.length +//// val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] +//// val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) +//// val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) +//// +//// logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks + +//// ", lastBlockSize = " + lastBlockSize) +//// val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) +//// +//// for (index <- 0 until numBlocks - 1) { +//// buffers += new ReadWriteFileContainer(new FileSegment(segment.file, +//// segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null) +//// } +//// +//// // Last block +//// buffers += new ReadWriteFileContainer(new FileSegment(segment.file, +//// segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null) +//// +//// new LargeByteBuffer(buffers, false, ephemeralDiskBacked) +//// } +////} From 01cafbf15026fdcbfd58566335802082493a491c Mon Sep 17 00:00:00 2001 From: Imran RashidInvoke this method before using a sequence of channel-read or -//// * put operations to fill this buffer. -//// * -//// *
This method does not actually erase the data in the buffer, but it -//// * is named as if it did because it will most often be used in situations -//// * in which that might as well be the case.
-//// */ -//// def clear(): Unit -//// -//// /** -//// * Flips this buffer. The limit is set to the current position and then -//// * the position is set to zero. If the mark is defined then it is -//// * discarded. -//// * -//// *After a sequence of channel-read or put operations, invoke -//// * this method to prepare for a sequence of channel-write or relative -//// * get operations. -//// */ -//// def flip(): Unit -// -// /** -// * Rewinds this buffer. The position is set to zero and the mark is -// * discarded. -// * -// *
Invoke this method before a sequence of channel-write or get -// * operations, assuming that the limit has already been set -// * appropriately. -// */ -// def rewind(): Unit -// -// /** -// * Returns the number of elements between the current position and the -// * limit.
-// * -// * @return The number of elements remaining in this buffer -// */ -// def remaining(): Long -//} -//// -////class ChainedLargeByteBuffer(private[network] val underlying: ChainedBuffer) extends LargeByteBuffer { -//// -//// def capacity = underlying.capacity -//// -//// var _pos = 0l -//// -//// def get(dst: Array[Byte],offset: Int,length: Int): Unit = { -//// underlying.read(_pos, dst, offset, length) -//// _pos += length -//// } -//// -//// def get(): Byte = { -//// val b = underlying.read(_pos) -//// _pos += 1 -//// b -//// } -//// -//// def put(bytes: LargeByteBuffer): Unit = { -//// ??? -//// } -//// -//// def position: Long = _pos -//// def position(position: Long): Unit = { -//// _pos = position -//// } -//// def remaining(): Long = { -//// underlying.size - position -//// } -//// -//// def duplicate(): ChainedLargeByteBuffer = { -//// new ChainedLargeByteBuffer(underlying) -//// } -//// -//// def rewind(): Unit = { -//// _pos = 0 -//// } -//// -//// def limit(): Long = { -//// capacity -//// } -//// -//// def limit(newLimit: Long): Unit = { -//// ??? -//// } -//// -//// def writeTo(channel:WritableByteChannel): Long = { -//// var written = 0l -//// underlying.chunks.foreach{bytes => -//// //TODO test this -//// val buffer = ByteBuffer.wrap(bytes) -//// while (buffer.hasRemaining) -//// channel.write(buffer) -//// written += bytes.length -//// } -//// written -//// } -////} -// -//class WrappedLargeByteBuffer(private[spark] val underlying: Array[ByteBuffer]) extends LargeByteBuffer { -// -// val (totalCapacity, chunkOffsets) = { -// var sum = 0l -// val offsets = new Array[Long](underlying.size) -// (0 until underlying.size).foreach{idx => -// offsets(idx) = sum -// sum += underlying(idx).capacity() -// } -// (sum, offsets) -// } -// -// private var _pos = 0l -// private var currentBufferIdx = 0 -// private var currentBuffer = underlying(0) -// private var _limit = totalCapacity -// -// def capacity = totalCapacity -// -// def get(dst: Array[Byte], offset: Int, length: Int): Unit = { -// var moved = 0 -// while (moved < length) { -// val toRead = math.min(length - moved, currentBuffer.remaining()) -// currentBuffer.get(dst, offset, toRead) -// moved += toRead -// updateCurrentBuffer() -// } -// } -// -// def get(): Byte = { -// val r = currentBuffer.get() -// _pos += 1 -// updateCurrentBuffer() -// r -// } -// -// private def updateCurrentBuffer(): Unit = { -// //TODO fix end condition -// while(!currentBuffer.hasRemaining()) { -// currentBufferIdx += 1 -// currentBuffer = underlying(currentBufferIdx) -// } -// } -// -// def put(bytes: LargeByteBuffer): Unit = { -// ??? -// } -// -// def position: Long = _pos -// def position(position: Long): Unit = { -// //XXX check range? -// _pos = position -// } -// def remaining(): Long = { -// totalCapacity - _pos -// } -// -// def duplicate(): WrappedLargeByteBuffer = { -// new WrappedLargeByteBuffer(underlying.map{_.duplicate()}) -// } -// -// def rewind(): Unit = { -// _pos = 0 -// underlying.foreach{_.rewind()} -// } -// -// def limit(): Long = { -// totalCapacity -// } -// -// def limit(newLimit: Long) = { -// //XXX check range? set limits in sub buffers? -// _limit = newLimit -// } -// -// def writeTo(channel: WritableByteChannel): Long = { -// var written = 0l -// underlying.foreach{buffer => -// //TODO test this -// //XXX do we care about respecting the limit here? -// written += buffer.remaining() -// while (buffer.hasRemaining) -// channel.write(buffer) -// } -// written -// } -// -//} -// -//object LargeByteBuffer { -// -// def asLargeByteBuffer(byteBuffer: ByteBuffer): LargeByteBuffer = { -// new WrappedLargeByteBuffer(Array(byteBuffer)) -// } -// -// def asLargeByteBuffer(bytes: Array[Byte]): LargeByteBuffer = { -// new WrappedLargeByteBuffer(Array(ByteBuffer.wrap(bytes))) -// } -// -//// -//// def allocateOnHeap(size: Long, maxChunk: Int): LargeByteBuffer = { -//// val buffer = ChainedBuffer.withInitialSize(maxChunk, size) -//// new ChainedLargeByteBuffer(buffer) -//// } -// -// def mapFile( -// channel: FileChannel, -// mode: MapMode, -// offset: Long, -// length: Long, -// maxChunk: Int = Integer.MAX_VALUE - 1e6.toInt -// ): LargeByteBuffer = { -// val offsets = new ArrayBuffer[Long]() -// var curOffset = offset -// val end = offset + length -// while (curOffset < end) { -// offsets += curOffset -// val length = math.min(end - curOffset, maxChunk) -// curOffset += length -// } -// offsets += end -// val chunks = new Array[ByteBuffer](offsets.size - 1) -// (0 until offsets.size - 1).foreach{idx => -// chunks(idx) = channel.map(mode, offsets(idx), offsets(idx + 1) - offsets(idx)) -// } -// new WrappedLargeByteBuffer(chunks) -// } -//} -// -// -//// -/////** -//// * This is a variant of ByteBuffer to be used internally in spark, which is not limited to 2G -//// * which ByteBuffers are limited to. -//// * Externally, it exposes all the api which java.nio.ByteBuffer exposes. -//// * Internally, it maintains a sequence of Containers which manage the ByteBuffer data. -//// * Not all the data might be loaded into memory (like disk or tachyon data) - so actual -//// * memory footprint - heap and vm could be much lower than capacity. -//// * -//// * TODO: Currently we are slightly fast and loose in terms of concurrent modifications to this -//// * buffer, maybe revisit this later ? Note: this is not much different from earlier though ! -//// * -//// * TODO: Explore if (at all) we can leverage zero copy transfers. The issue (currently) is that this -//// * will require the file to be kept open (repeatedly opening/closing file is not good -//// * for each transfer) and this has an impact on ulimit. Not to mention writing of mmap'ed buffer is -//// * pretty quick (it is the first failover in case direct transfer is not possible in file zero copy) -//// * -//// * TODO: After redesign to containers, we got rid of parent containers to free - the side effect is -//// * that if there are direct ByteBuffers, we are not handling explicit cleanup of those in some -//// * cases (when we duplicate/slice them). Currently spark does not need this, but might in future -//// * so relook at it later. -//// */ -////// We should make this constructor private: but for now, -////// leaving it public since TachyonStore needs it -////class LargeByteBuffer private[spark](private val inputContainers: ArrayBuffer[ByteBufferContainer], -//// private val needDuplicate: Boolean, val ephemeralDiskBacked: Boolean) extends Logging { -//// -//// // TODO: TEMP code: to flush out potential resource leaks. REMOVE ME -//// private val allocateLocationThrowable: Throwable = { -//// if (inputContainers.exists(c => c.requireRelease() || c.requireFree())) { -//// new Throwable("blockId = " + BlockManager.getLookupBlockId) -//// } else { -//// null -//// } -//// } -//// private var disposeLocationThrowable: Throwable = null -//// -//// @volatile private var allowCleanerOverride = true -//// @volatile private var cleaner: BufferCleaner = new BufferCleaner { -//// override def doClean(buffer: LargeByteBuffer) = { -//// assert (LargeByteBuffer.this == buffer) -//// doDispose(needRelease = false) -//// } -//// } -//// -//// // should not be empty -//// assert (null != inputContainers && ! inputContainers.isEmpty) -//// // should not have any null's -//// assert (inputContainers.find(_ == null).isEmpty) -//// -//// // println("Num containers = " + inputContainers.size) -//// -//// // Position, limit and capacity relevant over the engire LargeByteBuffer -//// @volatile private var globalPosition = 0L -//// @volatile private var globalLimit = 0L -//// @volatile private var currentContainerIndex = 0 -//// -//// // The buffers in which the actual data is held. -//// private var containers: Array[ByteBufferContainer] = null -//// -//// // aggregate capacities of the individual buffers. -//// // bufferPositionStart(0) will be capacity of 1st buffer, bufferPositionStart(1) will be -//// // sum of capacity of 0th and 1st block buffer -//// private var bufferPositionStart: Array[Long] = null -//// -//// // Contains the indices of a containers which requires release before subsequent invocation of -//// // read/write should be serviced. This is required since current read/write might have moved the -//// // position but since we are returning bytebuffers which depend on the validity of the existing -//// // bytebuffer, we cant release them yet. -//// private var needReleaseIndices = new HashSet[Int]() -//// -//// private val readable = ! inputContainers.exists(! _.isReadable) -//// private val writable = ! inputContainers.exists(! _.isWritable) -//// -//// -//// // initialize -//// @volatile private var globalCapacity = { -//// -//// // Ensure that there are no empty buffers : messes up with our code : unless it -//// // is a single buffer (for empty buffer for marker case) -//// assert (inputContainers.find(0 == _.capacity()).isEmpty || 1 == inputContainers.length) -//// -//// containers = { -//// if (needDuplicate) inputContainers.map(_.duplicate()).toArray else inputContainers.toArray -//// } -//// containers.foreach(_.validate()) -//// -//// def initializeBufferPositionStart(arr: Array[ByteBufferContainer]) { -//// val buff = new ArrayBuffer[Long](arr.length + 1) -//// buff += 0L -//// -//// buff ++= arr.map(_.capacity().asInstanceOf[Long]).scanLeft(0L)(_ + _).slice(1, arr.length + 1) -//// assert (buff.length == arr.length + 1) -//// bufferPositionStart = buff.toArray -//// } -//// -//// initializeBufferPositionStart(containers) -//// -//// // remove references from inputBuffers -//// inputContainers.clear() -//// -//// globalLimit = bufferPositionStart(containers.length) -//// globalPosition = 0L -//// currentContainerIndex = 0 -//// -//// assert (globalLimit == containers.map(_.capacity().asInstanceOf[Long]).sum) -//// -//// globalLimit -//// } -//// -//// final def position(): Long = globalPosition -//// -//// final def limit(): Long = globalLimit -//// -//// final def capacity(): Long = globalCapacity -//// -//// final def limit(newLimit: Long) { -//// if ((newLimit > capacity()) || (newLimit < 0)) { -//// throw new IllegalArgumentException("newLimit = " + newLimit + ", capacity = " + capacity()) -//// } -//// -//// globalLimit = newLimit -//// if (position() > newLimit) position(newLimit) -//// } -//// -//// def skip(skipBy: Long) = position(position() + skipBy) -//// -//// private def releasePendingContainers() { -//// if (! needReleaseIndices.isEmpty) { -//// val iter = needReleaseIndices.iterator -//// while (iter.hasNext) { -//// val index = iter.next() -//// assert (index >= 0 && index < containers.length) -//// // It is possible to move from one container to next before the previous -//// // container was acquired. For example, get forcing move to next container -//// // since current was exhausted immediatelly followed by a position() -//// // so the container we moved to was never acquired. -//// -//// // assert (containers(index).isAcquired) -//// // will this always be satisfied ? -//// // assert (index != currentContainerIndex) -//// if (containers(index).isAcquired) containers(index).release() -//// } -//// needReleaseIndices.clear() -//// } -//// } -//// -//// private def toNewContainer(newIndex: Int) { -//// if (newIndex != currentContainerIndex && currentContainerIndex < containers.length) { -//// -//// assert (currentContainerIndex >= 0) -//// needReleaseIndices += currentContainerIndex -//// } -//// currentContainerIndex = newIndex -//// } -//// -//// // expensive method, sigh ... optimize it later ? -//// final def position(newPosition: Long) { -//// -//// if ((newPosition > globalLimit) || (newPosition < 0)) throw new IllegalArgumentException() -//// -//// if (currentContainerIndex < bufferPositionStart.length - 1 && -//// newPosition >= bufferPositionStart(currentContainerIndex) && -//// newPosition < bufferPositionStart(currentContainerIndex + 1)) { -//// // Same buffer - easy method ... -//// globalPosition = newPosition -//// // Changed position - free previously returned buffers. -//// releasePendingContainers() -//// return -//// } -//// -//// // Find appropriate currentContainerIndex -//// // Since bufferPositionStart is sorted, can be replaced with binary search if required. -//// // For now, not in the perf critical path since buffers size is very low typically. -//// var index = 0 -//// val cLen = containers.length -//// while (index < cLen) { -//// if (newPosition >= bufferPositionStart(index) && -//// newPosition < bufferPositionStart(index + 1)) { -//// globalPosition = newPosition -//// toNewContainer(index) -//// // Changed position - free earlier and previously returned buffers. -//// releasePendingContainers() -//// return -//// } -//// index += 1 -//// } -//// -//// if (newPosition == globalLimit && newPosition == bufferPositionStart(cLen)) { -//// // boundary. -//// globalPosition = newPosition -//// toNewContainer(cLen) -//// // Changed position - free earlier and previously returned buffers. -//// releasePendingContainers() -//// return -//// } -//// -//// assert (assertion = false, "Unexpected to come here .... newPosition = " + newPosition + -//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", "]")) -//// } -//// -//// -//// /** -//// * Clears this buffer. The position is set to zero, the limit is set to -//// * the capacity, and the mark is discarded. -//// * -//// *Invoke this method before using a sequence of channel-read or -//// * put operations to fill this buffer. -//// * -//// *
This method does not actually erase the data in the buffer, but it -//// * is named as if it did because it will most often be used in situations -//// * in which that might as well be the case.
-//// */ -//// final def clear() { -//// // if (0 == globalCapacity) return -//// -//// needReleaseIndices += 0 -//// globalPosition = 0L -//// toNewContainer(0) -//// globalLimit = globalCapacity -//// -//// // Now free all pending containers -//// releasePendingContainers() -//// } -//// -//// /** -//// * Flips this buffer. The limit is set to the current position and then -//// * the position is set to zero. If the mark is defined then it is -//// * discarded. -//// * -//// *After a sequence of channel-read or put operations, invoke -//// * this method to prepare for a sequence of channel-write or relative -//// * get operations. -//// */ -//// final def flip() { -//// needReleaseIndices += 0 -//// globalLimit = globalPosition -//// globalPosition = 0L -//// toNewContainer(0) -//// -//// // Now free all pending containers -//// releasePendingContainers() -//// } -//// -//// /** -//// * Rewinds this buffer. The position is set to zero and the mark is -//// * discarded. -//// * -//// *
Invoke this method before a sequence of channel-write or get -//// * operations, assuming that the limit has already been set -//// * appropriately. -//// */ -//// final def rewind() { -//// needReleaseIndices += 0 -//// globalPosition = 0L -//// toNewContainer(0) -//// -//// // Now free all pending containers -//// releasePendingContainers() -//// } -//// -//// /** -//// * Returns the number of elements between the current position and the -//// * limit.
-//// * -//// * @return The number of elements remaining in this buffer -//// */ -//// final def remaining(): Long = { -//// globalLimit - globalPosition -//// } -//// -//// /** -//// * Tells whether there are any elements between the current position and -//// * the limit. -//// * -//// * @return true if, and only if, there is at least one element -//// * remaining in this buffer -//// */ -//// final def hasRemaining() = { -//// globalPosition < globalLimit -//// } -//// -//// // private def currentBuffer(): ByteBuffer = buffers(currentContainerIndex) -//// -//// // number of bytes remaining in currently active underlying buffer -//// private def currentRemaining(): Int = { -//// if (hasRemaining()) { -//// // validate currentContainerIndex is valid -//// assert (globalPosition >= bufferPositionStart(currentContainerIndex) && -//// globalPosition < bufferPositionStart(currentContainerIndex + 1), -//// "globalPosition = " + globalPosition + -//// ", currentContainerIndex = " + currentContainerIndex + -//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) -//// -//// currentRemaining0(currentContainerIndex) -//// } else 0 -//// } -//// -//// // Without any validation : required when we are bumping the index (when validation will fail) ... -//// private def currentRemaining0(which: Int): Int = { -//// // currentBuffer().remaining() -//// math.max(0, math.min(bufferPositionStart(which + 1), -//// globalLimit) - globalPosition).asInstanceOf[Int] -//// } -//// -//// // Set the approppriate position/limit for the current underlying buffer to mirror our -//// // the LargeByteBuffer's state. -//// private def fetchCurrentBuffer(): ByteBuffer = { -//// releasePendingContainers() -//// -//// assert (currentContainerIndex < containers.length) -//// -//// val container = containers(currentContainerIndex) -//// if (! container.isAcquired) { -//// container.acquire() -//// } -//// -//// assert (container.isAcquired) -//// if (LargeByteBuffer.enableExpensiveAssert) { -//// assert (! containers.exists( b => (b ne container) && b.isAcquired)) -//// } -//// -//// assert (currentContainerIndex < bufferPositionStart.length && -//// globalPosition < bufferPositionStart(currentContainerIndex + 1), -//// "currentContainerIndex = " + currentContainerIndex + ", bufferPositionStart = " + -//// bufferPositionStart.mkString("[", ", ", "]") + ", this = " + this) -//// -//// val buffPosition = (globalPosition - bufferPositionStart(currentContainerIndex)). -//// asInstanceOf[Int] -//// -//// val buffer = container.getByteBuffer -//// buffer.position(buffPosition) -//// val diff = buffer.capacity - buffPosition -//// val left = remaining() -//// if (diff <= left) { -//// buffer.limit(buffer.capacity()) -//// } else { -//// // Can happen if limit() was called. -//// buffer.limit(buffPosition + left.asInstanceOf[Int]) -//// } -//// -//// buffer -//// } -//// -//// // To be used ONLY to test in suites. -//// private[spark] def fetchCurrentBufferForTesting(): ByteBuffer = { -//// if ("1" != System.getProperty("SPARK_TESTING")) { -//// throw new IllegalStateException("This method is to be used ONLY within spark test suites") -//// } -//// -//// fetchCurrentBuffer() -//// } -//// -//// // Expects that the invoker has ensured that this can be safely invoked. -//// // That is, it wont be invoked when the loop wont terminate. -//// private def toNonEmptyBuffer() { -//// -//// if (! hasRemaining()) { -//// var newIndex = currentContainerIndex -//// // Ensure we are in the right block or not. -//// while (newIndex < containers.length && globalPosition >= bufferPositionStart(newIndex + 1)) { -//// newIndex += 1 -//// } -//// toNewContainer(newIndex) -//// // Do not do this - since we might not yet have consumed the buffer which caused EOF right now -//// /* -//// // Add last one also, and release it too - since we are at the end of the buffer with nothing -//// // more pending. -//// if (newIndex >= 0 && currentContainerIndex < containers.length) { -//// needReleaseIndices += newIndex -//// } -//// */ -//// assert (currentContainerIndex >= 0) -//// // releasePendingContainers() -//// return -//// } -//// -//// var index = currentContainerIndex -//// while (0 == currentRemaining0(index) && index < containers.length) { -//// index += 1 -//// } -//// assert (currentContainerIndex < containers.length) -//// toNewContainer(index) -//// assert (0 != currentRemaining()) -//// } -//// -//// private def assertPreconditions(containerIndex: Int) { -//// assert (globalPosition >= bufferPositionStart(containerIndex), -//// "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + -//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) -//// assert (globalPosition < bufferPositionStart(containerIndex + 1), -//// "globalPosition = " + globalPosition + ", containerIndex = " + containerIndex + -//// ", bufferPositionStart = " + bufferPositionStart.mkString("[", ", ", " ]")) -//// -//// assert (globalLimit <= globalCapacity) -//// assert (containerIndex < containers.length) -//// } -//// -//// -//// /** -//// * Attempts to return a ByteBuffer of the requested size. -//// * It is possible to return a buffer of size smaller than requested -//// * even though hasRemaining == true -//// * -//// * On return, position would have been moved 'ahead' by the size of the buffer returned : -//// * that is, we treat that the returned buffer has been already 'read' from this LargeByteBuffer -//// * -//// * -//// * This is used to primarily retrieve content of this buffer to expose via ByteBuffer -//// * to some other api which is deemed too cumbersome to move to LargeByteBuffer (like the -//// * chunked sending of contents via ConnectionManager) Note that the lifecycle of the ByteBuffer -//// * returned is inherently tied to the state of this LargeByteBuffer. For example,if the underlying -//// * container is a disk backed container, and we make subsequent calls to get(), the returned -//// * ByteBuffer can be dispose'ed off -//// * -//// * @param maxChunkSize Max size of the ByteBuffer to retrieve. -//// * @return -//// */ -//// -//// private def fetchBufferOfSize(maxChunkSize: Int): ByteBuffer = { -//// fetchBufferOfSizeImpl(maxChunkSize, canReleaseContainers = true) -//// } -//// -//// private def fetchBufferOfSizeImpl(maxChunkSize: Int, -//// canReleaseContainers: Boolean): ByteBuffer = { -//// if (canReleaseContainers) releasePendingContainers() -//// assert (maxChunkSize > 0) -//// -//// // not checking for degenerate case of maxChunkSize == 0 -//// if (globalPosition >= globalLimit) { -//// // throw exception -//// throw new BufferUnderflowException() -//// } -//// -//// // Check preconditions : disable these later, since they might be expensive to -//// // evaluate for every IO op -//// assertPreconditions(currentContainerIndex) -//// -//// val currentBufferRemaining = currentRemaining() -//// -//// assert (currentBufferRemaining > 0) -//// -//// val size = math.min(currentBufferRemaining, maxChunkSize) -//// -//// val newBuffer = if (currentBufferRemaining > maxChunkSize) { -//// val currentBuffer = fetchCurrentBuffer() -//// val buff = ByteBufferContainer.createSlice(currentBuffer, -//// currentBuffer.position(), maxChunkSize) -//// assert (buff.remaining() == maxChunkSize) -//// buff -//// } else { -//// val currentBuffer = fetchCurrentBuffer() -//// val buff = currentBuffer.slice() -//// assert (buff.remaining() == currentBufferRemaining) -//// buff -//// } -//// -//// assert (size == newBuffer.remaining()) -//// assert (0 == newBuffer.position()) -//// assert (size == newBuffer.limit()) -//// assert (newBuffer.capacity() == newBuffer.limit()) -//// -//// globalPosition += newBuffer.remaining -//// toNonEmptyBuffer() -//// -//// newBuffer -//// } -//// -//// // Can we service the read/write from the currently active (underlying) bytebuffer or not. -//// // For almost all cases, this will return true allowing us to optimize away the more expensive -//// // computations. -//// private def localReadWritePossible(size: Int) = -//// size >= 0 && globalPosition + size <= bufferPositionStart(currentContainerIndex + 1) -//// -//// -//// def getLong(): Long = { -//// assert (readable) -//// releasePendingContainers() -//// -//// if (remaining() < 8) throw new BufferUnderflowException -//// -//// if (localReadWritePossible(8)) { -//// val buff = fetchCurrentBuffer() -//// assert (buff.remaining() >= 8) -//// val retval = buff.getLong -//// globalPosition += 8 -//// toNonEmptyBuffer() -//// return retval -//// } -//// -//// val buff = readFully(8) -//// buff.getLong -//// } -//// -//// def getInt(): Int = { -//// assert (readable) -//// releasePendingContainers() -//// -//// if (remaining() < 4) throw new BufferUnderflowException -//// -//// if (localReadWritePossible(4)) { -//// val buff = fetchCurrentBuffer() -//// assert (buff.remaining() >= 4) -//// val retval = buff.getInt -//// globalPosition += 4 -//// toNonEmptyBuffer() -//// return retval -//// } -//// -//// val buff = readFully(4) -//// buff.getInt -//// } -//// -//// def getChar(): Char = { -//// assert (readable) -//// releasePendingContainers() -//// -//// if (remaining() < 2) throw new BufferUnderflowException -//// -//// if (localReadWritePossible(2)) { -//// val buff = fetchCurrentBuffer() -//// assert (buff.remaining() >= 2) -//// val retval = buff.getChar -//// globalPosition += 2 -//// toNonEmptyBuffer() -//// return retval -//// } -//// -//// // if slice is becoming too expensive, revisit this ... -//// val buff = readFully(2) -//// buff.getChar -//// } -//// -//// def get(): Byte = { -//// assert (readable) -//// releasePendingContainers() -//// -//// if (! hasRemaining()) throw new BufferUnderflowException -//// -//// // If we have remaining bytes, previous invocations MUST have ensured that we are at -//// // a buffer which has data to be read. -//// assert (localReadWritePossible(1)) -//// -//// val buff = fetchCurrentBuffer() -//// assert (buff.remaining() >= 1, "buff.remaining = " + buff.remaining()) -//// val retval = buff.get() -//// globalPosition += 1 -//// toNonEmptyBuffer() -//// -//// retval -//// } -//// -//// def get(arr: Array[Byte], offset: Int, size: Int): Int = { -//// assert (readable) -//// releasePendingContainers() -//// -//// LargeByteBuffer.checkOffsets(arr, offset, size) -//// -//// // kyro depends on this it seems ? -//// // assert (size > 0) -//// if (0 == size) return 0 -//// -//// if (! hasRemaining()) return -1 -//// -//// if (localReadWritePossible(size)) { -//// val buff = fetchCurrentBuffer() -//// assert (buff.remaining() >= size) -//// buff.get(arr, offset, size) -//// globalPosition += size -//// toNonEmptyBuffer() -//// return size -//// } -//// -//// var remainingSize = math.min(size, remaining()).asInstanceOf[Int] -//// var currentOffset = offset -//// -//// while (remainingSize > 0) { -//// val buff = fetchBufferOfSize(remainingSize) -//// val toCopy = math.min(buff.remaining(), remainingSize) -//// -//// buff.get(arr, currentOffset, toCopy) -//// currentOffset += toCopy -//// remainingSize -= toCopy -//// } -//// -//// currentOffset - offset -//// } -//// -//// -//// private def createSlice(size: Long): LargeByteBuffer = { -//// -//// releasePendingContainers() -//// -//// if (remaining() < size) { -//// // logInfo("createSlice. remaining = " + remaining() + ", size " + size + ", this = " + this) -//// throw new BufferOverflowException -//// } -//// -//// // kyro depends on this it seems ? -//// // assert (size > 0) -//// if (0 == size) return LargeByteBuffer.EMPTY_BUFFER -//// -//// val arr = new ArrayBuffer[ByteBufferContainer](2) -//// var totalLeft = size -//// -//// // assert (currentRemaining() < totalLeft || totalLeft != size || currentAsByteBuffer) -//// -//// var containerIndex = currentContainerIndex -//// while (totalLeft > 0 && hasRemaining()) { -//// assertPreconditions(containerIndex) -//// val container = containers(containerIndex) -//// val currentLeft = currentRemaining0(containerIndex) -//// -//// assert (globalPosition + currentLeft <= globalLimit) -//// assert (globalPosition >= bufferPositionStart(containerIndex) && -//// (globalPosition < bufferPositionStart(containerIndex + 1))) -//// -//// val from = (globalPosition - bufferPositionStart(containerIndex)).asInstanceOf[Int] -//// val sliceSize = math.min(totalLeft, currentLeft) -//// assert (from >= 0) -//// assert (sliceSize > 0 && sliceSize <= Int.MaxValue) -//// -//// val slice = container.createSlice(from, sliceSize.asInstanceOf[Int]) -//// arr += slice -//// -//// globalPosition += sliceSize -//// totalLeft -= sliceSize -//// if (currentLeft == sliceSize) containerIndex += 1 -//// } -//// -//// // Using toNonEmptyBuffer instead of directly moving to next here so that -//// // other checks can be performed there. -//// toNonEmptyBuffer() -//// // force cleanup - this is fine since we are not using the buffers directly -//// // which are actively needed (the returned value is on containers which can -//// // recreate) -//// releasePendingContainers() -//// // free current container if acquired. -//// if (currentContainerIndex < containers.length) { -//// containers(currentContainerIndex).release() -//// } -//// assert (currentContainerIndex == containerIndex) -//// -//// val retval = new LargeByteBuffer(arr, false, ephemeralDiskBacked) -//// retval.overrideCleaner(LargeByteBuffer.noopDisposeFunction) -//// retval -//// } -//// -//// // Get a composite sequence of ByteBuffer which might straddle one or more underlying buffers -//// // This is to be used only for writes : and ensures that writes are done into the appropriate -//// // underlying bytebuffers. -//// def getCompositeWriteBuffer(size: Long): LargeByteBuffer = { -//// assert(writable) -//// assert(size >= 0) -//// -//// createSlice(size) -//// } -//// -//// // get a buffer which is of the specified size and contains data from the underlying buffers -//// // Note, the actual data might be spread across the underlying buffers. -//// // This MUST BE used only for specific usecases like getInt, etc. Not for bulk copy ! -//// private def readFully(size: Int): ByteBuffer = { -//// assert (readable) -//// -//// if (remaining() < size) { -//// // throw exception -//// throw new BufferUnderflowException() -//// } -//// -//// // kyro depends on this it seems ? -//// // assert (size > 0) -//// if (0 == size) return LargeByteBuffer.EMPTY_BYTEBUFFER -//// -//// // Expected to be handled elsewhere. -//// assert (! localReadWritePossible(size)) -//// -//// val localBuff = { -//// val buff = fetchBufferOfSize(size) -//// // assert(buff.remaining() <= size) -//// // if (buff.remaining() == size) return buff -//// assert(buff.remaining() < size) -//// ByteBuffer.allocate(size).put(buff) -//// } -//// -//// // assert (localBuff.hasRemaining) -//// -//// while (localBuff.hasRemaining) { -//// val buff = fetchBufferOfSize(localBuff.remaining()) -//// localBuff.put(buff) -//// } -//// -//// localBuff.flip() -//// localBuff -//// } -//// -//// -//// -//// def put(b: Byte) { -//// assert (writable) -//// if (remaining() < 1) { -//// // logInfo("put byte. remaining = " + remaining() + ", this = " + this) -//// throw new BufferOverflowException -//// } -//// -//// assert (currentRemaining() > 0) -//// -//// fetchCurrentBuffer().put(b) -//// globalPosition += 1 -//// // Check to need to bump the index ? -//// toNonEmptyBuffer() -//// } -//// -//// -//// def put(buffer: ByteBuffer) { -//// assert (writable) -//// if (remaining() < buffer.remaining()) { -//// throw new BufferOverflowException -//// } -//// -//// val bufferRemaining = buffer.remaining() -//// if (localReadWritePossible(bufferRemaining)) { -//// -//// assert (currentRemaining() >= bufferRemaining) -//// -//// fetchCurrentBuffer().put(buffer) -//// -//// globalPosition += bufferRemaining -//// toNonEmptyBuffer() -//// return -//// } -//// -//// while (buffer.hasRemaining) { -//// val currentBufferRemaining = currentRemaining() -//// val bufferRemaining = buffer.remaining() -//// -//// if (currentBufferRemaining >= bufferRemaining) { -//// fetchCurrentBuffer().put(buffer) -//// globalPosition += bufferRemaining -//// } else { -//// // Split across buffers. -//// val currentBuffer = fetchCurrentBuffer() -//// assert (currentBuffer.remaining() >= currentBufferRemaining) -//// val sliced = ByteBufferContainer.createSlice(buffer, buffer.position(), -//// currentBufferRemaining) -//// assert (sliced.remaining() == currentBufferRemaining) -//// currentBuffer.put(sliced) -//// // move buffer pos -//// buffer.position(buffer.position() + currentBufferRemaining) -//// -//// globalPosition += currentBufferRemaining -//// } -//// toNonEmptyBuffer() -//// } -//// -//// assert (! hasRemaining() || currentRemaining() > 0) -//// } -//// -//// def put(other: LargeByteBuffer) { -//// assert (writable) -//// if (this.remaining() < other.remaining()) { -//// throw new BufferOverflowException -//// } -//// -//// while (other.hasRemaining()) { -//// val buffer = other.fetchBufferOfSize(other.currentRemaining()) -//// this.put(buffer) -//// } -//// } -//// -//// -//// def duplicate(): LargeByteBuffer = { -//// val containersCopy = new ArrayBuffer[ByteBufferContainer](containers.size) -//// // We do a duplicate as part of construction - so avoid double duplicate. -//// // containersCopy ++= containers.map(_.duplicate()) -//// containersCopy ++= containers -//// val retval = new LargeByteBuffer(containersCopy, true, ephemeralDiskBacked) -//// -//// // set limit and position (in that order) ... -//// retval.limit(this.limit()) -//// retval.position(this.position()) -//// -//// // Now release our containers - if any had been acquired -//// releasePendingContainers() -//// -//// retval -//// } -//// -//// -//// /** -//// * 'read' a LargeByteBuffer of size specified and return that. -//// * Position will be incremented by size -//// * -//// * The name might be slightly confusing : rename ? -//// * -//// * @param size Amount of data to be read from this buffer and returned -//// * @return -//// */ -//// def readLargeBuffer(size: Long, partialReadAllowed: Boolean): LargeByteBuffer = { -//// if (! hasRemaining() && ! partialReadAllowed) throw new BufferUnderflowException -//// if (remaining() < size && ! partialReadAllowed) throw new BufferUnderflowException -//// -//// -//// assert (readable) -//// assert (size >= 0) -//// -//// releasePendingContainers() -//// -//// if (0 == size) return LargeByteBuffer.EMPTY_BUFFER -//// -//// createSlice(size) -//// } -//// -//// -//// // This is essentially a workaround to exposing underlying buffers -//// def readFrom(channel: ReadableByteChannel): Long = { -//// -//// assert (writable) -//// releasePendingContainers() -//// -//// // this also allows us to avoid nasty corner cases in the loop. -//// if (! hasRemaining()) { -//// // logInfo("readFrom channel. remaining = " + remaining() + ", this = " + this) -//// throw new BufferOverflowException -//// } -//// -//// var totalBytesRead = 0L -//// -//// while (hasRemaining()) { -//// // read what we can ... -//// val buffer = fetchCurrentBuffer() -//// val bufferRemaining = currentRemaining() -//// val bytesRead = channel.read(buffer) -//// -//// if (bytesRead > 0) { -//// totalBytesRead += bytesRead -//// // bump position too .. -//// globalPosition += bytesRead -//// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() -//// } -//// else if (-1 == bytesRead) { -//// // if we had already read some data in the loop, return that. -//// if (totalBytesRead > 0) return totalBytesRead -//// return -1 -//// } // nothing available to read, retry later. return -//// else if (0 == bytesRead) { -//// return totalBytesRead -//// } -//// -//// // toNonEmptyBuffer() -//// } -//// -//// // Cleanup last buffer ? -//// toNonEmptyBuffer() -//// totalBytesRead -//// } -//// -//// // This is essentially a workaround to exposing underlying buffers -//// def readFrom(inStrm: InputStream): Long = { -//// -//// assert (writable) -//// releasePendingContainers() -//// -//// // this also allows us to avoid nasty corner cases in the loop. -//// // if (! hasRemaining()) throw new BufferOverflowException -//// if (! hasRemaining()) return 0 -//// -//// var totalBytesRead = 0L -//// -//// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) -//// -//// while (hasRemaining()) { -//// // read what we can ... note, since there is no gaurantee that underlying buffer might -//// // expose array() method, we do double copy - from stream to buff and from buff to bytearray. -//// // see if we can optimize this later ... -//// val buffer = fetchCurrentBuffer() -//// val bufferRemaining = buffer.remaining() -//// val max = math.min(buff.length, bufferRemaining) -//// val bytesRead = inStrm.read(buff, 0, max) -//// -//// if (bytesRead > 0) { -//// buffer.put(buff, 0, bytesRead) -//// totalBytesRead += bytesRead -//// // bump position too .. -//// globalPosition += bytesRead -//// // buffer.position(buffer.position + bytesRead) -//// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() -//// } -//// else if (-1 == bytesRead) { -//// // if we had already read some data in the loop, return that. -//// if (totalBytesRead > 0) return totalBytesRead -//// return -1 -//// } // nothing available to read, retry later. return -//// else if (0 == bytesRead) { -//// return totalBytesRead -//// } -//// -//// // toNonEmptyBuffer() -//// } -//// -//// totalBytesRead -//// } -//// -//// // This is essentially a workaround to exposing underlying buffers -//// // Note: very similar to readFrom(InputStream) : not trying anything fancy to reduce -//// // code for performance reasons. -//// def readFrom(inStrm: DataInput): Long = { -//// -//// assert (writable) -//// releasePendingContainers() -//// -//// // this also allows us to avoid nasty corner cases in the loop. -//// // if (! hasRemaining()) throw new BufferOverflowException -//// if (! hasRemaining()) return 0 -//// -//// var totalBytesRead = 0L -//// -//// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) -//// -//// while (hasRemaining()) { -//// // read what we can ... note, since there is no gaurantee that underlying buffer might -//// // expose array() method, we do double copy - from stream to buff and from buff to bytearray. -//// // see if we can optimize this later ... -//// val buffer = fetchCurrentBuffer() -//// val bufferRemaining = buffer.remaining() -//// val max = math.min(buff.length, bufferRemaining) -//// inStrm.readFully(buff, 0, max) -//// val bytesRead = max -//// -//// if (bytesRead > 0) { -//// buffer.put(buff, 0, bytesRead) -//// totalBytesRead += bytesRead -//// // bump position too .. -//// globalPosition += bytesRead -//// // buffer.position(buffer.position() + bytesRead) -//// if (bytesRead >= bufferRemaining) toNonEmptyBuffer() -//// } -//// else if (-1 == bytesRead) { -//// // if we had already read some data in the loop, return that. -//// if (totalBytesRead > 0) return totalBytesRead -//// return -1 -//// } // nothing available to read, retry later. return -//// else if (0 == bytesRead) { -//// return totalBytesRead -//// } -//// -//// // toNonEmptyBuffer() -//// } -//// -//// totalBytesRead -//// } -//// -//// // This is essentially a workaround to exposing underlying buffers -//// // Note: tries to do it efficiently without needing to load everything into memory -//// // (particularly for diskbacked buffers, etc). -//// def writeTo(channel: WritableByteChannel, cleanup: Boolean): Long = { -//// -//// assert (readable) -//// releasePendingContainers() -//// -//// // this also allows us to avoid nasty corner cases in the loop. -//// if (! hasRemaining()) throw new BufferUnderflowException -//// -//// var totalBytesWritten = 0L -//// -//// while (hasRemaining()) { -//// // Write what we can ... -//// val buffer = fetchCurrentBuffer() -//// val bufferRemaining = buffer.remaining() -//// assert (bufferRemaining > 0) -//// val bytesWritten = channel.write(buffer) -//// -//// if (bytesWritten > 0) { -//// totalBytesWritten += bytesWritten -//// // bump position too .. -//// globalPosition += bytesWritten -//// if (bytesWritten >= bufferRemaining) toNonEmptyBuffer() -//// assert (! hasRemaining() || currentRemaining() > 0) -//// } -//// else if (0 == bytesWritten) { -//// return totalBytesWritten -//// } -//// -//// // toNonEmptyBuffer() -//// } -//// -//// assert (! hasRemaining()) -//// if (cleanup) { -//// free() -//// } -//// totalBytesWritten -//// } -//// -//// // This is essentially a workaround to exposing underlying buffers -//// def writeTo(outStrm: OutputStream, cleanup: Boolean): Long = { -//// -//// assert (readable) -//// releasePendingContainers() -//// -//// // this also allows us to avoid nasty corner cases in the loop. -//// if (! hasRemaining()) throw new BufferUnderflowException -//// -//// var totalBytesWritten = 0L -//// val buff = new Array[Byte](LargeByteBuffer.TEMP_ARRAY_SIZE) -//// -//// while (hasRemaining()) { -//// // write what we can ... note, since there is no gaurantee that underlying buffer might -//// // expose array() method, we do double copy - from bytearray to buff and from -//// // buff to outputstream. see if we can optimize this later ... -//// val buffer = fetchCurrentBuffer() -//// val bufferRemaining = buffer.remaining() -//// val size = math.min(bufferRemaining, buff.length) -//// buffer.get(buff, 0, size) -//// outStrm.write(buff, 0, size) -//// -//// totalBytesWritten += size -//// // bump position too .. -//// globalPosition += size -//// -//// if (size >= bufferRemaining) toNonEmptyBuffer() -//// } -//// -//// toNonEmptyBuffer() -//// if (cleanup) { -//// free() -//// } -//// totalBytesWritten -//// } -//// -//// def asInputStream(): InputStream = { -//// new InputStream() { -//// override def read(): Int = { -//// if (! hasRemaining()) return -1 -//// get() -//// } -//// -//// override def read(arr: Array[Byte], off: Int, len: Int): Int = { -//// if (! hasRemaining()) return -1 -//// -//// get(arr, off, len) -//// } -//// -//// override def available(): Int = { -//// // current remaining is what can be read without blocking -//// // anything higher might need disk access/buffer swapping. -//// /* -//// val left = remaining() -//// math.min(left, Int.MaxValue).asInstanceOf[Int] -//// */ -//// currentRemaining() -//// } -//// } -//// } -//// -//// def getCleaner() = cleaner -//// -//// /** -//// * @param cleaner The previous cleaner, so that the caller can chain them if required. -//// * @return -//// */ -//// private[spark] def overrideCleaner(cleaner: BufferCleaner): BufferCleaner = { -//// overrideCleaner(cleaner, allowOverride = true) -//// } -//// -//// private def overrideCleaner(cleaner: BufferCleaner, allowOverride: Boolean): BufferCleaner = { -//// if (! this.allowCleanerOverride) { -//// // allowCleanerOverride = false is used for EMPTY_BUFFER - where we do not allow free -//// return this.cleaner -//// } -//// -//// this.allowCleanerOverride = allowOverride -//// assert (null != cleaner) -//// val prev = this.cleaner -//// this.cleaner = cleaner -//// // logInfo("Overriding " + prev + " with " + this.cleaner) -//// prev -//// } -//// -//// private def doReleaseAll() { -//// for (container <- containers) { -//// container.release() -//// } -//// } -//// -//// def free(invokeCleaner: Boolean = true) { -//// // logInfo("Free on " + this + ", cleaner = " + cleaner) -//// // always invoking release -//// doReleaseAll() -//// -//// if (invokeCleaner) cleaner.clean(this) -//// } -//// -//// private def doDispose(needRelease: Boolean) { -//// -//// if (disposeLocationThrowable ne null) { -//// logError("Already free'ed earlier at : ", disposeLocationThrowable) -//// logError("Current at ", new Throwable) -//// throw new IllegalStateException("Already freed.") -//// } -//// disposeLocationThrowable = new Throwable() -//// -//// // Forcefully cleanup all -//// if (needRelease) doReleaseAll() -//// -//// // Free in a different loop, in case different containers refer to same resource -//// // to release (like file) -//// for (container <- containers) { -//// container.free() -//// } -//// -//// needReleaseIndices.clear() -//// -//// // We should not use this buffer anymore : set the values such that f -//// // we dont ... -//// globalPosition = 0 -//// globalLimit = 0 -//// globalCapacity = 0 -//// } -//// -//// // copy data over ... MUST be used only for cases where array is known to be -//// // small to begin with. slightly risky method due to that assumption -//// def toByteArray(): Array[Byte] = { -//// val positionBackup = position() -//// val size = remaining() -//// if (size > Int.MaxValue) { -//// throw new IllegalStateException( -//// "Attempt to convert LargeByteBuffer to byte array when data held is more than 2G") -//// } -//// -//// val retval = new Array[Byte](size.asInstanceOf[Int]) -//// val readSize = get(retval, 0, retval.length) -//// assert (readSize == retval.length, -//// "readSize = " + readSize + ", retval.length = " + retval.length) -//// -//// position(positionBackup) -//// -//// retval -//// } -//// -//// // copy data over ... MUST be used only for cases where array is known to be -//// // small to begin with. slightly risky method due to that assumption -//// def toByteBuffer(): ByteBuffer = { -//// ByteBuffer.wrap(toByteArray()) -//// } -//// -//// def toInMemoryBuffer(ioConf: IOConfig): LargeByteBuffer = { -//// val retval = LargeByteBuffer.allocateMemoryBuffer(remaining(), ioConf) -//// val currentPosition = position() -//// retval.put(this) -//// position(currentPosition) -//// retval.clear() -//// retval -//// } -//// -//// -//// -//// // This is ONLY used for testing : that too as part of development of this and associated classes -//// // remove before contributing to spark. -//// def hexDump(): String = { -//// if (remaining() * 64 > Int.MaxValue) { -//// throw new UnsupportedOperationException("buffer too large " + remaining()) -//// } -//// -//// val sb = new StringBuilder((remaining() * 2).asInstanceOf[Int]) -//// -//// var perLine = 0 -//// var first = true -//// for (b <- toByteArray()) { -//// perLine += 1 -//// if (perLine % 8 == 0) { -//// sb.append('\n') -//// first = true -//// } -//// if (! first) sb.append(' ') -//// first = false -//// sb.append(java.lang.Integer.toHexString(b & 0xff)) -//// } -//// sb.append('\n') -//// sb.toString() -//// } -//// -//// override def toString: String = { -//// val sb: StringBuffer = new StringBuffer -//// sb.append(getClass.getName) -//// sb.append(' ') -//// sb.append(System.identityHashCode(this)) -//// sb.append("@[pos=") -//// sb.append(position()) -//// sb.append(" lim=") -//// sb.append(limit()) -//// sb.append(" cap=") -//// sb.append(capacity()) -//// sb.append("]") -//// sb.toString -//// } -//// -//// -//// -//// override def finalize(): Unit = { -//// var marked = false -//// if (containers ne null) { -//// if (containers.exists(container => container.isAcquired && container.requireRelease())) { -//// marked = true -//// logError("BUG: buffer was not released - and now going out of scope. " + -//// "Potential resource leak. Allocated at ", allocateLocationThrowable) -//// containers.foreach(_.release()) -//// } -//// if (containers.exists(container => !container.isFreed && container.requireFree())) { -//// if (!marked) { -//// logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak", -//// allocateLocationThrowable) -//// } -//// else { -//// logError("BUG: buffer was not freed - and now going out of scope. Potential resource leak") -//// } -//// containers.foreach(_.free()) -//// } -//// } -//// super.finalize() -//// } -////} -//// -//// -////object LargeByteBuffer extends Logging { -//// -//// private val noopDisposeFunction = new BufferCleaner() { -//// protected def doClean(buffer: LargeByteBuffer) { -//// buffer.free(invokeCleaner = false) -//// } -//// } -//// -//// val enableExpensiveAssert = false -//// private val EMPTY_BYTEBUFFER = ByteBuffer.allocate(0) -//// val EMPTY_BUFFER = new LargeByteBuffer(ArrayBuffer( -//// new HeapByteBufferContainer(EMPTY_BYTEBUFFER, false)), false, false) -//// // Do not allow anyone else to override cleaner -//// EMPTY_BUFFER.overrideCleaner(noopDisposeFunction, allowOverride = false) -//// -//// // 8K sufficient ? -//// private val TEMP_ARRAY_SIZE = 8192 -//// -//// /** -//// * Create a LargeByteBuffer of specified size which is split across -//// * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by in memory -//// * ByteBuffer -//// * -//// */ -//// def allocateMemoryBuffer(totalSize: Long, ioConf: IOConfig): LargeByteBuffer = { -//// if (0 == totalSize) { -//// return EMPTY_BUFFER -//// } -//// -//// assert (totalSize > 0) -//// -//// val blockSize = ioConf.getMaxBlockSize(BufferType.MEMORY) -//// val numBlocks = ioConf.numBlocks(BufferType.MEMORY, totalSize) -//// val lastBlockSize = ioConf.lastBlockSize(BufferType.MEMORY, totalSize) -//// -//// assert (lastBlockSize > 0) -//// -//// val bufferArray = { -//// val arr = new ArrayBuffer[ByteBufferContainer](numBlocks) -//// for (index <- 0 until numBlocks - 1) { -//// val buff = ByteBuffer.allocate(blockSize) -//// // buff.clear() -//// arr += new HeapByteBufferContainer(buff, true) -//// } -//// arr += new HeapByteBufferContainer(ByteBuffer.allocate(lastBlockSize), true) -//// assert (arr.length == numBlocks) -//// arr -//// } -//// -//// new LargeByteBuffer(bufferArray, false, false) -//// } -//// -//// /** -//// * Create a LargeByteBuffer of specified size which is split across -//// * ByteBuffer's of size DiskStore.MAX_BLOCK_SIZE and backed by on disk -//// * -//// */ -//// private def allocateDiskBuffer(totalSize: Long, -//// blockManager: BlockManager): LargeByteBuffer = { -//// if (0 == totalSize) { -//// return EMPTY_BUFFER -//// } -//// -//// assert (totalSize > 0) -//// -//// // Create a file of the specified size. -//// val file = blockManager.diskBlockManager.createTempBlock()._2 -//// val raf = new RandomAccessFile(file, "rw") -//// try { -//// raf.setLength(totalSize) -//// } finally { -//// raf.close() -//// } -//// -//// readWriteDiskSegment(new FileSegment(file, 0, totalSize), -//// ephemeralDiskBacked = true, blockManager.ioConf) -//// } -//// -//// // The returned buffer takes up ownership of the underlying buffers -//// // (including dispos'ing that when done) -//// def fromBuffers(buffers: ByteBuffer*): LargeByteBuffer = { -//// val nonEmpty = buffers.filter(_.hasRemaining) -//// -//// // cleanup the empty buffers -//// buffers.filter(! _.hasRemaining).foreach(b => BlockManager.dispose(b)) -//// -//// -//// if (nonEmpty.isEmpty) { -//// return EMPTY_BUFFER -//// } -//// -//// // slice so that offsets match our requirement -//// new LargeByteBuffer(new ArrayBuffer() ++ nonEmpty.map(b => -//// new HeapByteBufferContainer(b.slice(), true)), false, false) -//// } -//// -//// def fromByteArrays(byteArrays: Array[Byte]*): LargeByteBuffer = { -//// // only non empty arrays -//// val arrays = byteArrays.filter(_.length > 0) -//// if (0 == arrays.length) return EMPTY_BUFFER -//// -//// new LargeByteBuffer(new ArrayBuffer() ++ arrays.map(arr => -//// new HeapByteBufferContainer(ByteBuffer.wrap(arr), true)), false, false) -//// } -//// -//// def fromLargeByteBuffers(canDispose: Boolean, inputBuffers: LargeByteBuffer*): LargeByteBuffer = { -//// -//// if (inputBuffers.isEmpty) return EMPTY_BUFFER -//// -//// if (! inputBuffers.exists(_.hasRemaining())) { -//// if (canDispose) inputBuffers.map(_.free()) -//// return EMPTY_BUFFER -//// } -//// -//// // release all temp resources acquired -//// inputBuffers.foreach(buff => buff.releasePendingContainers()) -//// // free current container if acquired. -//// inputBuffers.foreach(buff => if (buff.currentContainerIndex < buff.containers.length) { -//// buff.containers(buff.currentContainerIndex).release() -//// }) -//// // inputBuffers.foreach(b => b.doReleaseAll()) -//// -//// -//// // Dispose of any empty buffers -//// if (canDispose) inputBuffers.filter(! _.hasRemaining()).foreach(_.free()) -//// -//// // Find all containers we need. -//// val buffers = inputBuffers.filter(_.hasRemaining()).map(b => b.createSlice(b.remaining())) -//// -//// val containers = buffers.flatMap(_.containers) -//// assert (! containers.isEmpty) -//// // The in order containers of "buffers" seq constitute the required return value -//// val retval = new LargeByteBuffer(new ArrayBuffer() ++ containers, -//// // if you cant dispose, then we dont own the buffers : in which case, need duplicate -//// ! canDispose, inputBuffers.exists(_.ephemeralDiskBacked)) -//// -//// if (canDispose) { -//// // override dispose of all other buffers. -//// val disposeFunctions = inputBuffers.map { -//// buffer => { -//// (buffer, buffer.overrideCleaner(noopDisposeFunction)) -//// } -//// } -//// -//// val cleaner = retval.getCleaner() -//// val newCleaner = new BufferCleaner { -//// protected def doClean(buffer: LargeByteBuffer) { -//// -//// assert (retval == buffer) -//// // default cleaner. -//// cleaner.clean(retval) -//// // not required, since we are within clean anyway. -//// // retval.free(invokeCleaner = false) -//// -//// // retval.doDispose(needRelease = true) -//// -//// // This might actually call dispose twice on some (initially) empty buffers, -//// // which is fine since we now guard against that. -//// disposeFunctions.foreach(v => v._2.clean(v._1)) -//// // Call the free method too : so that buffers are marked free ... -//// disposeFunctions.foreach(v => v._1.free(invokeCleaner = false)) -//// } -//// } -//// -//// val prev = retval.overrideCleaner(newCleaner) -//// assert (prev == cleaner) -//// } -//// -//// retval -//// } -//// -//// private def checkOffsets(arr: Array[Byte], offset: Int, size: Int) { -//// if (arr == null) { -//// throw new NullPointerException -//// } else if (offset < 0 || size < 0 || offset + size > arr.length) { -//// throw new IndexOutOfBoundsException -//// } -//// } -//// -//// def allocateTransientBuffer(size: Long, blockManager: BlockManager) = { -//// if (size <= blockManager.ioConf.maxInMemSize) { -//// LargeByteBuffer.allocateMemoryBuffer(size, blockManager.ioConf) -//// } else { -//// LargeByteBuffer.allocateDiskBuffer(size, blockManager) -//// } -//// } -//// -//// def readFromDiskSegment(segment: FileSegment, ioConf: IOConfig, -//// ephemeralDiskBacked: Boolean): LargeByteBuffer = { -//// // Split the block into multiple of BlockStore.maxBlockSize -//// val segmentSize = segment.length -//// val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] -//// val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) -//// val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) -//// -//// val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) -//// -//// for (index <- 0 until numBlocks - 1) { -//// buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, -//// segment.offset + index * blockSize, blockSize), ioConf) -//// } -//// -//// // Last block -//// buffers += new ReadOnlyFileContainer(new FileSegment(segment.file, -//// segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ioConf) -//// -//// new LargeByteBuffer(buffers, false, ephemeralDiskBacked) -//// } -//// -//// def readWriteDiskSegment(segment: FileSegment, ephemeralDiskBacked: Boolean, -//// ioConf: IOConfig): LargeByteBuffer = { -//// -//// // Split the block into multiple of BlockStore.maxBlockSize -//// val segmentSize = segment.length -//// val blockSize = ioConf.getMaxBlockSize(BufferType.DISK).asInstanceOf[Long] -//// val numBlocks = ioConf.numBlocks(BufferType.DISK, segmentSize) -//// val lastBlockSize = ioConf.lastBlockSize(BufferType.DISK, segmentSize) -//// -//// logInfo("readWriteDiskSegment = " + segment + ", numBlocks = " + numBlocks + -//// ", lastBlockSize = " + lastBlockSize) -//// val buffers = new ArrayBuffer[ByteBufferContainer](numBlocks) -//// -//// for (index <- 0 until numBlocks - 1) { -//// buffers += new ReadWriteFileContainer(new FileSegment(segment.file, -//// segment.offset + index * blockSize, blockSize), ephemeralDiskBacked, null) -//// } -//// -//// // Last block -//// buffers += new ReadWriteFileContainer(new FileSegment(segment.file, -//// segment.offset + (numBlocks - 1) * blockSize, lastBlockSize), ephemeralDiskBacked, null) -//// -//// new LargeByteBuffer(buffers, false, ephemeralDiskBacked) -//// } -////} diff --git a/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java b/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java index dfb7740344ed0..fde2b78d10a5d 100644 --- a/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java +++ b/network/common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java @@ -37,9 +37,7 @@ import static org.junit.Assert.*; -import org.apache.spark.network.buffer.FileSegmentManagedBuffer; -import org.apache.spark.network.buffer.ManagedBuffer; -import org.apache.spark.network.buffer.NioManagedBuffer; +import org.apache.spark.network.buffer.*; import org.apache.spark.network.client.ChunkReceivedCallback; import org.apache.spark.network.client.RpcResponseCallback; import org.apache.spark.network.client.TransportClient; @@ -73,7 +71,8 @@ public static void setUp() throws Exception { buf.put((byte) i); } buf.flip(); - bufferChunk = new NioManagedBuffer(buf); + final LargeByteBuffer lBuf = LargeByteBufferHelper.asLargeByteBuffer(buf); + bufferChunk = new NioManagedBuffer(lBuf); testFile = File.createTempFile("shuffle-test-file", "txt"); testFile.deleteOnExit(); @@ -91,7 +90,7 @@ public static void setUp() throws Exception { public ManagedBuffer getChunk(long streamId, int chunkIndex) { assertEquals(STREAM_ID, streamId); if (chunkIndex == BUFFER_CHUNK_INDEX) { - return new NioManagedBuffer(buf); + return new NioManagedBuffer(lBuf); } else if (chunkIndex == FILE_CHUNK_INDEX) { return new FileSegmentManagedBuffer(conf, testFile, 10, testFile.length() - 25); } else { @@ -222,10 +221,10 @@ private void assertBufferListsEqual(Listget(dst, 0, dst.length)
+ *
+ * @param dst the destination array
+ * @return this buffer
+ */
+ public LargeByteBuffer get(byte[] dst);
+
/**
* Bulk copy data from this buffer into the given array. First checks there is sufficient
* data in this buffer; if not, throws a {@link java.nio.BufferUnderflowException}.
*
- * @param dst
- * @param offset
- * @param length
+ * @param dst the destination array
+ * @param offset the offset within the destination array to write to
+ * @param length how many bytes to write
+ * @return this buffer
*/
- public void get(byte[] dst, int offset, int length);
+ public LargeByteBuffer get(byte[] dst, int offset, int length);
+
public LargeByteBuffer rewind();
diff --git a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
index dbbbc0d0d7a9d..58a621249386f 100644
--- a/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
+++ b/network/common/src/main/java/org/apache/spark/network/buffer/WrappedLargeByteBuffer.java
@@ -101,8 +101,14 @@ public WrappedLargeByteBuffer(ByteBuffer[] underlying) {
size = sum;
}
+
@Override
- public void get(byte[] dest, int offset, int length) {
+ public WrappedLargeByteBuffer get(byte[] dest) {
+ return get(dest, 0, dest.length);
+ }
+
+ @Override
+ public WrappedLargeByteBuffer get(byte[] dest, int offset, int length) {
if (length > remaining()) {
throw new BufferUnderflowException();
}
@@ -114,6 +120,7 @@ public void get(byte[] dest, int offset, int length) {
updateCurrentBufferIfNeeded();
}
_pos += moved;
+ return this;
}
@Override
diff --git a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
index 2943705c40ecd..3cbd2d8710304 100644
--- a/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
+++ b/network/common/src/test/java/org/apache/spark/network/buffer/WrappedLargeByteBufferSuite.java
@@ -138,6 +138,16 @@ public void skipAndGet() {
assertConsistent(b);
b.skip(position);
assertConsistent(b);
+
+ int copy2Length = Math.min(20, 500 - position);
+ byte[] copy2 = new byte[copy2Length];
+ b.rewind();
+ b.skip(position);
+ b.get(copy2);
+ assertSubArrayEquals(data, position, copy2, 0, copy2Length);
+
+ b.rewind();
+ b.skip(position);
}
}
@@ -146,10 +156,18 @@ public void get() {
WrappedLargeByteBuffer b = testDataBuf();
byte[] into = new byte[500];
for (int[] offsetAndLength: new int[][]{{0, 200}, {10,10}, {300, 20}, {30, 100}}) {
+ int offset = offsetAndLength[0];
+ int length = offsetAndLength[1];
+ b.rewind();
+ b.get(into, offset, length);
+ assertConsistent(b);
+ assertSubArrayEquals(data, 0, into, offset, length);
+
+ byte[] into2 = new byte[length];
b.rewind();
- b.get(into, offsetAndLength[0], offsetAndLength[1]);
+ b.get(into2);
assertConsistent(b);
- assertSubArrayEquals(data, 0, into, offsetAndLength[0], offsetAndLength[1]);
+ assertSubArrayEquals(data, 0, into2, 0, length);
}
try {
@@ -159,6 +177,15 @@ public void get() {
fail("expected exception");
} catch (BufferUnderflowException bue) {
}
+
+ try {
+ b.rewind();
+ b.skip(1);
+ b.get(into);
+ fail("expected exception");
+ } catch (BufferUnderflowException bue) {
+ }
+
b.rewind();
b.skip(495);
assertEquals(data[495], b.get());
From b6620d0a5516519b073b49e6f191ff929c2a5152 Mon Sep 17 00:00:00 2001
From: Imran Rashid chunkSize has no effect on the LargeByteBuffer returned by
+ * {@link #largeBuffer()}.
+ *
+ * @param chunkSize size of the byte arrays used by this output stream, in bytes
+ */
public LargeByteBufferOutputStream(int chunkSize) {
output = new ByteArrayChunkOutputStream(chunkSize);
}
@@ -39,6 +51,13 @@ public void write(byte[] bytes, int off, int len) {
output.write(bytes, off, len);
}
+ /**
+ * Get all of the data written to the stream so far as a LargeByteBuffer. This method can be
+ * called multiple times, and each returned buffer will be completely independent (the data
+ * is copied for each returned buffer). It does not close the stream.
+ *
+ * @return the data written to the stream as a LargeByteBuffer
+ */
public LargeByteBuffer largeBuffer() {
return largeBuffer(LargeByteBufferHelper.MAX_CHUNK_SIZE);
}
From 54d09af36e66a0fb215a19bf6160d625cbabf8c8 Mon Sep 17 00:00:00 2001
From: Imran Rashid