@@ -49,8 +49,16 @@ private[sql] class UnsafeRowSerializer(numFields: Int) extends Serializer with S
4949
5050private class UnsafeRowSerializerInstance (numFields : Int ) extends SerializerInstance {
5151
52+ /**
53+ * Marks the end of a stream written with [[serializeStream() ]].
54+ */
5255 private [this ] val EOF : Int = - 1
5356
57+ /**
58+ * Serializes a stream of UnsafeRows. Within the stream, each record consists of a record
59+ * length (stored as a 4-byte integer, written high byte first), followed by the record's bytes.
60+ * The end of the stream is denoted by a record with the special length `EOF` (-1).
61+ */
5462 override def serializeStream (out : OutputStream ): SerializationStream = new SerializationStream {
5563 private [this ] var writeBuffer : Array [Byte ] = new Array [Byte ](4096 )
5664 private [this ] val dOut : DataOutputStream = new DataOutputStream (out)
@@ -59,32 +67,31 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst
5967 val row = value.asInstanceOf [UnsafeRow ]
6068 assert(row.getPool == null , " UnsafeRowSerializer does not support ObjectPool" )
6169 dOut.writeInt(row.getSizeInBytes)
62- var dataRemaining : Int = row.getSizeInBytes
63- val baseObject = row.getBaseObject
64- var rowReadPosition : Long = row.getBaseOffset
65- while (dataRemaining > 0 ) {
66- val toTransfer : Int = Math .min(writeBuffer.length, dataRemaining)
67- PlatformDependent .copyMemory(
68- baseObject,
69- rowReadPosition,
70- writeBuffer,
71- PlatformDependent .BYTE_ARRAY_OFFSET ,
72- toTransfer)
73- out.write(writeBuffer, 0 , toTransfer)
74- rowReadPosition += toTransfer
75- dataRemaining -= toTransfer
76- }
70+ row.writeToStream(out, writeBuffer)
7771 this
7872 }
73+
7974 override def writeKey [T : ClassTag ](key : T ): SerializationStream = {
75+ // The key is only needed on the map side when computing partition ids. It does not need to
76+ // be shuffled.
8077 assert(key.isInstanceOf [Int ])
8178 this
8279 }
83- override def writeAll [T : ClassTag ](iter : Iterator [T ]): SerializationStream =
80+
81+ override def writeAll [T : ClassTag ](iter : Iterator [T ]): SerializationStream = {
82+ // This method is never called by shuffle code.
8483 throw new UnsupportedOperationException
85- override def writeObject [T : ClassTag ](t : T ): SerializationStream =
84+ }
85+
86+ override def writeObject [T : ClassTag ](t : T ): SerializationStream = {
87+ // This method is never called by shuffle code.
8688 throw new UnsupportedOperationException
87- override def flush (): Unit = dOut.flush()
89+ }
90+
91+ override def flush (): Unit = {
92+ dOut.flush()
93+ }
94+
8895 override def close (): Unit = {
8996 writeBuffer = null
9097 dOut.writeInt(EOF )
@@ -95,6 +102,7 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst
95102 override def deserializeStream (in : InputStream ): DeserializationStream = {
96103 new DeserializationStream {
97104 private [this ] val dIn : DataInputStream = new DataInputStream (in)
105+ // 1024 is a default buffer size; this buffer will grow to accommodate larger rows
98106 private [this ] var rowBuffer : Array [Byte ] = new Array [Byte ](1024 )
99107 private [this ] var row : UnsafeRow = new UnsafeRow ()
100108 private [this ] var rowTuple : (Int , UnsafeRow ) = (0 , row)
@@ -126,14 +134,40 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst
126134 }
127135 }
128136 }
129- override def asIterator : Iterator [Any ] = throw new UnsupportedOperationException
130- override def readKey [T : ClassTag ](): T = throw new UnsupportedOperationException
131- override def readValue [T : ClassTag ](): T = throw new UnsupportedOperationException
132- override def readObject [T : ClassTag ](): T = throw new UnsupportedOperationException
133- override def close (): Unit = dIn.close()
137+
138+ override def asIterator : Iterator [Any ] = {
139+ // This method is never called by shuffle code.
140+ throw new UnsupportedOperationException
141+ }
142+
143+ override def readKey [T : ClassTag ](): T = {
144+ // We skipped serialization of the key in writeKey(), so just return a dummy value since
145+ // this is going to be discarded anyways.
146+ null .asInstanceOf [T ]
147+ }
148+
149+ override def readValue [T : ClassTag ](): T = {
150+ val rowSize = dIn.readInt()
151+ if (rowBuffer.length < rowSize) {
152+ rowBuffer = new Array [Byte ](rowSize)
153+ }
154+ ByteStreams .readFully(in, rowBuffer, 0 , rowSize)
155+ row.pointTo(rowBuffer, PlatformDependent .BYTE_ARRAY_OFFSET , numFields, rowSize, null )
156+ row.asInstanceOf [T ]
157+ }
158+
159+ override def readObject [T : ClassTag ](): T = {
160+ // This method is never called by shuffle code.
161+ throw new UnsupportedOperationException
162+ }
163+
164+ override def close (): Unit = {
165+ dIn.close()
166+ }
134167 }
135168 }
136169
170+ // These methods are never called by shuffle code.
137171 override def serialize [T : ClassTag ](t : T ): ByteBuffer = throw new UnsupportedOperationException
138172 override def deserialize [T : ClassTag ](bytes : ByteBuffer ): T =
139173 throw new UnsupportedOperationException
0 commit comments