@@ -96,6 +96,9 @@ private[execution] object HashedRelation {
9696
9797 /**
9898 * Create a HashedRelation from an Iterator of InternalRow.
99+ *
100+ * @param isLookupAware reserve one extra boolean in value to track if value being looked up
101+ * @param value the expressions for value inserted into HashedRelation
99102 */
100103 def apply (
101104 input : Iterator [InternalRow ],
@@ -118,6 +121,8 @@ private[execution] object HashedRelation {
118121 if (isNullAware && ! input.hasNext) {
119122 EmptyHashedRelation
120123 } else if (key.length == 1 && key.head.dataType == LongType && ! isLookupAware) {
124+ // NOTE: LongHashedRelation cannot support isLookupAware as it cannot
125+ // handle NULL key
121126 LongHashedRelation (input, key, sizeEstimate, mm, isNullAware)
122127 } else {
123128 UnsafeHashedRelation (input, key, sizeEstimate, mm, isNullAware, isLookupAware, value)
@@ -148,7 +153,7 @@ private[joins] class UnsafeHashedRelation(
148153
149154 override def estimatedSize : Long = binaryMap.getTotalMemoryConsumption
150155
151- // re-used in get()/getValue()
156+ // re-used in get()/getValue()/values()
152157 var resultRow = new UnsafeRow (numFields)
153158
154159 override def get (key : InternalRow ): Iterator [InternalRow ] = {
@@ -186,6 +191,23 @@ private[joins] class UnsafeHashedRelation(
186191 }
187192 }
188193
194+ override def values (): Iterator [InternalRow ] = {
195+ val iter = binaryMap.iterator()
196+
197+ new Iterator [InternalRow ] {
198+ override def hasNext : Boolean = iter.hasNext
199+
200+ override def next (): InternalRow = {
201+ if (! hasNext) {
202+ throw new NoSuchElementException (" End of the iterator" )
203+ }
204+ val loc = iter.next()
205+ resultRow.pointTo(loc.getValueBase, loc.getValueOffset, loc.getValueLength)
206+ resultRow
207+ }
208+ }
209+ }
210+
189211 override def keys (): Iterator [InternalRow ] = {
190212 val iter = binaryMap.iterator()
191213
@@ -312,23 +334,6 @@ private[joins] class UnsafeHashedRelation(
312334 override def read (kryo : Kryo , in : Input ): Unit = Utils .tryOrIOException {
313335 read(() => in.readInt(), () => in.readLong(), in.readBytes)
314336 }
315-
316- override def values (): Iterator [InternalRow ] = {
317- val iter = binaryMap.iterator()
318-
319- new Iterator [InternalRow ] {
320- override def hasNext : Boolean = iter.hasNext
321-
322- override def next (): InternalRow = {
323- if (! hasNext) {
324- throw new NoSuchElementException (" End of the iterator" )
325- }
326- val loc = iter.next()
327- resultRow.pointTo(loc.getValueBase, loc.getValueOffset, loc.getValueLength)
328- resultRow
329- }
330- }
331- }
332337}
333338
334339private [joins] object UnsafeHashedRelation {
@@ -341,6 +346,10 @@ private[joins] object UnsafeHashedRelation {
341346 isNullAware : Boolean = false ,
342347 isLookupAware : Boolean = false ,
343348 value : Option [Seq [Expression ]] = None ): HashedRelation = {
349+ if (isNullAware && isLookupAware) {
350+ throw new SparkException (
351+ " isLookupAware and isNullAware cannot be enabled at same time for UnsafeHashedRelation" )
352+ }
344353
345354 val pageSizeBytes = Option (SparkEnv .get).map(_.memoryManager.pageSizeBytes)
346355 .getOrElse(new SparkConf ().get(BUFFER_PAGESIZE ).getOrElse(16L * 1024 * 1024 ))
@@ -354,44 +363,36 @@ private[joins] object UnsafeHashedRelation {
354363 val keyGenerator = UnsafeProjection .create(key)
355364 var numFields = 0
356365
366+ val append = (key : UnsafeRow , value : UnsafeRow ) => {
367+ val loc = binaryMap.lookup(key.getBaseObject, key.getBaseOffset, key.getSizeInBytes)
368+ val success = loc.append(
369+ key.getBaseObject, key.getBaseOffset, key.getSizeInBytes,
370+ value.getBaseObject, value.getBaseOffset, value.getSizeInBytes)
371+ if (! success) {
372+ binaryMap.free()
373+ // scalastyle:off throwerror
374+ throw new SparkOutOfMemoryError (" There is not enough memory to build hash map" )
375+ // scalastyle:on throwerror
376+ }
377+ }
378+
357379 if (isLookupAware) {
358380 // Add one extra boolean value at the end as part of the row,
359381 // to track the information that whether the corresponding key
360382 // has been looked up or not. See `ShuffledHashJoin.fullOuterJoin` for example of usage.
361383 val valueGenerator = UnsafeProjection .create(value.get :+ Literal (false ))
362-
363384 while (input.hasNext) {
364385 val row = input.next().asInstanceOf [UnsafeRow ]
365386 numFields = row.numFields() + 1
366- val key = keyGenerator(row)
367- val value = valueGenerator(row)
368- val loc = binaryMap.lookup(key.getBaseObject, key.getBaseOffset, key.getSizeInBytes)
369- val success = loc.append(
370- key.getBaseObject, key.getBaseOffset, key.getSizeInBytes,
371- value.getBaseObject, value.getBaseOffset, value.getSizeInBytes)
372- if (! success) {
373- binaryMap.free()
374- // scalastyle:off throwerror
375- throw new SparkOutOfMemoryError (" There is not enough memory to build hash map" )
376- // scalastyle:on throwerror
377- }
387+ append(keyGenerator(row), valueGenerator(row))
378388 }
379389 } else {
380390 while (input.hasNext) {
381391 val row = input.next().asInstanceOf [UnsafeRow ]
382392 numFields = row.numFields()
383393 val key = keyGenerator(row)
384394 if (! key.anyNull) {
385- val loc = binaryMap.lookup(key.getBaseObject, key.getBaseOffset, key.getSizeInBytes)
386- val success = loc.append(
387- key.getBaseObject, key.getBaseOffset, key.getSizeInBytes,
388- row.getBaseObject, row.getBaseOffset, row.getSizeInBytes)
389- if (! success) {
390- binaryMap.free()
391- // scalastyle:off throwerror
392- throw new SparkOutOfMemoryError (" There is not enough memory to build hash map" )
393- // scalastyle:on throwerror
394- }
395+ append(key, row)
395396 } else if (isNullAware) {
396397 return EmptyHashedRelationWithAllNullKeys
397398 }
0 commit comments