@@ -76,6 +76,11 @@ private[execution] sealed trait HashedRelation extends KnownSizeEstimation {
7676 */
7777 def keys (): Iterator [InternalRow ]
7878
79+ /**
80+ * Returns an iterator for values of InternalRow type.
81+ */
82+ def values (): Iterator [InternalRow ]
83+
7984 /**
8085 * Returns a read-only copy of this, to be safely used in current thread.
8186 */
@@ -97,7 +102,9 @@ private[execution] object HashedRelation {
97102 key : Seq [Expression ],
98103 sizeEstimate : Int = 64 ,
99104 taskMemoryManager : TaskMemoryManager = null ,
100- isNullAware : Boolean = false ): HashedRelation = {
105+ isNullAware : Boolean = false ,
106+ isLookupAware : Boolean = false ,
107+ value : Option [Seq [Expression ]] = None ): HashedRelation = {
101108 val mm = Option (taskMemoryManager).getOrElse {
102109 new TaskMemoryManager (
103110 new UnifiedMemoryManager (
@@ -110,10 +117,10 @@ private[execution] object HashedRelation {
110117
111118 if (isNullAware && ! input.hasNext) {
112119 EmptyHashedRelation
113- } else if (key.length == 1 && key.head.dataType == LongType ) {
120+ } else if (key.length == 1 && key.head.dataType == LongType && ! isLookupAware ) {
114121 LongHashedRelation (input, key, sizeEstimate, mm, isNullAware)
115122 } else {
116- UnsafeHashedRelation (input, key, sizeEstimate, mm, isNullAware)
123+ UnsafeHashedRelation (input, key, sizeEstimate, mm, isNullAware, isLookupAware, value )
117124 }
118125 }
119126}
@@ -128,15 +135,18 @@ private[execution] object HashedRelation {
128135private [joins] class UnsafeHashedRelation (
129136 private var numKeys : Int ,
130137 private var numFields : Int ,
131- private var binaryMap : BytesToBytesMap )
138+ private var binaryMap : BytesToBytesMap ,
139+ private val isLookupAware : Boolean = false )
132140 extends HashedRelation with Externalizable with KryoSerializable {
133141
134- private [joins] def this () = this (0 , 0 , null ) // Needed for serialization
142+ private [joins] def this () = this (0 , 0 , null , false ) // Needed for serialization
135143
136- override def keyIsUnique : Boolean = binaryMap.numKeys() == binaryMap.numValues()
144+ override def keyIsUnique : Boolean = {
145+ binaryMap.numKeys() == binaryMap.numValues()
146+ }
137147
138148 override def asReadOnlyCopy (): UnsafeHashedRelation = {
139- new UnsafeHashedRelation (numKeys, numFields, binaryMap)
149+ new UnsafeHashedRelation (numKeys, numFields, binaryMap, isLookupAware )
140150 }
141151
142152 override def estimatedSize : Long = binaryMap.getTotalMemoryConsumption
@@ -305,6 +315,27 @@ private[joins] class UnsafeHashedRelation(
305315 override def read (kryo : Kryo , in : Input ): Unit = Utils .tryOrIOException {
306316 read(() => in.readInt(), () => in.readLong(), in.readBytes)
307317 }
318+
319+ override def values (): Iterator [InternalRow ] = {
320+ if (isLookupAware) {
321+ val iter = binaryMap.iterator()
322+
323+ new Iterator [InternalRow ] {
324+ override def hasNext : Boolean = iter.hasNext
325+
326+ override def next (): InternalRow = {
327+ if (! hasNext) {
328+ throw new NoSuchElementException (" End of the iterator" )
329+ }
330+ val loc = iter.next()
331+ resultRow.pointTo(loc.getValueBase, loc.getValueOffset, loc.getValueLength)
332+ resultRow
333+ }
334+ }
335+ } else {
336+ throw new UnsupportedOperationException
337+ }
338+ }
308339}
309340
310341private [joins] object UnsafeHashedRelation {
@@ -314,7 +345,9 @@ private[joins] object UnsafeHashedRelation {
314345 key : Seq [Expression ],
315346 sizeEstimate : Int ,
316347 taskMemoryManager : TaskMemoryManager ,
317- isNullAware : Boolean = false ): HashedRelation = {
348+ isNullAware : Boolean = false ,
349+ isLookupAware : Boolean = false ,
350+ value : Option [Seq [Expression ]] = None ): HashedRelation = {
318351
319352 val pageSizeBytes = Option (SparkEnv .get).map(_.memoryManager.pageSizeBytes)
320353 .getOrElse(new SparkConf ().get(BUFFER_PAGESIZE ).getOrElse(16L * 1024 * 1024 ))
@@ -327,27 +360,52 @@ private[joins] object UnsafeHashedRelation {
327360 // Create a mapping of buildKeys -> rows
328361 val keyGenerator = UnsafeProjection .create(key)
329362 var numFields = 0
330- while (input.hasNext) {
331- val row = input.next().asInstanceOf [UnsafeRow ]
332- numFields = row.numFields()
333- val key = keyGenerator(row)
334- if (! key.anyNull) {
363+
364+ if (isLookupAware) {
365+ // Add one extra boolean value at the end as part of the row,
366+ // to track the information that whether the corresponding key
367+ // has been looked up or not. See `ShuffledHashJoin.fullOuterJoin` for example of usage.
368+ val valueGenerator = UnsafeProjection .create(value.get :+ Literal (false ))
369+
370+ while (input.hasNext) {
371+ val row = input.next().asInstanceOf [UnsafeRow ]
372+ numFields = row.numFields() + 1
373+ val key = keyGenerator(row)
374+ val value = valueGenerator(row)
335375 val loc = binaryMap.lookup(key.getBaseObject, key.getBaseOffset, key.getSizeInBytes)
336376 val success = loc.append(
337377 key.getBaseObject, key.getBaseOffset, key.getSizeInBytes,
338- row .getBaseObject, row .getBaseOffset, row .getSizeInBytes)
378+ value .getBaseObject, value .getBaseOffset, value .getSizeInBytes)
339379 if (! success) {
340380 binaryMap.free()
341381 // scalastyle:off throwerror
342382 throw new SparkOutOfMemoryError (" There is not enough memory to build hash map" )
343383 // scalastyle:on throwerror
344384 }
345- } else if (isNullAware) {
346- return EmptyHashedRelationWithAllNullKeys
385+ }
386+ } else {
387+ while (input.hasNext) {
388+ val row = input.next().asInstanceOf [UnsafeRow ]
389+ numFields = row.numFields()
390+ val key = keyGenerator(row)
391+ if (! key.anyNull) {
392+ val loc = binaryMap.lookup(key.getBaseObject, key.getBaseOffset, key.getSizeInBytes)
393+ val success = loc.append(
394+ key.getBaseObject, key.getBaseOffset, key.getSizeInBytes,
395+ row.getBaseObject, row.getBaseOffset, row.getSizeInBytes)
396+ if (! success) {
397+ binaryMap.free()
398+ // scalastyle:off throwerror
399+ throw new SparkOutOfMemoryError (" There is not enough memory to build hash map" )
400+ // scalastyle:on throwerror
401+ }
402+ } else if (isNullAware) {
403+ return EmptyHashedRelationWithAllNullKeys
404+ }
347405 }
348406 }
349407
350- new UnsafeHashedRelation (key.size, numFields, binaryMap)
408+ new UnsafeHashedRelation (key.size, numFields, binaryMap, isLookupAware )
351409 }
352410}
353411
@@ -885,6 +943,10 @@ class LongHashedRelation(
885943 * Returns an iterator for keys of InternalRow type.
886944 */
887945 override def keys (): Iterator [InternalRow ] = map.keys()
946+
947+ override def values (): Iterator [InternalRow ] = {
948+ throw new UnsupportedOperationException
949+ }
888950}
889951
890952/**
@@ -939,6 +1001,10 @@ trait NullAwareHashedRelation extends HashedRelation with Externalizable {
9391001 throw new UnsupportedOperationException
9401002 }
9411003
1004+ override def values (): Iterator [InternalRow ] = {
1005+ throw new UnsupportedOperationException
1006+ }
1007+
9421008 override def close (): Unit = {}
9431009
9441010 override def writeExternal (out : ObjectOutput ): Unit = {}
0 commit comments