-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-25724] Add sorting functionality in MapType. #22712
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ import org.json4s.JsonAST.JValue | |
| import org.json4s.JsonDSL._ | ||
|
|
||
| import org.apache.spark.annotation.InterfaceStability | ||
| import org.apache.spark.sql.catalyst.util.{MapData, TypeUtils} | ||
|
|
||
| /** | ||
| * The data type for Maps. Keys in a map are not allowed to have `null` values. | ||
|
|
@@ -73,6 +74,90 @@ case class MapType( | |
| override private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = { | ||
| f(this) || keyType.existsRecursively(f) || valueType.existsRecursively(f) | ||
| } | ||
|
|
||
| private[this] class OrderedWrapper { | ||
| var isOrdered: Boolean = false | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer not to make this mutable if we can. That can be a source of some pretty weird errors if we move from an unordered to an ordered map. Why do you need this?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for quick reply :) Actually I'm not pretty sure about this. The |
||
| } | ||
|
|
||
| private[this] lazy val orderedWrapper: OrderedWrapper = new OrderedWrapper() | ||
|
|
||
| private[sql] def setOrdered(b: Boolean): Unit = { | ||
| orderedWrapper.isOrdered = b | ||
| } | ||
|
|
||
| // Indicates if a map is itself "ordered". It makes sense to compare two | ||
| // maps only when they are themselves "ordered", i.e. entries of the map are sorted. | ||
| // This parameter is used by internal when doing ordering operation, e.g. sort | ||
| // values of `MapType`. | ||
| private[sql] def isOrdered(): Boolean = orderedWrapper.isOrdered | ||
|
|
||
| // This is used to sort the entries of a map. | ||
| @transient | ||
| private[sql] lazy val interpretedKeyOrdering: Ordering[Any] = | ||
| TypeUtils.getInterpretedOrdering(keyType) | ||
|
|
||
| @transient | ||
| private[this] lazy val interpretedValueOrdering: Ordering[Any] = | ||
| TypeUtils.getInterpretedOrdering(valueType) | ||
|
|
||
| @transient | ||
| private[sql] lazy val interpretedOrdering: Ordering[MapData] = new Ordering[MapData] { | ||
| val keyOrdering = interpretedKeyOrdering | ||
| val valueOrdering = interpretedValueOrdering | ||
|
|
||
| // The approach to compare (left: MapData, right: MapData): | ||
| // 1. The precondition is that entries inside `left` and `right` are already sorted themselves; | ||
| // 2. Compare entries from `left` and `right`, say entryA(keyA, valueA) is from `left` and | ||
| // entryB(keyB, valueB) is from `right`: | ||
| // a. entryA is bigger than entryB if keyA is bigger than keyB and vice versa; | ||
| // b. entryA is bigger than entryB if keyA equals to keyB and valueA is bigger than | ||
| // valueB and vice versa; | ||
| // 3. If entries from the head equals to each other between `left` and `right`, the `MapData` | ||
| // with more entries is bigger. | ||
| def compare(left: MapData, right: MapData): Int = { | ||
| val leftKeys = left.keyArray() | ||
| val leftValues = left.valueArray() | ||
| val rightKeys = right.keyArray() | ||
| val rightValues = right.valueArray() | ||
| val minLength = scala.math.min(leftKeys.numElements(), rightKeys.numElements()) | ||
| var i = 0 | ||
| while (i < minLength) { | ||
| val keyComp = keyOrdering.compare(leftKeys.get(i, keyType), rightKeys.get(i, keyType)) | ||
| if (keyComp != 0) { | ||
| return keyComp | ||
| } | ||
|
|
||
| val isNullLeft = leftValues.isNullAt(i) | ||
| val isNullRight = rightValues.isNullAt(i) | ||
| if (isNullLeft && isNullRight) { | ||
| // Do nothing. | ||
| } else if (isNullLeft) { | ||
| return -1 | ||
| } else if (isNullRight) { | ||
| return 1 | ||
| } else { | ||
| val comp = valueOrdering.compare( | ||
| leftValues.get(i, valueType), rightValues.get(i, valueType)) | ||
| if (comp != 0) { | ||
| return comp | ||
| } | ||
| } | ||
| i += 1 | ||
| } | ||
| val diff = left.numElements() - right.numElements() | ||
| if (diff < 0) { | ||
| -1 | ||
| } else if (diff > 0) { | ||
| 1 | ||
| } else { | ||
| 0 | ||
| } | ||
| } | ||
| } | ||
|
|
||
| override def toString: String = { | ||
| s"MapType(${keyType.toString},${valueType.toString},${valueContainsNull.toString})" | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to care about this ordering direction? We just need comparable maps?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, this is not necessary, but just to make the logic complete.
#9718 did the same thing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You mean we can't remove this? If not necessary, better to remove it off.