Skip to content

Commit 02206cd

Browse files
n-young-dbdongjoon-hyun
authored andcommitted
[SPARK-48047][SQL] Reduce memory pressure of empty TreeNode tags
### What changes were proposed in this pull request? - Changed the `tags` variable of the `TreeNode` class to initialize lazily. This will reduce unnecessary driver memory pressure. ### Why are the changes needed? - Plans with large expression or operator trees are known to cause driver memory pressure; this is one step in alleviating that issue. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing UT covers behavior. Outwards facing behavior does not change. ### Was this patch authored or co-authored using generative AI tooling? No Closes #46285 from n-young-db/treenode-tags. Authored-by: Nick Young <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 991763c commit 02206cd

File tree

1 file changed

+20
-4
lines changed
  • sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees

1 file changed

+20
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,16 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
7878
/**
7979
* A mutable map for holding auxiliary information of this tree node. It will be carried over
8080
* when this node is copied via `makeCopy`, or transformed via `transformUp`/`transformDown`.
81+
* We lazily evaluate the `tags` since the default size of a `mutable.Map` is nonzero. This
82+
* will reduce unnecessary memory pressure.
8183
*/
82-
private val tags: mutable.Map[TreeNodeTag[_], Any] = mutable.Map.empty
84+
private[this] var _tags: mutable.Map[TreeNodeTag[_], Any] = null
85+
private def tags: mutable.Map[TreeNodeTag[_], Any] = {
86+
if (_tags eq null) {
87+
_tags = mutable.Map.empty
88+
}
89+
_tags
90+
}
8391

8492
/**
8593
* Default tree pattern [[BitSet] for a [[TreeNode]].
@@ -147,11 +155,13 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
147155
ineffectiveRules.get(ruleId.id)
148156
}
149157

158+
def isTagsEmpty: Boolean = (_tags eq null) || _tags.isEmpty
159+
150160
def copyTagsFrom(other: BaseType): Unit = {
151161
// SPARK-32753: it only makes sense to copy tags to a new node
152162
// but it's too expensive to detect other cases likes node removal
153163
// so we make a compromise here to copy tags to node with no tags
154-
if (tags.isEmpty) {
164+
if (isTagsEmpty && !other.isTagsEmpty) {
155165
tags ++= other.tags
156166
}
157167
}
@@ -161,11 +171,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
161171
}
162172

163173
def getTagValue[T](tag: TreeNodeTag[T]): Option[T] = {
164-
tags.get(tag).map(_.asInstanceOf[T])
174+
if (isTagsEmpty) {
175+
None
176+
} else {
177+
tags.get(tag).map(_.asInstanceOf[T])
178+
}
165179
}
166180

167181
def unsetTagValue[T](tag: TreeNodeTag[T]): Unit = {
168-
tags -= tag
182+
if (!isTagsEmpty) {
183+
tags -= tag
184+
}
169185
}
170186

171187
/**

0 commit comments

Comments
 (0)