-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-44239][SQL] Free memory allocated by large vectors when vectors are reset #41782
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e32996e
4dfbbdb
c97a0c4
9b9305a
7cdf514
ba0d8bc
dd24ec4
fb28922
21f7a58
e47eb8a
4594f34
5d03e79
3e560eb
d17ccc6
0f5f977
bfef94e
88822b3
ccb02c0
c510024
2707803
d429aa0
358f29c
72ce8a5
cc42fd1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -84,9 +84,7 @@ public long valuesNativeAddress() { | |
| return data; | ||
| } | ||
|
|
||
| @Override | ||
| public void close() { | ||
| super.close(); | ||
| protected void releaseMemory() { | ||
| Platform.freeMemory(nulls); | ||
| Platform.freeMemory(data); | ||
| Platform.freeMemory(lengthData); | ||
|
|
@@ -97,6 +95,11 @@ public void close() { | |
| offsetData = 0; | ||
| } | ||
|
|
||
| @Override | ||
| public void close() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this seems like a bug in MIMA... anyway, it's fine to have this workaround for MIMA |
||
| super.close(); | ||
| } | ||
|
|
||
| // | ||
| // APIs dealing with nulls | ||
| // | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,6 +53,8 @@ | |
| public abstract class WritableColumnVector extends ColumnVector { | ||
| private final byte[] byte8 = new byte[8]; | ||
|
|
||
| protected abstract void releaseMemory(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cloud-fan do we treat
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not a public API, I think third-party lib should update and re-compile the code when upgrading Spark versions if private APIs were used.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. got it, thanks for the information. |
||
|
|
||
| /** | ||
| * Resets this column for writing. The currently stored values are no longer accessible. | ||
| */ | ||
|
|
@@ -69,6 +71,12 @@ public void reset() { | |
| putNotNulls(0, capacity); | ||
| numNulls = 0; | ||
| } | ||
|
|
||
| if (hugeVectorThreshold > 0 && capacity > hugeVectorThreshold) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't this be
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if hugeVectorThreshold == 0 or hugeVectorThreshold is a small value, the ColumnVector will always releaseMemory() and reserve new memory, this may be slower than before.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know, but according to the doc and impl, this should be > -1, right?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the doc and the code doesn't matched, Sorry.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you send a followup?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry for the later reply, filed a followup PR: https://github.com/apache/spark/pull/47988/files |
||
| capacity = defaultCapacity; | ||
| releaseMemory(); | ||
| reserveInternal(capacity); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -85,6 +93,7 @@ public void close() { | |
| dictionaryIds = null; | ||
| } | ||
| dictionary = null; | ||
| releaseMemory(); | ||
| } | ||
|
|
||
| public void reserveAdditional(int additionalCapacity) { | ||
|
|
@@ -95,7 +104,10 @@ public void reserve(int requiredCapacity) { | |
| if (requiredCapacity < 0) { | ||
| throwUnsupportedException(requiredCapacity, null); | ||
| } else if (requiredCapacity > capacity) { | ||
| int newCapacity = (int) Math.min(MAX_CAPACITY, requiredCapacity * 2L); | ||
| int newCapacity = | ||
| hugeVectorThreshold < 0 || requiredCapacity < hugeVectorThreshold ? | ||
| (int) Math.min(MAX_CAPACITY, requiredCapacity * 2L) : | ||
| (int) Math.min(MAX_CAPACITY, requiredCapacity * hugeVectorReserveRatio); | ||
| if (requiredCapacity <= newCapacity) { | ||
| try { | ||
| reserveInternal(newCapacity); | ||
|
|
@@ -846,7 +858,14 @@ public final void addElementsAppended(int num) { | |
| /** | ||
| * Marks this column as being constant. | ||
| */ | ||
| public final void setIsConstant() { isConstant = true; } | ||
| public final void setIsConstant() { | ||
| if (childColumns != null) { | ||
| for (WritableColumnVector c : childColumns) { | ||
| c.setIsConstant(); | ||
|
||
| } | ||
| } | ||
| isConstant = true; | ||
| } | ||
|
|
||
| /** | ||
| * Marks this column only contains null values. | ||
|
|
@@ -867,12 +886,21 @@ public final boolean isAllNull() { | |
| */ | ||
| protected int capacity; | ||
|
|
||
| /** | ||
| * The default number of rows that can be stored in this column. | ||
| */ | ||
| protected final int defaultCapacity; | ||
|
|
||
| /** | ||
| * Upper limit for the maximum capacity for this column. | ||
| */ | ||
| @VisibleForTesting | ||
| protected int MAX_CAPACITY = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH; | ||
|
|
||
| protected int hugeVectorThreshold; | ||
|
|
||
| protected double hugeVectorReserveRatio; | ||
|
|
||
| /** | ||
| * Number of nulls in this column. This is an optimization for the reader, to skip NULL checks. | ||
| */ | ||
|
|
@@ -922,6 +950,9 @@ protected boolean isArray() { | |
| protected WritableColumnVector(int capacity, DataType dataType) { | ||
| super(dataType); | ||
| this.capacity = capacity; | ||
| this.defaultCapacity = capacity; | ||
| this.hugeVectorThreshold = SQLConf.get().vectorizedHugeVectorThreshold(); | ||
| this.hugeVectorReserveRatio = SQLConf.get().vectorizedHugeVectorReserveRatio(); | ||
|
|
||
| if (isArray()) { | ||
| DataType childType; | ||
|
|
||

Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we set this as
1and see if there is any test failures? If not we can change it back to-1and merge it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When VECTORIZED_HUGE_VECTOR_THRESHOLD = 1, there are two UT failures, as expected.