-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-45905][SQL] Least common type between decimal types should retain integral digits first #43781
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-45905][SQL] Least common type between decimal types should retain integral digits first #43781
Changes from all commits
70affef
41e0a3c
cf5cb7b
d61da82
5426242
f75fe5d
c6f4295
0400e8c
42477eb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -146,6 +146,18 @@ object DecimalType extends AbstractDataType { | |
| DecimalType(min(precision, MAX_PRECISION), min(scale, MAX_SCALE)) | ||
| } | ||
|
|
||
| private[sql] def boundedPreferIntegralDigits(precision: Int, scale: Int): DecimalType = { | ||
| if (precision <= MAX_PRECISION) { | ||
| DecimalType(precision, scale) | ||
| } else { | ||
| // If we have to reduce the precision, we should retain the digits in the integral part first, | ||
| // as they are more significant to the value. Here we reduce the scale as well to drop the | ||
| // digits in the fractional part. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks good. |
||
| val diff = precision - MAX_PRECISION | ||
| DecimalType(MAX_PRECISION, math.max(0, scale - diff)) | ||
| } | ||
| } | ||
|
|
||
| private[sql] def checkNegativeScale(scale: Int): Unit = { | ||
| if (scale < 0 && !SqlApiConf.get.allowNegativeScaleOfDecimalEnabled) { | ||
| throw DataTypeErrors.negativeScaleNotAllowedError(scale) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.analysis | |
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.expressions.Literal._ | ||
| import org.apache.spark.sql.catalyst.types.DataTypeUtils | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types._ | ||
|
|
||
|
|
||
|
|
@@ -64,7 +65,11 @@ object DecimalPrecision extends TypeCoercionRule { | |
| def widerDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = { | ||
| val scale = max(s1, s2) | ||
| val range = max(p1 - s1, p2 - s2) | ||
| DecimalType.bounded(range + scale, scale) | ||
| if (conf.getConf(SQLConf.LEGACY_RETAIN_FRACTION_DIGITS_FIRST)) { | ||
| DecimalType.bounded(range + scale, scale) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are many usages of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To limit the scope to type coercion only. Some arithmetic operations also call it to determine the result decimal type and I don't want to change that part. |
||
| } else { | ||
| DecimalType.boundedPreferIntegralDigits(range + scale, scale) | ||
| } | ||
| } | ||
|
|
||
| override def transform: PartialFunction[Expression, Expression] = { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4541,6 +4541,16 @@ object SQLConf { | |
| .booleanConf | ||
| .createWithDefault(false) | ||
|
|
||
| val LEGACY_RETAIN_FRACTION_DIGITS_FIRST = | ||
| buildConf("spark.sql.legacy.decimal.retainFractionDigitsOnTruncate") | ||
| .internal() | ||
| .doc("When set to true, we will try to retain the fraction digits first rather than " + | ||
| "integral digits as prior Spark 4.0, when getting a least common type between decimal " + | ||
| "types, and the result decimal precision exceeds the max precision.") | ||
dongjoon-hyun marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| .version("4.0.0") | ||
| .booleanConf | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| .createWithDefault(false) | ||
|
|
||
| /** | ||
| * Holds information about keys that have been deprecated. | ||
| * | ||
|
|
@@ -5425,7 +5435,7 @@ class SQLConf extends Serializable with Logging with SqlApiConf { | |
| } | ||
|
|
||
| def legacyRaiseErrorWithoutErrorClass: Boolean = | ||
| getConf(SQLConf.LEGACY_RAISE_ERROR_WITHOUT_ERROR_CLASS) | ||
| getConf(SQLConf.LEGACY_RAISE_ERROR_WITHOUT_ERROR_CLASS) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. noise? (whitespace changes best made in a non-bugfix PR?)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since I touched this file, I just fixed the wrong indentation. |
||
|
|
||
| /** ********************** SQLConf functionality methods ************ */ | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AFAIK, the arithmetic operations did not strictly follow this rule.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
which one does not follow? The final decimal type can be different as there is one more truncation step.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
*AND/.For example:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not the Spark SQL multiple. Please take a look at
Multiple#resultDecimalType