From 7da0de4fca158e8ba59f2dc71da654c26acc133a Mon Sep 17 00:00:00 2001 From: Kris Mok Date: Thu, 17 Jan 2019 00:58:35 -0800 Subject: [PATCH] Make DecimalType minimum adjusted scale configurable --- .../catalyst/analysis/DecimalPrecision.scala | 2 +- .../apache/spark/sql/internal/SQLConf.scala | 12 ++ .../apache/spark/sql/types/DecimalType.scala | 13 +- .../native/decimalArithmeticOperations.sql | 31 +++ .../decimalArithmeticOperations.sql.out | 182 ++++++++++++++++-- 5 files changed, 215 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala index 82692334544e..cacbb9133554 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala @@ -133,7 +133,7 @@ object DecimalPrecision extends TypeCoercionRule { // Precision: p1 - s1 + s2 + max(6, s1 + p2 + 1) // Scale: max(6, s1 + p2 + 1) val intDig = p1 - s1 + s2 - val scale = max(DecimalType.MINIMUM_ADJUSTED_SCALE, s1 + p2 + 1) + val scale = max(DecimalType.minimumAdjustedScale, s1 + p2 + 1) val prec = intDig + scale DecimalType.adjustPrecisionScale(prec, scale) } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ebc8c3705ea2..fb6bef7320e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1347,6 +1347,15 @@ object SQLConf { .booleanConf .createWithDefault(true) + val DECIMAL_OPERATIONS_MINIMUM_ADJUSTED_SCALE = + buildConf("spark.sql.decimalOperations.minimumAdjustedScale") + .internal() + .doc("Decimal operations' minimum adjusted scale when " + + "spark.sql.decimalOperations.allowPrecisionLoss is true") + .intConf + .checkValue(scale => scale >= 0 && scale < 38, "valid scale should be in [0, 38)") + .createWithDefault(org.apache.spark.sql.types.DecimalType.DEFAULT_MINIMUM_ADJUSTED_SCALE) + val LITERAL_PICK_MINIMUM_PRECISION = buildConf("spark.sql.legacy.literal.pickMinimumPrecision") .internal() @@ -2002,6 +2011,9 @@ class SQLConf extends Serializable with Logging { def decimalOperationsAllowPrecisionLoss: Boolean = getConf(DECIMAL_OPERATIONS_ALLOW_PREC_LOSS) + def decimalOperationsMinimumAdjustedScale: Int = + getConf(DECIMAL_OPERATIONS_MINIMUM_ADJUSTED_SCALE) + def literalPickMinimumPrecision: Boolean = getConf(LITERAL_PICK_MINIMUM_PRECISION) def continuousStreamingExecutorQueueSize: Int = getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala index 25eddaf06a78..4a7bbbbca87d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala @@ -24,6 +24,7 @@ import scala.reflect.runtime.universe.typeTag import org.apache.spark.annotation.Stable import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} +import org.apache.spark.sql.internal.SQLConf /** * The data type representing `java.math.BigDecimal` values. @@ -117,7 +118,7 @@ object DecimalType extends AbstractDataType { val MAX_SCALE = 38 val SYSTEM_DEFAULT: DecimalType = DecimalType(MAX_PRECISION, 18) val USER_DEFAULT: DecimalType = DecimalType(10, 0) - val MINIMUM_ADJUSTED_SCALE = 6 + val DEFAULT_MINIMUM_ADJUSTED_SCALE = 6 // The decimal types compatible with other numeric types private[sql] val BooleanDecimal = DecimalType(1, 0) @@ -153,6 +154,10 @@ object DecimalType extends AbstractDataType { DecimalType(min(precision, MAX_PRECISION), min(scale, MAX_SCALE)) } + def minimumAdjustedScale: Int = { + SQLConf.get.decimalOperationsMinimumAdjustedScale + } + /** * Scale adjustment implementation is based on Hive's one, which is itself inspired to * SQLServer's one. In particular, when a result precision is greater than @@ -176,9 +181,9 @@ object DecimalType extends AbstractDataType { } else { // Precision/scale exceed maximum precision. Result must be adjusted to MAX_PRECISION. val intDigits = precision - scale - // If original scale is less than MINIMUM_ADJUSTED_SCALE, use original scale value; otherwise - // preserve at least MINIMUM_ADJUSTED_SCALE fractional digits - val minScaleValue = Math.min(scale, MINIMUM_ADJUSTED_SCALE) + // If original scale is less than minimumAdjustedScale, use original scale value; otherwise + // preserve at least minimumAdjustedScale fractional digits + val minScaleValue = Math.min(scale, minimumAdjustedScale) // The resulting scale is the maximum between what is available without causing a loss of // digits for the integer part of the decimal and the minimum guaranteed scale, which is // computed above diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql index 28a0e20c0f49..a7dc89b2e71d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalArithmeticOperations.sql @@ -54,6 +54,37 @@ select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.1 select 123456789123456789.1234567890 * 1.123456789123456789; select 12345678912345.123456789123 / 0.000000012345678; +-- use a higher minimum adjusted scale and repeat the above +set spark.sql.decimalOperations.minimumAdjustedScale=12; + +-- test decimal operations +select id, a+b, a-b, a*b, a/b from decimals_test order by id; + +-- test operations between decimals and constants +select id, a*10, b/10 from decimals_test order by id; + +-- test operations on constants +select 10.3 * 3.0; +select 10.3000 * 3.0; +select 10.30000 * 30.0; +select 10.300000000000000000 * 3.000000000000000000; +select 10.300000000000000000 * 3.0000000000000000000; +select 2.35E10 * 1.0; + +-- arithmetic operations causing an overflow return NULL +select (5e36 + 0.1) + 5e36; +select (-4e36 - 0.1) - 7e36; +select 12345678901234567890.0 * 12345678901234567890.0; +select 1e35 / 0.1; +select 1.2345678901234567890E30 * 1.2345678901234567890E25; + +-- arithmetic operations causing an overflow at adjusted scale 7, return NULL +select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345; + +-- arithmetic operations causing a precision loss are truncated +select 123456789123456789.1234567890 * 1.123456789123456789; +select 12345678912345.123456789123 / 0.000000012345678; + -- return NULL instead of rounding, according to old Spark versions' behavior set spark.sql.decimalOperations.allowPrecisionLoss=false; diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalArithmeticOperations.sql.out index cbf44548b3cc..340e31b4a78a 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalArithmeticOperations.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalArithmeticOperations.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 40 +-- Number of queries: 57 -- !query 0 @@ -186,33 +186,33 @@ struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 -- !query 22 -set spark.sql.decimalOperations.allowPrecisionLoss=false +set spark.sql.decimalOperations.minimumAdjustedScale=12 -- !query 22 schema struct -- !query 22 output -spark.sql.decimalOperations.allowPrecisionLoss false +spark.sql.decimalOperations.minimumAdjustedScale 12 -- !query 23 select id, a+b, a-b, a*b, a/b from decimals_test order by id -- !query 23 schema -struct +struct -- !query 23 output -1 1099 -899 NULL 0.1001001001001001 -2 24690.246 0 NULL 1 -3 1234.2234567891011 -1233.9765432108989 NULL 0.000100037913541123 -4 123456789123456790.123456789123456789 123456789123456787.876543210876543211 NULL 109890109097814272.043109406191131436 +1 1099 -899 99900 0.1001001001 +2 24690.246 0 152402061.885129 1 +3 1234.2234567891011 -1233.9765432108989 152.358023429668 0.000100037914 +4 123456789123456790.12345678912345679 123456789123456787.87654321087654321 138698367904130467.51562262075 109890109097814272.043109406191 -- !query 24 select id, a*10, b/10 from decimals_test order by id -- !query 24 schema -struct +struct -- !query 24 output 1 1000 99.9 2 123451.23 1234.5123 3 1.234567891011 123.41 -4 1234567891234567890 0.1123456789123456789 +4 1234567891234567890 0.112345678912345679 -- !query 25 @@ -242,7 +242,7 @@ struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11, -- !query 28 select 10.300000000000000000 * 3.000000000000000000 -- !query 28 schema -struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,36)> +struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,34)> -- !query 28 output 30.9 @@ -250,9 +250,9 @@ struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.00000000000000000 -- !query 29 select 10.300000000000000000 * 3.0000000000000000000 -- !query 29 schema -struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,37)> +struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,34)> -- !query 29 output -NULL +30.9 -- !query 30 @@ -290,7 +290,7 @@ NULL -- !query 34 select 1e35 / 0.1 -- !query 34 schema -struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,3)> +struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,12)> -- !query 34 output NULL @@ -314,22 +314,164 @@ NULL -- !query 37 select 123456789123456789.1234567890 * 1.123456789123456789 -- !query 37 schema -struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,28)> +struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,18)> -- !query 37 output -NULL +138698367904130467.654320988515622621 -- !query 38 select 12345678912345.123456789123 / 0.000000012345678 -- !query 38 schema -struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,18)> +struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,12)> -- !query 38 output -NULL +1000000073899961059796.725866331521 -- !query 39 -drop table decimals_test +set spark.sql.decimalOperations.allowPrecisionLoss=false -- !query 39 schema -struct<> +struct -- !query 39 output +spark.sql.decimalOperations.allowPrecisionLoss false + + +-- !query 40 +select id, a+b, a-b, a*b, a/b from decimals_test order by id +-- !query 40 schema +struct +-- !query 40 output +1 1099 -899 NULL 0.1001001001001001 +2 24690.246 0 NULL 1 +3 1234.2234567891011 -1233.9765432108989 NULL 0.000100037913541123 +4 123456789123456790.123456789123456789 123456789123456787.876543210876543211 NULL 109890109097814272.043109406191131436 + + +-- !query 41 +select id, a*10, b/10 from decimals_test order by id +-- !query 41 schema +struct +-- !query 41 output +1 1000 99.9 +2 123451.23 1234.5123 +3 1.234567891011 123.41 +4 1234567891234567890 0.1123456789123456789 + + +-- !query 42 +select 10.3 * 3.0 +-- !query 42 schema +struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)> +-- !query 42 output +30.9 + + +-- !query 43 +select 10.3000 * 3.0 +-- !query 43 schema +struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)> +-- !query 43 output +30.9 + + +-- !query 44 +select 10.30000 * 30.0 +-- !query 44 schema +struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)> +-- !query 44 output +309 + + +-- !query 45 +select 10.300000000000000000 * 3.000000000000000000 +-- !query 45 schema +struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,36)> +-- !query 45 output +30.9 + + +-- !query 46 +select 10.300000000000000000 * 3.0000000000000000000 +-- !query 46 schema +struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,37)> +-- !query 46 output +NULL + + +-- !query 47 +select 2.35E10 * 1.0 +-- !query 47 schema +struct<(CAST(2.35E+10 AS DECIMAL(12,1)) * CAST(1.0 AS DECIMAL(12,1))):decimal(6,-7)> +-- !query 47 output +23500000000 + + +-- !query 48 +select (5e36 + 0.1) + 5e36 +-- !query 48 schema +struct<(CAST((CAST(5E+36 AS DECIMAL(38,1)) + CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) + CAST(5E+36 AS DECIMAL(38,1))):decimal(38,1)> +-- !query 48 output +NULL + + +-- !query 49 +select (-4e36 - 0.1) - 7e36 +-- !query 49 schema +struct<(CAST((CAST(-4E+36 AS DECIMAL(38,1)) - CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) - CAST(7E+36 AS DECIMAL(38,1))):decimal(38,1)> +-- !query 49 output +NULL + + +-- !query 50 +select 12345678901234567890.0 * 12345678901234567890.0 +-- !query 50 schema +struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)> +-- !query 50 output +NULL + + +-- !query 51 +select 1e35 / 0.1 +-- !query 51 schema +struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,3)> +-- !query 51 output +NULL + + +-- !query 52 +select 1.2345678901234567890E30 * 1.2345678901234567890E25 +-- !query 52 schema +struct<(CAST(1.2345678901234567890E+30 AS DECIMAL(25,-6)) * CAST(1.2345678901234567890E+25 AS DECIMAL(25,-6))):decimal(38,-17)> +-- !query 52 output +NULL + + +-- !query 53 +select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345 +-- !query 53 schema +struct<(CAST(12345678912345678912345678912.1234567 AS DECIMAL(38,7)) + CAST(9999999999999999999999999999999.12345 AS DECIMAL(38,7))):decimal(38,7)> +-- !query 53 output +NULL + + +-- !query 54 +select 123456789123456789.1234567890 * 1.123456789123456789 +-- !query 54 schema +struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,28)> +-- !query 54 output +NULL + + +-- !query 55 +select 12345678912345.123456789123 / 0.000000012345678 +-- !query 55 schema +struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,18)> +-- !query 55 output +NULL + + +-- !query 56 +drop table decimals_test +-- !query 56 schema +struct<> +-- !query 56 output