Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions docs/sql-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,32 @@ license: |

- Since Spark 3.0, when casting string value to date, timestamp and interval values, the leading and trailing white spaces(<= ACSII 32) will be trimmed before casing, e.g. `cast('2019-10-10\t as date)` results the date value `2019-10-10`. In Spark version 2.4 and earlier, only the trailing space will be removed, thus, the result is `null`.

- Since Spark 3.0, we pad decimal numbers with trailing zeros to the scale of the column for `spark-sql` interface, for example:
<table class="table">
<tr>
<th>
<b>Query</b>
</th>
<th>
<b>Spark 2.4 or Prior</b>
</th>
<th>
<b>Spark 3.0</b>
</th>
</tr>
<tr>
<td>
<code>SELECT CAST(1 AS decimal(38, 18));</code>
</td>
<td>
<code>1</code>
</td>
<td>
<code>1.000000000000000000</code>
</td>
</tr>
</table>

## Upgrading from Spark SQL 2.4 to 2.4.1

- The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ import org.apache.spark.sql.types._
> SELECT _FUNC_(10.0, array(0.5, 0.4, 0.1), 100);
[10.0,10.0,10.0]
> SELECT _FUNC_(10.0, 0.5, 100);
10
10.0
""",
since = "2.1.0")
case class ApproximatePercentile(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
examples = """
Examples:
> SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END;
1
1.0
> SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END;
2
2.0
> SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 < 0 THEN 2.0 END;
NULL
""")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,6 @@ object HiveResult {
result.map(_.zip(types).map(toHiveString)).map(_.mkString("\t"))
}

private def formatDecimal(d: java.math.BigDecimal): String = {
if (d.compareTo(java.math.BigDecimal.ZERO) == 0) {
java.math.BigDecimal.ZERO.toPlainString
} else {
d.stripTrailingZeros().toPlainString // Hive strips trailing zeros
}
}

private val primitiveTypes = Seq(
StringType,
IntegerType,
Expand Down Expand Up @@ -127,7 +119,7 @@ object HiveResult {
case (t: Timestamp, TimestampType) =>
DateTimeUtils.timestampToString(timestampFormatter, DateTimeUtils.fromJavaTimestamp(t))
case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
case (decimal: java.math.BigDecimal, DecimalType()) => formatDecimal(decimal)
case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString
case (interval: CalendarInterval, CalendarIntervalType) =>
SQLConf.get.intervalOutputStyle match {
case SQL_STANDARD => toSqlStandardString(interval)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,50 +24,50 @@ select id, a*10, b/10 from decimals_test order by id
-- !query 2 schema
struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,15),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18)>
-- !query 2 output
1 1000 99.9
2 123451.23 1234.5123
3 1.234567891011 123.41
4 1234567891234567890 0.112345678912345679
1 1000.000000000000000 99.900000000000000000
2 123451.230000000000000 1234.512300000000000000
3 1.234567891011000 123.410000000000000000
4 1234567891234567890.000000000000000 0.112345678912345679


-- !query 3
select 10.3 * 3.0
-- !query 3 schema
struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)>
-- !query 3 output
30.9
30.90


-- !query 4
select 10.3000 * 3.0
-- !query 4 schema
struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)>
-- !query 4 output
30.9
30.90000


-- !query 5
select 10.30000 * 30.0
-- !query 5 schema
struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)>
-- !query 5 output
309
309.000000


-- !query 6
select 10.300000000000000000 * 3.000000000000000000
-- !query 6 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,34)>
-- !query 6 output
30.9
30.9000000000000000000000000000000000


-- !query 7
select 10.300000000000000000 * 3.0000000000000000000
-- !query 7 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,34)>
-- !query 7 output
30.9
30.9000000000000000000000000000000000


-- !query 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ select 1D, 1.2D, 1e10, 1.5e5, .10D, 0.10D, .1e5, .9e+2, 0.9e+2, 900e-1, 9.e+1
-- !query 17 schema
struct<1.0:double,1.2:double,1E+10:decimal(1,-10),1.5E+5:decimal(2,-4),0.1:double,0.1:double,1E+4:decimal(1,-4),9E+1:decimal(1,-1),9E+1:decimal(1,-1),90.0:decimal(3,1),9E+1:decimal(1,-1)>
-- !query 17 output
1.0 1.2 10000000000 150000 0.1 0.1 10000 90 90 90 90
1.0 1.2 10000000000 150000 0.1 0.1 10000 90 90 90.0 90


-- !query 18
Expand Down Expand Up @@ -339,7 +339,7 @@ select 90912830918230182310293801923652346786BD, 123.0E-28BD, 123.08BD
-- !query 35 schema
struct<90912830918230182310293801923652346786:decimal(38,0),1.230E-26:decimal(29,29),123.08:decimal(5,2)>
-- !query 35 output
90912830918230182310293801923652346786 0.0000000000000000000000000123 123.08
90912830918230182310293801923652346786 0.00000000000000000000000001230 123.08


-- !query 36
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ select id, a+b, a-b, a*b, a/b from decimals_test order by id
-- !query 6 schema
struct<id:int,(a + b):decimal(38,17),(a - b):decimal(38,17),(a * b):decimal(38,6),(a / b):decimal(38,6)>
-- !query 6 output
1 1099 -899 99900 0.1001
2 24690.246 0 152402061.885129 1
3 1234.2234567891011 -1233.9765432108989 152.358023 0.0001
1 1099.00000000000000000 -899.00000000000000000 99900.000000 0.100100
2 24690.24600000000000000 0.00000000000000000 152402061.885129 1.000000
3 1234.22345678910110000 -1233.97654321089890000 152.358023 0.000100
4 123456789123456790.12345678912345679 123456789123456787.87654321087654321 138698367904130467.515623 109890109097814272.043109


Expand All @@ -67,50 +67,50 @@ select id, a*10, b/10 from decimals_test order by id
-- !query 7 schema
struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,15),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18)>
-- !query 7 output
1 1000 99.9
2 123451.23 1234.5123
3 1.234567891011 123.41
4 1234567891234567890 0.112345678912345679
1 1000.000000000000000 99.900000000000000000
2 123451.230000000000000 1234.512300000000000000
3 1.234567891011000 123.410000000000000000
4 1234567891234567890.000000000000000 0.112345678912345679


-- !query 8
select 10.3 * 3.0
-- !query 8 schema
struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)>
-- !query 8 output
30.9
30.90


-- !query 9
select 10.3000 * 3.0
-- !query 9 schema
struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)>
-- !query 9 output
30.9
30.90000


-- !query 10
select 10.30000 * 30.0
-- !query 10 schema
struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)>
-- !query 10 output
309
309.000000


-- !query 11
select 10.300000000000000000 * 3.000000000000000000
-- !query 11 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,34)>
-- !query 11 output
30.9
30.9000000000000000000000000000000000


-- !query 12
select 10.300000000000000000 * 3.0000000000000000000
-- !query 12 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,34)>
-- !query 12 output
30.9
30.9000000000000000000000000000000000


-- !query 13
Expand Down Expand Up @@ -198,9 +198,9 @@ select id, a+b, a-b, a*b, a/b from decimals_test order by id
-- !query 23 schema
struct<id:int,(a + b):decimal(38,18),(a - b):decimal(38,18),(a * b):decimal(38,36),(a / b):decimal(38,18)>
-- !query 23 output
1 1099 -899 NULL 0.1001001001001001
2 24690.246 0 NULL 1
3 1234.2234567891011 -1233.9765432108989 NULL 0.000100037913541123
1 1099.000000000000000000 -899.000000000000000000 NULL 0.100100100100100100
2 24690.246000000000000000 0.000000000000000000 NULL 1.000000000000000000
3 1234.223456789101100000 -1233.976543210898900000 NULL 0.000100037913541123
4 123456789123456790.123456789123456789 123456789123456787.876543210876543211 NULL 109890109097814272.043109406191131436


Expand All @@ -209,42 +209,42 @@ select id, a*10, b/10 from decimals_test order by id
-- !query 24 schema
struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,19)>
-- !query 24 output
1 1000 99.9
2 123451.23 1234.5123
3 1.234567891011 123.41
4 1234567891234567890 0.1123456789123456789
1 1000.000000000000000000 99.9000000000000000000
2 123451.230000000000000000 1234.5123000000000000000
3 1.234567891011000000 123.4100000000000000000
4 1234567891234567890.000000000000000000 0.1123456789123456789


-- !query 25
select 10.3 * 3.0
-- !query 25 schema
struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)>
-- !query 25 output
30.9
30.90


-- !query 26
select 10.3000 * 3.0
-- !query 26 schema
struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)>
-- !query 26 output
30.9
30.90000


-- !query 27
select 10.30000 * 30.0
-- !query 27 schema
struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)>
-- !query 27 output
309
309.000000


-- !query 28
select 10.300000000000000000 * 3.000000000000000000
-- !query 28 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,36)>
-- !query 28 output
30.9
30.900000000000000000000000000000000000


-- !query 29
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ select 1D, 1.2D, 1e10, 1.5e5, .10D, 0.10D, .1e5, .9e+2, 0.9e+2, 900e-1, 9.e+1
-- !query 17 schema
struct<1.0:double,1.2:double,1E+10:decimal(1,-10),1.5E+5:decimal(2,-4),0.1:double,0.1:double,1E+4:decimal(1,-4),9E+1:decimal(1,-1),9E+1:decimal(1,-1),90.0:decimal(3,1),9E+1:decimal(1,-1)>
-- !query 17 output
1.0 1.2 10000000000 150000 0.1 0.1 10000 90 90 90 90
1.0 1.2 10000000000 150000 0.1 0.1 10000 90 90 90.0 90


-- !query 18
Expand Down Expand Up @@ -339,7 +339,7 @@ select 90912830918230182310293801923652346786BD, 123.0E-28BD, 123.08BD
-- !query 35 schema
struct<90912830918230182310293801923652346786:decimal(38,0),1.230E-26:decimal(29,29),123.08:decimal(5,2)>
-- !query 35 output
90912830918230182310293801923652346786 0.0000000000000000000000000123 123.08
90912830918230182310293801923652346786 0.00000000000000000000000001230 123.08


-- !query 36
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,44 +206,44 @@ select * from spark_10747_mix order by col1 nulls last, col5 nulls last
-- !query 13 schema
struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)>
-- !query 13 output
a 1 1.0 1 NULL
b 2 1.0 1 10
c 3 2.0 2 15.1
c 3 2.0 2 NULL
d 3 2.0 3 0
d 3 0.0 3 1
d 3 NULL 4 1
NULL 3 0.0 3 1
a 1 1.0 1.00 NULL
b 2 1.0 1.00 10.0
c 3 2.0 2.00 15.1
c 3 2.0 2.00 NULL
d 3 2.0 3.00 0.0
d 3 0.0 3.00 1.0
d 3 NULL 4.00 1.0
NULL 3 0.0 3.00 1.0


-- !query 14
select * from spark_10747_mix order by col1 desc nulls first, col5 desc nulls first
-- !query 14 schema
struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)>
-- !query 14 output
NULL 3 0.0 3 1
d 3 0.0 3 1
d 3 NULL 4 1
d 3 2.0 3 0
c 3 2.0 2 NULL
c 3 2.0 2 15.1
b 2 1.0 1 10
a 1 1.0 1 NULL
NULL 3 0.0 3.00 1.0
d 3 0.0 3.00 1.0
d 3 NULL 4.00 1.0
d 3 2.0 3.00 0.0
c 3 2.0 2.00 NULL
c 3 2.0 2.00 15.1
b 2 1.0 1.00 10.0
a 1 1.0 1.00 NULL


-- !query 15
select * from spark_10747_mix order by col5 desc nulls first, col3 desc nulls last
-- !query 15 schema
struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)>
-- !query 15 output
c 3 2.0 2 NULL
a 1 1.0 1 NULL
c 3 2.0 2 15.1
b 2 1.0 1 10
d 3 0.0 3 1
NULL 3 0.0 3 1
d 3 NULL 4 1
d 3 2.0 3 0
c 3 2.0 2.00 NULL
a 1 1.0 1.00 NULL
c 3 2.0 2.00 15.1
b 2 1.0 1.00 10.0
d 3 0.0 3.00 1.0
NULL 3 0.0 3.00 1.0
d 3 NULL 4.00 1.0
d 3 2.0 3.00 0.0


-- !query 16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -956,7 +956,7 @@ SELECT * FROM tt1
struct<a:string,b:string,c:decimal(12,2),d:string>
-- !query 83 output
0123456789 abc 42.12 abc
abc 0123456789 42 abcd
abc 0123456789 42.00 abcd


-- !query 84
Expand Down
Loading