diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index d6b663df9b88..ee8e109cba09 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -113,7 +113,7 @@ license: | ### UDFs and Built-in Functions - - Since Spark 3.0, the `date_add` and `date_sub` functions only accepts int, smallint, tinyint as the 2nd argument, fractional and string types are not valid anymore, e.g. `date_add(cast('1964-05-23' as date), '12.34')` will cause `AnalysisException`. In Spark version 2.4 and earlier, if the 2nd argument is fractional or string value, it will be coerced to int value, and the result will be a date value of `1964-06-04`. + - Since Spark 3.0, the `date_add` and `date_sub` functions only accept int, smallint, tinyint as the 2nd argument, fractional and non-literal string are not valid anymore, e.g. `date_add(cast('1964-05-23' as date), 12.34)` will cause `AnalysisException`. Note that, string literals are still allowed, but Spark will throw Analysis Exception if the string content is not a valid integer. In Spark version 2.4 and earlier, if the 2nd argument is fractional or string value, it will be coerced to int value, and the result will be a date value of `1964-06-04`. - Since Spark 3.0, the function `percentile_approx` and its alias `approx_percentile` only accept integral value with range in `[1, 2147483647]` as its 3rd argument `accuracy`, fractional and string types are disallowed, e.g. `percentile_approx(10.0, 0.2, 1.8D)` will cause `AnalysisException`. In Spark version 2.4 and earlier, if `accuracy` is fractional or string value, it will be coerced to an int value, `percentile_approx(10.0, 0.2, 1.8D)` is operated as `percentile_approx(10.0, 0.2, 1)` which results in `10.0`. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index edcfd6fe8ab6..7817fceef93a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -295,8 +295,8 @@ class Analyzer( case (CalendarIntervalType, CalendarIntervalType) => a case (_, CalendarIntervalType) => Cast(TimeAdd(l, r), l.dataType) case (CalendarIntervalType, _) => Cast(TimeAdd(r, l), r.dataType) - case (DateType, _) => DateAdd(l, r) - case (_, DateType) => DateAdd(r, l) + case (DateType, dt) if dt != StringType => DateAdd(l, r) + case (dt, DateType) if dt != StringType => DateAdd(r, l) case _ => a } case s @ Subtract(l, r) if s.childrenResolved => (l.dataType, r.dataType) match { @@ -305,7 +305,7 @@ class Analyzer( case (TimestampType, _) => SubtractTimestamps(l, r) case (_, TimestampType) => SubtractTimestamps(l, r) case (_, DateType) => SubtractDates(l, r) - case (DateType, _) => DateSub(l, r) + case (DateType, dt) if dt != StringType => DateSub(l, r) case _ => s } case m @ Multiply(l, r) if m.childrenResolved => (l.dataType, r.dataType) match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index e149bf2f4976..5a5d7c6620fb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -23,6 +23,7 @@ import scala.annotation.tailrec import scala.collection.mutable import org.apache.spark.internal.Logging +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.plans.logical._ @@ -63,6 +64,7 @@ object TypeCoercion { ImplicitTypeCasts :: DateTimeOperations :: WindowFrameCoercion :: + StringLiteralCoercion :: Nil // See https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types. @@ -1043,6 +1045,34 @@ object TypeCoercion { } } } + + /** + * A special rule to support string literal as the second argument of date_add/date_sub functions, + * to keep backward compatibility as a temporary workaround. + * TODO(SPARK-28589): implement ANSI type type coercion and handle string literals. + */ + object StringLiteralCoercion extends TypeCoercionRule { + override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = plan resolveExpressions { + // Skip nodes who's children have not been resolved yet. + case e if !e.childrenResolved => e + case DateAdd(l, r) if r.dataType == StringType && r.foldable => + val days = try { + AnsiCast(r, IntegerType).eval().asInstanceOf[Int] + } catch { + case e: NumberFormatException => throw new AnalysisException( + "The second argument of 'date_add' function needs to be an integer.", cause = Some(e)) + } + DateAdd(l, Literal(days)) + case DateSub(l, r) if r.dataType == StringType && r.foldable => + val days = try { + AnsiCast(r, IntegerType).eval().asInstanceOf[Int] + } catch { + case e: NumberFormatException => throw new AnalysisException( + "The second argument of 'date_sub' function needs to be an integer.", cause = Some(e)) + } + DateSub(l, Literal(days)) + } + } } trait TypeCoercionRule extends Rule[LogicalPlan] with Logging { diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 2c4ed64eda80..fd3325085df9 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -58,20 +58,30 @@ select date_add('2011-11-11', 1L); select date_add('2011-11-11', 1.0); select date_add('2011-11-11', 1E1); select date_add('2011-11-11', '1'); +select date_add('2011-11-11', '1.2'); select date_add(date'2011-11-11', 1); select date_add(timestamp'2011-11-11', 1); select date_sub(date'2011-11-11', 1); +select date_sub(date'2011-11-11', '1'); +select date_sub(date'2011-11-11', '1.2'); select date_sub(timestamp'2011-11-11', 1); select date_sub(null, 1); select date_sub(date'2011-11-11', null); select date'2011-11-11' + 1E1; +select date'2011-11-11' + '1'; select null + date '2001-09-28'; select date '2001-09-28' + 7Y; select 7S + date '2001-09-28'; select date '2001-10-01' - 7; +select date '2001-10-01' - '7'; select date '2001-09-28' + null; select date '2001-09-28' - null; +-- date add/sub with non-literal string column +create temp view v as select '1' str; +select date_add('2011-11-11', str) from v; +select date_sub('2011-11-11', str) from v; + -- subtract dates select null - date '2019-10-06'; select date '2001-10-01' - date '2001-09-28'; diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index f440b5fa1b89..aec04da4dd54 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 77 +-- Number of queries: 85 -- !query @@ -266,10 +266,18 @@ cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 10.0D)' due to data type mi -- !query select date_add('2011-11-11', '1') -- !query schema +struct +-- !query output +2011-11-12 + + +-- !query +select date_add('2011-11-11', '1.2') +-- !query schema struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'date_add(CAST('2011-11-11' AS DATE), '1')' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, ''1'' is of string type.; line 1 pos 7 +The second argument of 'date_add' function needs to be an integer.; -- !query @@ -296,6 +304,23 @@ struct 2011-11-10 +-- !query +select date_sub(date'2011-11-11', '1') +-- !query schema +struct +-- !query output +2011-11-10 + + +-- !query +select date_sub(date'2011-11-11', '1.2') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +The second argument of 'date_sub' function needs to be an integer.; + + -- !query select date_sub(timestamp'2011-11-11', 1) -- !query schema @@ -329,6 +354,15 @@ org.apache.spark.sql.AnalysisException cannot resolve 'date_add(DATE '2011-11-11', 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7 +-- !query +select date'2011-11-11' + '1' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'date_add(DATE '2011-11-11', CAST('1' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DOUBLE)' is of double type.; line 1 pos 7 + + -- !query select null + date '2001-09-28' -- !query schema @@ -361,6 +395,15 @@ struct 2001-09-24 +-- !query +select date '2001-10-01' - '7' +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'date_sub(DATE '2001-10-01', CAST('7' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('7' AS DOUBLE)' is of double type.; line 1 pos 7 + + -- !query select date '2001-09-28' + null -- !query schema @@ -377,6 +420,32 @@ struct NULL +-- !query +create temp view v as select '1' str +-- !query schema +struct<> +-- !query output + + + +-- !query +select date_add('2011-11-11', str) from v +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'date_add(CAST('2011-11-11' AS DATE), v.`str`)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'v.`str`' is of string type.; line 1 pos 7 + + +-- !query +select date_sub('2011-11-11', str) from v +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), v.`str`)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'v.`str`' is of string type.; line 1 pos 7 + + -- !query select null - date '2019-10-06' -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out index 31353bdedc69..b8c190beeae1 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out @@ -107,7 +107,7 @@ SELECT '1' + cast('2017-12-11 09:30:00' as date) FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), '1')' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, ''1'' is of string type.; line 1 pos 7 +cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DOUBLE)' is of double type.; line 1 pos 7 -- !query @@ -698,7 +698,7 @@ SELECT cast('2017-12-11 09:30:00' as date) + '1' FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), '1')' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, ''1'' is of string type.; line 1 pos 7 +cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DOUBLE)' is of double type.; line 1 pos 7 -- !query @@ -790,7 +790,7 @@ SELECT cast('2017-12-11 09:30:00' as date) - '1' FROM t struct<> -- !query output org.apache.spark.sql.AnalysisException -cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), '1')' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, ''1'' is of string type.; line 1 pos 7 +cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DOUBLE)' is of double type.; line 1 pos 7 -- !query