diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java index ea6fffaebc9a..6a64cb127e6c 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java @@ -31,35 +31,58 @@ * `usage()` will be used for the function usage in brief way. * * These below are concatenated and used for the function usage in verbose way, suppose arguments, - * examples, note and since will be provided. + * examples, note, since and deprecated will be provided. * - * `arguments()` describes arguments for the expression. This should follow the format as below: + * `arguments()` describes arguments for the expression. * - * Arguments: - * * arg0 - ... - * .... - * * arg1 - ... - * .... - * - * `examples()` describes examples for the expression. This should follow the format as below: - * - * Examples: - * > SELECT ...; - * ... - * > SELECT ...; - * ... + * `examples()` describes examples for the expression. * * `note()` contains some notes for the expression optionally. * * `since()` contains version information for the expression. Version is specified by, * for example, "2.2.0". * - * We can refer the function name by `_FUNC_`, in `usage`, `arguments` and `examples`, as it's - * registered in `FunctionRegistry`. + * `deprecated()` contains deprecation information for the expression optionally, for example, + * "Deprecated since 2.2.0. Use something else instead". + * + * The format, in particular for `arguments()`, `examples()`,`note()`, `since()` and + * `deprecated()`, should strictly be as follows. + * + *
+ * @ExpressionDescription(
+ *   ...
+ *   arguments = """
+ *     Arguments:
+ *       * arg0 - ...
+ *           ....
+ *       * arg1 - ...
+ *           ....
+ *   """,
+ *   examples = """
+ *     Examples:
+ *       > SELECT ...;
+ *        ...
+ *       > SELECT ...;
+ *        ...
+ *   """,
+ *   note = """
+ *     ...
+ *   """,
+ *   since = "3.0.0",
+ *   deprecated = """
+ *     ...
+ *   """)
+ * 
+ * 
+ * + * We can refer the function name by `_FUNC_`, in `usage()`, `arguments()` and `examples()` as + * it is registered in `FunctionRegistry`. + * + * Note that, if `extended()` is defined, `arguments()`, `examples()`, `note()`, `since()` and + * `deprecated()` should be not defined together. `extended()` exists for backward compatibility. * - * Note that, if `extended()` is defined, `arguments()`, `examples()`, `note()` and `since()` will - * be ignored and `extended()` will be used for the extended description for backward - * compatibility. + * Note this contents are used in the SparkSQL documentation for built-in functions. The contents + * here are considered as a Markdown text and then rendered. */ @DeveloperApi @Retention(RetentionPolicy.RUNTIME) @@ -70,4 +93,5 @@ String examples() default ""; String note() default ""; String since() default ""; + String deprecated() default ""; } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java index d5a1b77c0ec8..b0b74dac57ac 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java @@ -30,6 +30,7 @@ public class ExpressionInfo { private String examples; private String note; private String since; + private String deprecated; public String getClassName() { return className; @@ -63,6 +64,10 @@ public String getNote() { return note; } + public String getDeprecated() { + return deprecated; + } + public String getDb() { return db; } @@ -75,13 +80,15 @@ public ExpressionInfo( String arguments, String examples, String note, - String since) { + String since, + String deprecated) { assert name != null; assert arguments != null; assert examples != null; assert examples.isEmpty() || examples.startsWith(System.lineSeparator() + " Examples:"); assert note != null; assert since != null; + assert deprecated != null; this.className = className; this.db = db; @@ -91,6 +98,7 @@ public ExpressionInfo( this.examples = examples; this.note = note; this.since = since; + this.deprecated = deprecated; // Make the extended description. this.extended = arguments + examples; @@ -98,25 +106,44 @@ public ExpressionInfo( this.extended = "\n No example/argument for _FUNC_.\n"; } if (!note.isEmpty()) { + if (!note.contains(" ") || !note.endsWith(" ")) { + throw new IllegalArgumentException("'note' is malformed in the expression [" + + this.name + "]. It should start with a newline and 4 leading spaces; end " + + "with a newline and two spaces; however, got [" + note + "]."); + } this.extended += "\n Note:\n " + note.trim() + "\n"; } if (!since.isEmpty()) { + if (Integer.parseInt(since.split("\\.")[0]) < 0) { + throw new IllegalArgumentException("'since' is malformed in the expression [" + + this.name + "]. It should not start with a negative number; however, " + + "got [" + since + "]."); + } this.extended += "\n Since: " + since + "\n"; } + if (!deprecated.isEmpty()) { + if (!deprecated.contains(" ") || !deprecated.endsWith(" ")) { + throw new IllegalArgumentException("'deprecated' is malformed in the " + + "expression [" + this.name + "]. It should start with a newline and 4 " + + "leading spaces; end with a newline and two spaces; however, got [" + + deprecated + "]."); + } + this.extended += "\n Deprecated:\n " + deprecated.trim() + "\n"; + } } public ExpressionInfo(String className, String name) { - this(className, null, name, null, "", "", "", ""); + this(className, null, name, null, "", "", "", "", ""); } public ExpressionInfo(String className, String db, String name) { - this(className, db, name, null, "", "", "", ""); + this(className, db, name, null, "", "", "", "", ""); } // This is to keep the original constructor just in case. public ExpressionInfo(String className, String db, String name, String usage, String extended) { // `arguments` and `examples` are concatenated for the extended description. So, here // simply pass the `extended` as `arguments` and an empty string for `examples`. - this(className, db, name, usage, extended, "", "", ""); + this(className, db, name, usage, extended, "", "", "", ""); } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 46cf0f9d16f2..deb53cf6a4ba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -621,7 +621,7 @@ object FunctionRegistry { val clazz = scala.reflect.classTag[Cast].runtimeClass val usage = "_FUNC_(expr) - Casts the value `expr` to the target data type `_FUNC_`." val expressionInfo = - new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", "") + new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", "", "") (name, (expressionInfo, builder)) } @@ -641,7 +641,8 @@ object FunctionRegistry { df.arguments(), df.examples(), df.note(), - df.since()) + df.since(), + df.deprecated()) } else { // This exists for the backward compatibility with old `ExpressionDescription`s defining // the extended description in `extended()`. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index f7955bc92339..c61c47d44480 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -964,7 +964,9 @@ case class ArraySort(child: Expression) extends UnaryExpression with ArraySortLi > SELECT _FUNC_(array(1, 20, null, 3)); [20,null,3,1] """, - note = "The function is non-deterministic.", + note = """ + The function is non-deterministic. + """, since = "2.4.0") case class Shuffle(child: Expression, randomSeed: Option[Long] = None) extends UnaryExpression with ExpectsInputTypes with Stateful with ExpressionWithRandomSeed { @@ -1048,7 +1050,9 @@ case class Shuffle(child: Expression, randomSeed: Option[Long] = None) [3,4,1,2] """, since = "1.5.0", - note = "Reverse logic for arrays is available since 2.4.0." + note = """ + Reverse logic for arrays is available since 2.4.0. + """ ) case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { @@ -2062,7 +2066,9 @@ case class ElementAt(left: Expression, right: Expression) > SELECT _FUNC_(array(1, 2, 3), array(4, 5), array(6)); [1,2,3,4,5,6] """, - note = "Concat logic for arrays is available since 2.4.0.") + note = """ + Concat logic for arrays is available since 2.4.0. + """) case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpression { private def allowedTypes: Seq[AbstractDataType] = Seq(StringType, BinaryType, ArrayType) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 4e722c9237a9..8d3a64132b77 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -288,6 +288,7 @@ object CreateStruct extends FunctionBuilder { "", "", "", + "", "") ("struct", (info, this)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 784425e29739..5fb0b852b882 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1018,7 +1018,10 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S > SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul'); 2016-08-31 09:00:00 """, - since = "1.5.0") + since = "1.5.0", + deprecated = """ + Deprecated since 3.0.0. See SPARK-25496. + """) // scalastyle:on line.size.limit case class FromUTCTimestamp(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { @@ -1229,7 +1232,10 @@ case class MonthsBetween( > SELECT _FUNC_('2016-08-31', 'Asia/Seoul'); 2016-08-30 15:00:00 """, - since = "1.5.0") + since = "1.5.0", + deprecated = """ + Deprecated since 3.0.0. See SPARK-25496. + """) // scalastyle:on line.size.limit case class ToUTCTimestamp(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 1f1decc45a3f..2af2b13ad77f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -125,7 +125,9 @@ case class CurrentDatabase() extends LeafExpression with Unevaluable { > SELECT _FUNC_(); 46707d92-02f4-4817-8116-a4c3b23e6266 """, - note = "The function is non-deterministic.") + note = """ + The function is non-deterministic. + """) // scalastyle:on line.size.limit case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Stateful with ExpressionWithRandomSeed { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala index b70c34141b97..91a8ac07b908 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala @@ -78,7 +78,9 @@ trait ExpressionWithRandomSeed { > SELECT _FUNC_(null); 0.8446490682263027 """, - note = "The function is non-deterministic in general case.") + note = """ + The function is non-deterministic in general case. + """) // scalastyle:on line.size.limit case class Rand(child: Expression) extends RDG with ExpressionWithRandomSeed { @@ -118,7 +120,9 @@ object Rand { > SELECT _FUNC_(null); 1.1164209726833079 """, - note = "The function is non-deterministic in general case.") + note = """ + The function is non-deterministic in general case. + """) // scalastyle:on line.size.limit case class Randn(child: Expression) extends RDG with ExpressionWithRandomSeed { diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-markdown.py index fa8124b4513a..e0529f831061 100644 --- a/sql/gen-sql-markdown.py +++ b/sql/gen-sql-markdown.py @@ -20,7 +20,7 @@ from collections import namedtuple ExpressionInfo = namedtuple( - "ExpressionInfo", "className name usage arguments examples note since") + "ExpressionInfo", "className name usage arguments examples note since deprecated") def _list_function_infos(jvm): @@ -42,7 +42,8 @@ def _list_function_infos(jvm): arguments=jinfo.getArguments().replace("_FUNC_", name), examples=jinfo.getExamples().replace("_FUNC_", name), note=jinfo.getNote(), - since=jinfo.getSince())) + since=jinfo.getSince(), + deprecated=jinfo.getDeprecated())) return sorted(infos, key=lambda i: i.name) @@ -136,6 +137,27 @@ def _make_pretty_note(note): return "**Note:**\n%s\n" % note +def _make_pretty_deprecated(deprecated): + """ + Makes the deprecated description pretty and returns a formatted string if `deprecated` + is not an empty string. Otherwise, returns None. + + Expected input: + + ... + + Expected output: + **Deprecated:** + + ... + + """ + + if deprecated != "": + deprecated = "\n".join(map(lambda n: n[4:], deprecated.split("\n"))) + return "**Deprecated:**\n%s\n" % deprecated + + def generate_sql_markdown(jvm, path): """ Generates a markdown file after listing the function information. The output file @@ -162,6 +184,10 @@ def generate_sql_markdown(jvm, path): **Since:** SINCE + **Deprecated:** + + DEPRECATED +
""" @@ -174,6 +200,7 @@ def generate_sql_markdown(jvm, path): examples = _make_pretty_examples(info.examples) note = _make_pretty_note(info.note) since = info.since + deprecated = _make_pretty_deprecated(info.deprecated) mdfile.write("### %s\n\n" % name) if usage is not None: @@ -186,6 +213,8 @@ def generate_sql_markdown(jvm, path): mdfile.write(note) if since is not None and since != "": mdfile.write("**Since:** %s\n\n" % since.strip()) + if deprecated is not None: + mdfile.write(deprecated) mdfile.write("
\n\n")