Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<!-- Automatically generated byExpressionsSchemaSuite -->
## Summary
- Number of queries: 333
- Number of queries: 328
- Number of expressions that missing example: 34
- Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch
## Schema of Built-in Functions
Expand Down Expand Up @@ -123,7 +123,7 @@
| org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual | >= | SELECT 2 >= 1 | struct<(2 >= 1):boolean> |
| org.apache.spark.sql.catalyst.expressions.Greatest | greatest | SELECT greatest(10, 9, 2, 4, 3) | struct<greatest(10, 9, 2, 4, 3):int> |
| org.apache.spark.sql.catalyst.expressions.Grouping | grouping | SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name) | struct<name:string,grouping(name):tinyint,sum(age):bigint> |
| org.apache.spark.sql.catalyst.expressions.GroupingID | grouping_id | SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) | struct<name:string,grouping_id():bigint,sum(age):bigint,avg(height):double> |
| org.apache.spark.sql.catalyst.expressions.GroupingID | grouping_id | SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) | struct<name:string,grouping_id():int,sum(age):bigint,avg(height):double> |
| org.apache.spark.sql.catalyst.expressions.Hex | hex | SELECT hex(17) | struct<hex(CAST(17 AS BIGINT)):string> |
| org.apache.spark.sql.catalyst.expressions.Hour | hour | SELECT hour('2009-07-30 12:58:59') | struct<hour(CAST(2009-07-30 12:58:59 AS TIMESTAMP)):int> |
| org.apache.spark.sql.catalyst.expressions.Hypot | hypot | SELECT hypot(3, 4) | struct<HYPOT(CAST(3 AS DOUBLE), CAST(4 AS DOUBLE)):double> |
Expand All @@ -140,7 +140,6 @@
| org.apache.spark.sql.catalyst.expressions.IsNaN | isnan | SELECT isnan(cast('NaN' as double)) | struct<isnan(CAST(NaN AS DOUBLE)):boolean> |
| org.apache.spark.sql.catalyst.expressions.IsNotNull | isnotnull | SELECT isnotnull(1) | struct<(1 IS NOT NULL):boolean> |
| org.apache.spark.sql.catalyst.expressions.IsNull | isnull | SELECT isnull(1) | struct<(1 IS NULL):boolean> |
| org.apache.spark.sql.catalyst.expressions.JsonObjectKeys | json_object_keys | SELECT json_object_keys('{}') | struct<json_object_keys({}):array<string>> |
| org.apache.spark.sql.catalyst.expressions.JsonToStructs | from_json | SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') | struct<from_json({"a":1, "b":0.8}):struct<a:int,b:double>> |
| org.apache.spark.sql.catalyst.expressions.JsonTuple | json_tuple | SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') | struct<c0:string,c1:string> |
| org.apache.spark.sql.catalyst.expressions.Lag | lag | N/A | N/A |
Expand All @@ -151,7 +150,6 @@
| org.apache.spark.sql.catalyst.expressions.Length | character_length | SELECT character_length('Spark SQL ') | struct<character_length(Spark SQL ):int> |
| org.apache.spark.sql.catalyst.expressions.Length | char_length | SELECT char_length('Spark SQL ') | struct<char_length(Spark SQL ):int> |
| org.apache.spark.sql.catalyst.expressions.Length | length | SELECT length('Spark SQL ') | struct<length(Spark SQL ):int> |
| org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray | json_array_length | SELECT json_array_length('[1,2,3,4]') | struct<json_array_length([1,2,3,4]):int> |
| org.apache.spark.sql.catalyst.expressions.LessThan | < | SELECT 1 < 2 | struct<(1 < 2):boolean> |
| org.apache.spark.sql.catalyst.expressions.LessThanOrEqual | <= | SELECT 2 <= 2 | struct<(2 <= 2):boolean> |
| org.apache.spark.sql.catalyst.expressions.Levenshtein | levenshtein | SELECT levenshtein('kitten', 'sitting') | struct<levenshtein(kitten, sitting):int> |
Expand Down Expand Up @@ -206,9 +204,6 @@
| org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct<POWER(CAST(2 AS DOUBLE), CAST(3 AS DOUBLE)):double> |
| org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct<quarter(CAST(2016-08-31 AS DATE)):int> |
| org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' | struct<%SystemDrive%UsersJohn RLIKE %SystemDrive%\Users.*:boolean> |
| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<rand():double> |
| org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct<rand():double> |
| org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct<randn():double> |
Copy link
Member

@HyukjinKwon HyukjinKwon May 1, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think @beliefer meant #28392 is only landed into master and we should keep these just as are at #28194. I assume rand(seed) was printed out because this test was ran against the latest master instead of branch-3.0 at 948a54e (?).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I think this golden file was generated in master, so I re-generated it in branch-3.0. Btw, we need this test suite in branch-3.0? Reverting #28194 from branch-3.0 is another simple solution to recover it, I think.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is useful to track the auto-alias name change history. It would be nice to have it in 3.0.

| org.apache.spark.sql.catalyst.expressions.Rank | rank | N/A | N/A |
| org.apache.spark.sql.catalyst.expressions.RegExpExtract | regexp_extract | SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) | struct<regexp_extract(100-200, (\d+)-(\d+), 1):string> |
| org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct<regexp_replace(100-200, (\d+), num):string> |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {

private val resultFile = new File(baseResourcePath, "sql-expression-schema.md")

private val ignoreSet = Set(
// Output name with a random seed
"org.apache.spark.sql.catalyst.expressions.Rand",
"org.apache.spark.sql.catalyst.expressions.Randn")

/** A single SQL query's SQL and schema. */
protected case class QueryOutput(
className: String,
Expand All @@ -106,7 +111,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
val outputs = new ArrayBuffer[QueryOutput]
val missingExamples = new ArrayBuffer[String]

classFunsMap.foreach { kv =>
classFunsMap.filterNot(v => ignoreSet.contains(v._1)).foreach { kv =>
val className = kv._1
kv._2.foreach { funInfo =>
val example = funInfo.getExamples
Expand Down