From 36a7ce84fbce339471ed2917a3a2f60da6e868dd Mon Sep 17 00:00:00 2001 From: beliefer Date: Sun, 12 Apr 2020 13:54:44 +0800 Subject: [PATCH 01/22] Display expression schema for double check. --- .../test/resources/sql-functions/output.out | 3454 +++++++++++++++++ .../spark/sql/ExpressionsSchemaSuite.scala | 190 + 2 files changed, 3644 insertions(+) create mode 100644 sql/core/src/test/resources/sql-functions/output.out create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala diff --git a/sql/core/src/test/resources/sql-functions/output.out b/sql/core/src/test/resources/sql-functions/output.out new file mode 100644 index 0000000000000..40d47209fc2ad --- /dev/null +++ b/sql/core/src/test/resources/sql-functions/output.out @@ -0,0 +1,3454 @@ +-- Automatically generated by ExpressionsSchemaSuite + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringSpace + +-- Function name: space +-- !query +SELECT concat(space(2), '1') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CreateArray + +-- Function name: array +-- !query +SELECT array(1, 2, 3) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayExcept + +-- Function name: array_except +-- !query +SELECT array_except(array(1, 2, 3), array(1, 3, 5)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseXor + +-- Function name: ^ +-- !query +SELECT 3 ^ 5 +-- !query schema +struct<(3 ^ 5):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringRPad + +-- Function name: rpad +-- !query +SELECT rpad('hi', 5, '??') +-- !query schema +struct +-- !query +SELECT rpad('hi', 1, '??') +-- !query schema +struct +-- !query +SELECT rpad('hi', 5) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SchemaOfJson + +-- Function name: schema_of_json +-- !query +SELECT schema_of_json('[{"col":0}]') +-- !query schema +struct +-- !query +SELECT schema_of_json('[{"col":01}]', map('allowNumericLeadingZeros', 'true')) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ParseToTimestamp + +-- Function name: to_timestamp +-- !query +SELECT to_timestamp('2016-12-31 00:12:00') +-- !query schema +struct +-- !query +SELECT to_timestamp('2016-12-31', 'yyyy-MM-dd') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathInt + +-- Function name: xpath_int +-- !query +SELECT xpath_int('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop + +-- Function name: var_pop +-- !query +SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Hex + +-- Function name: hex +-- !query +SELECT hex(17) +-- !query schema +struct +-- !query +SELECT hex('Spark SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArraysZip + +-- Function name: arrays_zip +-- !query +SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)) +-- !query schema +struct>> +-- !query +SELECT arrays_zip(array(1, 2), array(2, 3), array(3, 4)) +-- !query schema +struct>> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.InputFileName + +-- Function name: input_file_name + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID + +-- Function name: monotonically_increasing_id + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DayOfMonth + +-- Function name: day +-- !query +SELECT day('2009-07-30') +-- !query schema +struct + + +-- Function name: dayofmonth +-- !query +SELECT dayofmonth('2009-07-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ElementAt + +-- Function name: element_at +-- !query +SELECT element_at(array(1, 2, 3), 2) +-- !query schema +struct +-- !query +SELECT element_at(map(1, 'a', 2, 'b'), 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.WeekDay + +-- Function name: weekday +-- !query +SELECT weekday('2009-07-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathLong + +-- Function name: xpath_long +-- !query +SELECT xpath_long('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):bigint> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CumeDist + +-- Function name: cume_dist + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayMin + +-- Function name: array_min +-- !query +SELECT array_min(array(1, 20, null, 3)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy + +-- Function name: max_by +-- !query +SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Rank + +-- Function name: rank + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Right + +-- Function name: right +-- !query +SELECT right('Spark SQL', 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Least + +-- Function name: least +-- !query +SELECT least(10, 9, 2, 4, 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Lower + +-- Function name: lcase +-- !query +SELECT lcase('SparkSql') +-- !query schema +struct + + +-- Function name: lower +-- !query +SELECT lower('SparkSql') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Nvl + +-- Function name: nvl +-- !query +SELECT nvl(NULL, array('2')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Pmod + +-- Function name: pmod +-- !query +SELECT pmod(10, 3) +-- !query schema +struct +-- !query +SELECT pmod(-10, 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Chr + +-- Function name: char +-- !query +SELECT char(65) +-- !query schema +struct + + +-- Function name: chr +-- !query +SELECT chr(65) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.GetJsonObject + +-- Function name: get_json_object +-- !query +SELECT get_json_object('{"a":"b"}', '$.a') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ToDegrees + +-- Function name: degrees +-- !query +SELECT degrees(3.141592653589793) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayExists + +-- Function name: exists +-- !query +SELECT exists(array(1, 2, 3), x -> x % 2 == 0) +-- !query schema +struct +-- !query +SELECT exists(array(1, 2, 3), x -> x % 2 == 10) +-- !query schema +struct +-- !query +SELECT exists(array(1, null, 3), x -> x % 2 == 0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Tanh + +-- Function name: tanh +-- !query +SELECT tanh(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringSplit + +-- Function name: split +-- !query +SELECT split('oneAtwoBthreeC', '[ABC]') +-- !query schema +struct> +-- !query +SELECT split('oneAtwoBthreeC', '[ABC]', -1) +-- !query schema +struct> +-- !query +SELECT split('oneAtwoBthreeC', '[ABC]', 2) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.GroupingID + +-- Function name: grouping_id +-- !query +SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Expm1 + +-- Function name: expm1 +-- !query +SELECT expm1(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Quarter + +-- Function name: quarter +-- !query +SELECT quarter('2016-08-31') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Atan2 + +-- Function name: atan2 +-- !query +SELECT atan2(0, 0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TypeOf + +-- Function name: typeof +-- !query +SELECT typeof(1) +-- !query schema +struct +-- !query +SELECT typeof(array(1)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sequence + +-- Function name: sequence +-- !query +SELECT sequence(1, 5) +-- !query schema +struct> +-- !query +SELECT sequence(5, 1) +-- !query schema +struct> +-- !query +SELECT sequence(to_date('2018-01-01'), to_date('2018-03-01'), interval 1 month) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.InputFileBlockStart + +-- Function name: input_file_block_start + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ConcatWs + +-- Function name: concat_ws +-- !query +SELECT concat_ws(' ', 'Spark', 'SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile + +-- Function name: approx_percentile +-- !query +SELECT approx_percentile(10.0, array(0.5, 0.4, 0.1), 100) +-- !query schema +struct> +-- !query +SELECT approx_percentile(10.0, 0.5, 100) +-- !query schema +struct + + +-- Function name: percentile_approx +-- !query +SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100) +-- !query schema +struct> +-- !query +SELECT percentile_approx(10.0, 0.5, 100) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TimeWindow + +-- Function name: window + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringLocate + +-- Function name: position +-- !query +SELECT position('bar', 'foobarbar') +-- !query schema +struct +-- !query +SELECT position('bar', 'foobarbar', 5) +-- !query schema +struct +-- !query +SELECT POSITION('bar' IN 'foobarbar') +-- !query schema +struct + + +-- Function name: locate +-- !query +SELECT locate('bar', 'foobarbar') +-- !query schema +struct +-- !query +SELECT locate('bar', 'foobarbar', 5) +-- !query schema +struct +-- !query +SELECT POSITION('bar' IN 'foobarbar') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.FormatNumber + +-- Function name: format_number +-- !query +SELECT format_number(12332.123456, 4) +-- !query schema +struct +-- !query +SELECT format_number(12332.123456, '##################.###') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.UnaryPositive + +-- Function name: positive + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Corr + +-- Function name: corr +-- !query +SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Md5 + +-- Function name: md5 +-- !query +SELECT md5('Spark') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StructsToJson + +-- Function name: to_json +-- !query +SELECT to_json(named_struct('a', 1, 'b', 2)) +-- !query schema +struct +-- !query +SELECT to_json(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy')) +-- !query schema +struct +-- !query +SELECT to_json(array(named_struct('a', 1, 'b', 2))) +-- !query schema +struct +-- !query +SELECT to_json(map('a', named_struct('b', 1))) +-- !query schema +struct +-- !query +SELECT to_json(map(named_struct('a', 1),named_struct('b', 2))) +-- !query schema +struct +-- !query +SELECT to_json(map('a', 1)) +-- !query schema +struct +-- !query +SELECT to_json(array((map('a', 1)))) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop + +-- Function name: stddev_pop +-- !query +SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Rint + +-- Function name: rint +-- !query +SELECT rint(12.3456) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapFromArrays + +-- Function name: map_from_arrays +-- !query +SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sinh + +-- Function name: sinh +-- !query +SELECT sinh(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Lag + +-- Function name: lag + + +-- Class name: org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual + +-- Function name: >= +-- !query +SELECT 2 >= 1 +-- !query schema +struct<(2 >= 1):boolean> +-- !query +SELECT 2.0 >= '2.1' +-- !query schema +struct<(CAST(2.0 AS DOUBLE) >= CAST(2.1 AS DOUBLE)):boolean> +-- !query +SELECT to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52') +-- !query schema +struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean> +-- !query +SELECT to_date('2009-07-30 04:17:52') >= to_date('2009-08-01 04:17:52') +-- !query schema +struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-08-01 04:17:52')):boolean> +-- !query +SELECT 1 >= NULL +-- !query schema +struct<(1 >= CAST(NULL AS INT)):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseAnd + +-- Function name: & +-- !query +SELECT 3 & 5 +-- !query schema +struct<(3 & 5):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.First + +-- Function name: first_value +-- !query +SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) +-- !query schema +struct +-- !query +SELECT first_value(col) FROM VALUES (NULL), (5), (20) AS tab(col) +-- !query schema +struct +-- !query +SELECT first_value(col, true) FROM VALUES (NULL), (5), (20) AS tab(col) +-- !query schema +struct + + +-- Function name: first +-- !query +SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) +-- !query schema +struct +-- !query +SELECT first(col) FROM VALUES (NULL), (5), (20) AS tab(col) +-- !query schema +struct +-- !query +SELECT first(col, true) FROM VALUES (NULL), (5), (20) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TruncDate + +-- Function name: trunc +-- !query +SELECT trunc('2019-08-04', 'week') +-- !query schema +struct +-- !query +SELECT trunc('2019-08-04', 'quarter') +-- !query schema +struct +-- !query +SELECT trunc('2009-02-12', 'MM') +-- !query schema +struct +-- !query +SELECT trunc('2015-10-27', 'YEAR') +-- !query schema +struct +-- !query +SELECT trunc('2015-10-27', 'DECADE') +-- !query schema +struct +-- !query +SELECT trunc('1981-01-19', 'century') +-- !query schema +struct +-- !query +SELECT trunc('1981-01-19', 'millennium') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean + +-- Function name: xpath_boolean +-- !query +SELECT xpath_boolean('1','a/b') +-- !query schema +struct1, a/b):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MakeInterval + +-- Function name: make_interval +-- !query +SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) +-- !query schema +struct +-- !query +SELECT make_interval(100, null, 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Atanh + +-- Function name: atanh +-- !query +SELECT atanh(0) +-- !query schema +struct +-- !query +SELECT atanh(2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.FindInSet + +-- Function name: find_in_set +-- !query +SELECT find_in_set('ab','abc,b,ab,c,def') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray + +-- Function name: json_array_length +-- !query +SELECT json_array_length('[1,2,3,4]') +-- !query schema +struct +-- !query +SELECT json_array_length('[1,2,3,{"f1":1,"f2":[5,6]},4]') +-- !query schema +struct +-- !query +SELECT json_array_length('[1,2') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg + +-- Function name: bit_xor +-- !query +SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Decode + +-- Function name: decode +-- !query +SELECT decode(encode('abc', 'utf-8'), 'utf-8') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Coalesce + +-- Function name: coalesce +-- !query +SELECT coalesce(NULL, 1, NULL) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.RegExpReplace + +-- Function name: regexp_replace +-- !query +SELECT regexp_replace('100-200', '(\\d+)', 'num') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp + +-- Function name: var_samp +-- !query +SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Function name: variance +-- !query +SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cos + +-- Function name: cos +-- !query +SELECT cos(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayFilter + +-- Function name: filter +-- !query +SELECT filter(array(1, 2, 3), x -> x % 2 == 1) +-- !query schema +struct> +-- !query +SELECT filter(array(0, 2, 3), (x, i) -> x > i) +-- !query schema +struct namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):array> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.PosExplode + +-- Function name: posexplode_outer +-- !query +SELECT posexplode_outer(array(10,20)) +-- !query schema +struct + + +-- Function name: posexplode +-- !query +SELECT posexplode(array(10,20)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.InputFileBlockLength + +-- Function name: input_file_block_length + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd + +-- Function name: every +-- !query +SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col) +-- !query schema +struct +-- !query +SELECT every(col) FROM VALUES (NULL), (true), (true) AS tab(col) +-- !query schema +struct +-- !query +SELECT every(col) FROM VALUES (true), (false), (true) AS tab(col) +-- !query schema +struct + + +-- Function name: bool_and +-- !query +SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) +-- !query schema +struct +-- !query +SELECT bool_and(col) FROM VALUES (NULL), (true), (true) AS tab(col) +-- !query schema +struct +-- !query +SELECT bool_and(col) FROM VALUES (true), (false), (true) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg + +-- Function name: count_min_sketch + + +-- Class name: org.apache.spark.sql.catalyst.expressions.AssertTrue + +-- Function name: assert_true +-- !query +SELECT assert_true(0 < 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CurrentDate + +-- Function name: current_date + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MonthsBetween + +-- Function name: months_between +-- !query +SELECT months_between('1997-02-28 10:30:00', '1996-10-30') +-- !query schema +struct +-- !query +SELECT months_between('1997-02-28 10:30:00', '1996-10-30', false) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.EqualNullSafe + +-- Function name: <=> +-- !query +SELECT 2 <=> 2 +-- !query schema +struct<(2 <=> 2):boolean> +-- !query +SELECT 1 <=> '1' +-- !query schema +struct<(1 <=> CAST(1 AS INT)):boolean> +-- !query +SELECT true <=> NULL +-- !query schema +struct<(true <=> CAST(NULL AS BOOLEAN)):boolean> +-- !query +SELECT NULL <=> NULL +-- !query schema +struct<(NULL <=> NULL):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Add + +-- Function name: + +-- !query +SELECT 1 + 2 +-- !query schema +struct<(1 + 2):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Multiply + +-- Function name: * +-- !query +SELECT 2 * 3 +-- !query schema +struct<(2 * 3):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DatePart + +-- Function name: date_part +-- !query +SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') +-- !query schema +struct +-- !query +SELECT date_part('week', timestamp'2019-08-12 01:00:00.123456') +-- !query schema +struct +-- !query +SELECT date_part('doy', DATE'2019-08-12') +-- !query schema +struct +-- !query +SELECT date_part('SECONDS', timestamp'2019-10-01 00:00:01.000001') +-- !query schema +struct +-- !query +SELECT date_part('days', interval 1 year 10 months 5 days) +-- !query schema +struct +-- !query +SELECT date_part('seconds', interval 5 hours 30 seconds 1 milliseconds 1 microseconds) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ShiftLeft + +-- Function name: shiftleft +-- !query +SELECT shiftleft(2, 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.GreaterThan + +-- Function name: > +-- !query +SELECT 2 > 1 +-- !query schema +struct<(2 > 1):boolean> +-- !query +SELECT 2 > '1.1' +-- !query schema +struct<(2 > CAST(1.1 AS INT)):boolean> +-- !query +SELECT to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52') +-- !query schema +struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean> +-- !query +SELECT to_date('2009-07-30 04:17:52') > to_date('2009-08-01 04:17:52') +-- !query schema +struct<(to_date('2009-07-30 04:17:52') > to_date('2009-08-01 04:17:52')):boolean> +-- !query +SELECT 1 > NULL +-- !query schema +struct<(1 > CAST(NULL AS INT)):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Slice + +-- Function name: slice +-- !query +SELECT slice(array(1, 2, 3, 4), 2, 2) +-- !query schema +struct> +-- !query +SELECT slice(array(1, 2, 3, 4), -2, 2) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sentences + +-- Function name: sentences +-- !query +SELECT sentences('Hi there! Good morning.') +-- !query schema +struct>> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SoundEx + +-- Function name: soundex +-- !query +SELECT soundex('Miller') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.AddMonths + +-- Function name: add_months +-- !query +SELECT add_months('2016-08-31', 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Max + +-- Function name: max +-- !query +SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapFilter + +-- Function name: map_filter +-- !query +SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v) +-- !query schema +struct namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):map> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Crc32 + +-- Function name: crc32 +-- !query +SELECT crc32('Spark') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sha2 + +-- Function name: sha2 +-- !query +SELECT sha2('Spark', 256) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Size + +-- Function name: size +-- !query +SELECT size(array('b', 'd', 'c', 'a')) +-- !query schema +struct +-- !query +SELECT size(map('a', 1, 'b', 2)) +-- !query schema +struct +-- !query +SELECT size(NULL) +-- !query schema +struct + + +-- Function name: cardinality +-- !query +SELECT cardinality(array('b', 'd', 'c', 'a')) +-- !query schema +struct +-- !query +SELECT cardinality(map('a', 1, 'b', 2)) +-- !query schema +struct +-- !query +SELECT cardinality(NULL) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CurrentTimestamp + +-- Function name: current_timestamp + + +-- Function name: now + + +-- Class name: org.apache.spark.sql.catalyst.expressions.In + +-- Function name: in +-- !query +SELECT 1 in(1, 2, 3) +-- !query schema +struct<(1 IN (1, 2, 3)):boolean> +-- !query +SELECT 1 in(2, 3, 4) +-- !query schema +struct<(1 IN (2, 3, 4)):boolean> +-- !query +SELECT named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3)) +-- !query schema +struct<(named_struct(a, 1, b, 2) IN (named_struct(a, 1, b, 1), named_struct(a, 1, b, 3))):boolean> +-- !query +SELECT named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3)) +-- !query schema +struct<(named_struct(a, 1, b, 2) IN (named_struct(a, 1, b, 2), named_struct(a, 1, b, 3))):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CurrentDatabase + +-- Function name: current_database +-- !query +SELECT current_database() +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringInstr + +-- Function name: instr +-- !query +SELECT instr('SparkSQL', 'SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Sum + +-- Function name: sum +-- !query +SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) +-- !query schema +struct +-- !query +SELECT sum(col) FROM VALUES (NULL), (10), (15) AS tab(col) +-- !query schema +struct +-- !query +SELECT sum(col) FROM VALUES (NULL), (NULL) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CountIf + +-- Function name: count_if +-- !query +SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) +-- !query schema +struct +-- !query +SELECT count_if(col IS NULL) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MakeTimestamp + +-- Function name: make_timestamp +-- !query +SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) +-- !query schema +struct +-- !query +SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887, 'CET') +-- !query schema +struct +-- !query +SELECT make_timestamp(2019, 6, 30, 23, 59, 60) +-- !query schema +struct +-- !query +SELECT make_timestamp(2019, 13, 1, 10, 11, 12, 'PST') +-- !query schema +struct +-- !query +SELECT make_timestamp(null, 7, 22, 15, 30, 0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CsvToStructs + +-- Function name: from_csv +-- !query +SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') +-- !query schema +struct> +-- !query +SELECT from_csv('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Remainder + +-- Function name: % +-- !query +SELECT 2 % 1.8 +-- !query schema +struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> +-- !query +SELECT MOD(2, 1.8) +-- !query schema +struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> + + +-- Function name: mod +-- !query +SELECT 2 % 1.8 +-- !query schema +struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> +-- !query +SELECT MOD(2, 1.8) +-- !query schema +struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringRepeat + +-- Function name: repeat +-- !query +SELECT repeat('123', 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SubstringIndex + +-- Function name: substring_index +-- !query +SELECT substring_index('www.apache.org', '.', 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringTrimLeft + +-- Function name: ltrim +-- !query +SELECT ltrim(' SparkSQL ') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringTranslate + +-- Function name: translate +-- !query +SELECT translate('AaBbCc', 'abc', '123') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Greatest + +-- Function name: greatest +-- !query +SELECT greatest(10, 9, 2, 4, 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayDistinct + +-- Function name: array_distinct +-- !query +SELECT array_distinct(array(1, 2, 3, null, 3)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringReplace + +-- Function name: replace +-- !query +SELECT replace('ABCabc', 'abc', 'DEF') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathShort + +-- Function name: xpath_short +-- !query +SELECT xpath_short('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):smallint> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr + +-- Function name: bool_or +-- !query +SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) +-- !query schema +struct +-- !query +SELECT bool_or(col) FROM VALUES (NULL), (true), (false) AS tab(col) +-- !query schema +struct +-- !query +SELECT bool_or(col) FROM VALUES (false), (false), (NULL) AS tab(col) +-- !query schema +struct + + +-- Function name: some +-- !query +SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) +-- !query schema +struct +-- !query +SELECT some(col) FROM VALUES (NULL), (true), (false) AS tab(col) +-- !query schema +struct +-- !query +SELECT some(col) FROM VALUES (false), (false), (NULL) AS tab(col) +-- !query schema +struct + + +-- Function name: any +-- !query +SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) +-- !query schema +struct +-- !query +SELECT any(col) FROM VALUES (NULL), (true), (false) AS tab(col) +-- !query schema +struct +-- !query +SELECT any(col) FROM VALUES (false), (false), (NULL) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Murmur3Hash + +-- Function name: hash +-- !query +SELECT hash('Spark', array(123), 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.RLike + +-- Function name: rlike +-- !query +SET spark.sql.parser.escapedStringLiterals=true +-- !query schema +struct +-- !query +SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' +-- !query schema +struct<%SystemDrive%\Users\John RLIKE %SystemDrive%\\Users.*:boolean> +-- !query +SET spark.sql.parser.escapedStringLiterals=false +-- !query schema +struct +-- !query +SELECT '%SystemDrive%\\Users\\John' rlike '%SystemDrive%\\\\Users.*' +-- !query schema +struct<%SystemDrive%\Users\John RLIKE %SystemDrive%\\Users.*:boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.If + +-- Function name: if +-- !query +SELECT if(1 < 2, 'a', 'b') +-- !query schema +struct<(IF((1 < 2), a, b)):string> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Grouping + +-- Function name: grouping +-- !query +SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Abs + +-- Function name: abs +-- !query +SELECT abs(-1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.InitCap + +-- Function name: initcap +-- !query +SELECT initcap('sPark sql') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Percentile + +-- Function name: percentile +-- !query +SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) +-- !query schema +struct +-- !query +SELECT percentile(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS tab(col) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.IsNotNull + +-- Function name: isnotnull +-- !query +SELECT isnotnull(1) +-- !query schema +struct<(1 IS NOT NULL):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cbrt + +-- Function name: cbrt +-- !query +SELECT cbrt(27.0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseNot + +-- Function name: ~ +-- !query +SELECT ~ 0 +-- !query schema +struct<~0:int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Last + +-- Function name: last_value +-- !query +SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) +-- !query schema +struct +-- !query +SELECT last_value(col) FROM VALUES (10), (5), (NULL) AS tab(col) +-- !query schema +struct +-- !query +SELECT last_value(col, true) FROM VALUES (10), (5), (NULL) AS tab(col) +-- !query schema +struct + + +-- Function name: last +-- !query +SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) +-- !query schema +struct +-- !query +SELECT last(col) FROM VALUES (10), (5), (NULL) AS tab(col) +-- !query schema +struct +-- !query +SELECT last(col, true) FROM VALUES (10), (5), (NULL) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.NullIf + +-- Function name: nullif +-- !query +SELECT nullif(2, 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Month + +-- Function name: month +-- !query +SELECT month('2016-07-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Logarithm + +-- Function name: log +-- !query +SELECT log(10, 100) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Subtract + +-- Function name: - +-- !query +SELECT 2 - 1 +-- !query schema +struct<(2 - 1):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DateAdd + +-- Function name: date_add +-- !query +SELECT date_add('2016-07-30', 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MakeDate + +-- Function name: make_date +-- !query +SELECT make_date(2013, 7, 15) +-- !query schema +struct +-- !query +SELECT make_date(2019, 13, 1) +-- !query schema +struct +-- !query +SELECT make_date(2019, 7, NULL) +-- !query schema +struct +-- !query +SELECT make_date(2019, 2, 30) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.JsonToStructs + +-- Function name: from_json +-- !query +SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') +-- !query schema +struct> +-- !query +SELECT from_json('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ZipWith + +-- Function name: zip_with +-- !query +SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) +-- !query schema +struct>> +-- !query +SELECT zip_with(array(1, 2), array(3, 4), (x, y) -> x + y) +-- !query schema +struct> +-- !query +SELECT zip_with(array('a', 'b', 'c'), array('d', 'e', 'f'), (x, y) -> concat(x, y)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.NamedStruct + +-- Function name: struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Tan + +-- Function name: tan +-- !query +SELECT tan(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.EulerNumber + +-- Function name: e +-- !query +SELECT e() +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringToMap + +-- Function name: str_to_map +-- !query +SELECT str_to_map('a:1,b:2,c:3', ',', ':') +-- !query schema +struct> +-- !query +SELECT str_to_map('a') +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArraySort + +-- Function name: array_sort +-- !query +SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) +-- !query schema +struct namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array> +-- !query +SELECT array_sort(array('bc', 'ab', 'dc'), (left, right) -> case when left is null and right is null then 0 when left is null then -1 when right is null then 1 when left < right then 1 when left > right then -1 else 0 end) +-- !query schema +struct namedlambdavariable()) THEN -1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array> +-- !query +SELECT array_sort(array('b', 'd', null, 'c', 'a')) +-- !query schema +struct namedlambdavariable()), 1, 0)))))))))), namedlambdavariable(), namedlambdavariable())):array> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cast + +-- Function name: string + + +-- Function name: cast +-- !query +SELECT cast('10' as int) +-- !query schema +struct + + +-- Function name: tinyint + + +-- Function name: double + + +-- Function name: smallint + + +-- Function name: date + + +-- Function name: decimal + + +-- Function name: boolean + + +-- Function name: float + + +-- Function name: binary + + +-- Function name: bigint + + +-- Function name: int + + +-- Function name: timestamp + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Min + +-- Function name: min +-- !query +SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Average + +-- Function name: avg +-- !query +SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct +-- !query +SELECT avg(col) FROM VALUES (1), (2), (NULL) AS tab(col) +-- !query schema +struct + + +-- Function name: mean +-- !query +SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct +-- !query +SELECT mean(col) FROM VALUES (1), (2), (NULL) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SortArray + +-- Function name: sort_array +-- !query +SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.NextDay + +-- Function name: next_day +-- !query +SELECT next_day('2015-01-14', 'TU') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Ascii + +-- Function name: ascii +-- !query +SELECT ascii('222') +-- !query schema +struct +-- !query +SELECT ascii(2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayRemove + +-- Function name: array_remove +-- !query +SELECT array_remove(array(1, 2, 3, null, 3), 3) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Pow + +-- Function name: pow +-- !query +SELECT pow(2, 3) +-- !query schema +struct + + +-- Function name: power +-- !query +SELECT power(2, 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.LessThan + +-- Function name: < +-- !query +SELECT 1 < 2 +-- !query schema +struct<(1 < 2):boolean> +-- !query +SELECT 1.1 < '1' +-- !query schema +struct<(CAST(1.1 AS DOUBLE) < CAST(1 AS DOUBLE)):boolean> +-- !query +SELECT to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52') +-- !query schema +struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean> +-- !query +SELECT to_date('2009-07-30 04:17:52') < to_date('2009-08-01 04:17:52') +-- !query schema +struct<(to_date('2009-07-30 04:17:52') < to_date('2009-08-01 04:17:52')):boolean> +-- !query +SELECT 1 < NULL +-- !query schema +struct<(1 < CAST(NULL AS INT)):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapKeys + +-- Function name: map_keys +-- !query +SELECT map_keys(map(1, 'a', 2, 'b')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Inline + +-- Function name: inline +-- !query +SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) +-- !query schema +struct + + +-- Function name: inline_outer +-- !query +SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapZipWith + +-- Function name: map_zip_with +-- !query +SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Encode + +-- Function name: encode +-- !query +SELECT encode('abc', 'utf-8') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayJoin + +-- Function name: array_join +-- !query +SELECT array_join(array('hello', 'world'), ' ') +-- !query schema +struct +-- !query +SELECT array_join(array('hello', null ,'world'), ' ') +-- !query schema +struct +-- !query +SELECT array_join(array('hello', null ,'world'), ' ', ',') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.And + +-- Function name: and + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Hypot + +-- Function name: hypot +-- !query +SELECT hypot(3, 4) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Round + +-- Function name: round +-- !query +SELECT round(2.5, 0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CovSample + +-- Function name: covar_samp +-- !query +SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Pi + +-- Function name: pi +-- !query +SELECT pi() +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sqrt + +-- Function name: sqrt +-- !query +SELECT sqrt(4) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TransformKeys + +-- Function name: transform_keys +-- !query +SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) +-- !query schema +struct> +-- !query +SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Substring + +-- Function name: substr +-- !query +SELECT substr('Spark SQL', 5) +-- !query schema +struct +-- !query +SELECT substr('Spark SQL', -3) +-- !query schema +struct +-- !query +SELECT substr('Spark SQL', 5, 1) +-- !query schema +struct + + +-- Function name: substring +-- !query +SELECT substring('Spark SQL', 5) +-- !query schema +struct +-- !query +SELECT substring('Spark SQL', -3) +-- !query schema +struct +-- !query +SELECT substring('Spark SQL', 5, 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Asinh + +-- Function name: asinh +-- !query +SELECT asinh(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Second + +-- Function name: second +-- !query +SELECT second('2009-07-30 12:58:59') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp + +-- Function name: to_utc_timestamp +-- !query +SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Upper + +-- Function name: ucase +-- !query +SELECT ucase('SparkSql') +-- !query schema +struct + + +-- Function name: upper +-- !query +SELECT upper('SparkSql') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg + +-- Function name: bit_and +-- !query +SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Stack + +-- Function name: stack +-- !query +SELECT stack(2, 1, 2, 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.EqualTo + +-- Function name: = +-- !query +SELECT 2 = 2 +-- !query schema +struct<(2 = 2):boolean> +-- !query +SELECT 1 = '1' +-- !query schema +struct<(1 = CAST(1 AS INT)):boolean> +-- !query +SELECT true = NULL +-- !query schema +struct<(true = CAST(NULL AS BOOLEAN)):boolean> +-- !query +SELECT NULL = NULL +-- !query schema +struct<(NULL = NULL):boolean> + + +-- Function name: == +-- !query +SELECT 2 == 2 +-- !query schema +struct<(2 = 2):boolean> +-- !query +SELECT 1 == '1' +-- !query schema +struct<(1 = CAST(1 AS INT)):boolean> +-- !query +SELECT true == NULL +-- !query schema +struct<(true = CAST(NULL AS BOOLEAN)):boolean> +-- !query +SELECT NULL == NULL +-- !query schema +struct<(NULL = NULL):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringLPad + +-- Function name: lpad +-- !query +SELECT lpad('hi', 5, '??') +-- !query schema +struct +-- !query +SELECT lpad('hi', 1, '??') +-- !query schema +struct +-- !query +SELECT lpad('hi', 5) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapFromEntries + +-- Function name: map_from_entries +-- !query +SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cube + +-- Function name: cube +-- !query +SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name, age) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Divide + +-- Function name: / +-- !query +SELECT 3 / 2 +-- !query schema +struct<(CAST(3 AS DOUBLE) / CAST(2 AS DOUBLE)):double> +-- !query +SELECT 2L / 2L +-- !query schema +struct<(CAST(2 AS DOUBLE) / CAST(2 AS DOUBLE)):double> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Like + +-- Function name: like +-- !query +SELECT like('Spark', '_park') +-- !query schema +struct +-- !query +SET spark.sql.parser.escapedStringLiterals=true +-- !query schema +struct +-- !query +SELECT '%SystemDrive%\Users\John' like '\%SystemDrive\%\\Users%' +-- !query schema +struct<%SystemDrive%\Users\John LIKE \%SystemDrive\%\\Users%:boolean> +-- !query +SET spark.sql.parser.escapedStringLiterals=false +-- !query schema +struct +-- !query +SELECT '%SystemDrive%\\Users\\John' like '\%SystemDrive\%\\\\Users%' +-- !query schema +struct<%SystemDrive%\Users\John LIKE \%SystemDrive\%\\Users%:boolean> +-- !query +SELECT '%SystemDrive%/Users/John' like '/%SystemDrive/%//Users%' ESCAPE '/' +-- !query schema +struct<%SystemDrive%/Users/John LIKE /%SystemDrive/%//Users%:boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.OctetLength + +-- Function name: octet_length +-- !query +SELECT octet_length('Spark SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CaseWhen + +-- Function name: when +-- !query +SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END +-- !query schema +struct 0) THEN CAST(1 AS DECIMAL(11,1)) WHEN (2 > 0) THEN CAST(2.0 AS DECIMAL(11,1)) ELSE CAST(1.2 AS DECIMAL(11,1)) END:decimal(11,1)> +-- !query +SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END +-- !query schema +struct 0) THEN CAST(2.0 AS DECIMAL(11,1)) ELSE CAST(1.2 AS DECIMAL(11,1)) END:decimal(11,1)> +-- !query +SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 < 0 THEN 2.0 END +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Log + +-- Function name: ln +-- !query +SELECT ln(1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseCount + +-- Function name: bit_count +-- !query +SELECT bit_count(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Acos + +-- Function name: acos +-- !query +SELECT acos(1) +-- !query schema +struct +-- !query +SELECT acos(2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SparkPartitionID + +-- Function name: spark_partition_id + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DateFormatClass + +-- Function name: date_format +-- !query +SELECT date_format('2016-04-08', 'y') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.FromUnixTime + +-- Function name: from_unixtime +-- !query +SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Floor + +-- Function name: floor +-- !query +SELECT floor(-0.1) +-- !query schema +struct +-- !query +SELECT floor(5) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SchemaOfCsv + +-- Function name: schema_of_csv +-- !query +SELECT schema_of_csv('1,abc') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Log2 + +-- Function name: log2 +-- !query +SELECT log2(2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DateSub + +-- Function name: date_sub +-- !query +SELECT date_sub('2016-07-30', 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.NTile + +-- Function name: ntile + + +-- Class name: org.apache.spark.sql.catalyst.expressions.RowNumber + +-- Function name: row_number + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CreateMap + +-- Function name: map +-- !query +SELECT map(1.0, '2', 3.0, '4') +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg + +-- Function name: bit_or +-- !query +SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DayOfYear + +-- Function name: dayofyear +-- !query +SELECT dayofyear('2016-04-09') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.IsNull + +-- Function name: isnull +-- !query +SELECT isnull(1) +-- !query schema +struct<(1 IS NULL):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Ceil + +-- Function name: ceil +-- !query +SELECT ceil(-0.1) +-- !query schema +struct +-- !query +SELECT ceil(5) +-- !query schema +struct + + +-- Function name: ceiling +-- !query +SELECT ceiling(-0.1) +-- !query schema +struct +-- !query +SELECT ceiling(5) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Asin + +-- Function name: asin +-- !query +SELECT asin(0) +-- !query schema +struct +-- !query +SELECT asin(2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Count + +-- Function name: count +-- !query +SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) +-- !query schema +struct +-- !query +SELECT count(col) FROM VALUES (NULL), (5), (5), (20) AS tab(col) +-- !query schema +struct +-- !query +SELECT count(DISTINCT col) FROM VALUES (NULL), (5), (5), (10) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Minute + +-- Function name: minute +-- !query +SELECT minute('2009-07-30 12:58:59') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.PercentRank + +-- Function name: percent_rank + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathList + +-- Function name: xpath +-- !query +SELECT xpath('b1b2b3c1c2','a/b/text()') +-- !query schema +structb1b2b3c1c2, a/b/text()):array> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.IntegralDivide + +-- Function name: div +-- !query +SELECT 3 div 2 +-- !query schema +struct<(3 div 2):bigint> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation + +-- Function name: covar_pop +-- !query +SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathDouble + +-- Function name: xpath_number +-- !query +SELECT xpath_number('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):double> + + +-- Function name: xpath_double +-- !query +SELECT xpath_double('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):double> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SparkVersion + +-- Function name: version + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Not + +-- Function name: ! + + +-- Function name: not + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ShiftRight + +-- Function name: shiftright +-- !query +SELECT shiftright(4, 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sin + +-- Function name: sin +-- !query +SELECT sin(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ToRadians + +-- Function name: radians +-- !query +SELECT radians(180) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TransformValues + +-- Function name: transform_values +-- !query +SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1) +-- !query schema +struct> +-- !query +SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayUnion + +-- Function name: array_union +-- !query +SELECT array_union(array(1, 2, 3), array(1, 3, 5)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis + +-- Function name: kurtosis +-- !query +SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) +-- !query schema +struct +-- !query +SELECT kurtosis(col) FROM VALUES (1), (10), (100), (10), (1) as tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Signum + +-- Function name: signum +-- !query +SELECT signum(40) +-- !query schema +struct + + +-- Function name: sign +-- !query +SELECT sign(40) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Overlay + +-- Function name: overlay +-- !query +SELECT overlay('Spark SQL' PLACING '_' FROM 6) +-- !query schema +struct +-- !query +SELECT overlay('Spark SQL' PLACING 'CORE' FROM 7) +-- !query schema +struct +-- !query +SELECT overlay('Spark SQL' PLACING 'ANSI ' FROM 7 FOR 0) +-- !query schema +struct +-- !query +SELECT overlay('Spark SQL' PLACING 'tructured' FROM 2 FOR 4) +-- !query schema +struct +-- !query +SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('_', 'utf-8') FROM 6) +-- !query schema +struct +-- !query +SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('CORE', 'utf-8') FROM 7) +-- !query schema +struct +-- !query +SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('ANSI ', 'utf-8') FROM 7 FOR 0) +-- !query schema +struct +-- !query +SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('tructured', 'utf-8') FROM 2 FOR 4) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sha1 + +-- Function name: sha1 +-- !query +SELECT sha1('Spark') +-- !query schema +struct + + +-- Function name: sha +-- !query +SELECT sha('Spark') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TruncTimestamp + +-- Function name: date_trunc +-- !query +SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') +-- !query schema +struct +-- !query +SELECT date_trunc('MM', '2015-03-05T09:32:05.359') +-- !query schema +struct +-- !query +SELECT date_trunc('DD', '2015-03-05T09:32:05.359') +-- !query schema +struct +-- !query +SELECT date_trunc('HOUR', '2015-03-05T09:32:05.359') +-- !query schema +struct +-- !query +SELECT date_trunc('MILLISECOND', '2015-03-05T09:32:05.123456') +-- !query schema +struct +-- !query +SELECT date_trunc('DECADE', '2015-03-05T09:32:05.123456') +-- !query schema +struct +-- !query +SELECT date_trunc('CENTURY', '2015-03-05T09:32:05.123456') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet + +-- Function name: collect_set +-- !query +SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Factorial + +-- Function name: factorial +-- !query +SELECT factorial(5) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitLength + +-- Function name: bit_length +-- !query +SELECT bit_length('Spark SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StructsToCsv + +-- Function name: to_csv +-- !query +SELECT to_csv(named_struct('a', 1, 'b', 2)) +-- !query schema +struct +-- !query +SELECT to_csv(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy')) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.XxHash64 + +-- Function name: xxhash64 +-- !query +SELECT xxhash64('Spark', array(123), 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.IfNull + +-- Function name: ifnull +-- !query +SELECT ifnull(NULL, array('2')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Flatten + +-- Function name: flatten +-- !query +SELECT flatten(array(array(1, 2), array(3, 4))) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CollectList + +-- Function name: collect_list +-- !query +SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseOr + +-- Function name: | +-- !query +SELECT 3 | 5 +-- !query schema +struct<(3 | 5):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Or + +-- Function name: or + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayRepeat + +-- Function name: array_repeat +-- !query +SELECT array_repeat('123', 2) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathString + +-- Function name: xpath_string +-- !query +SELECT xpath_string('bcc','a/c') +-- !query schema +structbcc, a/c):string> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayMax + +-- Function name: array_max +-- !query +SELECT array_max(array(1, 20, null, 3)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringTrim + +-- Function name: trim +-- !query +SELECT trim(' SparkSQL ') +-- !query schema +struct +-- !query +SELECT trim(BOTH FROM ' SparkSQL ') +-- !query schema +struct +-- !query +SELECT trim(LEADING FROM ' SparkSQL ') +-- !query schema +struct +-- !query +SELECT trim(TRAILING FROM ' SparkSQL ') +-- !query schema +struct +-- !query +SELECT trim('SL' FROM 'SSparkSQLS') +-- !query schema +struct +-- !query +SELECT trim(BOTH 'SL' FROM 'SSparkSQLS') +-- !query schema +struct +-- !query +SELECT trim(LEADING 'SL' FROM 'SSparkSQLS') +-- !query schema +struct +-- !query +SELECT trim(TRAILING 'SL' FROM 'SSparkSQLS') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.IsNaN + +-- Function name: isnan +-- !query +SELECT isnan(cast('NaN' as double)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Levenshtein + +-- Function name: levenshtein +-- !query +SELECT levenshtein('kitten', 'sitting') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus + +-- Function name: approx_count_distinct +-- !query +SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapConcat + +-- Function name: map_concat +-- !query +SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Atan + +-- Function name: atan +-- !query +SELECT atan(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathFloat + +-- Function name: xpath_float +-- !query +SELECT xpath_float('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):float> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Log10 + +-- Function name: log10 +-- !query +SELECT log10(10) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp + +-- Function name: from_utc_timestamp +-- !query +SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CreateNamedStruct + +-- Function name: named_struct +-- !query +SELECT named_struct("a", 1, "b", 2, "c", 3) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BRound + +-- Function name: bround +-- !query +SELECT bround(2.5, 0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Year + +-- Function name: year +-- !query +SELECT year('2016-07-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.WeekOfYear + +-- Function name: weekofyear +-- !query +SELECT weekofyear('2008-02-20') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Hour + +-- Function name: hour +-- !query +SELECT hour('2009-07-30 12:58:59') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DayOfWeek + +-- Function name: dayofweek +-- !query +SELECT dayofweek('2009-07-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayContains + +-- Function name: array_contains +-- !query +SELECT array_contains(array(1, 2, 3), 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Base64 + +-- Function name: base64 +-- !query +SELECT base64('Spark SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.UnaryMinus + +-- Function name: negative +-- !query +SELECT negative(1) +-- !query schema +struct<(- 1):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Explode + +-- Function name: explode +-- !query +SELECT explode(array(10, 20)) +-- !query schema +struct + + +-- Function name: explode_outer +-- !query +SELECT explode_outer(array(10, 20)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ParseToDate + +-- Function name: to_date +-- !query +SELECT to_date('2009-07-30 04:17:52') +-- !query schema +struct +-- !query +SELECT to_date('2016-12-31', 'yyyy-MM-dd') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ParseUrl + +-- Function name: parse_url +-- !query +SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') +-- !query schema +struct +-- !query +SELECT parse_url('http://spark.apache.org/path?query=1', 'QUERY') +-- !query schema +struct +-- !query +SELECT parse_url('http://spark.apache.org/path?query=1', 'QUERY', 'query') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cosh + +-- Function name: cosh +-- !query +SELECT cosh(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayAggregate + +-- Function name: aggregate +-- !query +SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) +-- !query schema +struct +-- !query +SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x, acc -> acc * 10) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned + +-- Function name: shiftrightunsigned +-- !query +SELECT shiftrightunsigned(4, 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Nvl2 + +-- Function name: nvl2 +-- !query +SELECT nvl2(NULL, 2, 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DateDiff + +-- Function name: datediff +-- !query +SELECT datediff('2009-07-31', '2009-07-30') +-- !query schema +struct +-- !query +SELECT datediff('2009-07-30', '2009-07-31') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Log1p + +-- Function name: log1p +-- !query +SELECT log1p(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.NaNvl + +-- Function name: nanvl +-- !query +SELECT nanvl(cast('NaN' as double), 123) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapEntries + +-- Function name: map_entries +-- !query +SELECT map_entries(map(1, 'a', 2, 'b')) +-- !query schema +struct>> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Reverse + +-- Function name: reverse +-- !query +SELECT reverse('Spark SQL') +-- !query schema +struct +-- !query +SELECT reverse(array(2, 1, 4, 3)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayIntersect + +-- Function name: array_intersect +-- !query +SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp + +-- Function name: stddev_samp +-- !query +SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Function name: stddev +-- !query +SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Function name: std +-- !query +SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapValues + +-- Function name: map_values +-- !query +SELECT map_values(map(1, 'a', 2, 'b')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArraysOverlap + +-- Function name: arrays_overlap +-- !query +SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Rollup + +-- Function name: rollup +-- !query +SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY rollup(name, age) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.LessThanOrEqual + +-- Function name: <= +-- !query +SELECT 2 <= 2 +-- !query schema +struct<(2 <= 2):boolean> +-- !query +SELECT 1.0 <= '1' +-- !query schema +struct<(CAST(1.0 AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean> +-- !query +SELECT to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52') +-- !query schema +struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean> +-- !query +SELECT to_date('2009-07-30 04:17:52') <= to_date('2009-08-01 04:17:52') +-- !query schema +struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-08-01 04:17:52')):boolean> +-- !query +SELECT 1 <= NULL +-- !query schema +struct<(1 <= CAST(NULL AS INT)):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Elt + +-- Function name: elt +-- !query +SELECT elt(1, 'scala', 'java') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Skewness + +-- Function name: skewness +-- !query +SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) +-- !query schema +struct +-- !query +SELECT skewness(col) FROM VALUES (-1000), (-100), (10), (20) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Left + +-- Function name: left +-- !query +SELECT left('Spark SQL', 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringTrimRight + +-- Function name: rtrim +-- !query +SELECT rtrim(' SparkSQL ') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Lead + +-- Function name: lead + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayForAll + +-- Function name: forall +-- !query +SELECT forall(array(1, 2, 3), x -> x % 2 == 0) +-- !query schema +struct +-- !query +SELECT forall(array(2, 4, 8), x -> x % 2 == 0) +-- !query schema +struct +-- !query +SELECT forall(array(1, null, 3), x -> x % 2 == 0) +-- !query schema +struct +-- !query +SELECT forall(array(2, null, 8), x -> x % 2 == 0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Bin + +-- Function name: bin +-- !query +SELECT bin(13) +-- !query schema +struct +-- !query +SELECT bin(-13) +-- !query schema +struct +-- !query +SELECT bin(13.3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DenseRank + +-- Function name: dense_rank + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayPosition + +-- Function name: array_position +-- !query +SELECT array_position(array(3, 2, 1), 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayTransform + +-- Function name: transform +-- !query +SELECT transform(array(1, 2, 3), x -> x + 1) +-- !query schema +struct> +-- !query +SELECT transform(array(1, 2, 3), (x, i) -> x + i) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.JsonTuple + +-- Function name: json_tuple +-- !query +SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.RegExpExtract + +-- Function name: regexp_extract +-- !query +SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Length + +-- Function name: character_length +-- !query +SELECT character_length('Spark SQL ') +-- !query schema +struct +-- !query +SELECT CHAR_LENGTH('Spark SQL ') +-- !query schema +struct +-- !query +SELECT CHARACTER_LENGTH('Spark SQL ') +-- !query schema +struct + + +-- Function name: char_length +-- !query +SELECT char_length('Spark SQL ') +-- !query schema +struct +-- !query +SELECT CHAR_LENGTH('Spark SQL ') +-- !query schema +struct +-- !query +SELECT CHARACTER_LENGTH('Spark SQL ') +-- !query schema +struct + + +-- Function name: length +-- !query +SELECT length('Spark SQL ') +-- !query schema +struct +-- !query +SELECT CHAR_LENGTH('Spark SQL ') +-- !query schema +struct +-- !query +SELECT CHARACTER_LENGTH('Spark SQL ') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Unhex + +-- Function name: unhex +-- !query +SELECT decode(unhex('537061726B2053514C'), 'UTF-8') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Conv + +-- Function name: conv +-- !query +SELECT conv('100', 2, 10) +-- !query schema +struct +-- !query +SELECT conv(-10, 16, -10) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.JsonObjectKeys + +-- Function name: json_object_keys +-- !query +Select json_object_keys('{}') +-- !query schema +struct> +-- !query +Select json_object_keys('{"key": "value"}') +-- !query schema +struct> +-- !query +Select json_object_keys('{"f1":"abc","f2":{"f3":"a", "f4":"b"}}') +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.MinBy + +-- Function name: min_by +-- !query +SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cot + +-- Function name: cot +-- !query +SELECT cot(1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.LastDay + +-- Function name: last_day +-- !query +SELECT last_day('2009-01-12') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Exp + +-- Function name: exp +-- !query +SELECT exp(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Concat + +-- Function name: concat +-- !query +SELECT concat('Spark', 'SQL') +-- !query schema +struct +-- !query +SELECT concat(array(1, 2, 3), array(4, 5), array(6)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.UnBase64 + +-- Function name: unbase64 +-- !query +SELECT unbase64('U3BhcmsgU1FM') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Acosh + +-- Function name: acosh +-- !query +SELECT acosh(1) +-- !query schema +struct +-- !query +SELECT acosh(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.FormatString + +-- Function name: printf +-- !query +SELECT printf("Hello World %d %s", 100, "days") +-- !query schema +struct + + +-- Function name: format_string +-- !query +SELECT format_string("Hello World %d %s", 100, "days") +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp + +-- Function name: to_unix_timestamp +-- !query +SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') +-- !query schema +struct + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala new file mode 100644 index 0000000000000..e457bfb6b8880 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import java.io.File + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.catalyst.expressions.ExpressionInfo +import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile} +import org.apache.spark.sql.execution.HiveResult.hiveResultString +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.tags.ExtendedSQLTest + +/** + * End-to-end test cases for SQL schemas of expression examples. + * The golden result file is "spark/sql/core/src/test/resources/sql-functions/output.out". + * + * To run the entire test suite: + * {{{ + * build/sbt "sql/test-only *ExpressionsSchemaSuite" + * }}} + * + * To re-generate golden files for entire suite, run: + * {{{ + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *ExpressionsSchemaSuite" + * }}} + * + * For example: + * {{{ + * ... + * @ExpressionDescription( + * usage = "_FUNC_(str, n) - Returns the string which repeats the given string value n times.", + * examples = """ + * Examples: + * > SELECT _FUNC_('123', 2); + * 123123 + * """, + * since = "1.5.0") + * case class StringRepeat(str: Expression, times: Expression) + * ... + * }}} + * + * The format for golden result files look roughly like: + * {{{ + * ... + * -- Class name: org.apache.spark.sql.catalyst.expressions.StringRepeat + * + * -- Function name: repeat + * -- !query + * SELECT repeat('123', 2) + * -- !query schema + * struct + * ... + * }}} + */ +@ExtendedSQLTest +class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { + + private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1" + + private val baseResourcePath = { + // We use a path based on Spark home for 2 reasons: + // 1. Maven can't get correct resource directory when resources in other jars. + // 2. We test subclasses in the hive-thriftserver module. + val sparkHome = { + assert(sys.props.contains("spark.test.home") || + sys.env.contains("SPARK_HOME"), "spark.test.home or SPARK_HOME is not set.") + sys.props.getOrElse("spark.test.home", sys.env("SPARK_HOME")) + } + + java.nio.file.Paths.get(sparkHome, + "sql", "core", "src", "test", "resources", "sql-functions").toFile + } + + private val resultFile = new File(baseResourcePath, "output.out") + + val ignoreSet = Set( + // One of examples shows getting the current timestamp + "org.apache.spark.sql.catalyst.expressions.UnixTimestamp", + // Random output without a seed + "org.apache.spark.sql.catalyst.expressions.Rand", + "org.apache.spark.sql.catalyst.expressions.Randn", + "org.apache.spark.sql.catalyst.expressions.Shuffle", + "org.apache.spark.sql.catalyst.expressions.Uuid", + // The example calls methods that return unstable results. + "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection") + + /** A single SQL query's SQL and schema. */ + protected case class QueryOutput(sql: String, schema: String) { + override def toString: String = { + // We are explicitly not using multi-line string due to stripMargin removing "|" in output. + s"-- !query\n" + + sql + "\n" + + s"-- !query schema\n" + + schema + } + } + + test("Check schemas for expression examples") { + val exampleRe = """^(.+);\n(?s)(.+)$""".r + val funInfos = spark.sessionState.functionRegistry.listFunction().map { funcId => + spark.sessionState.catalog.lookupFunctionInfo(funcId) + } + + val classFunsMap = funInfos.groupBy(_.getClassName) + val outputBuffer = new ArrayBuffer[String] + val outputs = new ArrayBuffer[QueryOutput] + outputBuffer += s"-- Automatically generated by ${getClass.getSimpleName}\n\n" + + classFunsMap.foreach { kv => + val className = kv._1 + if (!ignoreSet.contains(className)) { + outputBuffer += s"-- Class name: ${kv._1}\n" + kv._2.foreach { funInfo => + outputBuffer += s"-- Function name: ${funInfo.getName}" + val example = funInfo.getExamples + example.split(" > ").toList.foreach(_ match { + case exampleRe(sql, expected) => + val df = spark.sql(sql) + val schema = df.schema.catalogString + val queryOutput = QueryOutput(sql, schema) + outputBuffer += queryOutput.toString + outputs += queryOutput + case _ => + }) + outputBuffer += "\n" + } + } + } + + if (regenerateGoldenFiles) { + val goldenOutput = outputBuffer.mkString("\n") + val parent = resultFile.getParentFile + if (!parent.exists()) { + assert(parent.mkdirs(), "Could not create directory: " + parent) + } + stringToFile(resultFile, goldenOutput) + } + + val expectedOutputs: Seq[QueryOutput] = { + val goldenOutput = fileToString(resultFile) + val classSegments = goldenOutput.split("-- Class name: .*\n") + val functionSegments = classSegments + .flatMap(_.split("-- Function name: .*\n")).map(_.trim).filter(_ != "") + val segments = functionSegments.flatMap(_.split("-- !query.*\n")).filter(_ != "") + + // each query has 2 segments, plus the header + assert(segments.size == outputs.size * 2 + 1, + s"Expected ${outputs.size * 2 + 1} blocks in result file but got ${segments.size}. " + + s"Try regenerate the result files.") + Seq.tabulate(outputs.size) { i => + QueryOutput( + sql = segments(i * 2 + 1).trim, + schema = segments(i * 2 + 2).trim + ) + } + } + + // Compare results. + assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") { + outputs.size + } + + outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) => + assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") { + output.sql + } + assertResult(expected.schema, + s"Schema did not match for query #$i\n${expected.sql}: $output") { + output.schema + } + } + } +} From 8d976a20ca14550fc2d43c58903bd60682892d8d Mon Sep 17 00:00:00 2001 From: beliefer Date: Sun, 12 Apr 2020 14:50:36 +0800 Subject: [PATCH 02/22] Optimize code --- .../test/resources/sql-functions/output.out | 49 +------------------ .../spark/sql/ExpressionsSchemaSuite.scala | 12 +++-- 2 files changed, 9 insertions(+), 52 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/output.out b/sql/core/src/test/resources/sql-functions/output.out index 40d47209fc2ad..5ff22f6deceff 100644 --- a/sql/core/src/test/resources/sql-functions/output.out +++ b/sql/core/src/test/resources/sql-functions/output.out @@ -1,5 +1,5 @@ -- Automatically generated by ExpressionsSchemaSuite - +-- Number of queries: 480 -- Class name: org.apache.spark.sql.catalyst.expressions.StringSpace @@ -142,7 +142,6 @@ SELECT day('2009-07-30') -- !query schema struct - -- Function name: dayofmonth -- !query SELECT dayofmonth('2009-07-30') @@ -235,7 +234,6 @@ SELECT lcase('SparkSql') -- !query schema struct - -- Function name: lower -- !query SELECT lower('SparkSql') @@ -273,7 +271,6 @@ SELECT char(65) -- !query schema struct - -- Function name: chr -- !query SELECT chr(65) @@ -434,7 +431,6 @@ SELECT approx_percentile(10.0, 0.5, 100) -- !query schema struct - -- Function name: percentile_approx -- !query SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100) @@ -467,7 +463,6 @@ SELECT POSITION('bar' IN 'foobarbar') -- !query schema struct - -- Function name: locate -- !query SELECT locate('bar', 'foobarbar') @@ -643,7 +638,6 @@ SELECT first_value(col, true) FROM VALUES (NULL), (5), (20) AS tab(col) -- !query schema struct - -- Function name: first -- !query SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) @@ -797,7 +791,6 @@ SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) -- !query schema struct - -- Function name: variance -- !query SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) @@ -835,7 +828,6 @@ SELECT posexplode_outer(array(10,20)) -- !query schema struct - -- Function name: posexplode -- !query SELECT posexplode(array(10,20)) @@ -864,7 +856,6 @@ SELECT every(col) FROM VALUES (true), (false), (true) AS tab(col) -- !query schema struct - -- Function name: bool_and -- !query SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) @@ -1106,7 +1097,6 @@ SELECT size(NULL) -- !query schema struct - -- Function name: cardinality -- !query SELECT cardinality(array('b', 'd', 'c', 'a')) @@ -1126,7 +1116,6 @@ struct -- Function name: current_timestamp - -- Function name: now @@ -1249,7 +1238,6 @@ SELECT MOD(2, 1.8) -- !query schema struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> - -- Function name: mod -- !query SELECT 2 % 1.8 @@ -1349,7 +1337,6 @@ SELECT bool_or(col) FROM VALUES (false), (false), (NULL) AS tab(col) -- !query schema struct - -- Function name: some -- !query SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) @@ -1364,7 +1351,6 @@ SELECT some(col) FROM VALUES (false), (false), (NULL) AS tab(col) -- !query schema struct - -- Function name: any -- !query SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) @@ -1502,7 +1488,6 @@ SELECT last_value(col, true) FROM VALUES (10), (5), (NULL) AS tab(col) -- !query schema struct - -- Function name: last -- !query SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) @@ -1671,44 +1656,32 @@ struct - -- Function name: tinyint - -- Function name: double - -- Function name: smallint - -- Function name: date - -- Function name: decimal - -- Function name: boolean - -- Function name: float - -- Function name: binary - -- Function name: bigint - -- Function name: int - -- Function name: timestamp @@ -1733,7 +1706,6 @@ SELECT avg(col) FROM VALUES (1), (2), (NULL) AS tab(col) -- !query schema struct - -- Function name: mean -- !query SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) @@ -1793,7 +1765,6 @@ SELECT pow(2, 3) -- !query schema struct - -- Function name: power -- !query SELECT power(2, 3) @@ -1843,7 +1814,6 @@ SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) -- !query schema struct - -- Function name: inline_outer -- !query SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) @@ -1965,7 +1935,6 @@ SELECT substr('Spark SQL', 5, 1) -- !query schema struct - -- Function name: substring -- !query SELECT substring('Spark SQL', 5) @@ -2016,7 +1985,6 @@ SELECT ucase('SparkSql') -- !query schema struct - -- Function name: upper -- !query SELECT upper('SparkSql') @@ -2062,7 +2030,6 @@ SELECT NULL = NULL -- !query schema struct<(NULL = NULL):boolean> - -- Function name: == -- !query SELECT 2 == 2 @@ -2337,7 +2304,6 @@ SELECT ceil(5) -- !query schema struct - -- Function name: ceiling -- !query SELECT ceiling(-0.1) @@ -2428,7 +2394,6 @@ SELECT xpath_number('12', 'sum(a/b)') -- !query schema struct12, sum(a/b)):double> - -- Function name: xpath_double -- !query SELECT xpath_double('12', 'sum(a/b)') @@ -2445,7 +2410,6 @@ struct12, sum(a/b)):double> -- Function name: ! - -- Function name: not @@ -2519,7 +2483,6 @@ SELECT signum(40) -- !query schema struct - -- Function name: sign -- !query SELECT sign(40) @@ -2572,7 +2535,6 @@ SELECT sha1('Spark') -- !query schema struct - -- Function name: sha -- !query SELECT sha('Spark') @@ -2928,7 +2890,6 @@ SELECT explode(array(10, 20)) -- !query schema struct - -- Function name: explode_outer -- !query SELECT explode_outer(array(10, 20)) @@ -3076,14 +3037,12 @@ SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) -- !query schema struct - -- Function name: stddev -- !query SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) -- !query schema struct - -- Function name: std -- !query SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) @@ -3287,7 +3246,6 @@ SELECT CHARACTER_LENGTH('Spark SQL ') -- !query schema struct - -- Function name: char_length -- !query SELECT char_length('Spark SQL ') @@ -3302,7 +3260,6 @@ SELECT CHARACTER_LENGTH('Spark SQL ') -- !query schema struct - -- Function name: length -- !query SELECT length('Spark SQL ') @@ -3436,7 +3393,6 @@ SELECT printf("Hello World %d %s", 100, "days") -- !query schema struct - -- Function name: format_string -- !query SELECT format_string("Hello World %d %s", 100, "days") @@ -3450,5 +3406,4 @@ struct -- !query SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') -- !query schema -struct - +struct \ No newline at end of file diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index e457bfb6b8880..d719b5ab58e72 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -121,14 +121,13 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { val classFunsMap = funInfos.groupBy(_.getClassName) val outputBuffer = new ArrayBuffer[String] val outputs = new ArrayBuffer[QueryOutput] - outputBuffer += s"-- Automatically generated by ${getClass.getSimpleName}\n\n" classFunsMap.foreach { kv => val className = kv._1 if (!ignoreSet.contains(className)) { - outputBuffer += s"-- Class name: ${kv._1}\n" + outputBuffer += s"\n\n-- Class name: $className" kv._2.foreach { funInfo => - outputBuffer += s"-- Function name: ${funInfo.getName}" + outputBuffer += s"\n-- Function name: ${funInfo.getName}" val example = funInfo.getExamples example.split(" > ").toList.foreach(_ match { case exampleRe(sql, expected) => @@ -139,13 +138,16 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { outputs += queryOutput case _ => }) - outputBuffer += "\n" } } } if (regenerateGoldenFiles) { - val goldenOutput = outputBuffer.mkString("\n") + val goldenOutput = { + s"-- Automatically generated by ${getClass.getSimpleName}\n" + + s"-- Number of queries: ${outputs.size}" + + outputBuffer.mkString("\n") + } val parent = resultFile.getParentFile if (!parent.exists()) { assert(parent.mkdirs(), "Could not create directory: " + parent) From 31c79844fcff50f55434f1e1efbb68fdb237616a Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Mon, 13 Apr 2020 13:29:43 +0800 Subject: [PATCH 03/22] Only preserve the first SQL and its schema. --- .../test/resources/sql-functions/output.out | 776 +----------------- .../spark/sql/ExpressionsSchemaSuite.scala | 6 +- 2 files changed, 7 insertions(+), 775 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/output.out b/sql/core/src/test/resources/sql-functions/output.out index 5ff22f6deceff..58defe4a26851 100644 --- a/sql/core/src/test/resources/sql-functions/output.out +++ b/sql/core/src/test/resources/sql-functions/output.out @@ -1,5 +1,5 @@ -- Automatically generated by ExpressionsSchemaSuite --- Number of queries: 480 +-- Number of queries: 287 -- Class name: org.apache.spark.sql.catalyst.expressions.StringSpace @@ -44,14 +44,6 @@ struct<(3 ^ 5):int> SELECT rpad('hi', 5, '??') -- !query schema struct --- !query -SELECT rpad('hi', 1, '??') --- !query schema -struct --- !query -SELECT rpad('hi', 5) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.SchemaOfJson @@ -61,10 +53,6 @@ struct SELECT schema_of_json('[{"col":0}]') -- !query schema struct --- !query -SELECT schema_of_json('[{"col":01}]', map('allowNumericLeadingZeros', 'true')) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.ParseToTimestamp @@ -74,10 +62,6 @@ struct SELECT to_timestamp('2016-12-31 00:12:00') -- !query schema struct --- !query -SELECT to_timestamp('2016-12-31', 'yyyy-MM-dd') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathInt @@ -105,10 +89,6 @@ struct SELECT hex(17) -- !query schema struct --- !query -SELECT hex('Spark SQL') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.ArraysZip @@ -118,10 +98,6 @@ struct SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)) -- !query schema struct>> --- !query -SELECT arrays_zip(array(1, 2), array(2, 3), array(3, 4)) --- !query schema -struct>> -- Class name: org.apache.spark.sql.catalyst.expressions.InputFileName @@ -156,10 +132,6 @@ struct SELECT element_at(array(1, 2, 3), 2) -- !query schema struct --- !query -SELECT element_at(map(1, 'a', 2, 'b'), 2) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.WeekDay @@ -257,10 +229,6 @@ struct> SELECT pmod(10, 3) -- !query schema struct --- !query -SELECT pmod(-10, 3) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Chr @@ -303,14 +271,6 @@ struct SELECT exists(array(1, 2, 3), x -> x % 2 == 0) -- !query schema struct --- !query -SELECT exists(array(1, 2, 3), x -> x % 2 == 10) --- !query schema -struct --- !query -SELECT exists(array(1, null, 3), x -> x % 2 == 0) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Tanh @@ -329,14 +289,6 @@ struct SELECT split('oneAtwoBthreeC', '[ABC]') -- !query schema struct> --- !query -SELECT split('oneAtwoBthreeC', '[ABC]', -1) --- !query schema -struct> --- !query -SELECT split('oneAtwoBthreeC', '[ABC]', 2) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.GroupingID @@ -382,10 +334,6 @@ struct SELECT typeof(1) -- !query schema struct --- !query -SELECT typeof(array(1)) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Sequence @@ -395,14 +343,6 @@ struct SELECT sequence(1, 5) -- !query schema struct> --- !query -SELECT sequence(5, 1) --- !query schema -struct> --- !query -SELECT sequence(to_date('2018-01-01'), to_date('2018-03-01'), interval 1 month) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.InputFileBlockStart @@ -426,20 +366,12 @@ struct SELECT approx_percentile(10.0, array(0.5, 0.4, 0.1), 100) -- !query schema struct> --- !query -SELECT approx_percentile(10.0, 0.5, 100) --- !query schema -struct -- Function name: percentile_approx -- !query SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100) -- !query schema struct> --- !query -SELECT percentile_approx(10.0, 0.5, 100) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.TimeWindow @@ -454,28 +386,12 @@ struct SELECT position('bar', 'foobarbar') -- !query schema struct --- !query -SELECT position('bar', 'foobarbar', 5) --- !query schema -struct --- !query -SELECT POSITION('bar' IN 'foobarbar') --- !query schema -struct -- Function name: locate -- !query SELECT locate('bar', 'foobarbar') -- !query schema struct --- !query -SELECT locate('bar', 'foobarbar', 5) --- !query schema -struct --- !query -SELECT POSITION('bar' IN 'foobarbar') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.FormatNumber @@ -485,10 +401,6 @@ struct SELECT format_number(12332.123456, 4) -- !query schema struct --- !query -SELECT format_number(12332.123456, '##################.###') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.UnaryPositive @@ -521,30 +433,6 @@ struct SELECT to_json(named_struct('a', 1, 'b', 2)) -- !query schema struct --- !query -SELECT to_json(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy')) --- !query schema -struct --- !query -SELECT to_json(array(named_struct('a', 1, 'b', 2))) --- !query schema -struct --- !query -SELECT to_json(map('a', named_struct('b', 1))) --- !query schema -struct --- !query -SELECT to_json(map(named_struct('a', 1),named_struct('b', 2))) --- !query schema -struct --- !query -SELECT to_json(map('a', 1)) --- !query schema -struct --- !query -SELECT to_json(array((map('a', 1)))) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop @@ -595,22 +483,6 @@ struct SELECT 2 >= 1 -- !query schema struct<(2 >= 1):boolean> --- !query -SELECT 2.0 >= '2.1' --- !query schema -struct<(CAST(2.0 AS DOUBLE) >= CAST(2.1 AS DOUBLE)):boolean> --- !query -SELECT to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52') --- !query schema -struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean> --- !query -SELECT to_date('2009-07-30 04:17:52') >= to_date('2009-08-01 04:17:52') --- !query schema -struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-08-01 04:17:52')):boolean> --- !query -SELECT 1 >= NULL --- !query schema -struct<(1 >= CAST(NULL AS INT)):boolean> -- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseAnd @@ -629,28 +501,12 @@ struct<(3 & 5):int> SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) -- !query schema struct --- !query -SELECT first_value(col) FROM VALUES (NULL), (5), (20) AS tab(col) --- !query schema -struct --- !query -SELECT first_value(col, true) FROM VALUES (NULL), (5), (20) AS tab(col) --- !query schema -struct -- Function name: first -- !query SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) -- !query schema struct --- !query -SELECT first(col) FROM VALUES (NULL), (5), (20) AS tab(col) --- !query schema -struct --- !query -SELECT first(col, true) FROM VALUES (NULL), (5), (20) AS tab(col) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.TruncDate @@ -660,30 +516,6 @@ struct SELECT trunc('2019-08-04', 'week') -- !query schema struct --- !query -SELECT trunc('2019-08-04', 'quarter') --- !query schema -struct --- !query -SELECT trunc('2009-02-12', 'MM') --- !query schema -struct --- !query -SELECT trunc('2015-10-27', 'YEAR') --- !query schema -struct --- !query -SELECT trunc('2015-10-27', 'DECADE') --- !query schema -struct --- !query -SELECT trunc('1981-01-19', 'century') --- !query schema -struct --- !query -SELECT trunc('1981-01-19', 'millennium') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean @@ -702,10 +534,6 @@ struct1, a/b):boolean> SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) -- !query schema struct --- !query -SELECT make_interval(100, null, 3) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Atanh @@ -715,10 +543,6 @@ struct SELECT atanh(0) -- !query schema struct --- !query -SELECT atanh(2) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.FindInSet @@ -737,14 +561,6 @@ struct SELECT json_array_length('[1,2,3,4]') -- !query schema struct --- !query -SELECT json_array_length('[1,2,3,{"f1":1,"f2":[5,6]},4]') --- !query schema -struct --- !query -SELECT json_array_length('[1,2') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg @@ -814,10 +630,6 @@ struct SELECT filter(array(1, 2, 3), x -> x % 2 == 1) -- !query schema struct> --- !query -SELECT filter(array(0, 2, 3), (x, i) -> x > i) --- !query schema -struct namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):array> -- Class name: org.apache.spark.sql.catalyst.expressions.PosExplode @@ -847,28 +659,12 @@ struct SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col) -- !query schema struct --- !query -SELECT every(col) FROM VALUES (NULL), (true), (true) AS tab(col) --- !query schema -struct --- !query -SELECT every(col) FROM VALUES (true), (false), (true) AS tab(col) --- !query schema -struct -- Function name: bool_and -- !query SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) -- !query schema struct --- !query -SELECT bool_and(col) FROM VALUES (NULL), (true), (true) AS tab(col) --- !query schema -struct --- !query -SELECT bool_and(col) FROM VALUES (true), (false), (true) AS tab(col) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg @@ -897,10 +693,6 @@ struct SELECT months_between('1997-02-28 10:30:00', '1996-10-30') -- !query schema struct --- !query -SELECT months_between('1997-02-28 10:30:00', '1996-10-30', false) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.EqualNullSafe @@ -910,18 +702,6 @@ struct 2 -- !query schema struct<(2 <=> 2):boolean> --- !query -SELECT 1 <=> '1' --- !query schema -struct<(1 <=> CAST(1 AS INT)):boolean> --- !query -SELECT true <=> NULL --- !query schema -struct<(true <=> CAST(NULL AS BOOLEAN)):boolean> --- !query -SELECT NULL <=> NULL --- !query schema -struct<(NULL <=> NULL):boolean> -- Class name: org.apache.spark.sql.catalyst.expressions.Add @@ -949,26 +729,6 @@ struct<(2 * 3):int> SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') -- !query schema struct --- !query -SELECT date_part('week', timestamp'2019-08-12 01:00:00.123456') --- !query schema -struct --- !query -SELECT date_part('doy', DATE'2019-08-12') --- !query schema -struct --- !query -SELECT date_part('SECONDS', timestamp'2019-10-01 00:00:01.000001') --- !query schema -struct --- !query -SELECT date_part('days', interval 1 year 10 months 5 days) --- !query schema -struct --- !query -SELECT date_part('seconds', interval 5 hours 30 seconds 1 milliseconds 1 microseconds) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.ShiftLeft @@ -987,22 +747,6 @@ struct SELECT 2 > 1 -- !query schema struct<(2 > 1):boolean> --- !query -SELECT 2 > '1.1' --- !query schema -struct<(2 > CAST(1.1 AS INT)):boolean> --- !query -SELECT to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52') --- !query schema -struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean> --- !query -SELECT to_date('2009-07-30 04:17:52') > to_date('2009-08-01 04:17:52') --- !query schema -struct<(to_date('2009-07-30 04:17:52') > to_date('2009-08-01 04:17:52')):boolean> --- !query -SELECT 1 > NULL --- !query schema -struct<(1 > CAST(NULL AS INT)):boolean> -- Class name: org.apache.spark.sql.catalyst.expressions.Slice @@ -1012,10 +756,6 @@ struct<(1 > CAST(NULL AS INT)):boolean> SELECT slice(array(1, 2, 3, 4), 2, 2) -- !query schema struct> --- !query -SELECT slice(array(1, 2, 3, 4), -2, 2) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.Sentences @@ -1088,28 +828,12 @@ struct SELECT size(array('b', 'd', 'c', 'a')) -- !query schema struct --- !query -SELECT size(map('a', 1, 'b', 2)) --- !query schema -struct --- !query -SELECT size(NULL) --- !query schema -struct -- Function name: cardinality -- !query SELECT cardinality(array('b', 'd', 'c', 'a')) -- !query schema struct --- !query -SELECT cardinality(map('a', 1, 'b', 2)) --- !query schema -struct --- !query -SELECT cardinality(NULL) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.CurrentTimestamp @@ -1126,18 +850,6 @@ struct SELECT 1 in(1, 2, 3) -- !query schema struct<(1 IN (1, 2, 3)):boolean> --- !query -SELECT 1 in(2, 3, 4) --- !query schema -struct<(1 IN (2, 3, 4)):boolean> --- !query -SELECT named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3)) --- !query schema -struct<(named_struct(a, 1, b, 2) IN (named_struct(a, 1, b, 1), named_struct(a, 1, b, 3))):boolean> --- !query -SELECT named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3)) --- !query schema -struct<(named_struct(a, 1, b, 2) IN (named_struct(a, 1, b, 2), named_struct(a, 1, b, 3))):boolean> -- Class name: org.apache.spark.sql.catalyst.expressions.CurrentDatabase @@ -1165,14 +877,6 @@ struct SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) -- !query schema struct --- !query -SELECT sum(col) FROM VALUES (NULL), (10), (15) AS tab(col) --- !query schema -struct --- !query -SELECT sum(col) FROM VALUES (NULL), (NULL) AS tab(col) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CountIf @@ -1182,10 +886,6 @@ struct SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) -- !query schema struct --- !query -SELECT count_if(col IS NULL) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.MakeTimestamp @@ -1195,22 +895,6 @@ struct SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) -- !query schema struct --- !query -SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887, 'CET') --- !query schema -struct --- !query -SELECT make_timestamp(2019, 6, 30, 23, 59, 60) --- !query schema -struct --- !query -SELECT make_timestamp(2019, 13, 1, 10, 11, 12, 'PST') --- !query schema -struct --- !query -SELECT make_timestamp(null, 7, 22, 15, 30, 0) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.CsvToStructs @@ -1220,10 +904,6 @@ struct> --- !query -SELECT from_csv('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy')) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.Remainder @@ -1233,20 +913,12 @@ struct> SELECT 2 % 1.8 -- !query schema struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> --- !query -SELECT MOD(2, 1.8) --- !query schema -struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> -- Function name: mod -- !query SELECT 2 % 1.8 -- !query schema struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> --- !query -SELECT MOD(2, 1.8) --- !query schema -struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> -- Class name: org.apache.spark.sql.catalyst.expressions.StringRepeat @@ -1328,42 +1000,18 @@ struct12, sum(a/b)):smallint> SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) -- !query schema struct --- !query -SELECT bool_or(col) FROM VALUES (NULL), (true), (false) AS tab(col) --- !query schema -struct --- !query -SELECT bool_or(col) FROM VALUES (false), (false), (NULL) AS tab(col) --- !query schema -struct -- Function name: some -- !query SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) -- !query schema struct --- !query -SELECT some(col) FROM VALUES (NULL), (true), (false) AS tab(col) --- !query schema -struct --- !query -SELECT some(col) FROM VALUES (false), (false), (NULL) AS tab(col) --- !query schema -struct -- Function name: any -- !query SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) -- !query schema struct --- !query -SELECT any(col) FROM VALUES (NULL), (true), (false) AS tab(col) --- !query schema -struct --- !query -SELECT any(col) FROM VALUES (false), (false), (NULL) AS tab(col) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Murmur3Hash @@ -1382,18 +1030,6 @@ struct SET spark.sql.parser.escapedStringLiterals=true -- !query schema struct --- !query -SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' --- !query schema -struct<%SystemDrive%\Users\John RLIKE %SystemDrive%\\Users.*:boolean> --- !query -SET spark.sql.parser.escapedStringLiterals=false --- !query schema -struct --- !query -SELECT '%SystemDrive%\\Users\\John' rlike '%SystemDrive%\\\\Users.*' --- !query schema -struct<%SystemDrive%\Users\John RLIKE %SystemDrive%\\Users.*:boolean> -- Class name: org.apache.spark.sql.catalyst.expressions.If @@ -1439,10 +1075,6 @@ struct SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) -- !query schema struct --- !query -SELECT percentile(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS tab(col) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.IsNotNull @@ -1479,28 +1111,12 @@ struct<~0:int> SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) -- !query schema struct --- !query -SELECT last_value(col) FROM VALUES (10), (5), (NULL) AS tab(col) --- !query schema -struct --- !query -SELECT last_value(col, true) FROM VALUES (10), (5), (NULL) AS tab(col) --- !query schema -struct -- Function name: last -- !query SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) -- !query schema struct --- !query -SELECT last(col) FROM VALUES (10), (5), (NULL) AS tab(col) --- !query schema -struct --- !query -SELECT last(col, true) FROM VALUES (10), (5), (NULL) AS tab(col) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.NullIf @@ -1555,18 +1171,6 @@ struct SELECT make_date(2013, 7, 15) -- !query schema struct --- !query -SELECT make_date(2019, 13, 1) --- !query schema -struct --- !query -SELECT make_date(2019, 7, NULL) --- !query schema -struct --- !query -SELECT make_date(2019, 2, 30) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.JsonToStructs @@ -1576,10 +1180,6 @@ struct SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') -- !query schema struct> --- !query -SELECT from_json('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy')) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.ZipWith @@ -1589,14 +1189,6 @@ struct> SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) -- !query schema struct>> --- !query -SELECT zip_with(array(1, 2), array(3, 4), (x, y) -> x + y) --- !query schema -struct> --- !query -SELECT zip_with(array('a', 'b', 'c'), array('d', 'e', 'f'), (x, y) -> concat(x, y)) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.NamedStruct @@ -1629,10 +1221,6 @@ struct SELECT str_to_map('a:1,b:2,c:3', ',', ':') -- !query schema struct> --- !query -SELECT str_to_map('a') --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.ArraySort @@ -1642,14 +1230,6 @@ struct> SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) -- !query schema struct namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array> --- !query -SELECT array_sort(array('bc', 'ab', 'dc'), (left, right) -> case when left is null and right is null then 0 when left is null then -1 when right is null then 1 when left < right then 1 when left > right then -1 else 0 end) --- !query schema -struct namedlambdavariable()) THEN -1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array> --- !query -SELECT array_sort(array('b', 'd', null, 'c', 'a')) --- !query schema -struct namedlambdavariable()), 1, 0)))))))))), namedlambdavariable(), namedlambdavariable())):array> -- Class name: org.apache.spark.sql.catalyst.expressions.Cast @@ -1701,20 +1281,12 @@ struct SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) -- !query schema struct --- !query -SELECT avg(col) FROM VALUES (1), (2), (NULL) AS tab(col) --- !query schema -struct -- Function name: mean -- !query SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) -- !query schema struct --- !query -SELECT mean(col) FROM VALUES (1), (2), (NULL) AS tab(col) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.SortArray @@ -1742,10 +1314,6 @@ struct SELECT ascii('222') -- !query schema struct --- !query -SELECT ascii(2) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.ArrayRemove @@ -1779,22 +1347,6 @@ struct SELECT 1 < 2 -- !query schema struct<(1 < 2):boolean> --- !query -SELECT 1.1 < '1' --- !query schema -struct<(CAST(1.1 AS DOUBLE) < CAST(1 AS DOUBLE)):boolean> --- !query -SELECT to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52') --- !query schema -struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean> --- !query -SELECT to_date('2009-07-30 04:17:52') < to_date('2009-08-01 04:17:52') --- !query schema -struct<(to_date('2009-07-30 04:17:52') < to_date('2009-08-01 04:17:52')):boolean> --- !query -SELECT 1 < NULL --- !query schema -struct<(1 < CAST(NULL AS INT)):boolean> -- Class name: org.apache.spark.sql.catalyst.expressions.MapKeys @@ -1846,14 +1398,6 @@ struct SELECT array_join(array('hello', 'world'), ' ') -- !query schema struct --- !query -SELECT array_join(array('hello', null ,'world'), ' ') --- !query schema -struct --- !query -SELECT array_join(array('hello', null ,'world'), ' ', ',') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.And @@ -1913,10 +1457,6 @@ struct SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) -- !query schema struct> --- !query -SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.Substring @@ -1926,28 +1466,12 @@ struct --- !query -SELECT substr('Spark SQL', -3) --- !query schema -struct --- !query -SELECT substr('Spark SQL', 5, 1) --- !query schema -struct -- Function name: substring -- !query SELECT substring('Spark SQL', 5) -- !query schema struct --- !query -SELECT substring('Spark SQL', -3) --- !query schema -struct --- !query -SELECT substring('Spark SQL', 5, 1) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Asinh @@ -2017,36 +1541,12 @@ struct SELECT 2 = 2 -- !query schema struct<(2 = 2):boolean> --- !query -SELECT 1 = '1' --- !query schema -struct<(1 = CAST(1 AS INT)):boolean> --- !query -SELECT true = NULL --- !query schema -struct<(true = CAST(NULL AS BOOLEAN)):boolean> --- !query -SELECT NULL = NULL --- !query schema -struct<(NULL = NULL):boolean> -- Function name: == -- !query SELECT 2 == 2 -- !query schema struct<(2 = 2):boolean> --- !query -SELECT 1 == '1' --- !query schema -struct<(1 = CAST(1 AS INT)):boolean> --- !query -SELECT true == NULL --- !query schema -struct<(true = CAST(NULL AS BOOLEAN)):boolean> --- !query -SELECT NULL == NULL --- !query schema -struct<(NULL = NULL):boolean> -- Class name: org.apache.spark.sql.catalyst.expressions.StringLPad @@ -2056,14 +1556,6 @@ struct<(NULL = NULL):boolean> SELECT lpad('hi', 5, '??') -- !query schema struct --- !query -SELECT lpad('hi', 1, '??') --- !query schema -struct --- !query -SELECT lpad('hi', 5) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.MapFromEntries @@ -2091,10 +1583,6 @@ struct SELECT 3 / 2 -- !query schema struct<(CAST(3 AS DOUBLE) / CAST(2 AS DOUBLE)):double> --- !query -SELECT 2L / 2L --- !query schema -struct<(CAST(2 AS DOUBLE) / CAST(2 AS DOUBLE)):double> -- Class name: org.apache.spark.sql.catalyst.expressions.Like @@ -2104,26 +1592,6 @@ struct<(CAST(2 AS DOUBLE) / CAST(2 AS DOUBLE)):double> SELECT like('Spark', '_park') -- !query schema struct --- !query -SET spark.sql.parser.escapedStringLiterals=true --- !query schema -struct --- !query -SELECT '%SystemDrive%\Users\John' like '\%SystemDrive\%\\Users%' --- !query schema -struct<%SystemDrive%\Users\John LIKE \%SystemDrive\%\\Users%:boolean> --- !query -SET spark.sql.parser.escapedStringLiterals=false --- !query schema -struct --- !query -SELECT '%SystemDrive%\\Users\\John' like '\%SystemDrive\%\\\\Users%' --- !query schema -struct<%SystemDrive%\Users\John LIKE \%SystemDrive\%\\Users%:boolean> --- !query -SELECT '%SystemDrive%/Users/John' like '/%SystemDrive/%//Users%' ESCAPE '/' --- !query schema -struct<%SystemDrive%/Users/John LIKE /%SystemDrive/%//Users%:boolean> -- Class name: org.apache.spark.sql.catalyst.expressions.OctetLength @@ -2142,14 +1610,6 @@ struct SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END -- !query schema struct 0) THEN CAST(1 AS DECIMAL(11,1)) WHEN (2 > 0) THEN CAST(2.0 AS DECIMAL(11,1)) ELSE CAST(1.2 AS DECIMAL(11,1)) END:decimal(11,1)> --- !query -SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END --- !query schema -struct 0) THEN CAST(2.0 AS DECIMAL(11,1)) ELSE CAST(1.2 AS DECIMAL(11,1)) END:decimal(11,1)> --- !query -SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 < 0 THEN 2.0 END --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Log @@ -2177,10 +1637,6 @@ struct SELECT acos(1) -- !query schema struct --- !query -SELECT acos(2) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.SparkPartitionID @@ -2213,10 +1669,6 @@ struct SELECT floor(-0.1) -- !query schema struct --- !query -SELECT floor(5) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.SchemaOfCsv @@ -2299,20 +1751,12 @@ struct<(1 IS NULL):boolean> SELECT ceil(-0.1) -- !query schema struct --- !query -SELECT ceil(5) --- !query schema -struct -- Function name: ceiling -- !query SELECT ceiling(-0.1) -- !query schema struct --- !query -SELECT ceiling(5) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Asin @@ -2322,10 +1766,6 @@ struct SELECT asin(0) -- !query schema struct --- !query -SELECT asin(2) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Count @@ -2335,14 +1775,6 @@ struct SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) -- !query schema struct --- !query -SELECT count(col) FROM VALUES (NULL), (5), (5), (20) AS tab(col) --- !query schema -struct --- !query -SELECT count(DISTINCT col) FROM VALUES (NULL), (5), (5), (10) AS tab(col) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Minute @@ -2447,10 +1879,6 @@ struct SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1) -- !query schema struct> --- !query -SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.ArrayUnion @@ -2469,10 +1897,6 @@ struct> SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) -- !query schema struct --- !query -SELECT kurtosis(col) FROM VALUES (1), (10), (100), (10), (1) as tab(col) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Signum @@ -2497,34 +1921,6 @@ struct SELECT overlay('Spark SQL' PLACING '_' FROM 6) -- !query schema struct --- !query -SELECT overlay('Spark SQL' PLACING 'CORE' FROM 7) --- !query schema -struct --- !query -SELECT overlay('Spark SQL' PLACING 'ANSI ' FROM 7 FOR 0) --- !query schema -struct --- !query -SELECT overlay('Spark SQL' PLACING 'tructured' FROM 2 FOR 4) --- !query schema -struct --- !query -SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('_', 'utf-8') FROM 6) --- !query schema -struct --- !query -SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('CORE', 'utf-8') FROM 7) --- !query schema -struct --- !query -SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('ANSI ', 'utf-8') FROM 7 FOR 0) --- !query schema -struct --- !query -SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('tructured', 'utf-8') FROM 2 FOR 4) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Sha1 @@ -2549,30 +1945,6 @@ struct SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') -- !query schema struct --- !query -SELECT date_trunc('MM', '2015-03-05T09:32:05.359') --- !query schema -struct --- !query -SELECT date_trunc('DD', '2015-03-05T09:32:05.359') --- !query schema -struct --- !query -SELECT date_trunc('HOUR', '2015-03-05T09:32:05.359') --- !query schema -struct --- !query -SELECT date_trunc('MILLISECOND', '2015-03-05T09:32:05.123456') --- !query schema -struct --- !query -SELECT date_trunc('DECADE', '2015-03-05T09:32:05.123456') --- !query schema -struct --- !query -SELECT date_trunc('CENTURY', '2015-03-05T09:32:05.123456') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet @@ -2609,10 +1981,6 @@ struct SELECT to_csv(named_struct('a', 1, 'b', 2)) -- !query schema struct --- !query -SELECT to_csv(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy')) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.XxHash64 @@ -2699,34 +2067,6 @@ struct SELECT trim(' SparkSQL ') -- !query schema struct --- !query -SELECT trim(BOTH FROM ' SparkSQL ') --- !query schema -struct --- !query -SELECT trim(LEADING FROM ' SparkSQL ') --- !query schema -struct --- !query -SELECT trim(TRAILING FROM ' SparkSQL ') --- !query schema -struct --- !query -SELECT trim('SL' FROM 'SSparkSQLS') --- !query schema -struct --- !query -SELECT trim(BOTH 'SL' FROM 'SSparkSQLS') --- !query schema -struct --- !query -SELECT trim(LEADING 'SL' FROM 'SSparkSQLS') --- !query schema -struct --- !query -SELECT trim(TRAILING 'SL' FROM 'SSparkSQLS') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.IsNaN @@ -2904,10 +2244,6 @@ struct SELECT to_date('2009-07-30 04:17:52') -- !query schema struct --- !query -SELECT to_date('2016-12-31', 'yyyy-MM-dd') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.ParseUrl @@ -2917,14 +2253,6 @@ struct SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') -- !query schema struct --- !query -SELECT parse_url('http://spark.apache.org/path?query=1', 'QUERY') --- !query schema -struct --- !query -SELECT parse_url('http://spark.apache.org/path?query=1', 'QUERY', 'query') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Cosh @@ -2943,10 +2271,6 @@ struct SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) -- !query schema struct --- !query -SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x, acc -> acc * 10) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned @@ -2974,10 +2298,6 @@ struct SELECT datediff('2009-07-31', '2009-07-30') -- !query schema struct --- !query -SELECT datediff('2009-07-30', '2009-07-31') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Log1p @@ -3014,10 +2334,6 @@ struct>> SELECT reverse('Spark SQL') -- !query schema struct --- !query -SELECT reverse(array(2, 1, 4, 3)) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.ArrayIntersect @@ -3084,22 +2400,6 @@ struct SELECT 2 <= 2 -- !query schema struct<(2 <= 2):boolean> --- !query -SELECT 1.0 <= '1' --- !query schema -struct<(CAST(1.0 AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean> --- !query -SELECT to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52') --- !query schema -struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean> --- !query -SELECT to_date('2009-07-30 04:17:52') <= to_date('2009-08-01 04:17:52') --- !query schema -struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-08-01 04:17:52')):boolean> --- !query -SELECT 1 <= NULL --- !query schema -struct<(1 <= CAST(NULL AS INT)):boolean> -- Class name: org.apache.spark.sql.catalyst.expressions.Elt @@ -3118,10 +2418,6 @@ struct SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) -- !query schema struct --- !query -SELECT skewness(col) FROM VALUES (-1000), (-100), (10), (20) AS tab(col) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Left @@ -3154,18 +2450,6 @@ struct SELECT forall(array(1, 2, 3), x -> x % 2 == 0) -- !query schema struct --- !query -SELECT forall(array(2, 4, 8), x -> x % 2 == 0) --- !query schema -struct --- !query -SELECT forall(array(1, null, 3), x -> x % 2 == 0) --- !query schema -struct --- !query -SELECT forall(array(2, null, 8), x -> x % 2 == 0) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Bin @@ -3175,14 +2459,6 @@ struct --- !query -SELECT bin(-13) --- !query schema -struct --- !query -SELECT bin(13.3) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.DenseRank @@ -3206,10 +2482,6 @@ struct SELECT transform(array(1, 2, 3), x -> x + 1) -- !query schema struct> --- !query -SELECT transform(array(1, 2, 3), (x, i) -> x + i) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.JsonTuple @@ -3227,7 +2499,7 @@ struct -- !query SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) -- !query schema -struct +struct -- Class name: org.apache.spark.sql.catalyst.expressions.Length @@ -3237,42 +2509,18 @@ struct SELECT character_length('Spark SQL ') -- !query schema struct --- !query -SELECT CHAR_LENGTH('Spark SQL ') --- !query schema -struct --- !query -SELECT CHARACTER_LENGTH('Spark SQL ') --- !query schema -struct -- Function name: char_length -- !query SELECT char_length('Spark SQL ') -- !query schema struct --- !query -SELECT CHAR_LENGTH('Spark SQL ') --- !query schema -struct --- !query -SELECT CHARACTER_LENGTH('Spark SQL ') --- !query schema -struct -- Function name: length -- !query SELECT length('Spark SQL ') -- !query schema struct --- !query -SELECT CHAR_LENGTH('Spark SQL ') --- !query schema -struct --- !query -SELECT CHARACTER_LENGTH('Spark SQL ') --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.Unhex @@ -3291,10 +2539,6 @@ struct SELECT conv('100', 2, 10) -- !query schema struct --- !query -SELECT conv(-10, 16, -10) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.JsonObjectKeys @@ -3304,14 +2548,6 @@ struct Select json_object_keys('{}') -- !query schema struct> --- !query -Select json_object_keys('{"key": "value"}') --- !query schema -struct> --- !query -Select json_object_keys('{"f1":"abc","f2":{"f3":"a", "f4":"b"}}') --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.MinBy @@ -3357,10 +2593,6 @@ struct SELECT concat('Spark', 'SQL') -- !query schema struct --- !query -SELECT concat(array(1, 2, 3), array(4, 5), array(6)) --- !query schema -struct> -- Class name: org.apache.spark.sql.catalyst.expressions.UnBase64 @@ -3379,10 +2611,6 @@ struct SELECT acosh(1) -- !query schema struct --- !query -SELECT acosh(0) --- !query schema -struct -- Class name: org.apache.spark.sql.catalyst.expressions.FormatString diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index d719b5ab58e72..02631ac063c02 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -129,7 +129,11 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { kv._2.foreach { funInfo => outputBuffer += s"\n-- Function name: ${funInfo.getName}" val example = funInfo.getExamples - example.split(" > ").toList.foreach(_ match { + + // If expression exists 'Examples' segment, the first element is 'Examples'. Because + // this test case is only used to print aliases of expressions for double checking. + // Therefore, we only need to output the first SQL and its corresponding schema. + example.split(" > ").take(2).toList.foreach(_ match { case exampleRe(sql, expected) => val df = spark.sql(sql) val schema = df.schema.catalogString From de6f7ad8502456ec33b81f41c216dbcf41cfc207 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Mon, 13 Apr 2020 17:36:45 +0800 Subject: [PATCH 04/22] Optimize code --- .../test/resources/sql-functions/schema.out | 2637 +++++++++++++++++ .../spark/sql/ExpressionsSchemaSuite.scala | 25 +- 2 files changed, 2651 insertions(+), 11 deletions(-) create mode 100644 sql/core/src/test/resources/sql-functions/schema.out diff --git a/sql/core/src/test/resources/sql-functions/schema.out b/sql/core/src/test/resources/sql-functions/schema.out new file mode 100644 index 0000000000000..218f3ea8634a6 --- /dev/null +++ b/sql/core/src/test/resources/sql-functions/schema.out @@ -0,0 +1,2637 @@ +-- Automatically generated by ExpressionsSchemaSuite +-- Number of queries: 287 + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringSpace + +-- Function name: space +-- !query +SELECT concat(space(2), '1') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CreateArray + +-- Function name: array +-- !query +SELECT array(1, 2, 3) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayExcept + +-- Function name: array_except +-- !query +SELECT array_except(array(1, 2, 3), array(1, 3, 5)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseXor + +-- Function name: ^ +-- !query +SELECT 3 ^ 5 +-- !query schema +struct<(3 ^ 5):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringRPad + +-- Function name: rpad +-- !query +SELECT rpad('hi', 5, '??') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SchemaOfJson + +-- Function name: schema_of_json +-- !query +SELECT schema_of_json('[{"col":0}]') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ParseToTimestamp + +-- Function name: to_timestamp +-- !query +SELECT to_timestamp('2016-12-31 00:12:00') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathInt + +-- Function name: xpath_int +-- !query +SELECT xpath_int('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop + +-- Function name: var_pop +-- !query +SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Hex + +-- Function name: hex +-- !query +SELECT hex(17) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArraysZip + +-- Function name: arrays_zip +-- !query +SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)) +-- !query schema +struct>> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.InputFileName + +-- Function name: input_file_name + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID + +-- Function name: monotonically_increasing_id + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DayOfMonth + +-- Function name: day +-- !query +SELECT day('2009-07-30') +-- !query schema +struct + +-- Function name: dayofmonth +-- !query +SELECT dayofmonth('2009-07-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ElementAt + +-- Function name: element_at +-- !query +SELECT element_at(array(1, 2, 3), 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.WeekDay + +-- Function name: weekday +-- !query +SELECT weekday('2009-07-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathLong + +-- Function name: xpath_long +-- !query +SELECT xpath_long('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):bigint> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CumeDist + +-- Function name: cume_dist + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayMin + +-- Function name: array_min +-- !query +SELECT array_min(array(1, 20, null, 3)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy + +-- Function name: max_by +-- !query +SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Rank + +-- Function name: rank + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Right + +-- Function name: right +-- !query +SELECT right('Spark SQL', 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Least + +-- Function name: least +-- !query +SELECT least(10, 9, 2, 4, 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Lower + +-- Function name: lcase +-- !query +SELECT lcase('SparkSql') +-- !query schema +struct + +-- Function name: lower +-- !query +SELECT lower('SparkSql') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Nvl + +-- Function name: nvl +-- !query +SELECT nvl(NULL, array('2')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Pmod + +-- Function name: pmod +-- !query +SELECT pmod(10, 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Chr + +-- Function name: char +-- !query +SELECT char(65) +-- !query schema +struct + +-- Function name: chr +-- !query +SELECT chr(65) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.GetJsonObject + +-- Function name: get_json_object +-- !query +SELECT get_json_object('{"a":"b"}', '$.a') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ToDegrees + +-- Function name: degrees +-- !query +SELECT degrees(3.141592653589793) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayExists + +-- Function name: exists +-- !query +SELECT exists(array(1, 2, 3), x -> x % 2 == 0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Tanh + +-- Function name: tanh +-- !query +SELECT tanh(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringSplit + +-- Function name: split +-- !query +SELECT split('oneAtwoBthreeC', '[ABC]') +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.GroupingID + +-- Function name: grouping_id +-- !query +SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Expm1 + +-- Function name: expm1 +-- !query +SELECT expm1(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Quarter + +-- Function name: quarter +-- !query +SELECT quarter('2016-08-31') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Atan2 + +-- Function name: atan2 +-- !query +SELECT atan2(0, 0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TypeOf + +-- Function name: typeof +-- !query +SELECT typeof(1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sequence + +-- Function name: sequence +-- !query +SELECT sequence(1, 5) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.InputFileBlockStart + +-- Function name: input_file_block_start + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ConcatWs + +-- Function name: concat_ws +-- !query +SELECT concat_ws(' ', 'Spark', 'SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile + +-- Function name: approx_percentile +-- !query +SELECT approx_percentile(10.0, array(0.5, 0.4, 0.1), 100) +-- !query schema +struct> + +-- Function name: percentile_approx +-- !query +SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TimeWindow + +-- Function name: window + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringLocate + +-- Function name: position +-- !query +SELECT position('bar', 'foobarbar') +-- !query schema +struct + +-- Function name: locate +-- !query +SELECT locate('bar', 'foobarbar') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.FormatNumber + +-- Function name: format_number +-- !query +SELECT format_number(12332.123456, 4) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.UnaryPositive + +-- Function name: positive + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Corr + +-- Function name: corr +-- !query +SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Md5 + +-- Function name: md5 +-- !query +SELECT md5('Spark') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StructsToJson + +-- Function name: to_json +-- !query +SELECT to_json(named_struct('a', 1, 'b', 2)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop + +-- Function name: stddev_pop +-- !query +SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Rint + +-- Function name: rint +-- !query +SELECT rint(12.3456) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapFromArrays + +-- Function name: map_from_arrays +-- !query +SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sinh + +-- Function name: sinh +-- !query +SELECT sinh(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Lag + +-- Function name: lag + + +-- Class name: org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual + +-- Function name: >= +-- !query +SELECT 2 >= 1 +-- !query schema +struct<(2 >= 1):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseAnd + +-- Function name: & +-- !query +SELECT 3 & 5 +-- !query schema +struct<(3 & 5):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.First + +-- Function name: first_value +-- !query +SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) +-- !query schema +struct + +-- Function name: first +-- !query +SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TruncDate + +-- Function name: trunc +-- !query +SELECT trunc('2019-08-04', 'week') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean + +-- Function name: xpath_boolean +-- !query +SELECT xpath_boolean('1','a/b') +-- !query schema +struct1, a/b):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MakeInterval + +-- Function name: make_interval +-- !query +SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Atanh + +-- Function name: atanh +-- !query +SELECT atanh(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.FindInSet + +-- Function name: find_in_set +-- !query +SELECT find_in_set('ab','abc,b,ab,c,def') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray + +-- Function name: json_array_length +-- !query +SELECT json_array_length('[1,2,3,4]') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg + +-- Function name: bit_xor +-- !query +SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Decode + +-- Function name: decode +-- !query +SELECT decode(encode('abc', 'utf-8'), 'utf-8') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Coalesce + +-- Function name: coalesce +-- !query +SELECT coalesce(NULL, 1, NULL) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.RegExpReplace + +-- Function name: regexp_replace +-- !query +SELECT regexp_replace('100-200', '(\\d+)', 'num') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp + +-- Function name: var_samp +-- !query +SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + +-- Function name: variance +-- !query +SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cos + +-- Function name: cos +-- !query +SELECT cos(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayFilter + +-- Function name: filter +-- !query +SELECT filter(array(1, 2, 3), x -> x % 2 == 1) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.PosExplode + +-- Function name: posexplode_outer +-- !query +SELECT posexplode_outer(array(10,20)) +-- !query schema +struct + +-- Function name: posexplode +-- !query +SELECT posexplode(array(10,20)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.InputFileBlockLength + +-- Function name: input_file_block_length + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd + +-- Function name: every +-- !query +SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col) +-- !query schema +struct + +-- Function name: bool_and +-- !query +SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg + +-- Function name: count_min_sketch + + +-- Class name: org.apache.spark.sql.catalyst.expressions.AssertTrue + +-- Function name: assert_true +-- !query +SELECT assert_true(0 < 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CurrentDate + +-- Function name: current_date + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MonthsBetween + +-- Function name: months_between +-- !query +SELECT months_between('1997-02-28 10:30:00', '1996-10-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.EqualNullSafe + +-- Function name: <=> +-- !query +SELECT 2 <=> 2 +-- !query schema +struct<(2 <=> 2):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Add + +-- Function name: + +-- !query +SELECT 1 + 2 +-- !query schema +struct<(1 + 2):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Multiply + +-- Function name: * +-- !query +SELECT 2 * 3 +-- !query schema +struct<(2 * 3):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DatePart + +-- Function name: date_part +-- !query +SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ShiftLeft + +-- Function name: shiftleft +-- !query +SELECT shiftleft(2, 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.GreaterThan + +-- Function name: > +-- !query +SELECT 2 > 1 +-- !query schema +struct<(2 > 1):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Slice + +-- Function name: slice +-- !query +SELECT slice(array(1, 2, 3, 4), 2, 2) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sentences + +-- Function name: sentences +-- !query +SELECT sentences('Hi there! Good morning.') +-- !query schema +struct>> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SoundEx + +-- Function name: soundex +-- !query +SELECT soundex('Miller') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.AddMonths + +-- Function name: add_months +-- !query +SELECT add_months('2016-08-31', 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Max + +-- Function name: max +-- !query +SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapFilter + +-- Function name: map_filter +-- !query +SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v) +-- !query schema +struct namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):map> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Crc32 + +-- Function name: crc32 +-- !query +SELECT crc32('Spark') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sha2 + +-- Function name: sha2 +-- !query +SELECT sha2('Spark', 256) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Size + +-- Function name: size +-- !query +SELECT size(array('b', 'd', 'c', 'a')) +-- !query schema +struct + +-- Function name: cardinality +-- !query +SELECT cardinality(array('b', 'd', 'c', 'a')) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CurrentTimestamp + +-- Function name: current_timestamp + +-- Function name: now + + +-- Class name: org.apache.spark.sql.catalyst.expressions.In + +-- Function name: in +-- !query +SELECT 1 in(1, 2, 3) +-- !query schema +struct<(1 IN (1, 2, 3)):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CurrentDatabase + +-- Function name: current_database +-- !query +SELECT current_database() +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringInstr + +-- Function name: instr +-- !query +SELECT instr('SparkSQL', 'SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Sum + +-- Function name: sum +-- !query +SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CountIf + +-- Function name: count_if +-- !query +SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MakeTimestamp + +-- Function name: make_timestamp +-- !query +SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CsvToStructs + +-- Function name: from_csv +-- !query +SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Remainder + +-- Function name: % +-- !query +SELECT 2 % 1.8 +-- !query schema +struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> + +-- Function name: mod +-- !query +SELECT 2 % 1.8 +-- !query schema +struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringRepeat + +-- Function name: repeat +-- !query +SELECT repeat('123', 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SubstringIndex + +-- Function name: substring_index +-- !query +SELECT substring_index('www.apache.org', '.', 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringTrimLeft + +-- Function name: ltrim +-- !query +SELECT ltrim(' SparkSQL ') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringTranslate + +-- Function name: translate +-- !query +SELECT translate('AaBbCc', 'abc', '123') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Greatest + +-- Function name: greatest +-- !query +SELECT greatest(10, 9, 2, 4, 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayDistinct + +-- Function name: array_distinct +-- !query +SELECT array_distinct(array(1, 2, 3, null, 3)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringReplace + +-- Function name: replace +-- !query +SELECT replace('ABCabc', 'abc', 'DEF') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathShort + +-- Function name: xpath_short +-- !query +SELECT xpath_short('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):smallint> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr + +-- Function name: bool_or +-- !query +SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) +-- !query schema +struct + +-- Function name: some +-- !query +SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) +-- !query schema +struct + +-- Function name: any +-- !query +SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Murmur3Hash + +-- Function name: hash +-- !query +SELECT hash('Spark', array(123), 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.RLike + +-- Function name: rlike +-- !query +SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' +-- !query schema +struct<%SystemDrive%UsersJohn RLIKE %SystemDrive%\Users.*:boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.If + +-- Function name: if +-- !query +SELECT if(1 < 2, 'a', 'b') +-- !query schema +struct<(IF((1 < 2), a, b)):string> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Grouping + +-- Function name: grouping +-- !query +SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Abs + +-- Function name: abs +-- !query +SELECT abs(-1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.InitCap + +-- Function name: initcap +-- !query +SELECT initcap('sPark sql') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Percentile + +-- Function name: percentile +-- !query +SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.IsNotNull + +-- Function name: isnotnull +-- !query +SELECT isnotnull(1) +-- !query schema +struct<(1 IS NOT NULL):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cbrt + +-- Function name: cbrt +-- !query +SELECT cbrt(27.0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseNot + +-- Function name: ~ +-- !query +SELECT ~ 0 +-- !query schema +struct<~0:int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Last + +-- Function name: last_value +-- !query +SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) +-- !query schema +struct + +-- Function name: last +-- !query +SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.NullIf + +-- Function name: nullif +-- !query +SELECT nullif(2, 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Month + +-- Function name: month +-- !query +SELECT month('2016-07-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Logarithm + +-- Function name: log +-- !query +SELECT log(10, 100) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Subtract + +-- Function name: - +-- !query +SELECT 2 - 1 +-- !query schema +struct<(2 - 1):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DateAdd + +-- Function name: date_add +-- !query +SELECT date_add('2016-07-30', 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MakeDate + +-- Function name: make_date +-- !query +SELECT make_date(2013, 7, 15) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.JsonToStructs + +-- Function name: from_json +-- !query +SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ZipWith + +-- Function name: zip_with +-- !query +SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) +-- !query schema +struct>> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.NamedStruct + +-- Function name: struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Tan + +-- Function name: tan +-- !query +SELECT tan(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.EulerNumber + +-- Function name: e +-- !query +SELECT e() +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringToMap + +-- Function name: str_to_map +-- !query +SELECT str_to_map('a:1,b:2,c:3', ',', ':') +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArraySort + +-- Function name: array_sort +-- !query +SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) +-- !query schema +struct namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cast + +-- Function name: string + +-- Function name: cast +-- !query +SELECT cast('10' as int) +-- !query schema +struct + +-- Function name: tinyint + +-- Function name: double + +-- Function name: smallint + +-- Function name: date + +-- Function name: decimal + +-- Function name: boolean + +-- Function name: float + +-- Function name: binary + +-- Function name: bigint + +-- Function name: int + +-- Function name: timestamp + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Min + +-- Function name: min +-- !query +SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Average + +-- Function name: avg +-- !query +SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + +-- Function name: mean +-- !query +SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SortArray + +-- Function name: sort_array +-- !query +SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.NextDay + +-- Function name: next_day +-- !query +SELECT next_day('2015-01-14', 'TU') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Ascii + +-- Function name: ascii +-- !query +SELECT ascii('222') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayRemove + +-- Function name: array_remove +-- !query +SELECT array_remove(array(1, 2, 3, null, 3), 3) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Pow + +-- Function name: pow +-- !query +SELECT pow(2, 3) +-- !query schema +struct + +-- Function name: power +-- !query +SELECT power(2, 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.LessThan + +-- Function name: < +-- !query +SELECT 1 < 2 +-- !query schema +struct<(1 < 2):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapKeys + +-- Function name: map_keys +-- !query +SELECT map_keys(map(1, 'a', 2, 'b')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Inline + +-- Function name: inline +-- !query +SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) +-- !query schema +struct + +-- Function name: inline_outer +-- !query +SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapZipWith + +-- Function name: map_zip_with +-- !query +SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Encode + +-- Function name: encode +-- !query +SELECT encode('abc', 'utf-8') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayJoin + +-- Function name: array_join +-- !query +SELECT array_join(array('hello', 'world'), ' ') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.And + +-- Function name: and + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Hypot + +-- Function name: hypot +-- !query +SELECT hypot(3, 4) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Round + +-- Function name: round +-- !query +SELECT round(2.5, 0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CovSample + +-- Function name: covar_samp +-- !query +SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Pi + +-- Function name: pi +-- !query +SELECT pi() +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sqrt + +-- Function name: sqrt +-- !query +SELECT sqrt(4) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TransformKeys + +-- Function name: transform_keys +-- !query +SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Substring + +-- Function name: substr +-- !query +SELECT substr('Spark SQL', 5) +-- !query schema +struct + +-- Function name: substring +-- !query +SELECT substring('Spark SQL', 5) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Asinh + +-- Function name: asinh +-- !query +SELECT asinh(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Second + +-- Function name: second +-- !query +SELECT second('2009-07-30 12:58:59') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp + +-- Function name: to_utc_timestamp +-- !query +SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Upper + +-- Function name: ucase +-- !query +SELECT ucase('SparkSql') +-- !query schema +struct + +-- Function name: upper +-- !query +SELECT upper('SparkSql') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg + +-- Function name: bit_and +-- !query +SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Stack + +-- Function name: stack +-- !query +SELECT stack(2, 1, 2, 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.EqualTo + +-- Function name: = +-- !query +SELECT 2 = 2 +-- !query schema +struct<(2 = 2):boolean> + +-- Function name: == +-- !query +SELECT 2 == 2 +-- !query schema +struct<(2 = 2):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringLPad + +-- Function name: lpad +-- !query +SELECT lpad('hi', 5, '??') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapFromEntries + +-- Function name: map_from_entries +-- !query +SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cube + +-- Function name: cube +-- !query +SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name, age) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Divide + +-- Function name: / +-- !query +SELECT 3 / 2 +-- !query schema +struct<(CAST(3 AS DOUBLE) / CAST(2 AS DOUBLE)):double> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Like + +-- Function name: like +-- !query +SELECT like('Spark', '_park') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.OctetLength + +-- Function name: octet_length +-- !query +SELECT octet_length('Spark SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CaseWhen + +-- Function name: when +-- !query +SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END +-- !query schema +struct 0) THEN CAST(1 AS DECIMAL(11,1)) WHEN (2 > 0) THEN CAST(2.0 AS DECIMAL(11,1)) ELSE CAST(1.2 AS DECIMAL(11,1)) END:decimal(11,1)> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Log + +-- Function name: ln +-- !query +SELECT ln(1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseCount + +-- Function name: bit_count +-- !query +SELECT bit_count(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Acos + +-- Function name: acos +-- !query +SELECT acos(1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SparkPartitionID + +-- Function name: spark_partition_id + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DateFormatClass + +-- Function name: date_format +-- !query +SELECT date_format('2016-04-08', 'y') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.FromUnixTime + +-- Function name: from_unixtime +-- !query +SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Floor + +-- Function name: floor +-- !query +SELECT floor(-0.1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SchemaOfCsv + +-- Function name: schema_of_csv +-- !query +SELECT schema_of_csv('1,abc') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Log2 + +-- Function name: log2 +-- !query +SELECT log2(2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DateSub + +-- Function name: date_sub +-- !query +SELECT date_sub('2016-07-30', 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.NTile + +-- Function name: ntile + + +-- Class name: org.apache.spark.sql.catalyst.expressions.RowNumber + +-- Function name: row_number + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CreateMap + +-- Function name: map +-- !query +SELECT map(1.0, '2', 3.0, '4') +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg + +-- Function name: bit_or +-- !query +SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DayOfYear + +-- Function name: dayofyear +-- !query +SELECT dayofyear('2016-04-09') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.IsNull + +-- Function name: isnull +-- !query +SELECT isnull(1) +-- !query schema +struct<(1 IS NULL):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Ceil + +-- Function name: ceil +-- !query +SELECT ceil(-0.1) +-- !query schema +struct + +-- Function name: ceiling +-- !query +SELECT ceiling(-0.1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Asin + +-- Function name: asin +-- !query +SELECT asin(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Count + +-- Function name: count +-- !query +SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Minute + +-- Function name: minute +-- !query +SELECT minute('2009-07-30 12:58:59') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.PercentRank + +-- Function name: percent_rank + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathList + +-- Function name: xpath +-- !query +SELECT xpath('b1b2b3c1c2','a/b/text()') +-- !query schema +structb1b2b3c1c2, a/b/text()):array> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.IntegralDivide + +-- Function name: div +-- !query +SELECT 3 div 2 +-- !query schema +struct<(3 div 2):bigint> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation + +-- Function name: covar_pop +-- !query +SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathDouble + +-- Function name: xpath_number +-- !query +SELECT xpath_number('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):double> + +-- Function name: xpath_double +-- !query +SELECT xpath_double('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):double> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.SparkVersion + +-- Function name: version + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Not + +-- Function name: ! + +-- Function name: not + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ShiftRight + +-- Function name: shiftright +-- !query +SELECT shiftright(4, 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sin + +-- Function name: sin +-- !query +SELECT sin(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ToRadians + +-- Function name: radians +-- !query +SELECT radians(180) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TransformValues + +-- Function name: transform_values +-- !query +SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayUnion + +-- Function name: array_union +-- !query +SELECT array_union(array(1, 2, 3), array(1, 3, 5)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis + +-- Function name: kurtosis +-- !query +SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Signum + +-- Function name: signum +-- !query +SELECT signum(40) +-- !query schema +struct + +-- Function name: sign +-- !query +SELECT sign(40) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Overlay + +-- Function name: overlay +-- !query +SELECT overlay('Spark SQL' PLACING '_' FROM 6) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Sha1 + +-- Function name: sha1 +-- !query +SELECT sha1('Spark') +-- !query schema +struct + +-- Function name: sha +-- !query +SELECT sha('Spark') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.TruncTimestamp + +-- Function name: date_trunc +-- !query +SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet + +-- Function name: collect_set +-- !query +SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Factorial + +-- Function name: factorial +-- !query +SELECT factorial(5) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitLength + +-- Function name: bit_length +-- !query +SELECT bit_length('Spark SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StructsToCsv + +-- Function name: to_csv +-- !query +SELECT to_csv(named_struct('a', 1, 'b', 2)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.XxHash64 + +-- Function name: xxhash64 +-- !query +SELECT xxhash64('Spark', array(123), 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.IfNull + +-- Function name: ifnull +-- !query +SELECT ifnull(NULL, array('2')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Flatten + +-- Function name: flatten +-- !query +SELECT flatten(array(array(1, 2), array(3, 4))) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CollectList + +-- Function name: collect_list +-- !query +SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseOr + +-- Function name: | +-- !query +SELECT 3 | 5 +-- !query schema +struct<(3 | 5):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Or + +-- Function name: or + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayRepeat + +-- Function name: array_repeat +-- !query +SELECT array_repeat('123', 2) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathString + +-- Function name: xpath_string +-- !query +SELECT xpath_string('bcc','a/c') +-- !query schema +structbcc, a/c):string> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayMax + +-- Function name: array_max +-- !query +SELECT array_max(array(1, 20, null, 3)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringTrim + +-- Function name: trim +-- !query +SELECT trim(' SparkSQL ') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.IsNaN + +-- Function name: isnan +-- !query +SELECT isnan(cast('NaN' as double)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Levenshtein + +-- Function name: levenshtein +-- !query +SELECT levenshtein('kitten', 'sitting') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus + +-- Function name: approx_count_distinct +-- !query +SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapConcat + +-- Function name: map_concat +-- !query +SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Atan + +-- Function name: atan +-- !query +SELECT atan(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathFloat + +-- Function name: xpath_float +-- !query +SELECT xpath_float('12', 'sum(a/b)') +-- !query schema +struct12, sum(a/b)):float> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Log10 + +-- Function name: log10 +-- !query +SELECT log10(10) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp + +-- Function name: from_utc_timestamp +-- !query +SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.CreateNamedStruct + +-- Function name: named_struct +-- !query +SELECT named_struct("a", 1, "b", 2, "c", 3) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.BRound + +-- Function name: bround +-- !query +SELECT bround(2.5, 0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Year + +-- Function name: year +-- !query +SELECT year('2016-07-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.WeekOfYear + +-- Function name: weekofyear +-- !query +SELECT weekofyear('2008-02-20') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Hour + +-- Function name: hour +-- !query +SELECT hour('2009-07-30 12:58:59') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DayOfWeek + +-- Function name: dayofweek +-- !query +SELECT dayofweek('2009-07-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayContains + +-- Function name: array_contains +-- !query +SELECT array_contains(array(1, 2, 3), 2) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Base64 + +-- Function name: base64 +-- !query +SELECT base64('Spark SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.UnaryMinus + +-- Function name: negative +-- !query +SELECT negative(1) +-- !query schema +struct<(- 1):int> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Explode + +-- Function name: explode +-- !query +SELECT explode(array(10, 20)) +-- !query schema +struct + +-- Function name: explode_outer +-- !query +SELECT explode_outer(array(10, 20)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ParseToDate + +-- Function name: to_date +-- !query +SELECT to_date('2009-07-30 04:17:52') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ParseUrl + +-- Function name: parse_url +-- !query +SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cosh + +-- Function name: cosh +-- !query +SELECT cosh(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayAggregate + +-- Function name: aggregate +-- !query +SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned + +-- Function name: shiftrightunsigned +-- !query +SELECT shiftrightunsigned(4, 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Nvl2 + +-- Function name: nvl2 +-- !query +SELECT nvl2(NULL, 2, 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DateDiff + +-- Function name: datediff +-- !query +SELECT datediff('2009-07-31', '2009-07-30') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Log1p + +-- Function name: log1p +-- !query +SELECT log1p(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.NaNvl + +-- Function name: nanvl +-- !query +SELECT nanvl(cast('NaN' as double), 123) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapEntries + +-- Function name: map_entries +-- !query +SELECT map_entries(map(1, 'a', 2, 'b')) +-- !query schema +struct>> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Reverse + +-- Function name: reverse +-- !query +SELECT reverse('Spark SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayIntersect + +-- Function name: array_intersect +-- !query +SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp + +-- Function name: stddev_samp +-- !query +SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + +-- Function name: stddev +-- !query +SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + +-- Function name: std +-- !query +SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.MapValues + +-- Function name: map_values +-- !query +SELECT map_values(map(1, 'a', 2, 'b')) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArraysOverlap + +-- Function name: arrays_overlap +-- !query +SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Rollup + +-- Function name: rollup +-- !query +SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY rollup(name, age) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.LessThanOrEqual + +-- Function name: <= +-- !query +SELECT 2 <= 2 +-- !query schema +struct<(2 <= 2):boolean> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Elt + +-- Function name: elt +-- !query +SELECT elt(1, 'scala', 'java') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Skewness + +-- Function name: skewness +-- !query +SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Left + +-- Function name: left +-- !query +SELECT left('Spark SQL', 3) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.StringTrimRight + +-- Function name: rtrim +-- !query +SELECT rtrim(' SparkSQL ') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Lead + +-- Function name: lead + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayForAll + +-- Function name: forall +-- !query +SELECT forall(array(1, 2, 3), x -> x % 2 == 0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Bin + +-- Function name: bin +-- !query +SELECT bin(13) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.DenseRank + +-- Function name: dense_rank + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayPosition + +-- Function name: array_position +-- !query +SELECT array_position(array(3, 2, 1), 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ArrayTransform + +-- Function name: transform +-- !query +SELECT transform(array(1, 2, 3), x -> x + 1) +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.JsonTuple + +-- Function name: json_tuple +-- !query +SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.RegExpExtract + +-- Function name: regexp_extract +-- !query +SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Length + +-- Function name: character_length +-- !query +SELECT character_length('Spark SQL ') +-- !query schema +struct + +-- Function name: char_length +-- !query +SELECT char_length('Spark SQL ') +-- !query schema +struct + +-- Function name: length +-- !query +SELECT length('Spark SQL ') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Unhex + +-- Function name: unhex +-- !query +SELECT decode(unhex('537061726B2053514C'), 'UTF-8') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Conv + +-- Function name: conv +-- !query +SELECT conv('100', 2, 10) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.JsonObjectKeys + +-- Function name: json_object_keys +-- !query +Select json_object_keys('{}') +-- !query schema +struct> + + +-- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.MinBy + +-- Function name: min_by +-- !query +SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Cot + +-- Function name: cot +-- !query +SELECT cot(1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.LastDay + +-- Function name: last_day +-- !query +SELECT last_day('2009-01-12') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Exp + +-- Function name: exp +-- !query +SELECT exp(0) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Concat + +-- Function name: concat +-- !query +SELECT concat('Spark', 'SQL') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.UnBase64 + +-- Function name: unbase64 +-- !query +SELECT unbase64('U3BhcmsgU1FM') +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.Acosh + +-- Function name: acosh +-- !query +SELECT acosh(1) +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.FormatString + +-- Function name: printf +-- !query +SELECT printf("Hello World %d %s", 100, "days") +-- !query schema +struct + +-- Function name: format_string +-- !query +SELECT format_string("Hello World %d %s", 100, "days") +-- !query schema +struct + + +-- Class name: org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp + +-- Function name: to_unix_timestamp +-- !query +SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') +-- !query schema +struct \ No newline at end of file diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index 02631ac063c02..aff62e9005923 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -88,7 +88,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { "sql", "core", "src", "test", "resources", "sql-functions").toFile } - private val resultFile = new File(baseResourcePath, "output.out") + private val resultFile = new File(baseResourcePath, "schema.out") val ignoreSet = Set( // One of examples shows getting the current timestamp @@ -133,16 +133,19 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { // If expression exists 'Examples' segment, the first element is 'Examples'. Because // this test case is only used to print aliases of expressions for double checking. // Therefore, we only need to output the first SQL and its corresponding schema. - example.split(" > ").take(2).toList.foreach(_ match { - case exampleRe(sql, expected) => - val df = spark.sql(sql) - val schema = df.schema.catalogString - val queryOutput = QueryOutput(sql, schema) - outputBuffer += queryOutput.toString - outputs += queryOutput - case _ => - }) - } + // Note: We need to filter out the commands that set the parameters, such as: + // SET spark.sql.parser.escapedStringLiterals=true + example.split(" > ").tail + .filterNot(_.trim.startsWith("SET")).take(2).toList.foreach(_ match { + case exampleRe(sql, expected) => + val df = spark.sql(sql) + val schema = df.schema.catalogString + val queryOutput = QueryOutput(sql, schema) + outputBuffer += queryOutput.toString + outputs += queryOutput + case _ => + }) + } } } From c7b565b0741e9253c09116d923e471e6739cbd24 Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Mon, 13 Apr 2020 17:38:33 +0800 Subject: [PATCH 05/22] Optimize code --- .../test/resources/sql-functions/output.out | 2637 ----------------- 1 file changed, 2637 deletions(-) delete mode 100644 sql/core/src/test/resources/sql-functions/output.out diff --git a/sql/core/src/test/resources/sql-functions/output.out b/sql/core/src/test/resources/sql-functions/output.out deleted file mode 100644 index 58defe4a26851..0000000000000 --- a/sql/core/src/test/resources/sql-functions/output.out +++ /dev/null @@ -1,2637 +0,0 @@ --- Automatically generated by ExpressionsSchemaSuite --- Number of queries: 287 - --- Class name: org.apache.spark.sql.catalyst.expressions.StringSpace - --- Function name: space --- !query -SELECT concat(space(2), '1') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CreateArray - --- Function name: array --- !query -SELECT array(1, 2, 3) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayExcept - --- Function name: array_except --- !query -SELECT array_except(array(1, 2, 3), array(1, 3, 5)) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseXor - --- Function name: ^ --- !query -SELECT 3 ^ 5 --- !query schema -struct<(3 ^ 5):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringRPad - --- Function name: rpad --- !query -SELECT rpad('hi', 5, '??') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.SchemaOfJson - --- Function name: schema_of_json --- !query -SELECT schema_of_json('[{"col":0}]') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ParseToTimestamp - --- Function name: to_timestamp --- !query -SELECT to_timestamp('2016-12-31 00:12:00') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathInt - --- Function name: xpath_int --- !query -SELECT xpath_int('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop - --- Function name: var_pop --- !query -SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Hex - --- Function name: hex --- !query -SELECT hex(17) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArraysZip - --- Function name: arrays_zip --- !query -SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)) --- !query schema -struct>> - - --- Class name: org.apache.spark.sql.catalyst.expressions.InputFileName - --- Function name: input_file_name - - --- Class name: org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID - --- Function name: monotonically_increasing_id - - --- Class name: org.apache.spark.sql.catalyst.expressions.DayOfMonth - --- Function name: day --- !query -SELECT day('2009-07-30') --- !query schema -struct - --- Function name: dayofmonth --- !query -SELECT dayofmonth('2009-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ElementAt - --- Function name: element_at --- !query -SELECT element_at(array(1, 2, 3), 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.WeekDay - --- Function name: weekday --- !query -SELECT weekday('2009-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathLong - --- Function name: xpath_long --- !query -SELECT xpath_long('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):bigint> - - --- Class name: org.apache.spark.sql.catalyst.expressions.CumeDist - --- Function name: cume_dist - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayMin - --- Function name: array_min --- !query -SELECT array_min(array(1, 20, null, 3)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy - --- Function name: max_by --- !query -SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Rank - --- Function name: rank - - --- Class name: org.apache.spark.sql.catalyst.expressions.Right - --- Function name: right --- !query -SELECT right('Spark SQL', 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Least - --- Function name: least --- !query -SELECT least(10, 9, 2, 4, 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Lower - --- Function name: lcase --- !query -SELECT lcase('SparkSql') --- !query schema -struct - --- Function name: lower --- !query -SELECT lower('SparkSql') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Nvl - --- Function name: nvl --- !query -SELECT nvl(NULL, array('2')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Pmod - --- Function name: pmod --- !query -SELECT pmod(10, 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Chr - --- Function name: char --- !query -SELECT char(65) --- !query schema -struct - --- Function name: chr --- !query -SELECT chr(65) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.GetJsonObject - --- Function name: get_json_object --- !query -SELECT get_json_object('{"a":"b"}', '$.a') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ToDegrees - --- Function name: degrees --- !query -SELECT degrees(3.141592653589793) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayExists - --- Function name: exists --- !query -SELECT exists(array(1, 2, 3), x -> x % 2 == 0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Tanh - --- Function name: tanh --- !query -SELECT tanh(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringSplit - --- Function name: split --- !query -SELECT split('oneAtwoBthreeC', '[ABC]') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.GroupingID - --- Function name: grouping_id --- !query -SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Expm1 - --- Function name: expm1 --- !query -SELECT expm1(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Quarter - --- Function name: quarter --- !query -SELECT quarter('2016-08-31') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Atan2 - --- Function name: atan2 --- !query -SELECT atan2(0, 0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.TypeOf - --- Function name: typeof --- !query -SELECT typeof(1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sequence - --- Function name: sequence --- !query -SELECT sequence(1, 5) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.InputFileBlockStart - --- Function name: input_file_block_start - - --- Class name: org.apache.spark.sql.catalyst.expressions.ConcatWs - --- Function name: concat_ws --- !query -SELECT concat_ws(' ', 'Spark', 'SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile - --- Function name: approx_percentile --- !query -SELECT approx_percentile(10.0, array(0.5, 0.4, 0.1), 100) --- !query schema -struct> - --- Function name: percentile_approx --- !query -SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.TimeWindow - --- Function name: window - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringLocate - --- Function name: position --- !query -SELECT position('bar', 'foobarbar') --- !query schema -struct - --- Function name: locate --- !query -SELECT locate('bar', 'foobarbar') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.FormatNumber - --- Function name: format_number --- !query -SELECT format_number(12332.123456, 4) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.UnaryPositive - --- Function name: positive - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Corr - --- Function name: corr --- !query -SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Md5 - --- Function name: md5 --- !query -SELECT md5('Spark') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StructsToJson - --- Function name: to_json --- !query -SELECT to_json(named_struct('a', 1, 'b', 2)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop - --- Function name: stddev_pop --- !query -SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Rint - --- Function name: rint --- !query -SELECT rint(12.3456) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapFromArrays - --- Function name: map_from_arrays --- !query -SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sinh - --- Function name: sinh --- !query -SELECT sinh(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Lag - --- Function name: lag - - --- Class name: org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual - --- Function name: >= --- !query -SELECT 2 >= 1 --- !query schema -struct<(2 >= 1):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseAnd - --- Function name: & --- !query -SELECT 3 & 5 --- !query schema -struct<(3 & 5):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.First - --- Function name: first_value --- !query -SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) --- !query schema -struct - --- Function name: first --- !query -SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.TruncDate - --- Function name: trunc --- !query -SELECT trunc('2019-08-04', 'week') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean - --- Function name: xpath_boolean --- !query -SELECT xpath_boolean('1','a/b') --- !query schema -struct1, a/b):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.MakeInterval - --- Function name: make_interval --- !query -SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Atanh - --- Function name: atanh --- !query -SELECT atanh(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.FindInSet - --- Function name: find_in_set --- !query -SELECT find_in_set('ab','abc,b,ab,c,def') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray - --- Function name: json_array_length --- !query -SELECT json_array_length('[1,2,3,4]') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg - --- Function name: bit_xor --- !query -SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Decode - --- Function name: decode --- !query -SELECT decode(encode('abc', 'utf-8'), 'utf-8') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Coalesce - --- Function name: coalesce --- !query -SELECT coalesce(NULL, 1, NULL) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.RegExpReplace - --- Function name: regexp_replace --- !query -SELECT regexp_replace('100-200', '(\\d+)', 'num') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp - --- Function name: var_samp --- !query -SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - --- Function name: variance --- !query -SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cos - --- Function name: cos --- !query -SELECT cos(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayFilter - --- Function name: filter --- !query -SELECT filter(array(1, 2, 3), x -> x % 2 == 1) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.PosExplode - --- Function name: posexplode_outer --- !query -SELECT posexplode_outer(array(10,20)) --- !query schema -struct - --- Function name: posexplode --- !query -SELECT posexplode(array(10,20)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.InputFileBlockLength - --- Function name: input_file_block_length - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd - --- Function name: every --- !query -SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col) --- !query schema -struct - --- Function name: bool_and --- !query -SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg - --- Function name: count_min_sketch - - --- Class name: org.apache.spark.sql.catalyst.expressions.AssertTrue - --- Function name: assert_true --- !query -SELECT assert_true(0 < 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CurrentDate - --- Function name: current_date - - --- Class name: org.apache.spark.sql.catalyst.expressions.MonthsBetween - --- Function name: months_between --- !query -SELECT months_between('1997-02-28 10:30:00', '1996-10-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.EqualNullSafe - --- Function name: <=> --- !query -SELECT 2 <=> 2 --- !query schema -struct<(2 <=> 2):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Add - --- Function name: + --- !query -SELECT 1 + 2 --- !query schema -struct<(1 + 2):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Multiply - --- Function name: * --- !query -SELECT 2 * 3 --- !query schema -struct<(2 * 3):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.DatePart - --- Function name: date_part --- !query -SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ShiftLeft - --- Function name: shiftleft --- !query -SELECT shiftleft(2, 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.GreaterThan - --- Function name: > --- !query -SELECT 2 > 1 --- !query schema -struct<(2 > 1):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Slice - --- Function name: slice --- !query -SELECT slice(array(1, 2, 3, 4), 2, 2) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sentences - --- Function name: sentences --- !query -SELECT sentences('Hi there! Good morning.') --- !query schema -struct>> - - --- Class name: org.apache.spark.sql.catalyst.expressions.SoundEx - --- Function name: soundex --- !query -SELECT soundex('Miller') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.AddMonths - --- Function name: add_months --- !query -SELECT add_months('2016-08-31', 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Max - --- Function name: max --- !query -SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapFilter - --- Function name: map_filter --- !query -SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v) --- !query schema -struct namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):map> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Crc32 - --- Function name: crc32 --- !query -SELECT crc32('Spark') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sha2 - --- Function name: sha2 --- !query -SELECT sha2('Spark', 256) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Size - --- Function name: size --- !query -SELECT size(array('b', 'd', 'c', 'a')) --- !query schema -struct - --- Function name: cardinality --- !query -SELECT cardinality(array('b', 'd', 'c', 'a')) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CurrentTimestamp - --- Function name: current_timestamp - --- Function name: now - - --- Class name: org.apache.spark.sql.catalyst.expressions.In - --- Function name: in --- !query -SELECT 1 in(1, 2, 3) --- !query schema -struct<(1 IN (1, 2, 3)):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.CurrentDatabase - --- Function name: current_database --- !query -SELECT current_database() --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringInstr - --- Function name: instr --- !query -SELECT instr('SparkSQL', 'SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Sum - --- Function name: sum --- !query -SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CountIf - --- Function name: count_if --- !query -SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MakeTimestamp - --- Function name: make_timestamp --- !query -SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CsvToStructs - --- Function name: from_csv --- !query -SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Remainder - --- Function name: % --- !query -SELECT 2 % 1.8 --- !query schema -struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> - --- Function name: mod --- !query -SELECT 2 % 1.8 --- !query schema -struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringRepeat - --- Function name: repeat --- !query -SELECT repeat('123', 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.SubstringIndex - --- Function name: substring_index --- !query -SELECT substring_index('www.apache.org', '.', 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringTrimLeft - --- Function name: ltrim --- !query -SELECT ltrim(' SparkSQL ') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringTranslate - --- Function name: translate --- !query -SELECT translate('AaBbCc', 'abc', '123') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Greatest - --- Function name: greatest --- !query -SELECT greatest(10, 9, 2, 4, 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayDistinct - --- Function name: array_distinct --- !query -SELECT array_distinct(array(1, 2, 3, null, 3)) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringReplace - --- Function name: replace --- !query -SELECT replace('ABCabc', 'abc', 'DEF') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathShort - --- Function name: xpath_short --- !query -SELECT xpath_short('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):smallint> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr - --- Function name: bool_or --- !query -SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) --- !query schema -struct - --- Function name: some --- !query -SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) --- !query schema -struct - --- Function name: any --- !query -SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Murmur3Hash - --- Function name: hash --- !query -SELECT hash('Spark', array(123), 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.RLike - --- Function name: rlike --- !query -SET spark.sql.parser.escapedStringLiterals=true --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.If - --- Function name: if --- !query -SELECT if(1 < 2, 'a', 'b') --- !query schema -struct<(IF((1 < 2), a, b)):string> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Grouping - --- Function name: grouping --- !query -SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Abs - --- Function name: abs --- !query -SELECT abs(-1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.InitCap - --- Function name: initcap --- !query -SELECT initcap('sPark sql') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Percentile - --- Function name: percentile --- !query -SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.IsNotNull - --- Function name: isnotnull --- !query -SELECT isnotnull(1) --- !query schema -struct<(1 IS NOT NULL):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cbrt - --- Function name: cbrt --- !query -SELECT cbrt(27.0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseNot - --- Function name: ~ --- !query -SELECT ~ 0 --- !query schema -struct<~0:int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Last - --- Function name: last_value --- !query -SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) --- !query schema -struct - --- Function name: last --- !query -SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.NullIf - --- Function name: nullif --- !query -SELECT nullif(2, 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Month - --- Function name: month --- !query -SELECT month('2016-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Logarithm - --- Function name: log --- !query -SELECT log(10, 100) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Subtract - --- Function name: - --- !query -SELECT 2 - 1 --- !query schema -struct<(2 - 1):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.DateAdd - --- Function name: date_add --- !query -SELECT date_add('2016-07-30', 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MakeDate - --- Function name: make_date --- !query -SELECT make_date(2013, 7, 15) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.JsonToStructs - --- Function name: from_json --- !query -SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ZipWith - --- Function name: zip_with --- !query -SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) --- !query schema -struct>> - - --- Class name: org.apache.spark.sql.catalyst.expressions.NamedStruct - --- Function name: struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Tan - --- Function name: tan --- !query -SELECT tan(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.EulerNumber - --- Function name: e --- !query -SELECT e() --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringToMap - --- Function name: str_to_map --- !query -SELECT str_to_map('a:1,b:2,c:3', ',', ':') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArraySort - --- Function name: array_sort --- !query -SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) --- !query schema -struct namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cast - --- Function name: string - --- Function name: cast --- !query -SELECT cast('10' as int) --- !query schema -struct - --- Function name: tinyint - --- Function name: double - --- Function name: smallint - --- Function name: date - --- Function name: decimal - --- Function name: boolean - --- Function name: float - --- Function name: binary - --- Function name: bigint - --- Function name: int - --- Function name: timestamp - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Min - --- Function name: min --- !query -SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Average - --- Function name: avg --- !query -SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - --- Function name: mean --- !query -SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.SortArray - --- Function name: sort_array --- !query -SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.NextDay - --- Function name: next_day --- !query -SELECT next_day('2015-01-14', 'TU') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Ascii - --- Function name: ascii --- !query -SELECT ascii('222') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayRemove - --- Function name: array_remove --- !query -SELECT array_remove(array(1, 2, 3, null, 3), 3) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Pow - --- Function name: pow --- !query -SELECT pow(2, 3) --- !query schema -struct - --- Function name: power --- !query -SELECT power(2, 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.LessThan - --- Function name: < --- !query -SELECT 1 < 2 --- !query schema -struct<(1 < 2):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapKeys - --- Function name: map_keys --- !query -SELECT map_keys(map(1, 'a', 2, 'b')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Inline - --- Function name: inline --- !query -SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) --- !query schema -struct - --- Function name: inline_outer --- !query -SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapZipWith - --- Function name: map_zip_with --- !query -SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Encode - --- Function name: encode --- !query -SELECT encode('abc', 'utf-8') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayJoin - --- Function name: array_join --- !query -SELECT array_join(array('hello', 'world'), ' ') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.And - --- Function name: and - - --- Class name: org.apache.spark.sql.catalyst.expressions.Hypot - --- Function name: hypot --- !query -SELECT hypot(3, 4) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Round - --- Function name: round --- !query -SELECT round(2.5, 0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CovSample - --- Function name: covar_samp --- !query -SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Pi - --- Function name: pi --- !query -SELECT pi() --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sqrt - --- Function name: sqrt --- !query -SELECT sqrt(4) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.TransformKeys - --- Function name: transform_keys --- !query -SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Substring - --- Function name: substr --- !query -SELECT substr('Spark SQL', 5) --- !query schema -struct - --- Function name: substring --- !query -SELECT substring('Spark SQL', 5) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Asinh - --- Function name: asinh --- !query -SELECT asinh(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Second - --- Function name: second --- !query -SELECT second('2009-07-30 12:58:59') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp - --- Function name: to_utc_timestamp --- !query -SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Upper - --- Function name: ucase --- !query -SELECT ucase('SparkSql') --- !query schema -struct - --- Function name: upper --- !query -SELECT upper('SparkSql') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg - --- Function name: bit_and --- !query -SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Stack - --- Function name: stack --- !query -SELECT stack(2, 1, 2, 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.EqualTo - --- Function name: = --- !query -SELECT 2 = 2 --- !query schema -struct<(2 = 2):boolean> - --- Function name: == --- !query -SELECT 2 == 2 --- !query schema -struct<(2 = 2):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringLPad - --- Function name: lpad --- !query -SELECT lpad('hi', 5, '??') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapFromEntries - --- Function name: map_from_entries --- !query -SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cube - --- Function name: cube --- !query -SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name, age) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Divide - --- Function name: / --- !query -SELECT 3 / 2 --- !query schema -struct<(CAST(3 AS DOUBLE) / CAST(2 AS DOUBLE)):double> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Like - --- Function name: like --- !query -SELECT like('Spark', '_park') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.OctetLength - --- Function name: octet_length --- !query -SELECT octet_length('Spark SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CaseWhen - --- Function name: when --- !query -SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END --- !query schema -struct 0) THEN CAST(1 AS DECIMAL(11,1)) WHEN (2 > 0) THEN CAST(2.0 AS DECIMAL(11,1)) ELSE CAST(1.2 AS DECIMAL(11,1)) END:decimal(11,1)> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Log - --- Function name: ln --- !query -SELECT ln(1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseCount - --- Function name: bit_count --- !query -SELECT bit_count(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Acos - --- Function name: acos --- !query -SELECT acos(1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.SparkPartitionID - --- Function name: spark_partition_id - - --- Class name: org.apache.spark.sql.catalyst.expressions.DateFormatClass - --- Function name: date_format --- !query -SELECT date_format('2016-04-08', 'y') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.FromUnixTime - --- Function name: from_unixtime --- !query -SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Floor - --- Function name: floor --- !query -SELECT floor(-0.1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.SchemaOfCsv - --- Function name: schema_of_csv --- !query -SELECT schema_of_csv('1,abc') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Log2 - --- Function name: log2 --- !query -SELECT log2(2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.DateSub - --- Function name: date_sub --- !query -SELECT date_sub('2016-07-30', 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.NTile - --- Function name: ntile - - --- Class name: org.apache.spark.sql.catalyst.expressions.RowNumber - --- Function name: row_number - - --- Class name: org.apache.spark.sql.catalyst.expressions.CreateMap - --- Function name: map --- !query -SELECT map(1.0, '2', 3.0, '4') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg - --- Function name: bit_or --- !query -SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.DayOfYear - --- Function name: dayofyear --- !query -SELECT dayofyear('2016-04-09') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.IsNull - --- Function name: isnull --- !query -SELECT isnull(1) --- !query schema -struct<(1 IS NULL):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Ceil - --- Function name: ceil --- !query -SELECT ceil(-0.1) --- !query schema -struct - --- Function name: ceiling --- !query -SELECT ceiling(-0.1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Asin - --- Function name: asin --- !query -SELECT asin(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Count - --- Function name: count --- !query -SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Minute - --- Function name: minute --- !query -SELECT minute('2009-07-30 12:58:59') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.PercentRank - --- Function name: percent_rank - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathList - --- Function name: xpath --- !query -SELECT xpath('b1b2b3c1c2','a/b/text()') --- !query schema -structb1b2b3c1c2, a/b/text()):array> - - --- Class name: org.apache.spark.sql.catalyst.expressions.IntegralDivide - --- Function name: div --- !query -SELECT 3 div 2 --- !query schema -struct<(3 div 2):bigint> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation - --- Function name: covar_pop --- !query -SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathDouble - --- Function name: xpath_number --- !query -SELECT xpath_number('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):double> - --- Function name: xpath_double --- !query -SELECT xpath_double('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):double> - - --- Class name: org.apache.spark.sql.catalyst.expressions.SparkVersion - --- Function name: version - - --- Class name: org.apache.spark.sql.catalyst.expressions.Not - --- Function name: ! - --- Function name: not - - --- Class name: org.apache.spark.sql.catalyst.expressions.ShiftRight - --- Function name: shiftright --- !query -SELECT shiftright(4, 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sin - --- Function name: sin --- !query -SELECT sin(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ToRadians - --- Function name: radians --- !query -SELECT radians(180) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.TransformValues - --- Function name: transform_values --- !query -SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayUnion - --- Function name: array_union --- !query -SELECT array_union(array(1, 2, 3), array(1, 3, 5)) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis - --- Function name: kurtosis --- !query -SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Signum - --- Function name: signum --- !query -SELECT signum(40) --- !query schema -struct - --- Function name: sign --- !query -SELECT sign(40) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Overlay - --- Function name: overlay --- !query -SELECT overlay('Spark SQL' PLACING '_' FROM 6) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sha1 - --- Function name: sha1 --- !query -SELECT sha1('Spark') --- !query schema -struct - --- Function name: sha --- !query -SELECT sha('Spark') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.TruncTimestamp - --- Function name: date_trunc --- !query -SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet - --- Function name: collect_set --- !query -SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Factorial - --- Function name: factorial --- !query -SELECT factorial(5) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitLength - --- Function name: bit_length --- !query -SELECT bit_length('Spark SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StructsToCsv - --- Function name: to_csv --- !query -SELECT to_csv(named_struct('a', 1, 'b', 2)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.XxHash64 - --- Function name: xxhash64 --- !query -SELECT xxhash64('Spark', array(123), 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.IfNull - --- Function name: ifnull --- !query -SELECT ifnull(NULL, array('2')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Flatten - --- Function name: flatten --- !query -SELECT flatten(array(array(1, 2), array(3, 4))) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CollectList - --- Function name: collect_list --- !query -SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseOr - --- Function name: | --- !query -SELECT 3 | 5 --- !query schema -struct<(3 | 5):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Or - --- Function name: or - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayRepeat - --- Function name: array_repeat --- !query -SELECT array_repeat('123', 2) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathString - --- Function name: xpath_string --- !query -SELECT xpath_string('bcc','a/c') --- !query schema -structbcc, a/c):string> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayMax - --- Function name: array_max --- !query -SELECT array_max(array(1, 20, null, 3)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringTrim - --- Function name: trim --- !query -SELECT trim(' SparkSQL ') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.IsNaN - --- Function name: isnan --- !query -SELECT isnan(cast('NaN' as double)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Levenshtein - --- Function name: levenshtein --- !query -SELECT levenshtein('kitten', 'sitting') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus - --- Function name: approx_count_distinct --- !query -SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapConcat - --- Function name: map_concat --- !query -SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Atan - --- Function name: atan --- !query -SELECT atan(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathFloat - --- Function name: xpath_float --- !query -SELECT xpath_float('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):float> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Log10 - --- Function name: log10 --- !query -SELECT log10(10) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp - --- Function name: from_utc_timestamp --- !query -SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CreateNamedStruct - --- Function name: named_struct --- !query -SELECT named_struct("a", 1, "b", 2, "c", 3) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.BRound - --- Function name: bround --- !query -SELECT bround(2.5, 0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Year - --- Function name: year --- !query -SELECT year('2016-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.WeekOfYear - --- Function name: weekofyear --- !query -SELECT weekofyear('2008-02-20') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Hour - --- Function name: hour --- !query -SELECT hour('2009-07-30 12:58:59') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.DayOfWeek - --- Function name: dayofweek --- !query -SELECT dayofweek('2009-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayContains - --- Function name: array_contains --- !query -SELECT array_contains(array(1, 2, 3), 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Base64 - --- Function name: base64 --- !query -SELECT base64('Spark SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.UnaryMinus - --- Function name: negative --- !query -SELECT negative(1) --- !query schema -struct<(- 1):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Explode - --- Function name: explode --- !query -SELECT explode(array(10, 20)) --- !query schema -struct - --- Function name: explode_outer --- !query -SELECT explode_outer(array(10, 20)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ParseToDate - --- Function name: to_date --- !query -SELECT to_date('2009-07-30 04:17:52') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ParseUrl - --- Function name: parse_url --- !query -SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cosh - --- Function name: cosh --- !query -SELECT cosh(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayAggregate - --- Function name: aggregate --- !query -SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned - --- Function name: shiftrightunsigned --- !query -SELECT shiftrightunsigned(4, 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Nvl2 - --- Function name: nvl2 --- !query -SELECT nvl2(NULL, 2, 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.DateDiff - --- Function name: datediff --- !query -SELECT datediff('2009-07-31', '2009-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Log1p - --- Function name: log1p --- !query -SELECT log1p(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.NaNvl - --- Function name: nanvl --- !query -SELECT nanvl(cast('NaN' as double), 123) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapEntries - --- Function name: map_entries --- !query -SELECT map_entries(map(1, 'a', 2, 'b')) --- !query schema -struct>> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Reverse - --- Function name: reverse --- !query -SELECT reverse('Spark SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayIntersect - --- Function name: array_intersect --- !query -SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp - --- Function name: stddev_samp --- !query -SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - --- Function name: stddev --- !query -SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - --- Function name: std --- !query -SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapValues - --- Function name: map_values --- !query -SELECT map_values(map(1, 'a', 2, 'b')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArraysOverlap - --- Function name: arrays_overlap --- !query -SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Rollup - --- Function name: rollup --- !query -SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY rollup(name, age) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.LessThanOrEqual - --- Function name: <= --- !query -SELECT 2 <= 2 --- !query schema -struct<(2 <= 2):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Elt - --- Function name: elt --- !query -SELECT elt(1, 'scala', 'java') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Skewness - --- Function name: skewness --- !query -SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Left - --- Function name: left --- !query -SELECT left('Spark SQL', 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringTrimRight - --- Function name: rtrim --- !query -SELECT rtrim(' SparkSQL ') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Lead - --- Function name: lead - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayForAll - --- Function name: forall --- !query -SELECT forall(array(1, 2, 3), x -> x % 2 == 0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Bin - --- Function name: bin --- !query -SELECT bin(13) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.DenseRank - --- Function name: dense_rank - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayPosition - --- Function name: array_position --- !query -SELECT array_position(array(3, 2, 1), 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayTransform - --- Function name: transform --- !query -SELECT transform(array(1, 2, 3), x -> x + 1) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.JsonTuple - --- Function name: json_tuple --- !query -SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.RegExpExtract - --- Function name: regexp_extract --- !query -SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Length - --- Function name: character_length --- !query -SELECT character_length('Spark SQL ') --- !query schema -struct - --- Function name: char_length --- !query -SELECT char_length('Spark SQL ') --- !query schema -struct - --- Function name: length --- !query -SELECT length('Spark SQL ') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Unhex - --- Function name: unhex --- !query -SELECT decode(unhex('537061726B2053514C'), 'UTF-8') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Conv - --- Function name: conv --- !query -SELECT conv('100', 2, 10) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.JsonObjectKeys - --- Function name: json_object_keys --- !query -Select json_object_keys('{}') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.MinBy - --- Function name: min_by --- !query -SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cot - --- Function name: cot --- !query -SELECT cot(1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.LastDay - --- Function name: last_day --- !query -SELECT last_day('2009-01-12') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Exp - --- Function name: exp --- !query -SELECT exp(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Concat - --- Function name: concat --- !query -SELECT concat('Spark', 'SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.UnBase64 - --- Function name: unbase64 --- !query -SELECT unbase64('U3BhcmsgU1FM') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Acosh - --- Function name: acosh --- !query -SELECT acosh(1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.FormatString - --- Function name: printf --- !query -SELECT printf("Hello World %d %s", 100, "days") --- !query schema -struct - --- Function name: format_string --- !query -SELECT format_string("Hello World %d %s", 100, "days") --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp - --- Function name: to_unix_timestamp --- !query -SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') --- !query schema -struct \ No newline at end of file From 42910c25cf68d56e32c4b11a844f79fe19f8ea11 Mon Sep 17 00:00:00 2001 From: beliefer Date: Tue, 14 Apr 2020 10:05:42 +0800 Subject: [PATCH 06/22] Optimize code --- .../scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index aff62e9005923..cd74dac531126 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -136,7 +136,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { // Note: We need to filter out the commands that set the parameters, such as: // SET spark.sql.parser.escapedStringLiterals=true example.split(" > ").tail - .filterNot(_.trim.startsWith("SET")).take(2).toList.foreach(_ match { + .filterNot(_.trim.startsWith("SET")).take(1).foreach(_ match { case exampleRe(sql, expected) => val df = spark.sql(sql) val schema = df.schema.catalogString From c6ed1259c805b0aec831e727935bbb08c4086751 Mon Sep 17 00:00:00 2001 From: beliefer Date: Thu, 16 Apr 2020 17:38:40 +0800 Subject: [PATCH 07/22] Change golden file to markdown --- .../test/resources/sql-functions/schema.out | 2637 ----------------- .../sql-functions/sql-expression-schema.md | 331 +++ .../spark/sql/ExpressionsSchemaSuite.scala | 99 +- 3 files changed, 396 insertions(+), 2671 deletions(-) delete mode 100644 sql/core/src/test/resources/sql-functions/schema.out create mode 100644 sql/core/src/test/resources/sql-functions/sql-expression-schema.md diff --git a/sql/core/src/test/resources/sql-functions/schema.out b/sql/core/src/test/resources/sql-functions/schema.out deleted file mode 100644 index 218f3ea8634a6..0000000000000 --- a/sql/core/src/test/resources/sql-functions/schema.out +++ /dev/null @@ -1,2637 +0,0 @@ --- Automatically generated by ExpressionsSchemaSuite --- Number of queries: 287 - --- Class name: org.apache.spark.sql.catalyst.expressions.StringSpace - --- Function name: space --- !query -SELECT concat(space(2), '1') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CreateArray - --- Function name: array --- !query -SELECT array(1, 2, 3) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayExcept - --- Function name: array_except --- !query -SELECT array_except(array(1, 2, 3), array(1, 3, 5)) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseXor - --- Function name: ^ --- !query -SELECT 3 ^ 5 --- !query schema -struct<(3 ^ 5):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringRPad - --- Function name: rpad --- !query -SELECT rpad('hi', 5, '??') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.SchemaOfJson - --- Function name: schema_of_json --- !query -SELECT schema_of_json('[{"col":0}]') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ParseToTimestamp - --- Function name: to_timestamp --- !query -SELECT to_timestamp('2016-12-31 00:12:00') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathInt - --- Function name: xpath_int --- !query -SELECT xpath_int('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop - --- Function name: var_pop --- !query -SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Hex - --- Function name: hex --- !query -SELECT hex(17) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArraysZip - --- Function name: arrays_zip --- !query -SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)) --- !query schema -struct>> - - --- Class name: org.apache.spark.sql.catalyst.expressions.InputFileName - --- Function name: input_file_name - - --- Class name: org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID - --- Function name: monotonically_increasing_id - - --- Class name: org.apache.spark.sql.catalyst.expressions.DayOfMonth - --- Function name: day --- !query -SELECT day('2009-07-30') --- !query schema -struct - --- Function name: dayofmonth --- !query -SELECT dayofmonth('2009-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ElementAt - --- Function name: element_at --- !query -SELECT element_at(array(1, 2, 3), 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.WeekDay - --- Function name: weekday --- !query -SELECT weekday('2009-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathLong - --- Function name: xpath_long --- !query -SELECT xpath_long('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):bigint> - - --- Class name: org.apache.spark.sql.catalyst.expressions.CumeDist - --- Function name: cume_dist - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayMin - --- Function name: array_min --- !query -SELECT array_min(array(1, 20, null, 3)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy - --- Function name: max_by --- !query -SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Rank - --- Function name: rank - - --- Class name: org.apache.spark.sql.catalyst.expressions.Right - --- Function name: right --- !query -SELECT right('Spark SQL', 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Least - --- Function name: least --- !query -SELECT least(10, 9, 2, 4, 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Lower - --- Function name: lcase --- !query -SELECT lcase('SparkSql') --- !query schema -struct - --- Function name: lower --- !query -SELECT lower('SparkSql') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Nvl - --- Function name: nvl --- !query -SELECT nvl(NULL, array('2')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Pmod - --- Function name: pmod --- !query -SELECT pmod(10, 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Chr - --- Function name: char --- !query -SELECT char(65) --- !query schema -struct - --- Function name: chr --- !query -SELECT chr(65) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.GetJsonObject - --- Function name: get_json_object --- !query -SELECT get_json_object('{"a":"b"}', '$.a') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ToDegrees - --- Function name: degrees --- !query -SELECT degrees(3.141592653589793) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayExists - --- Function name: exists --- !query -SELECT exists(array(1, 2, 3), x -> x % 2 == 0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Tanh - --- Function name: tanh --- !query -SELECT tanh(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringSplit - --- Function name: split --- !query -SELECT split('oneAtwoBthreeC', '[ABC]') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.GroupingID - --- Function name: grouping_id --- !query -SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Expm1 - --- Function name: expm1 --- !query -SELECT expm1(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Quarter - --- Function name: quarter --- !query -SELECT quarter('2016-08-31') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Atan2 - --- Function name: atan2 --- !query -SELECT atan2(0, 0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.TypeOf - --- Function name: typeof --- !query -SELECT typeof(1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sequence - --- Function name: sequence --- !query -SELECT sequence(1, 5) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.InputFileBlockStart - --- Function name: input_file_block_start - - --- Class name: org.apache.spark.sql.catalyst.expressions.ConcatWs - --- Function name: concat_ws --- !query -SELECT concat_ws(' ', 'Spark', 'SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile - --- Function name: approx_percentile --- !query -SELECT approx_percentile(10.0, array(0.5, 0.4, 0.1), 100) --- !query schema -struct> - --- Function name: percentile_approx --- !query -SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.TimeWindow - --- Function name: window - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringLocate - --- Function name: position --- !query -SELECT position('bar', 'foobarbar') --- !query schema -struct - --- Function name: locate --- !query -SELECT locate('bar', 'foobarbar') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.FormatNumber - --- Function name: format_number --- !query -SELECT format_number(12332.123456, 4) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.UnaryPositive - --- Function name: positive - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Corr - --- Function name: corr --- !query -SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Md5 - --- Function name: md5 --- !query -SELECT md5('Spark') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StructsToJson - --- Function name: to_json --- !query -SELECT to_json(named_struct('a', 1, 'b', 2)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop - --- Function name: stddev_pop --- !query -SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Rint - --- Function name: rint --- !query -SELECT rint(12.3456) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapFromArrays - --- Function name: map_from_arrays --- !query -SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sinh - --- Function name: sinh --- !query -SELECT sinh(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Lag - --- Function name: lag - - --- Class name: org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual - --- Function name: >= --- !query -SELECT 2 >= 1 --- !query schema -struct<(2 >= 1):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseAnd - --- Function name: & --- !query -SELECT 3 & 5 --- !query schema -struct<(3 & 5):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.First - --- Function name: first_value --- !query -SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) --- !query schema -struct - --- Function name: first --- !query -SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.TruncDate - --- Function name: trunc --- !query -SELECT trunc('2019-08-04', 'week') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean - --- Function name: xpath_boolean --- !query -SELECT xpath_boolean('1','a/b') --- !query schema -struct1, a/b):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.MakeInterval - --- Function name: make_interval --- !query -SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Atanh - --- Function name: atanh --- !query -SELECT atanh(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.FindInSet - --- Function name: find_in_set --- !query -SELECT find_in_set('ab','abc,b,ab,c,def') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray - --- Function name: json_array_length --- !query -SELECT json_array_length('[1,2,3,4]') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg - --- Function name: bit_xor --- !query -SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Decode - --- Function name: decode --- !query -SELECT decode(encode('abc', 'utf-8'), 'utf-8') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Coalesce - --- Function name: coalesce --- !query -SELECT coalesce(NULL, 1, NULL) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.RegExpReplace - --- Function name: regexp_replace --- !query -SELECT regexp_replace('100-200', '(\\d+)', 'num') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp - --- Function name: var_samp --- !query -SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - --- Function name: variance --- !query -SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cos - --- Function name: cos --- !query -SELECT cos(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayFilter - --- Function name: filter --- !query -SELECT filter(array(1, 2, 3), x -> x % 2 == 1) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.PosExplode - --- Function name: posexplode_outer --- !query -SELECT posexplode_outer(array(10,20)) --- !query schema -struct - --- Function name: posexplode --- !query -SELECT posexplode(array(10,20)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.InputFileBlockLength - --- Function name: input_file_block_length - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd - --- Function name: every --- !query -SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col) --- !query schema -struct - --- Function name: bool_and --- !query -SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg - --- Function name: count_min_sketch - - --- Class name: org.apache.spark.sql.catalyst.expressions.AssertTrue - --- Function name: assert_true --- !query -SELECT assert_true(0 < 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CurrentDate - --- Function name: current_date - - --- Class name: org.apache.spark.sql.catalyst.expressions.MonthsBetween - --- Function name: months_between --- !query -SELECT months_between('1997-02-28 10:30:00', '1996-10-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.EqualNullSafe - --- Function name: <=> --- !query -SELECT 2 <=> 2 --- !query schema -struct<(2 <=> 2):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Add - --- Function name: + --- !query -SELECT 1 + 2 --- !query schema -struct<(1 + 2):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Multiply - --- Function name: * --- !query -SELECT 2 * 3 --- !query schema -struct<(2 * 3):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.DatePart - --- Function name: date_part --- !query -SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ShiftLeft - --- Function name: shiftleft --- !query -SELECT shiftleft(2, 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.GreaterThan - --- Function name: > --- !query -SELECT 2 > 1 --- !query schema -struct<(2 > 1):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Slice - --- Function name: slice --- !query -SELECT slice(array(1, 2, 3, 4), 2, 2) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sentences - --- Function name: sentences --- !query -SELECT sentences('Hi there! Good morning.') --- !query schema -struct>> - - --- Class name: org.apache.spark.sql.catalyst.expressions.SoundEx - --- Function name: soundex --- !query -SELECT soundex('Miller') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.AddMonths - --- Function name: add_months --- !query -SELECT add_months('2016-08-31', 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Max - --- Function name: max --- !query -SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapFilter - --- Function name: map_filter --- !query -SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v) --- !query schema -struct namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):map> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Crc32 - --- Function name: crc32 --- !query -SELECT crc32('Spark') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sha2 - --- Function name: sha2 --- !query -SELECT sha2('Spark', 256) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Size - --- Function name: size --- !query -SELECT size(array('b', 'd', 'c', 'a')) --- !query schema -struct - --- Function name: cardinality --- !query -SELECT cardinality(array('b', 'd', 'c', 'a')) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CurrentTimestamp - --- Function name: current_timestamp - --- Function name: now - - --- Class name: org.apache.spark.sql.catalyst.expressions.In - --- Function name: in --- !query -SELECT 1 in(1, 2, 3) --- !query schema -struct<(1 IN (1, 2, 3)):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.CurrentDatabase - --- Function name: current_database --- !query -SELECT current_database() --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringInstr - --- Function name: instr --- !query -SELECT instr('SparkSQL', 'SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Sum - --- Function name: sum --- !query -SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CountIf - --- Function name: count_if --- !query -SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MakeTimestamp - --- Function name: make_timestamp --- !query -SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CsvToStructs - --- Function name: from_csv --- !query -SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Remainder - --- Function name: % --- !query -SELECT 2 % 1.8 --- !query schema -struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> - --- Function name: mod --- !query -SELECT 2 % 1.8 --- !query schema -struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringRepeat - --- Function name: repeat --- !query -SELECT repeat('123', 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.SubstringIndex - --- Function name: substring_index --- !query -SELECT substring_index('www.apache.org', '.', 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringTrimLeft - --- Function name: ltrim --- !query -SELECT ltrim(' SparkSQL ') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringTranslate - --- Function name: translate --- !query -SELECT translate('AaBbCc', 'abc', '123') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Greatest - --- Function name: greatest --- !query -SELECT greatest(10, 9, 2, 4, 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayDistinct - --- Function name: array_distinct --- !query -SELECT array_distinct(array(1, 2, 3, null, 3)) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringReplace - --- Function name: replace --- !query -SELECT replace('ABCabc', 'abc', 'DEF') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathShort - --- Function name: xpath_short --- !query -SELECT xpath_short('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):smallint> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr - --- Function name: bool_or --- !query -SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) --- !query schema -struct - --- Function name: some --- !query -SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) --- !query schema -struct - --- Function name: any --- !query -SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Murmur3Hash - --- Function name: hash --- !query -SELECT hash('Spark', array(123), 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.RLike - --- Function name: rlike --- !query -SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' --- !query schema -struct<%SystemDrive%UsersJohn RLIKE %SystemDrive%\Users.*:boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.If - --- Function name: if --- !query -SELECT if(1 < 2, 'a', 'b') --- !query schema -struct<(IF((1 < 2), a, b)):string> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Grouping - --- Function name: grouping --- !query -SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Abs - --- Function name: abs --- !query -SELECT abs(-1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.InitCap - --- Function name: initcap --- !query -SELECT initcap('sPark sql') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Percentile - --- Function name: percentile --- !query -SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.IsNotNull - --- Function name: isnotnull --- !query -SELECT isnotnull(1) --- !query schema -struct<(1 IS NOT NULL):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cbrt - --- Function name: cbrt --- !query -SELECT cbrt(27.0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseNot - --- Function name: ~ --- !query -SELECT ~ 0 --- !query schema -struct<~0:int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Last - --- Function name: last_value --- !query -SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) --- !query schema -struct - --- Function name: last --- !query -SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.NullIf - --- Function name: nullif --- !query -SELECT nullif(2, 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Month - --- Function name: month --- !query -SELECT month('2016-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Logarithm - --- Function name: log --- !query -SELECT log(10, 100) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Subtract - --- Function name: - --- !query -SELECT 2 - 1 --- !query schema -struct<(2 - 1):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.DateAdd - --- Function name: date_add --- !query -SELECT date_add('2016-07-30', 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MakeDate - --- Function name: make_date --- !query -SELECT make_date(2013, 7, 15) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.JsonToStructs - --- Function name: from_json --- !query -SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ZipWith - --- Function name: zip_with --- !query -SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) --- !query schema -struct>> - - --- Class name: org.apache.spark.sql.catalyst.expressions.NamedStruct - --- Function name: struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Tan - --- Function name: tan --- !query -SELECT tan(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.EulerNumber - --- Function name: e --- !query -SELECT e() --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringToMap - --- Function name: str_to_map --- !query -SELECT str_to_map('a:1,b:2,c:3', ',', ':') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArraySort - --- Function name: array_sort --- !query -SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) --- !query schema -struct namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cast - --- Function name: string - --- Function name: cast --- !query -SELECT cast('10' as int) --- !query schema -struct - --- Function name: tinyint - --- Function name: double - --- Function name: smallint - --- Function name: date - --- Function name: decimal - --- Function name: boolean - --- Function name: float - --- Function name: binary - --- Function name: bigint - --- Function name: int - --- Function name: timestamp - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Min - --- Function name: min --- !query -SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Average - --- Function name: avg --- !query -SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - --- Function name: mean --- !query -SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.SortArray - --- Function name: sort_array --- !query -SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.NextDay - --- Function name: next_day --- !query -SELECT next_day('2015-01-14', 'TU') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Ascii - --- Function name: ascii --- !query -SELECT ascii('222') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayRemove - --- Function name: array_remove --- !query -SELECT array_remove(array(1, 2, 3, null, 3), 3) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Pow - --- Function name: pow --- !query -SELECT pow(2, 3) --- !query schema -struct - --- Function name: power --- !query -SELECT power(2, 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.LessThan - --- Function name: < --- !query -SELECT 1 < 2 --- !query schema -struct<(1 < 2):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapKeys - --- Function name: map_keys --- !query -SELECT map_keys(map(1, 'a', 2, 'b')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Inline - --- Function name: inline --- !query -SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) --- !query schema -struct - --- Function name: inline_outer --- !query -SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapZipWith - --- Function name: map_zip_with --- !query -SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Encode - --- Function name: encode --- !query -SELECT encode('abc', 'utf-8') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayJoin - --- Function name: array_join --- !query -SELECT array_join(array('hello', 'world'), ' ') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.And - --- Function name: and - - --- Class name: org.apache.spark.sql.catalyst.expressions.Hypot - --- Function name: hypot --- !query -SELECT hypot(3, 4) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Round - --- Function name: round --- !query -SELECT round(2.5, 0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CovSample - --- Function name: covar_samp --- !query -SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Pi - --- Function name: pi --- !query -SELECT pi() --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sqrt - --- Function name: sqrt --- !query -SELECT sqrt(4) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.TransformKeys - --- Function name: transform_keys --- !query -SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Substring - --- Function name: substr --- !query -SELECT substr('Spark SQL', 5) --- !query schema -struct - --- Function name: substring --- !query -SELECT substring('Spark SQL', 5) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Asinh - --- Function name: asinh --- !query -SELECT asinh(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Second - --- Function name: second --- !query -SELECT second('2009-07-30 12:58:59') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp - --- Function name: to_utc_timestamp --- !query -SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Upper - --- Function name: ucase --- !query -SELECT ucase('SparkSql') --- !query schema -struct - --- Function name: upper --- !query -SELECT upper('SparkSql') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg - --- Function name: bit_and --- !query -SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Stack - --- Function name: stack --- !query -SELECT stack(2, 1, 2, 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.EqualTo - --- Function name: = --- !query -SELECT 2 = 2 --- !query schema -struct<(2 = 2):boolean> - --- Function name: == --- !query -SELECT 2 == 2 --- !query schema -struct<(2 = 2):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringLPad - --- Function name: lpad --- !query -SELECT lpad('hi', 5, '??') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapFromEntries - --- Function name: map_from_entries --- !query -SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cube - --- Function name: cube --- !query -SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name, age) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Divide - --- Function name: / --- !query -SELECT 3 / 2 --- !query schema -struct<(CAST(3 AS DOUBLE) / CAST(2 AS DOUBLE)):double> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Like - --- Function name: like --- !query -SELECT like('Spark', '_park') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.OctetLength - --- Function name: octet_length --- !query -SELECT octet_length('Spark SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CaseWhen - --- Function name: when --- !query -SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END --- !query schema -struct 0) THEN CAST(1 AS DECIMAL(11,1)) WHEN (2 > 0) THEN CAST(2.0 AS DECIMAL(11,1)) ELSE CAST(1.2 AS DECIMAL(11,1)) END:decimal(11,1)> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Log - --- Function name: ln --- !query -SELECT ln(1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseCount - --- Function name: bit_count --- !query -SELECT bit_count(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Acos - --- Function name: acos --- !query -SELECT acos(1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.SparkPartitionID - --- Function name: spark_partition_id - - --- Class name: org.apache.spark.sql.catalyst.expressions.DateFormatClass - --- Function name: date_format --- !query -SELECT date_format('2016-04-08', 'y') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.FromUnixTime - --- Function name: from_unixtime --- !query -SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Floor - --- Function name: floor --- !query -SELECT floor(-0.1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.SchemaOfCsv - --- Function name: schema_of_csv --- !query -SELECT schema_of_csv('1,abc') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Log2 - --- Function name: log2 --- !query -SELECT log2(2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.DateSub - --- Function name: date_sub --- !query -SELECT date_sub('2016-07-30', 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.NTile - --- Function name: ntile - - --- Class name: org.apache.spark.sql.catalyst.expressions.RowNumber - --- Function name: row_number - - --- Class name: org.apache.spark.sql.catalyst.expressions.CreateMap - --- Function name: map --- !query -SELECT map(1.0, '2', 3.0, '4') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg - --- Function name: bit_or --- !query -SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.DayOfYear - --- Function name: dayofyear --- !query -SELECT dayofyear('2016-04-09') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.IsNull - --- Function name: isnull --- !query -SELECT isnull(1) --- !query schema -struct<(1 IS NULL):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Ceil - --- Function name: ceil --- !query -SELECT ceil(-0.1) --- !query schema -struct - --- Function name: ceiling --- !query -SELECT ceiling(-0.1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Asin - --- Function name: asin --- !query -SELECT asin(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Count - --- Function name: count --- !query -SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Minute - --- Function name: minute --- !query -SELECT minute('2009-07-30 12:58:59') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.PercentRank - --- Function name: percent_rank - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathList - --- Function name: xpath --- !query -SELECT xpath('b1b2b3c1c2','a/b/text()') --- !query schema -structb1b2b3c1c2, a/b/text()):array> - - --- Class name: org.apache.spark.sql.catalyst.expressions.IntegralDivide - --- Function name: div --- !query -SELECT 3 div 2 --- !query schema -struct<(3 div 2):bigint> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation - --- Function name: covar_pop --- !query -SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathDouble - --- Function name: xpath_number --- !query -SELECT xpath_number('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):double> - --- Function name: xpath_double --- !query -SELECT xpath_double('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):double> - - --- Class name: org.apache.spark.sql.catalyst.expressions.SparkVersion - --- Function name: version - - --- Class name: org.apache.spark.sql.catalyst.expressions.Not - --- Function name: ! - --- Function name: not - - --- Class name: org.apache.spark.sql.catalyst.expressions.ShiftRight - --- Function name: shiftright --- !query -SELECT shiftright(4, 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sin - --- Function name: sin --- !query -SELECT sin(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ToRadians - --- Function name: radians --- !query -SELECT radians(180) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.TransformValues - --- Function name: transform_values --- !query -SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayUnion - --- Function name: array_union --- !query -SELECT array_union(array(1, 2, 3), array(1, 3, 5)) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis - --- Function name: kurtosis --- !query -SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Signum - --- Function name: signum --- !query -SELECT signum(40) --- !query schema -struct - --- Function name: sign --- !query -SELECT sign(40) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Overlay - --- Function name: overlay --- !query -SELECT overlay('Spark SQL' PLACING '_' FROM 6) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Sha1 - --- Function name: sha1 --- !query -SELECT sha1('Spark') --- !query schema -struct - --- Function name: sha --- !query -SELECT sha('Spark') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.TruncTimestamp - --- Function name: date_trunc --- !query -SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet - --- Function name: collect_set --- !query -SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Factorial - --- Function name: factorial --- !query -SELECT factorial(5) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitLength - --- Function name: bit_length --- !query -SELECT bit_length('Spark SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StructsToCsv - --- Function name: to_csv --- !query -SELECT to_csv(named_struct('a', 1, 'b', 2)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.XxHash64 - --- Function name: xxhash64 --- !query -SELECT xxhash64('Spark', array(123), 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.IfNull - --- Function name: ifnull --- !query -SELECT ifnull(NULL, array('2')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Flatten - --- Function name: flatten --- !query -SELECT flatten(array(array(1, 2), array(3, 4))) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.CollectList - --- Function name: collect_list --- !query -SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.BitwiseOr - --- Function name: | --- !query -SELECT 3 | 5 --- !query schema -struct<(3 | 5):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Or - --- Function name: or - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayRepeat - --- Function name: array_repeat --- !query -SELECT array_repeat('123', 2) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathString - --- Function name: xpath_string --- !query -SELECT xpath_string('bcc','a/c') --- !query schema -structbcc, a/c):string> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayMax - --- Function name: array_max --- !query -SELECT array_max(array(1, 20, null, 3)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringTrim - --- Function name: trim --- !query -SELECT trim(' SparkSQL ') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.IsNaN - --- Function name: isnan --- !query -SELECT isnan(cast('NaN' as double)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Levenshtein - --- Function name: levenshtein --- !query -SELECT levenshtein('kitten', 'sitting') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus - --- Function name: approx_count_distinct --- !query -SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapConcat - --- Function name: map_concat --- !query -SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Atan - --- Function name: atan --- !query -SELECT atan(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.xml.XPathFloat - --- Function name: xpath_float --- !query -SELECT xpath_float('12', 'sum(a/b)') --- !query schema -struct12, sum(a/b)):float> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Log10 - --- Function name: log10 --- !query -SELECT log10(10) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp - --- Function name: from_utc_timestamp --- !query -SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.CreateNamedStruct - --- Function name: named_struct --- !query -SELECT named_struct("a", 1, "b", 2, "c", 3) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.BRound - --- Function name: bround --- !query -SELECT bround(2.5, 0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Year - --- Function name: year --- !query -SELECT year('2016-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.WeekOfYear - --- Function name: weekofyear --- !query -SELECT weekofyear('2008-02-20') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Hour - --- Function name: hour --- !query -SELECT hour('2009-07-30 12:58:59') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.DayOfWeek - --- Function name: dayofweek --- !query -SELECT dayofweek('2009-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayContains - --- Function name: array_contains --- !query -SELECT array_contains(array(1, 2, 3), 2) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Base64 - --- Function name: base64 --- !query -SELECT base64('Spark SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.UnaryMinus - --- Function name: negative --- !query -SELECT negative(1) --- !query schema -struct<(- 1):int> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Explode - --- Function name: explode --- !query -SELECT explode(array(10, 20)) --- !query schema -struct - --- Function name: explode_outer --- !query -SELECT explode_outer(array(10, 20)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ParseToDate - --- Function name: to_date --- !query -SELECT to_date('2009-07-30 04:17:52') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ParseUrl - --- Function name: parse_url --- !query -SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cosh - --- Function name: cosh --- !query -SELECT cosh(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayAggregate - --- Function name: aggregate --- !query -SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned - --- Function name: shiftrightunsigned --- !query -SELECT shiftrightunsigned(4, 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Nvl2 - --- Function name: nvl2 --- !query -SELECT nvl2(NULL, 2, 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.DateDiff - --- Function name: datediff --- !query -SELECT datediff('2009-07-31', '2009-07-30') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Log1p - --- Function name: log1p --- !query -SELECT log1p(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.NaNvl - --- Function name: nanvl --- !query -SELECT nanvl(cast('NaN' as double), 123) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapEntries - --- Function name: map_entries --- !query -SELECT map_entries(map(1, 'a', 2, 'b')) --- !query schema -struct>> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Reverse - --- Function name: reverse --- !query -SELECT reverse('Spark SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayIntersect - --- Function name: array_intersect --- !query -SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp - --- Function name: stddev_samp --- !query -SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - --- Function name: stddev --- !query -SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - --- Function name: std --- !query -SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.MapValues - --- Function name: map_values --- !query -SELECT map_values(map(1, 'a', 2, 'b')) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArraysOverlap - --- Function name: arrays_overlap --- !query -SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Rollup - --- Function name: rollup --- !query -SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY rollup(name, age) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.LessThanOrEqual - --- Function name: <= --- !query -SELECT 2 <= 2 --- !query schema -struct<(2 <= 2):boolean> - - --- Class name: org.apache.spark.sql.catalyst.expressions.Elt - --- Function name: elt --- !query -SELECT elt(1, 'scala', 'java') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.Skewness - --- Function name: skewness --- !query -SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Left - --- Function name: left --- !query -SELECT left('Spark SQL', 3) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.StringTrimRight - --- Function name: rtrim --- !query -SELECT rtrim(' SparkSQL ') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Lead - --- Function name: lead - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayForAll - --- Function name: forall --- !query -SELECT forall(array(1, 2, 3), x -> x % 2 == 0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Bin - --- Function name: bin --- !query -SELECT bin(13) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.DenseRank - --- Function name: dense_rank - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayPosition - --- Function name: array_position --- !query -SELECT array_position(array(3, 2, 1), 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ArrayTransform - --- Function name: transform --- !query -SELECT transform(array(1, 2, 3), x -> x + 1) --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.JsonTuple - --- Function name: json_tuple --- !query -SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.RegExpExtract - --- Function name: regexp_extract --- !query -SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Length - --- Function name: character_length --- !query -SELECT character_length('Spark SQL ') --- !query schema -struct - --- Function name: char_length --- !query -SELECT char_length('Spark SQL ') --- !query schema -struct - --- Function name: length --- !query -SELECT length('Spark SQL ') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Unhex - --- Function name: unhex --- !query -SELECT decode(unhex('537061726B2053514C'), 'UTF-8') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Conv - --- Function name: conv --- !query -SELECT conv('100', 2, 10) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.JsonObjectKeys - --- Function name: json_object_keys --- !query -Select json_object_keys('{}') --- !query schema -struct> - - --- Class name: org.apache.spark.sql.catalyst.expressions.aggregate.MinBy - --- Function name: min_by --- !query -SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Cot - --- Function name: cot --- !query -SELECT cot(1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.LastDay - --- Function name: last_day --- !query -SELECT last_day('2009-01-12') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Exp - --- Function name: exp --- !query -SELECT exp(0) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Concat - --- Function name: concat --- !query -SELECT concat('Spark', 'SQL') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.UnBase64 - --- Function name: unbase64 --- !query -SELECT unbase64('U3BhcmsgU1FM') --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.Acosh - --- Function name: acosh --- !query -SELECT acosh(1) --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.FormatString - --- Function name: printf --- !query -SELECT printf("Hello World %d %s", 100, "days") --- !query schema -struct - --- Function name: format_string --- !query -SELECT format_string("Hello World %d %s", 100, "days") --- !query schema -struct - - --- Class name: org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp - --- Function name: to_unix_timestamp --- !query -SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') --- !query schema -struct \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md new file mode 100644 index 0000000000000..171b0b1e5b5cd --- /dev/null +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -0,0 +1,331 @@ +## Summary + - Number of queries: 324 + - Number of expressions that missing example: 37 + - Expressions for all missing examples include and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,current_date,current_timestamp,now,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch +## Schema of Built-in Functions +| No | Class name | Function name or alias | Query example | Output schema | +| -- | ---------- | ---------------------- | ------------- | ------------- | +| 1 | org.apache.spark.sql.catalyst.expressions.Abs | abs | SELECT abs(-1) | struct | +| 2 | org.apache.spark.sql.catalyst.expressions.Acos | acos | SELECT acos(1) | struct | +| 3 | org.apache.spark.sql.catalyst.expressions.Acosh | acosh | SELECT acosh(1) | struct | +| 4 | org.apache.spark.sql.catalyst.expressions.Add | + | SELECT 1 + 2 | struct<(1 + 2):int> | +| 5 | org.apache.spark.sql.catalyst.expressions.AddMonths | add_months | SELECT add_months('2016-08-31', 1) | struct | +| 6 | org.apache.spark.sql.catalyst.expressions.And | and | Example is missing | Example is missing | +| 7 | org.apache.spark.sql.catalyst.expressions.ArrayAggregate | aggregate | SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct | +| 8 | org.apache.spark.sql.catalyst.expressions.ArrayContains | array_contains | SELECT array_contains(array(1, 2, 3), 2) | struct | +| 9 | org.apache.spark.sql.catalyst.expressions.ArrayDistinct | array_distinct | SELECT array_distinct(array(1, 2, 3, null, 3)) | struct> | +| 10 | org.apache.spark.sql.catalyst.expressions.ArrayExcept | array_except | SELECT array_except(array(1, 2, 3), array(1, 3, 5)) | struct> | +| 11 | org.apache.spark.sql.catalyst.expressions.ArrayExists | exists | SELECT exists(array(1, 2, 3), x -> x % 2 == 0) | struct | +| 12 | org.apache.spark.sql.catalyst.expressions.ArrayFilter | filter | SELECT filter(array(1, 2, 3), x -> x % 2 == 1) | struct> | +| 13 | org.apache.spark.sql.catalyst.expressions.ArrayForAll | forall | SELECT forall(array(1, 2, 3), x -> x % 2 == 0) | struct | +| 14 | org.apache.spark.sql.catalyst.expressions.ArrayIntersect | array_intersect | SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)) | struct> | +| 15 | org.apache.spark.sql.catalyst.expressions.ArrayJoin | array_join | SELECT array_join(array('hello', 'world'), ' ') | struct | +| 16 | org.apache.spark.sql.catalyst.expressions.ArrayMax | array_max | SELECT array_max(array(1, 20, null, 3)) | struct | +| 17 | org.apache.spark.sql.catalyst.expressions.ArrayMin | array_min | SELECT array_min(array(1, 20, null, 3)) | struct | +| 18 | org.apache.spark.sql.catalyst.expressions.ArrayPosition | array_position | SELECT array_position(array(3, 2, 1), 1) | struct | +| 19 | org.apache.spark.sql.catalyst.expressions.ArrayRemove | array_remove | SELECT array_remove(array(1, 2, 3, null, 3), 3) | struct> | +| 20 | org.apache.spark.sql.catalyst.expressions.ArrayRepeat | array_repeat | SELECT array_repeat('123', 2) | struct> | +| 21 | org.apache.spark.sql.catalyst.expressions.ArraySort | array_sort | SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) | struct namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array> | +| 22 | org.apache.spark.sql.catalyst.expressions.ArrayTransform | transform | SELECT transform(array(1, 2, 3), x -> x + 1) | struct> | +| 23 | org.apache.spark.sql.catalyst.expressions.ArrayUnion | array_union | SELECT array_union(array(1, 2, 3), array(1, 3, 5)) | struct> | +| 24 | org.apache.spark.sql.catalyst.expressions.ArraysOverlap | arrays_overlap | SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) | struct | +| 25 | org.apache.spark.sql.catalyst.expressions.ArraysZip | arrays_zip | SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)) | struct>> | +| 26 | org.apache.spark.sql.catalyst.expressions.Ascii | ascii | SELECT ascii('222') | struct | +| 27 | org.apache.spark.sql.catalyst.expressions.Asin | asin | SELECT asin(0) | struct | +| 28 | org.apache.spark.sql.catalyst.expressions.Asinh | asinh | SELECT asinh(0) | struct | +| 29 | org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct | +| 30 | org.apache.spark.sql.catalyst.expressions.Atan | atan | SELECT atan(0) | struct | +| 31 | org.apache.spark.sql.catalyst.expressions.Atan2 | atan2 | SELECT atan2(0, 0) | struct | +| 32 | org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | struct | +| 33 | org.apache.spark.sql.catalyst.expressions.BRound | bround | SELECT bround(2.5, 0) | struct | +| 34 | org.apache.spark.sql.catalyst.expressions.Base64 | base64 | SELECT base64('Spark SQL') | struct | +| 35 | org.apache.spark.sql.catalyst.expressions.Bin | bin | SELECT bin(13) | struct | +| 36 | org.apache.spark.sql.catalyst.expressions.BitLength | bit_length | SELECT bit_length('Spark SQL') | struct | +| 37 | org.apache.spark.sql.catalyst.expressions.BitwiseAnd | & | SELECT 3 & 5 | struct<(3 & 5):int> | +| 38 | org.apache.spark.sql.catalyst.expressions.BitwiseCount | bit_count | SELECT bit_count(0) | struct | +| 39 | org.apache.spark.sql.catalyst.expressions.BitwiseNot | ~ | SELECT ~ 0 | struct<~0:int> | +| 40 | org.apache.spark.sql.catalyst.expressions.BitwiseOr | | | SELECT 3 | 5 | struct<(3 | 5):int> | +| 41 | org.apache.spark.sql.catalyst.expressions.BitwiseXor | ^ | SELECT 3 ^ 5 | struct<(3 ^ 5):int> | +| 42 | org.apache.spark.sql.catalyst.expressions.CaseWhen | when | SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END | struct 0) THEN CAST(1 AS DECIMAL(11,1)) WHEN (2 > 0) THEN CAST(2.0 AS DECIMAL(11,1)) ELSE CAST(1.2 AS DECIMAL(11,1)) END:decimal(11,1)> | +| 43 | org.apache.spark.sql.catalyst.expressions.Cast | string | Example is missing | Example is missing | +| 44 | org.apache.spark.sql.catalyst.expressions.Cast | cast | SELECT cast('10' as int) | struct | +| 45 | org.apache.spark.sql.catalyst.expressions.Cast | tinyint | Example is missing | Example is missing | +| 46 | org.apache.spark.sql.catalyst.expressions.Cast | double | Example is missing | Example is missing | +| 47 | org.apache.spark.sql.catalyst.expressions.Cast | smallint | Example is missing | Example is missing | +| 48 | org.apache.spark.sql.catalyst.expressions.Cast | date | Example is missing | Example is missing | +| 49 | org.apache.spark.sql.catalyst.expressions.Cast | decimal | Example is missing | Example is missing | +| 50 | org.apache.spark.sql.catalyst.expressions.Cast | boolean | Example is missing | Example is missing | +| 51 | org.apache.spark.sql.catalyst.expressions.Cast | float | Example is missing | Example is missing | +| 52 | org.apache.spark.sql.catalyst.expressions.Cast | binary | Example is missing | Example is missing | +| 53 | org.apache.spark.sql.catalyst.expressions.Cast | bigint | Example is missing | Example is missing | +| 54 | org.apache.spark.sql.catalyst.expressions.Cast | int | Example is missing | Example is missing | +| 55 | org.apache.spark.sql.catalyst.expressions.Cast | timestamp | Example is missing | Example is missing | +| 56 | org.apache.spark.sql.catalyst.expressions.Cbrt | cbrt | SELECT cbrt(27.0) | struct | +| 57 | org.apache.spark.sql.catalyst.expressions.Ceil | ceil | SELECT ceil(-0.1) | struct | +| 58 | org.apache.spark.sql.catalyst.expressions.Ceil | ceiling | SELECT ceiling(-0.1) | struct | +| 59 | org.apache.spark.sql.catalyst.expressions.Chr | char | SELECT char(65) | struct | +| 60 | org.apache.spark.sql.catalyst.expressions.Chr | chr | SELECT chr(65) | struct | +| 61 | org.apache.spark.sql.catalyst.expressions.Coalesce | coalesce | SELECT coalesce(NULL, 1, NULL) | struct | +| 62 | org.apache.spark.sql.catalyst.expressions.Concat | concat | SELECT concat('Spark', 'SQL') | struct | +| 63 | org.apache.spark.sql.catalyst.expressions.ConcatWs | concat_ws | SELECT concat_ws(' ', 'Spark', 'SQL') | struct | +| 64 | org.apache.spark.sql.catalyst.expressions.Conv | conv | SELECT conv('100', 2, 10) | struct | +| 65 | org.apache.spark.sql.catalyst.expressions.Cos | cos | SELECT cos(0) | struct | +| 66 | org.apache.spark.sql.catalyst.expressions.Cosh | cosh | SELECT cosh(0) | struct | +| 67 | org.apache.spark.sql.catalyst.expressions.Cot | cot | SELECT cot(1) | struct | +| 68 | org.apache.spark.sql.catalyst.expressions.Crc32 | crc32 | SELECT crc32('Spark') | struct | +| 69 | org.apache.spark.sql.catalyst.expressions.CreateArray | array | SELECT array(1, 2, 3) | struct> | +| 70 | org.apache.spark.sql.catalyst.expressions.CreateMap | map | SELECT map(1.0, '2', 3.0, '4') | struct> | +| 71 | org.apache.spark.sql.catalyst.expressions.CreateNamedStruct | named_struct | SELECT named_struct("a", 1, "b", 2, "c", 3) | struct> | +| 72 | org.apache.spark.sql.catalyst.expressions.CsvToStructs | from_csv | SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') | struct> | +| 73 | org.apache.spark.sql.catalyst.expressions.Cube | cube | SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name, age) | struct | +| 74 | org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | Example is missing | Example is missing | +| 75 | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct | +| 76 | org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | Example is missing | Example is missing | +| 77 | org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | Example is missing | Example is missing | +| 78 | org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | now | Example is missing | Example is missing | +| 79 | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct | +| 80 | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct | +| 81 | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct | +| 82 | org.apache.spark.sql.catalyst.expressions.DatePart | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct | +| 83 | org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct | +| 84 | org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct | +| 85 | org.apache.spark.sql.catalyst.expressions.DayOfMonth | dayofmonth | SELECT dayofmonth('2009-07-30') | struct | +| 86 | org.apache.spark.sql.catalyst.expressions.DayOfWeek | dayofweek | SELECT dayofweek('2009-07-30') | struct | +| 87 | org.apache.spark.sql.catalyst.expressions.DayOfYear | dayofyear | SELECT dayofyear('2016-04-09') | struct | +| 88 | org.apache.spark.sql.catalyst.expressions.Decode | decode | SELECT decode(encode('abc', 'utf-8'), 'utf-8') | struct | +| 89 | org.apache.spark.sql.catalyst.expressions.DenseRank | dense_rank | Example is missing | Example is missing | +| 90 | org.apache.spark.sql.catalyst.expressions.Divide | / | SELECT 3 / 2 | struct<(CAST(3 AS DOUBLE) / CAST(2 AS DOUBLE)):double> | +| 91 | org.apache.spark.sql.catalyst.expressions.ElementAt | element_at | SELECT element_at(array(1, 2, 3), 2) | struct | +| 92 | org.apache.spark.sql.catalyst.expressions.Elt | elt | SELECT elt(1, 'scala', 'java') | struct | +| 93 | org.apache.spark.sql.catalyst.expressions.Encode | encode | SELECT encode('abc', 'utf-8') | struct | +| 94 | org.apache.spark.sql.catalyst.expressions.EqualNullSafe | <=> | SELECT 2 <=> 2 | struct<(2 <=> 2):boolean> | +| 95 | org.apache.spark.sql.catalyst.expressions.EqualTo | = | SELECT 2 = 2 | struct<(2 = 2):boolean> | +| 96 | org.apache.spark.sql.catalyst.expressions.EqualTo | == | SELECT 2 == 2 | struct<(2 = 2):boolean> | +| 97 | org.apache.spark.sql.catalyst.expressions.EulerNumber | e | SELECT e() | struct | +| 98 | org.apache.spark.sql.catalyst.expressions.Exp | exp | SELECT exp(0) | struct | +| 99 | org.apache.spark.sql.catalyst.expressions.Explode | explode | SELECT explode(array(10, 20)) | struct | +| 100 | org.apache.spark.sql.catalyst.expressions.Explode | explode_outer | SELECT explode_outer(array(10, 20)) | struct | +| 101 | org.apache.spark.sql.catalyst.expressions.Expm1 | expm1 | SELECT expm1(0) | struct | +| 102 | org.apache.spark.sql.catalyst.expressions.Factorial | factorial | SELECT factorial(5) | struct | +| 103 | org.apache.spark.sql.catalyst.expressions.FindInSet | find_in_set | SELECT find_in_set('ab','abc,b,ab,c,def') | struct | +| 104 | org.apache.spark.sql.catalyst.expressions.Flatten | flatten | SELECT flatten(array(array(1, 2), array(3, 4))) | struct> | +| 105 | org.apache.spark.sql.catalyst.expressions.Floor | floor | SELECT floor(-0.1) | struct | +| 106 | org.apache.spark.sql.catalyst.expressions.FormatNumber | format_number | SELECT format_number(12332.123456, 4) | struct | +| 107 | org.apache.spark.sql.catalyst.expressions.FormatString | printf | SELECT printf("Hello World %d %s", 100, "days") | struct | +| 108 | org.apache.spark.sql.catalyst.expressions.FormatString | format_string | SELECT format_string("Hello World %d %s", 100, "days") | struct | +| 109 | org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp | from_utc_timestamp | SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul') | struct | +| 110 | org.apache.spark.sql.catalyst.expressions.FromUnixTime | from_unixtime | SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') | struct | +| 111 | org.apache.spark.sql.catalyst.expressions.GetJsonObject | get_json_object | SELECT get_json_object('{"a":"b"}', '$.a') | struct | +| 112 | org.apache.spark.sql.catalyst.expressions.GreaterThan | > | SELECT 2 > 1 | struct<(2 > 1):boolean> | +| 113 | org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual | >= | SELECT 2 >= 1 | struct<(2 >= 1):boolean> | +| 114 | org.apache.spark.sql.catalyst.expressions.Greatest | greatest | SELECT greatest(10, 9, 2, 4, 3) | struct | +| 115 | org.apache.spark.sql.catalyst.expressions.Grouping | grouping | SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name) | struct | +| 116 | org.apache.spark.sql.catalyst.expressions.GroupingID | grouping_id | SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) | struct | +| 117 | org.apache.spark.sql.catalyst.expressions.Hex | hex | SELECT hex(17) | struct | +| 118 | org.apache.spark.sql.catalyst.expressions.Hour | hour | SELECT hour('2009-07-30 12:58:59') | struct | +| 119 | org.apache.spark.sql.catalyst.expressions.Hypot | hypot | SELECT hypot(3, 4) | struct | +| 120 | org.apache.spark.sql.catalyst.expressions.If | if | SELECT if(1 < 2, 'a', 'b') | struct<(IF((1 < 2), a, b)):string> | +| 121 | org.apache.spark.sql.catalyst.expressions.IfNull | ifnull | SELECT ifnull(NULL, array('2')) | struct> | +| 122 | org.apache.spark.sql.catalyst.expressions.In | in | SELECT 1 in(1, 2, 3) | struct<(1 IN (1, 2, 3)):boolean> | +| 123 | org.apache.spark.sql.catalyst.expressions.InitCap | initcap | SELECT initcap('sPark sql') | struct | +| 124 | org.apache.spark.sql.catalyst.expressions.Inline | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct | +| 125 | org.apache.spark.sql.catalyst.expressions.Inline | inline_outer | SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) | struct | +| 126 | org.apache.spark.sql.catalyst.expressions.InputFileBlockLength | input_file_block_length | Example is missing | Example is missing | +| 127 | org.apache.spark.sql.catalyst.expressions.InputFileBlockStart | input_file_block_start | Example is missing | Example is missing | +| 128 | org.apache.spark.sql.catalyst.expressions.InputFileName | input_file_name | Example is missing | Example is missing | +| 129 | org.apache.spark.sql.catalyst.expressions.IntegralDivide | div | SELECT 3 div 2 | struct<(3 div 2):bigint> | +| 130 | org.apache.spark.sql.catalyst.expressions.IsNaN | isnan | SELECT isnan(cast('NaN' as double)) | struct | +| 131 | org.apache.spark.sql.catalyst.expressions.IsNotNull | isnotnull | SELECT isnotnull(1) | struct<(1 IS NOT NULL):boolean> | +| 132 | org.apache.spark.sql.catalyst.expressions.IsNull | isnull | SELECT isnull(1) | struct<(1 IS NULL):boolean> | +| 133 | org.apache.spark.sql.catalyst.expressions.JsonObjectKeys | json_object_keys | Select json_object_keys('{}') | struct> | +| 134 | org.apache.spark.sql.catalyst.expressions.JsonToStructs | from_json | SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') | struct> | +| 135 | org.apache.spark.sql.catalyst.expressions.JsonTuple | json_tuple | SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') | struct | +| 136 | org.apache.spark.sql.catalyst.expressions.Lag | lag | Example is missing | Example is missing | +| 137 | org.apache.spark.sql.catalyst.expressions.LastDay | last_day | SELECT last_day('2009-01-12') | struct | +| 138 | org.apache.spark.sql.catalyst.expressions.Lead | lead | Example is missing | Example is missing | +| 139 | org.apache.spark.sql.catalyst.expressions.Least | least | SELECT least(10, 9, 2, 4, 3) | struct | +| 140 | org.apache.spark.sql.catalyst.expressions.Left | left | SELECT left('Spark SQL', 3) | struct | +| 141 | org.apache.spark.sql.catalyst.expressions.Length | character_length | SELECT character_length('Spark SQL ') | struct | +| 142 | org.apache.spark.sql.catalyst.expressions.Length | char_length | SELECT char_length('Spark SQL ') | struct | +| 143 | org.apache.spark.sql.catalyst.expressions.Length | length | SELECT length('Spark SQL ') | struct | +| 144 | org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray | json_array_length | SELECT json_array_length('[1,2,3,4]') | struct | +| 145 | org.apache.spark.sql.catalyst.expressions.LessThan | < | SELECT 1 < 2 | struct<(1 < 2):boolean> | +| 146 | org.apache.spark.sql.catalyst.expressions.LessThanOrEqual | <= | SELECT 2 <= 2 | struct<(2 <= 2):boolean> | +| 147 | org.apache.spark.sql.catalyst.expressions.Levenshtein | levenshtein | SELECT levenshtein('kitten', 'sitting') | struct | +| 148 | org.apache.spark.sql.catalyst.expressions.Like | like | SELECT like('Spark', '_park') | struct | +| 149 | org.apache.spark.sql.catalyst.expressions.Log | ln | SELECT ln(1) | struct | +| 150 | org.apache.spark.sql.catalyst.expressions.Log10 | log10 | SELECT log10(10) | struct | +| 151 | org.apache.spark.sql.catalyst.expressions.Log1p | log1p | SELECT log1p(0) | struct | +| 152 | org.apache.spark.sql.catalyst.expressions.Log2 | log2 | SELECT log2(2) | struct | +| 153 | org.apache.spark.sql.catalyst.expressions.Logarithm | log | SELECT log(10, 100) | struct | +| 154 | org.apache.spark.sql.catalyst.expressions.Lower | lcase | SELECT lcase('SparkSql') | struct | +| 155 | org.apache.spark.sql.catalyst.expressions.Lower | lower | SELECT lower('SparkSql') | struct | +| 156 | org.apache.spark.sql.catalyst.expressions.MakeDate | make_date | SELECT make_date(2013, 7, 15) | struct | +| 157 | org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct | +| 158 | org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct | +| 159 | org.apache.spark.sql.catalyst.expressions.MapConcat | map_concat | SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) | struct> | +| 160 | org.apache.spark.sql.catalyst.expressions.MapEntries | map_entries | SELECT map_entries(map(1, 'a', 2, 'b')) | struct>> | +| 161 | org.apache.spark.sql.catalyst.expressions.MapFilter | map_filter | SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v) | struct namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):map> | +| 162 | org.apache.spark.sql.catalyst.expressions.MapFromArrays | map_from_arrays | SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')) | struct> | +| 163 | org.apache.spark.sql.catalyst.expressions.MapFromEntries | map_from_entries | SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))) | struct> | +| 164 | org.apache.spark.sql.catalyst.expressions.MapKeys | map_keys | SELECT map_keys(map(1, 'a', 2, 'b')) | struct> | +| 165 | org.apache.spark.sql.catalyst.expressions.MapValues | map_values | SELECT map_values(map(1, 'a', 2, 'b')) | struct> | +| 166 | org.apache.spark.sql.catalyst.expressions.MapZipWith | map_zip_with | SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) | struct> | +| 167 | org.apache.spark.sql.catalyst.expressions.Md5 | md5 | SELECT md5('Spark') | struct | +| 168 | org.apache.spark.sql.catalyst.expressions.Minute | minute | SELECT minute('2009-07-30 12:58:59') | struct | +| 169 | org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID | monotonically_increasing_id | Example is missing | Example is missing | +| 170 | org.apache.spark.sql.catalyst.expressions.Month | month | SELECT month('2016-07-30') | struct | +| 171 | org.apache.spark.sql.catalyst.expressions.MonthsBetween | months_between | SELECT months_between('1997-02-28 10:30:00', '1996-10-30') | struct | +| 172 | org.apache.spark.sql.catalyst.expressions.Multiply | * | SELECT 2 * 3 | struct<(2 * 3):int> | +| 173 | org.apache.spark.sql.catalyst.expressions.Murmur3Hash | hash | SELECT hash('Spark', array(123), 2) | struct | +| 174 | org.apache.spark.sql.catalyst.expressions.NTile | ntile | Example is missing | Example is missing | +| 175 | org.apache.spark.sql.catalyst.expressions.NaNvl | nanvl | SELECT nanvl(cast('NaN' as double), 123) | struct | +| 176 | org.apache.spark.sql.catalyst.expressions.NamedStruct | struct | Example is missing | Example is missing | +| 177 | org.apache.spark.sql.catalyst.expressions.NextDay | next_day | SELECT next_day('2015-01-14', 'TU') | struct | +| 178 | org.apache.spark.sql.catalyst.expressions.Not | ! | Example is missing | Example is missing | +| 179 | org.apache.spark.sql.catalyst.expressions.Not | not | Example is missing | Example is missing | +| 180 | org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct | +| 181 | org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct> | +| 182 | org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct | +| 183 | org.apache.spark.sql.catalyst.expressions.OctetLength | octet_length | SELECT octet_length('Spark SQL') | struct | +| 184 | org.apache.spark.sql.catalyst.expressions.Or | or | Example is missing | Example is missing | +| 185 | org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct | +| 186 | org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct | +| 187 | org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct | +| 188 | org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct | +| 189 | org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | Example is missing | Example is missing | +| 190 | org.apache.spark.sql.catalyst.expressions.Pi | pi | SELECT pi() | struct | +| 191 | org.apache.spark.sql.catalyst.expressions.Pmod | pmod | SELECT pmod(10, 3) | struct | +| 192 | org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode_outer | SELECT posexplode_outer(array(10,20)) | struct | +| 193 | org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode | SELECT posexplode(array(10,20)) | struct | +| 194 | org.apache.spark.sql.catalyst.expressions.Pow | pow | SELECT pow(2, 3) | struct | +| 195 | org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct | +| 196 | org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct | +| 197 | org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' | struct<%SystemDrive%UsersJohn RLIKE %SystemDrive%\Users.*:boolean> | +| 198 | org.apache.spark.sql.catalyst.expressions.Rank | rank | Example is missing | Example is missing | +| 199 | org.apache.spark.sql.catalyst.expressions.RegExpExtract | regexp_extract | SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) | struct | +| 200 | org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct | +| 201 | org.apache.spark.sql.catalyst.expressions.Remainder | % | SELECT 2 % 1.8 | struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> | +| 202 | org.apache.spark.sql.catalyst.expressions.Remainder | mod | SELECT 2 % 1.8 | struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> | +| 203 | org.apache.spark.sql.catalyst.expressions.Reverse | reverse | SELECT reverse('Spark SQL') | struct | +| 204 | org.apache.spark.sql.catalyst.expressions.Right | right | SELECT right('Spark SQL', 3) | struct | +| 205 | org.apache.spark.sql.catalyst.expressions.Rint | rint | SELECT rint(12.3456) | struct | +| 206 | org.apache.spark.sql.catalyst.expressions.Rollup | rollup | SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY rollup(name, age) | struct | +| 207 | org.apache.spark.sql.catalyst.expressions.Round | round | SELECT round(2.5, 0) | struct | +| 208 | org.apache.spark.sql.catalyst.expressions.RowNumber | row_number | Example is missing | Example is missing | +| 209 | org.apache.spark.sql.catalyst.expressions.SchemaOfCsv | schema_of_csv | SELECT schema_of_csv('1,abc') | struct | +| 210 | org.apache.spark.sql.catalyst.expressions.SchemaOfJson | schema_of_json | SELECT schema_of_json('[{"col":0}]') | struct | +| 211 | org.apache.spark.sql.catalyst.expressions.Second | second | SELECT second('2009-07-30 12:58:59') | struct | +| 212 | org.apache.spark.sql.catalyst.expressions.Sentences | sentences | SELECT sentences('Hi there! Good morning.') | struct>> | +| 213 | org.apache.spark.sql.catalyst.expressions.Sequence | sequence | SELECT sequence(1, 5) | struct> | +| 214 | org.apache.spark.sql.catalyst.expressions.Sha1 | sha1 | SELECT sha1('Spark') | struct | +| 215 | org.apache.spark.sql.catalyst.expressions.Sha1 | sha | SELECT sha('Spark') | struct | +| 216 | org.apache.spark.sql.catalyst.expressions.Sha2 | sha2 | SELECT sha2('Spark', 256) | struct | +| 217 | org.apache.spark.sql.catalyst.expressions.ShiftLeft | shiftleft | SELECT shiftleft(2, 1) | struct | +| 218 | org.apache.spark.sql.catalyst.expressions.ShiftRight | shiftright | SELECT shiftright(4, 1) | struct | +| 219 | org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned | shiftrightunsigned | SELECT shiftrightunsigned(4, 1) | struct | +| 220 | org.apache.spark.sql.catalyst.expressions.Signum | signum | SELECT signum(40) | struct | +| 221 | org.apache.spark.sql.catalyst.expressions.Signum | sign | SELECT sign(40) | struct | +| 222 | org.apache.spark.sql.catalyst.expressions.Sin | sin | SELECT sin(0) | struct | +| 223 | org.apache.spark.sql.catalyst.expressions.Sinh | sinh | SELECT sinh(0) | struct | +| 224 | org.apache.spark.sql.catalyst.expressions.Size | size | SELECT size(array('b', 'd', 'c', 'a')) | struct | +| 225 | org.apache.spark.sql.catalyst.expressions.Size | cardinality | SELECT cardinality(array('b', 'd', 'c', 'a')) | struct | +| 226 | org.apache.spark.sql.catalyst.expressions.Slice | slice | SELECT slice(array(1, 2, 3, 4), 2, 2) | struct> | +| 227 | org.apache.spark.sql.catalyst.expressions.SortArray | sort_array | SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) | struct> | +| 228 | org.apache.spark.sql.catalyst.expressions.SoundEx | soundex | SELECT soundex('Miller') | struct | +| 229 | org.apache.spark.sql.catalyst.expressions.SparkPartitionID | spark_partition_id | Example is missing | Example is missing | +| 230 | org.apache.spark.sql.catalyst.expressions.SparkVersion | version | Example is missing | Example is missing | +| 231 | org.apache.spark.sql.catalyst.expressions.Sqrt | sqrt | SELECT sqrt(4) | struct | +| 232 | org.apache.spark.sql.catalyst.expressions.Stack | stack | SELECT stack(2, 1, 2, 3) | struct | +| 233 | org.apache.spark.sql.catalyst.expressions.StringInstr | instr | SELECT instr('SparkSQL', 'SQL') | struct | +| 234 | org.apache.spark.sql.catalyst.expressions.StringLPad | lpad | SELECT lpad('hi', 5, '??') | struct | +| 235 | org.apache.spark.sql.catalyst.expressions.StringLocate | position | SELECT position('bar', 'foobarbar') | struct | +| 236 | org.apache.spark.sql.catalyst.expressions.StringLocate | locate | SELECT locate('bar', 'foobarbar') | struct | +| 237 | org.apache.spark.sql.catalyst.expressions.StringRPad | rpad | SELECT rpad('hi', 5, '??') | struct | +| 238 | org.apache.spark.sql.catalyst.expressions.StringRepeat | repeat | SELECT repeat('123', 2) | struct | +| 239 | org.apache.spark.sql.catalyst.expressions.StringReplace | replace | SELECT replace('ABCabc', 'abc', 'DEF') | struct | +| 240 | org.apache.spark.sql.catalyst.expressions.StringSpace | space | SELECT concat(space(2), '1') | struct | +| 241 | org.apache.spark.sql.catalyst.expressions.StringSplit | split | SELECT split('oneAtwoBthreeC', '[ABC]') | struct> | +| 242 | org.apache.spark.sql.catalyst.expressions.StringToMap | str_to_map | SELECT str_to_map('a:1,b:2,c:3', ',', ':') | struct> | +| 243 | org.apache.spark.sql.catalyst.expressions.StringTranslate | translate | SELECT translate('AaBbCc', 'abc', '123') | struct | +| 244 | org.apache.spark.sql.catalyst.expressions.StringTrim | trim | SELECT trim(' SparkSQL ') | struct | +| 245 | org.apache.spark.sql.catalyst.expressions.StringTrimLeft | ltrim | SELECT ltrim(' SparkSQL ') | struct | +| 246 | org.apache.spark.sql.catalyst.expressions.StringTrimRight | rtrim | SELECT rtrim(' SparkSQL ') | struct | +| 247 | org.apache.spark.sql.catalyst.expressions.StructsToCsv | to_csv | SELECT to_csv(named_struct('a', 1, 'b', 2)) | struct | +| 248 | org.apache.spark.sql.catalyst.expressions.StructsToJson | to_json | SELECT to_json(named_struct('a', 1, 'b', 2)) | struct | +| 249 | org.apache.spark.sql.catalyst.expressions.Substring | substr | SELECT substr('Spark SQL', 5) | struct | +| 250 | org.apache.spark.sql.catalyst.expressions.Substring | substring | SELECT substring('Spark SQL', 5) | struct | +| 251 | org.apache.spark.sql.catalyst.expressions.SubstringIndex | substring_index | SELECT substring_index('www.apache.org', '.', 2) | struct | +| 252 | org.apache.spark.sql.catalyst.expressions.Subtract | - | SELECT 2 - 1 | struct<(2 - 1):int> | +| 253 | org.apache.spark.sql.catalyst.expressions.Tan | tan | SELECT tan(0) | struct | +| 254 | org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct | +| 255 | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | Example is missing | Example is missing | +| 256 | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct | +| 257 | org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct | +| 258 | org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp | to_utc_timestamp | SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') | struct | +| 259 | org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp | to_unix_timestamp | SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') | struct | +| 260 | org.apache.spark.sql.catalyst.expressions.TransformKeys | transform_keys | SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) | struct> | +| 261 | org.apache.spark.sql.catalyst.expressions.TransformValues | transform_values | SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1) | struct> | +| 262 | org.apache.spark.sql.catalyst.expressions.TruncDate | trunc | SELECT trunc('2019-08-04', 'week') | struct | +| 263 | org.apache.spark.sql.catalyst.expressions.TruncTimestamp | date_trunc | SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') | struct | +| 264 | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct | +| 265 | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct | +| 266 | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<(- 1):int> | +| 267 | org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | Example is missing | Example is missing | +| 268 | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct | +| 269 | org.apache.spark.sql.catalyst.expressions.Upper | ucase | SELECT ucase('SparkSql') | struct | +| 270 | org.apache.spark.sql.catalyst.expressions.Upper | upper | SELECT upper('SparkSql') | struct | +| 271 | org.apache.spark.sql.catalyst.expressions.WeekDay | weekday | SELECT weekday('2009-07-30') | struct | +| 272 | org.apache.spark.sql.catalyst.expressions.WeekOfYear | weekofyear | SELECT weekofyear('2008-02-20') | struct | +| 273 | org.apache.spark.sql.catalyst.expressions.XxHash64 | xxhash64 | SELECT xxhash64('Spark', array(123), 2) | struct | +| 274 | org.apache.spark.sql.catalyst.expressions.Year | year | SELECT year('2016-07-30') | struct | +| 275 | org.apache.spark.sql.catalyst.expressions.ZipWith | zip_with | SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) | struct>> | +| 276 | org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | approx_percentile | SELECT approx_percentile(10.0, array(0.5, 0.4, 0.1), 100) | struct> | +| 277 | org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | percentile_approx | SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100) | struct> | +| 278 | org.apache.spark.sql.catalyst.expressions.aggregate.Average | avg | SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| 279 | org.apache.spark.sql.catalyst.expressions.aggregate.Average | mean | SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| 280 | org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg | bit_and | SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col) | struct | +| 281 | org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg | bit_or | SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col) | struct | +| 282 | org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg | bit_xor | SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col) | struct | +| 283 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd | every | SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col) | struct | +| 284 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd | bool_and | SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) | struct | +| 285 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | bool_or | SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | +| 286 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | some | SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | +| 287 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | any | SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | +| 288 | org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | collect_list | SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | +| 289 | org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet | collect_set | SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | +| 290 | org.apache.spark.sql.catalyst.expressions.aggregate.Corr | corr | SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) | struct | +| 291 | org.apache.spark.sql.catalyst.expressions.aggregate.Count | count | SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) | struct | +| 292 | org.apache.spark.sql.catalyst.expressions.aggregate.CountIf | count_if | SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) | struct | +| 293 | org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg | count_min_sketch | Example is missing | Example is missing | +| 294 | org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation | covar_pop | SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct | +| 295 | org.apache.spark.sql.catalyst.expressions.aggregate.CovSample | covar_samp | SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct | +| 296 | org.apache.spark.sql.catalyst.expressions.aggregate.First | first_value | SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | +| 297 | org.apache.spark.sql.catalyst.expressions.aggregate.First | first | SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | +| 298 | org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus | approx_count_distinct | SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1) | struct | +| 299 | org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis | kurtosis | SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct | +| 300 | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last_value | SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | +| 301 | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last | SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | +| 302 | org.apache.spark.sql.catalyst.expressions.aggregate.Max | max | SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) | struct | +| 303 | org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct | +| 304 | org.apache.spark.sql.catalyst.expressions.aggregate.Min | min | SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) | struct | +| 305 | org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct | +| 306 | org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile | SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) | struct | +| 307 | org.apache.spark.sql.catalyst.expressions.aggregate.Skewness | skewness | SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct | +| 308 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop | stddev_pop | SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| 309 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev_samp | SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| 310 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev | SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| 311 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | std | SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| 312 | org.apache.spark.sql.catalyst.expressions.aggregate.Sum | sum | SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct | +| 313 | org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop | var_pop | SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| 314 | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | var_samp | SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| 315 | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | variance | SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| 316 | org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean | xpath_boolean | SELECT xpath_boolean('1','a/b') | struct1, a/b):boolean> | +| 317 | org.apache.spark.sql.catalyst.expressions.xml.XPathDouble | xpath_number | SELECT xpath_number('12', 'sum(a/b)') | struct12, sum(a/b)):double> | +| 318 | org.apache.spark.sql.catalyst.expressions.xml.XPathDouble | xpath_double | SELECT xpath_double('12', 'sum(a/b)') | struct12, sum(a/b)):double> | +| 319 | org.apache.spark.sql.catalyst.expressions.xml.XPathFloat | xpath_float | SELECT xpath_float('12', 'sum(a/b)') | struct12, sum(a/b)):float> | +| 320 | org.apache.spark.sql.catalyst.expressions.xml.XPathInt | xpath_int | SELECT xpath_int('12', 'sum(a/b)') | struct12, sum(a/b)):int> | +| 321 | org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('b1b2b3c1c2','a/b/text()') | structb1b2b3c1c2, a/b/text()):array> | +| 322 | org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('12', 'sum(a/b)') | struct12, sum(a/b)):bigint> | +| 323 | org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('12', 'sum(a/b)') | struct12, sum(a/b)):smallint> | +| 324 | org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('bcc','a/c') | structbcc, a/c):string> | \ No newline at end of file diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index cd74dac531126..80de9853692d6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -27,9 +27,10 @@ import org.apache.spark.sql.execution.HiveResult.hiveResultString import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.tags.ExtendedSQLTest +// scalastyle:off line.size.limit /** * End-to-end test cases for SQL schemas of expression examples. - * The golden result file is "spark/sql/core/src/test/resources/sql-functions/output.out". + * The golden result file is "spark/sql/core/src/test/resources/sql-functions/sql-expression-schema.md". * * To run the entire test suite: * {{{ @@ -59,16 +60,11 @@ import org.apache.spark.tags.ExtendedSQLTest * The format for golden result files look roughly like: * {{{ * ... - * -- Class name: org.apache.spark.sql.catalyst.expressions.StringRepeat - * - * -- Function name: repeat - * -- !query - * SELECT repeat('123', 2) - * -- !query schema - * struct + * | 238 | org.apache.spark.sql.catalyst.expressions.StringRepeat | repeat | SELECT repeat('123', 2) | struct | * ... * }}} */ +// scalastyle:on line.size.limit @ExtendedSQLTest class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { @@ -88,7 +84,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { "sql", "core", "src", "test", "resources", "sql-functions").toFile } - private val resultFile = new File(baseResourcePath, "schema.out") + private val resultFile = new File(baseResourcePath, "sql-expression-schema.md") val ignoreSet = Set( // One of examples shows getting the current timestamp @@ -101,14 +97,17 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { // The example calls methods that return unstable results. "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection") + val MISSING_EXAMPLE = "Example is missing" + /** A single SQL query's SQL and schema. */ - protected case class QueryOutput(sql: String, schema: String) { + protected case class QueryOutput( + number: String = "0", + className: String, + funcName: String, + sql: String = MISSING_EXAMPLE, + schema: String = MISSING_EXAMPLE) { override def toString: String = { - // We are explicitly not using multi-line string due to stripMargin removing "|" in output. - s"-- !query\n" + - sql + "\n" + - s"-- !query schema\n" + - schema + s"| $number | $className | $funcName | $sql | $schema |" } } @@ -118,17 +117,28 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { spark.sessionState.catalog.lookupFunctionInfo(funcId) } - val classFunsMap = funInfos.groupBy(_.getClassName) + val classFunsMap = funInfos.groupBy(_.getClassName).toSeq.sortBy(_._1) val outputBuffer = new ArrayBuffer[String] val outputs = new ArrayBuffer[QueryOutput] + val missingExamples = new ArrayBuffer[String] + + var _curNumber = 0 + def curNumber: String = { + _curNumber += 1 + _curNumber.toString + } classFunsMap.foreach { kv => val className = kv._1 if (!ignoreSet.contains(className)) { - outputBuffer += s"\n\n-- Class name: $className" kv._2.foreach { funInfo => - outputBuffer += s"\n-- Function name: ${funInfo.getName}" val example = funInfo.getExamples + if (example == "") { + val queryOutput = QueryOutput(curNumber, className, funInfo.getName) + outputBuffer += queryOutput.toString + outputs += queryOutput + missingExamples += queryOutput.funcName + } // If expression exists 'Examples' segment, the first element is 'Examples'. Because // this test case is only used to print aliases of expressions for double checking. @@ -140,7 +150,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { case exampleRe(sql, expected) => val df = spark.sql(sql) val schema = df.schema.catalogString - val queryOutput = QueryOutput(sql, schema) + val queryOutput = QueryOutput(curNumber, className, funInfo.getName, sql, schema) outputBuffer += queryOutput.toString outputs += queryOutput case _ => @@ -150,9 +160,15 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { } if (regenerateGoldenFiles) { + val missingExampleStr = missingExamples.mkString(",") val goldenOutput = { - s"-- Automatically generated by ${getClass.getSimpleName}\n" + - s"-- Number of queries: ${outputs.size}" + + "## Summary\n" + + s" - Number of queries: ${outputs.size}\n" + + s" - Number of expressions that missing example: ${missingExamples.size}\n" + + s" - Expressions for all missing examples include $missingExampleStr\n" + + "## Schema of Built-in Functions\n" + + "| No | Class name | Function name or alias | Query example | Output schema |\n" + + "| -- | ---------- | ---------------------- | ------------- | ------------- |\n" + outputBuffer.mkString("\n") } val parent = resultFile.getParentFile @@ -164,20 +180,34 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { val expectedOutputs: Seq[QueryOutput] = { val goldenOutput = fileToString(resultFile) - val classSegments = goldenOutput.split("-- Class name: .*\n") - val functionSegments = classSegments - .flatMap(_.split("-- Function name: .*\n")).map(_.trim).filter(_ != "") - val segments = functionSegments.flatMap(_.split("-- !query.*\n")).filter(_ != "") - - // each query has 2 segments, plus the header - assert(segments.size == outputs.size * 2 + 1, - s"Expected ${outputs.size * 2 + 1} blocks in result file but got ${segments.size}. " + + val lines = goldenOutput.split("\n") + + // The summary has 4 lines, plus the header of schema table has 3 lines + assert(lines.size == outputs.size + 7, + s"Expected ${outputs.size + 7} blocks in result file but got ${lines.size}. " + s"Try regenerate the result files.") + Seq.tabulate(outputs.size) { i => - QueryOutput( - sql = segments(i * 2 + 1).trim, - schema = segments(i * 2 + 2).trim - ) + val segments = lines(i + 7).split('|') + if (segments(2).trim == "org.apache.spark.sql.catalyst.expressions.BitwiseOr") { + // scalastyle:off line.size.limit + // The name of `BitwiseOr` is '|', so the line in golden file looks like below. + // | 40 | org.apache.spark.sql.catalyst.expressions.BitwiseOr | | | SELECT 3 | 5 | struct<(3 | 5):int> | + QueryOutput( + className = segments(2).trim, + funcName = "|", + sql = (segments(5) + "|" + segments(6)).trim, + schema = (segments(7) + "|" + segments(8)).trim) + } else { + // The lines most expressions output to a file are in the following format + // | 1 | org.apache.spark.sql.catalyst.expressions.Abs | abs | SELECT abs(-1) | struct | + // scalastyle:on line.size.limit + QueryOutput( + className = segments(2).trim, + funcName = segments(3).trim, + sql = segments(4).trim, + schema = segments(5).trim) + } } } @@ -187,7 +217,8 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { } outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) => - assertResult(expected.sql, s"SQL query did not match for query #$i\n${expected.sql}") { + assertResult(expected.sql, + s"SQL query did not match for query #$i\n${expected.sql}") { output.sql } assertResult(expected.schema, From 55b60bb2461ae7b0631c9cb63ec623e419952b4a Mon Sep 17 00:00:00 2001 From: beliefer Date: Thu, 16 Apr 2020 21:05:12 +0800 Subject: [PATCH 08/22] Optimize code --- .../scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index 80de9853692d6..7e8509c2b2665 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -165,7 +165,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { "## Summary\n" + s" - Number of queries: ${outputs.size}\n" + s" - Number of expressions that missing example: ${missingExamples.size}\n" + - s" - Expressions for all missing examples include $missingExampleStr\n" + + s" - Expressions missing examples: $missingExampleStr\n" + "## Schema of Built-in Functions\n" + "| No | Class name | Function name or alias | Query example | Output schema |\n" + "| -- | ---------- | ---------------------- | ------------- | ------------- |\n" + From 57f78fd531a856104197de980a87e6da34854bab Mon Sep 17 00:00:00 2001 From: beliefer Date: Thu, 16 Apr 2020 21:08:19 +0800 Subject: [PATCH 09/22] Optimize code --- .../src/test/resources/sql-functions/sql-expression-schema.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 171b0b1e5b5cd..c6a9ed385340d 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,7 +1,7 @@ ## Summary - Number of queries: 324 - Number of expressions that missing example: 37 - - Expressions for all missing examples include and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,current_date,current_timestamp,now,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch + - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,current_date,current_timestamp,now,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch ## Schema of Built-in Functions | No | Class name | Function name or alias | Query example | Output schema | | -- | ---------- | ---------------------- | ------------- | ------------- | From da0adbab67b2805b947520d53b99069ce4b5c425 Mon Sep 17 00:00:00 2001 From: beliefer Date: Mon, 27 Apr 2020 22:58:41 +0800 Subject: [PATCH 10/22] Fix bug and optimize code. --- .../sql-functions/sql-expression-schema.md | 652 +++++++++--------- .../spark/sql/ExpressionsSchemaSuite.scala | 77 +-- 2 files changed, 354 insertions(+), 375 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index c6a9ed385340d..5e597e797528d 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -3,329 +3,329 @@ - Number of expressions that missing example: 37 - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,current_date,current_timestamp,now,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch ## Schema of Built-in Functions -| No | Class name | Function name or alias | Query example | Output schema | -| -- | ---------- | ---------------------- | ------------- | ------------- | -| 1 | org.apache.spark.sql.catalyst.expressions.Abs | abs | SELECT abs(-1) | struct | -| 2 | org.apache.spark.sql.catalyst.expressions.Acos | acos | SELECT acos(1) | struct | -| 3 | org.apache.spark.sql.catalyst.expressions.Acosh | acosh | SELECT acosh(1) | struct | -| 4 | org.apache.spark.sql.catalyst.expressions.Add | + | SELECT 1 + 2 | struct<(1 + 2):int> | -| 5 | org.apache.spark.sql.catalyst.expressions.AddMonths | add_months | SELECT add_months('2016-08-31', 1) | struct | -| 6 | org.apache.spark.sql.catalyst.expressions.And | and | Example is missing | Example is missing | -| 7 | org.apache.spark.sql.catalyst.expressions.ArrayAggregate | aggregate | SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct | -| 8 | org.apache.spark.sql.catalyst.expressions.ArrayContains | array_contains | SELECT array_contains(array(1, 2, 3), 2) | struct | -| 9 | org.apache.spark.sql.catalyst.expressions.ArrayDistinct | array_distinct | SELECT array_distinct(array(1, 2, 3, null, 3)) | struct> | -| 10 | org.apache.spark.sql.catalyst.expressions.ArrayExcept | array_except | SELECT array_except(array(1, 2, 3), array(1, 3, 5)) | struct> | -| 11 | org.apache.spark.sql.catalyst.expressions.ArrayExists | exists | SELECT exists(array(1, 2, 3), x -> x % 2 == 0) | struct | -| 12 | org.apache.spark.sql.catalyst.expressions.ArrayFilter | filter | SELECT filter(array(1, 2, 3), x -> x % 2 == 1) | struct> | -| 13 | org.apache.spark.sql.catalyst.expressions.ArrayForAll | forall | SELECT forall(array(1, 2, 3), x -> x % 2 == 0) | struct | -| 14 | org.apache.spark.sql.catalyst.expressions.ArrayIntersect | array_intersect | SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)) | struct> | -| 15 | org.apache.spark.sql.catalyst.expressions.ArrayJoin | array_join | SELECT array_join(array('hello', 'world'), ' ') | struct | -| 16 | org.apache.spark.sql.catalyst.expressions.ArrayMax | array_max | SELECT array_max(array(1, 20, null, 3)) | struct | -| 17 | org.apache.spark.sql.catalyst.expressions.ArrayMin | array_min | SELECT array_min(array(1, 20, null, 3)) | struct | -| 18 | org.apache.spark.sql.catalyst.expressions.ArrayPosition | array_position | SELECT array_position(array(3, 2, 1), 1) | struct | -| 19 | org.apache.spark.sql.catalyst.expressions.ArrayRemove | array_remove | SELECT array_remove(array(1, 2, 3, null, 3), 3) | struct> | -| 20 | org.apache.spark.sql.catalyst.expressions.ArrayRepeat | array_repeat | SELECT array_repeat('123', 2) | struct> | -| 21 | org.apache.spark.sql.catalyst.expressions.ArraySort | array_sort | SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) | struct namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array> | -| 22 | org.apache.spark.sql.catalyst.expressions.ArrayTransform | transform | SELECT transform(array(1, 2, 3), x -> x + 1) | struct> | -| 23 | org.apache.spark.sql.catalyst.expressions.ArrayUnion | array_union | SELECT array_union(array(1, 2, 3), array(1, 3, 5)) | struct> | -| 24 | org.apache.spark.sql.catalyst.expressions.ArraysOverlap | arrays_overlap | SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) | struct | -| 25 | org.apache.spark.sql.catalyst.expressions.ArraysZip | arrays_zip | SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)) | struct>> | -| 26 | org.apache.spark.sql.catalyst.expressions.Ascii | ascii | SELECT ascii('222') | struct | -| 27 | org.apache.spark.sql.catalyst.expressions.Asin | asin | SELECT asin(0) | struct | -| 28 | org.apache.spark.sql.catalyst.expressions.Asinh | asinh | SELECT asinh(0) | struct | -| 29 | org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct | -| 30 | org.apache.spark.sql.catalyst.expressions.Atan | atan | SELECT atan(0) | struct | -| 31 | org.apache.spark.sql.catalyst.expressions.Atan2 | atan2 | SELECT atan2(0, 0) | struct | -| 32 | org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | struct | -| 33 | org.apache.spark.sql.catalyst.expressions.BRound | bround | SELECT bround(2.5, 0) | struct | -| 34 | org.apache.spark.sql.catalyst.expressions.Base64 | base64 | SELECT base64('Spark SQL') | struct | -| 35 | org.apache.spark.sql.catalyst.expressions.Bin | bin | SELECT bin(13) | struct | -| 36 | org.apache.spark.sql.catalyst.expressions.BitLength | bit_length | SELECT bit_length('Spark SQL') | struct | -| 37 | org.apache.spark.sql.catalyst.expressions.BitwiseAnd | & | SELECT 3 & 5 | struct<(3 & 5):int> | -| 38 | org.apache.spark.sql.catalyst.expressions.BitwiseCount | bit_count | SELECT bit_count(0) | struct | -| 39 | org.apache.spark.sql.catalyst.expressions.BitwiseNot | ~ | SELECT ~ 0 | struct<~0:int> | -| 40 | org.apache.spark.sql.catalyst.expressions.BitwiseOr | | | SELECT 3 | 5 | struct<(3 | 5):int> | -| 41 | org.apache.spark.sql.catalyst.expressions.BitwiseXor | ^ | SELECT 3 ^ 5 | struct<(3 ^ 5):int> | -| 42 | org.apache.spark.sql.catalyst.expressions.CaseWhen | when | SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END | struct 0) THEN CAST(1 AS DECIMAL(11,1)) WHEN (2 > 0) THEN CAST(2.0 AS DECIMAL(11,1)) ELSE CAST(1.2 AS DECIMAL(11,1)) END:decimal(11,1)> | -| 43 | org.apache.spark.sql.catalyst.expressions.Cast | string | Example is missing | Example is missing | -| 44 | org.apache.spark.sql.catalyst.expressions.Cast | cast | SELECT cast('10' as int) | struct | -| 45 | org.apache.spark.sql.catalyst.expressions.Cast | tinyint | Example is missing | Example is missing | -| 46 | org.apache.spark.sql.catalyst.expressions.Cast | double | Example is missing | Example is missing | -| 47 | org.apache.spark.sql.catalyst.expressions.Cast | smallint | Example is missing | Example is missing | -| 48 | org.apache.spark.sql.catalyst.expressions.Cast | date | Example is missing | Example is missing | -| 49 | org.apache.spark.sql.catalyst.expressions.Cast | decimal | Example is missing | Example is missing | -| 50 | org.apache.spark.sql.catalyst.expressions.Cast | boolean | Example is missing | Example is missing | -| 51 | org.apache.spark.sql.catalyst.expressions.Cast | float | Example is missing | Example is missing | -| 52 | org.apache.spark.sql.catalyst.expressions.Cast | binary | Example is missing | Example is missing | -| 53 | org.apache.spark.sql.catalyst.expressions.Cast | bigint | Example is missing | Example is missing | -| 54 | org.apache.spark.sql.catalyst.expressions.Cast | int | Example is missing | Example is missing | -| 55 | org.apache.spark.sql.catalyst.expressions.Cast | timestamp | Example is missing | Example is missing | -| 56 | org.apache.spark.sql.catalyst.expressions.Cbrt | cbrt | SELECT cbrt(27.0) | struct | -| 57 | org.apache.spark.sql.catalyst.expressions.Ceil | ceil | SELECT ceil(-0.1) | struct | -| 58 | org.apache.spark.sql.catalyst.expressions.Ceil | ceiling | SELECT ceiling(-0.1) | struct | -| 59 | org.apache.spark.sql.catalyst.expressions.Chr | char | SELECT char(65) | struct | -| 60 | org.apache.spark.sql.catalyst.expressions.Chr | chr | SELECT chr(65) | struct | -| 61 | org.apache.spark.sql.catalyst.expressions.Coalesce | coalesce | SELECT coalesce(NULL, 1, NULL) | struct | -| 62 | org.apache.spark.sql.catalyst.expressions.Concat | concat | SELECT concat('Spark', 'SQL') | struct | -| 63 | org.apache.spark.sql.catalyst.expressions.ConcatWs | concat_ws | SELECT concat_ws(' ', 'Spark', 'SQL') | struct | -| 64 | org.apache.spark.sql.catalyst.expressions.Conv | conv | SELECT conv('100', 2, 10) | struct | -| 65 | org.apache.spark.sql.catalyst.expressions.Cos | cos | SELECT cos(0) | struct | -| 66 | org.apache.spark.sql.catalyst.expressions.Cosh | cosh | SELECT cosh(0) | struct | -| 67 | org.apache.spark.sql.catalyst.expressions.Cot | cot | SELECT cot(1) | struct | -| 68 | org.apache.spark.sql.catalyst.expressions.Crc32 | crc32 | SELECT crc32('Spark') | struct | -| 69 | org.apache.spark.sql.catalyst.expressions.CreateArray | array | SELECT array(1, 2, 3) | struct> | -| 70 | org.apache.spark.sql.catalyst.expressions.CreateMap | map | SELECT map(1.0, '2', 3.0, '4') | struct> | -| 71 | org.apache.spark.sql.catalyst.expressions.CreateNamedStruct | named_struct | SELECT named_struct("a", 1, "b", 2, "c", 3) | struct> | -| 72 | org.apache.spark.sql.catalyst.expressions.CsvToStructs | from_csv | SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') | struct> | -| 73 | org.apache.spark.sql.catalyst.expressions.Cube | cube | SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name, age) | struct | -| 74 | org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | Example is missing | Example is missing | -| 75 | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct | -| 76 | org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | Example is missing | Example is missing | -| 77 | org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | Example is missing | Example is missing | -| 78 | org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | now | Example is missing | Example is missing | -| 79 | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct | -| 80 | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct | -| 81 | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct | -| 82 | org.apache.spark.sql.catalyst.expressions.DatePart | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct | -| 83 | org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct | -| 84 | org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct | -| 85 | org.apache.spark.sql.catalyst.expressions.DayOfMonth | dayofmonth | SELECT dayofmonth('2009-07-30') | struct | -| 86 | org.apache.spark.sql.catalyst.expressions.DayOfWeek | dayofweek | SELECT dayofweek('2009-07-30') | struct | -| 87 | org.apache.spark.sql.catalyst.expressions.DayOfYear | dayofyear | SELECT dayofyear('2016-04-09') | struct | -| 88 | org.apache.spark.sql.catalyst.expressions.Decode | decode | SELECT decode(encode('abc', 'utf-8'), 'utf-8') | struct | -| 89 | org.apache.spark.sql.catalyst.expressions.DenseRank | dense_rank | Example is missing | Example is missing | -| 90 | org.apache.spark.sql.catalyst.expressions.Divide | / | SELECT 3 / 2 | struct<(CAST(3 AS DOUBLE) / CAST(2 AS DOUBLE)):double> | -| 91 | org.apache.spark.sql.catalyst.expressions.ElementAt | element_at | SELECT element_at(array(1, 2, 3), 2) | struct | -| 92 | org.apache.spark.sql.catalyst.expressions.Elt | elt | SELECT elt(1, 'scala', 'java') | struct | -| 93 | org.apache.spark.sql.catalyst.expressions.Encode | encode | SELECT encode('abc', 'utf-8') | struct | -| 94 | org.apache.spark.sql.catalyst.expressions.EqualNullSafe | <=> | SELECT 2 <=> 2 | struct<(2 <=> 2):boolean> | -| 95 | org.apache.spark.sql.catalyst.expressions.EqualTo | = | SELECT 2 = 2 | struct<(2 = 2):boolean> | -| 96 | org.apache.spark.sql.catalyst.expressions.EqualTo | == | SELECT 2 == 2 | struct<(2 = 2):boolean> | -| 97 | org.apache.spark.sql.catalyst.expressions.EulerNumber | e | SELECT e() | struct | -| 98 | org.apache.spark.sql.catalyst.expressions.Exp | exp | SELECT exp(0) | struct | -| 99 | org.apache.spark.sql.catalyst.expressions.Explode | explode | SELECT explode(array(10, 20)) | struct | -| 100 | org.apache.spark.sql.catalyst.expressions.Explode | explode_outer | SELECT explode_outer(array(10, 20)) | struct | -| 101 | org.apache.spark.sql.catalyst.expressions.Expm1 | expm1 | SELECT expm1(0) | struct | -| 102 | org.apache.spark.sql.catalyst.expressions.Factorial | factorial | SELECT factorial(5) | struct | -| 103 | org.apache.spark.sql.catalyst.expressions.FindInSet | find_in_set | SELECT find_in_set('ab','abc,b,ab,c,def') | struct | -| 104 | org.apache.spark.sql.catalyst.expressions.Flatten | flatten | SELECT flatten(array(array(1, 2), array(3, 4))) | struct> | -| 105 | org.apache.spark.sql.catalyst.expressions.Floor | floor | SELECT floor(-0.1) | struct | -| 106 | org.apache.spark.sql.catalyst.expressions.FormatNumber | format_number | SELECT format_number(12332.123456, 4) | struct | -| 107 | org.apache.spark.sql.catalyst.expressions.FormatString | printf | SELECT printf("Hello World %d %s", 100, "days") | struct | -| 108 | org.apache.spark.sql.catalyst.expressions.FormatString | format_string | SELECT format_string("Hello World %d %s", 100, "days") | struct | -| 109 | org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp | from_utc_timestamp | SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul') | struct | -| 110 | org.apache.spark.sql.catalyst.expressions.FromUnixTime | from_unixtime | SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') | struct | -| 111 | org.apache.spark.sql.catalyst.expressions.GetJsonObject | get_json_object | SELECT get_json_object('{"a":"b"}', '$.a') | struct | -| 112 | org.apache.spark.sql.catalyst.expressions.GreaterThan | > | SELECT 2 > 1 | struct<(2 > 1):boolean> | -| 113 | org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual | >= | SELECT 2 >= 1 | struct<(2 >= 1):boolean> | -| 114 | org.apache.spark.sql.catalyst.expressions.Greatest | greatest | SELECT greatest(10, 9, 2, 4, 3) | struct | -| 115 | org.apache.spark.sql.catalyst.expressions.Grouping | grouping | SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name) | struct | -| 116 | org.apache.spark.sql.catalyst.expressions.GroupingID | grouping_id | SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) | struct | -| 117 | org.apache.spark.sql.catalyst.expressions.Hex | hex | SELECT hex(17) | struct | -| 118 | org.apache.spark.sql.catalyst.expressions.Hour | hour | SELECT hour('2009-07-30 12:58:59') | struct | -| 119 | org.apache.spark.sql.catalyst.expressions.Hypot | hypot | SELECT hypot(3, 4) | struct | -| 120 | org.apache.spark.sql.catalyst.expressions.If | if | SELECT if(1 < 2, 'a', 'b') | struct<(IF((1 < 2), a, b)):string> | -| 121 | org.apache.spark.sql.catalyst.expressions.IfNull | ifnull | SELECT ifnull(NULL, array('2')) | struct> | -| 122 | org.apache.spark.sql.catalyst.expressions.In | in | SELECT 1 in(1, 2, 3) | struct<(1 IN (1, 2, 3)):boolean> | -| 123 | org.apache.spark.sql.catalyst.expressions.InitCap | initcap | SELECT initcap('sPark sql') | struct | -| 124 | org.apache.spark.sql.catalyst.expressions.Inline | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct | -| 125 | org.apache.spark.sql.catalyst.expressions.Inline | inline_outer | SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) | struct | -| 126 | org.apache.spark.sql.catalyst.expressions.InputFileBlockLength | input_file_block_length | Example is missing | Example is missing | -| 127 | org.apache.spark.sql.catalyst.expressions.InputFileBlockStart | input_file_block_start | Example is missing | Example is missing | -| 128 | org.apache.spark.sql.catalyst.expressions.InputFileName | input_file_name | Example is missing | Example is missing | -| 129 | org.apache.spark.sql.catalyst.expressions.IntegralDivide | div | SELECT 3 div 2 | struct<(3 div 2):bigint> | -| 130 | org.apache.spark.sql.catalyst.expressions.IsNaN | isnan | SELECT isnan(cast('NaN' as double)) | struct | -| 131 | org.apache.spark.sql.catalyst.expressions.IsNotNull | isnotnull | SELECT isnotnull(1) | struct<(1 IS NOT NULL):boolean> | -| 132 | org.apache.spark.sql.catalyst.expressions.IsNull | isnull | SELECT isnull(1) | struct<(1 IS NULL):boolean> | -| 133 | org.apache.spark.sql.catalyst.expressions.JsonObjectKeys | json_object_keys | Select json_object_keys('{}') | struct> | -| 134 | org.apache.spark.sql.catalyst.expressions.JsonToStructs | from_json | SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') | struct> | -| 135 | org.apache.spark.sql.catalyst.expressions.JsonTuple | json_tuple | SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') | struct | -| 136 | org.apache.spark.sql.catalyst.expressions.Lag | lag | Example is missing | Example is missing | -| 137 | org.apache.spark.sql.catalyst.expressions.LastDay | last_day | SELECT last_day('2009-01-12') | struct | -| 138 | org.apache.spark.sql.catalyst.expressions.Lead | lead | Example is missing | Example is missing | -| 139 | org.apache.spark.sql.catalyst.expressions.Least | least | SELECT least(10, 9, 2, 4, 3) | struct | -| 140 | org.apache.spark.sql.catalyst.expressions.Left | left | SELECT left('Spark SQL', 3) | struct | -| 141 | org.apache.spark.sql.catalyst.expressions.Length | character_length | SELECT character_length('Spark SQL ') | struct | -| 142 | org.apache.spark.sql.catalyst.expressions.Length | char_length | SELECT char_length('Spark SQL ') | struct | -| 143 | org.apache.spark.sql.catalyst.expressions.Length | length | SELECT length('Spark SQL ') | struct | -| 144 | org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray | json_array_length | SELECT json_array_length('[1,2,3,4]') | struct | -| 145 | org.apache.spark.sql.catalyst.expressions.LessThan | < | SELECT 1 < 2 | struct<(1 < 2):boolean> | -| 146 | org.apache.spark.sql.catalyst.expressions.LessThanOrEqual | <= | SELECT 2 <= 2 | struct<(2 <= 2):boolean> | -| 147 | org.apache.spark.sql.catalyst.expressions.Levenshtein | levenshtein | SELECT levenshtein('kitten', 'sitting') | struct | -| 148 | org.apache.spark.sql.catalyst.expressions.Like | like | SELECT like('Spark', '_park') | struct | -| 149 | org.apache.spark.sql.catalyst.expressions.Log | ln | SELECT ln(1) | struct | -| 150 | org.apache.spark.sql.catalyst.expressions.Log10 | log10 | SELECT log10(10) | struct | -| 151 | org.apache.spark.sql.catalyst.expressions.Log1p | log1p | SELECT log1p(0) | struct | -| 152 | org.apache.spark.sql.catalyst.expressions.Log2 | log2 | SELECT log2(2) | struct | -| 153 | org.apache.spark.sql.catalyst.expressions.Logarithm | log | SELECT log(10, 100) | struct | -| 154 | org.apache.spark.sql.catalyst.expressions.Lower | lcase | SELECT lcase('SparkSql') | struct | -| 155 | org.apache.spark.sql.catalyst.expressions.Lower | lower | SELECT lower('SparkSql') | struct | -| 156 | org.apache.spark.sql.catalyst.expressions.MakeDate | make_date | SELECT make_date(2013, 7, 15) | struct | -| 157 | org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct | -| 158 | org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct | -| 159 | org.apache.spark.sql.catalyst.expressions.MapConcat | map_concat | SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) | struct> | -| 160 | org.apache.spark.sql.catalyst.expressions.MapEntries | map_entries | SELECT map_entries(map(1, 'a', 2, 'b')) | struct>> | -| 161 | org.apache.spark.sql.catalyst.expressions.MapFilter | map_filter | SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v) | struct namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):map> | -| 162 | org.apache.spark.sql.catalyst.expressions.MapFromArrays | map_from_arrays | SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')) | struct> | -| 163 | org.apache.spark.sql.catalyst.expressions.MapFromEntries | map_from_entries | SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))) | struct> | -| 164 | org.apache.spark.sql.catalyst.expressions.MapKeys | map_keys | SELECT map_keys(map(1, 'a', 2, 'b')) | struct> | -| 165 | org.apache.spark.sql.catalyst.expressions.MapValues | map_values | SELECT map_values(map(1, 'a', 2, 'b')) | struct> | -| 166 | org.apache.spark.sql.catalyst.expressions.MapZipWith | map_zip_with | SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) | struct> | -| 167 | org.apache.spark.sql.catalyst.expressions.Md5 | md5 | SELECT md5('Spark') | struct | -| 168 | org.apache.spark.sql.catalyst.expressions.Minute | minute | SELECT minute('2009-07-30 12:58:59') | struct | -| 169 | org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID | monotonically_increasing_id | Example is missing | Example is missing | -| 170 | org.apache.spark.sql.catalyst.expressions.Month | month | SELECT month('2016-07-30') | struct | -| 171 | org.apache.spark.sql.catalyst.expressions.MonthsBetween | months_between | SELECT months_between('1997-02-28 10:30:00', '1996-10-30') | struct | -| 172 | org.apache.spark.sql.catalyst.expressions.Multiply | * | SELECT 2 * 3 | struct<(2 * 3):int> | -| 173 | org.apache.spark.sql.catalyst.expressions.Murmur3Hash | hash | SELECT hash('Spark', array(123), 2) | struct | -| 174 | org.apache.spark.sql.catalyst.expressions.NTile | ntile | Example is missing | Example is missing | -| 175 | org.apache.spark.sql.catalyst.expressions.NaNvl | nanvl | SELECT nanvl(cast('NaN' as double), 123) | struct | -| 176 | org.apache.spark.sql.catalyst.expressions.NamedStruct | struct | Example is missing | Example is missing | -| 177 | org.apache.spark.sql.catalyst.expressions.NextDay | next_day | SELECT next_day('2015-01-14', 'TU') | struct | -| 178 | org.apache.spark.sql.catalyst.expressions.Not | ! | Example is missing | Example is missing | -| 179 | org.apache.spark.sql.catalyst.expressions.Not | not | Example is missing | Example is missing | -| 180 | org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct | -| 181 | org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct> | -| 182 | org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct | -| 183 | org.apache.spark.sql.catalyst.expressions.OctetLength | octet_length | SELECT octet_length('Spark SQL') | struct | -| 184 | org.apache.spark.sql.catalyst.expressions.Or | or | Example is missing | Example is missing | -| 185 | org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct | -| 186 | org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct | -| 187 | org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct | -| 188 | org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct | -| 189 | org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | Example is missing | Example is missing | -| 190 | org.apache.spark.sql.catalyst.expressions.Pi | pi | SELECT pi() | struct | -| 191 | org.apache.spark.sql.catalyst.expressions.Pmod | pmod | SELECT pmod(10, 3) | struct | -| 192 | org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode_outer | SELECT posexplode_outer(array(10,20)) | struct | -| 193 | org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode | SELECT posexplode(array(10,20)) | struct | -| 194 | org.apache.spark.sql.catalyst.expressions.Pow | pow | SELECT pow(2, 3) | struct | -| 195 | org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct | -| 196 | org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct | -| 197 | org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' | struct<%SystemDrive%UsersJohn RLIKE %SystemDrive%\Users.*:boolean> | -| 198 | org.apache.spark.sql.catalyst.expressions.Rank | rank | Example is missing | Example is missing | -| 199 | org.apache.spark.sql.catalyst.expressions.RegExpExtract | regexp_extract | SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) | struct | -| 200 | org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct | -| 201 | org.apache.spark.sql.catalyst.expressions.Remainder | % | SELECT 2 % 1.8 | struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> | -| 202 | org.apache.spark.sql.catalyst.expressions.Remainder | mod | SELECT 2 % 1.8 | struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> | -| 203 | org.apache.spark.sql.catalyst.expressions.Reverse | reverse | SELECT reverse('Spark SQL') | struct | -| 204 | org.apache.spark.sql.catalyst.expressions.Right | right | SELECT right('Spark SQL', 3) | struct | -| 205 | org.apache.spark.sql.catalyst.expressions.Rint | rint | SELECT rint(12.3456) | struct | -| 206 | org.apache.spark.sql.catalyst.expressions.Rollup | rollup | SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY rollup(name, age) | struct | -| 207 | org.apache.spark.sql.catalyst.expressions.Round | round | SELECT round(2.5, 0) | struct | -| 208 | org.apache.spark.sql.catalyst.expressions.RowNumber | row_number | Example is missing | Example is missing | -| 209 | org.apache.spark.sql.catalyst.expressions.SchemaOfCsv | schema_of_csv | SELECT schema_of_csv('1,abc') | struct | -| 210 | org.apache.spark.sql.catalyst.expressions.SchemaOfJson | schema_of_json | SELECT schema_of_json('[{"col":0}]') | struct | -| 211 | org.apache.spark.sql.catalyst.expressions.Second | second | SELECT second('2009-07-30 12:58:59') | struct | -| 212 | org.apache.spark.sql.catalyst.expressions.Sentences | sentences | SELECT sentences('Hi there! Good morning.') | struct>> | -| 213 | org.apache.spark.sql.catalyst.expressions.Sequence | sequence | SELECT sequence(1, 5) | struct> | -| 214 | org.apache.spark.sql.catalyst.expressions.Sha1 | sha1 | SELECT sha1('Spark') | struct | -| 215 | org.apache.spark.sql.catalyst.expressions.Sha1 | sha | SELECT sha('Spark') | struct | -| 216 | org.apache.spark.sql.catalyst.expressions.Sha2 | sha2 | SELECT sha2('Spark', 256) | struct | -| 217 | org.apache.spark.sql.catalyst.expressions.ShiftLeft | shiftleft | SELECT shiftleft(2, 1) | struct | -| 218 | org.apache.spark.sql.catalyst.expressions.ShiftRight | shiftright | SELECT shiftright(4, 1) | struct | -| 219 | org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned | shiftrightunsigned | SELECT shiftrightunsigned(4, 1) | struct | -| 220 | org.apache.spark.sql.catalyst.expressions.Signum | signum | SELECT signum(40) | struct | -| 221 | org.apache.spark.sql.catalyst.expressions.Signum | sign | SELECT sign(40) | struct | -| 222 | org.apache.spark.sql.catalyst.expressions.Sin | sin | SELECT sin(0) | struct | -| 223 | org.apache.spark.sql.catalyst.expressions.Sinh | sinh | SELECT sinh(0) | struct | -| 224 | org.apache.spark.sql.catalyst.expressions.Size | size | SELECT size(array('b', 'd', 'c', 'a')) | struct | -| 225 | org.apache.spark.sql.catalyst.expressions.Size | cardinality | SELECT cardinality(array('b', 'd', 'c', 'a')) | struct | -| 226 | org.apache.spark.sql.catalyst.expressions.Slice | slice | SELECT slice(array(1, 2, 3, 4), 2, 2) | struct> | -| 227 | org.apache.spark.sql.catalyst.expressions.SortArray | sort_array | SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) | struct> | -| 228 | org.apache.spark.sql.catalyst.expressions.SoundEx | soundex | SELECT soundex('Miller') | struct | -| 229 | org.apache.spark.sql.catalyst.expressions.SparkPartitionID | spark_partition_id | Example is missing | Example is missing | -| 230 | org.apache.spark.sql.catalyst.expressions.SparkVersion | version | Example is missing | Example is missing | -| 231 | org.apache.spark.sql.catalyst.expressions.Sqrt | sqrt | SELECT sqrt(4) | struct | -| 232 | org.apache.spark.sql.catalyst.expressions.Stack | stack | SELECT stack(2, 1, 2, 3) | struct | -| 233 | org.apache.spark.sql.catalyst.expressions.StringInstr | instr | SELECT instr('SparkSQL', 'SQL') | struct | -| 234 | org.apache.spark.sql.catalyst.expressions.StringLPad | lpad | SELECT lpad('hi', 5, '??') | struct | -| 235 | org.apache.spark.sql.catalyst.expressions.StringLocate | position | SELECT position('bar', 'foobarbar') | struct | -| 236 | org.apache.spark.sql.catalyst.expressions.StringLocate | locate | SELECT locate('bar', 'foobarbar') | struct | -| 237 | org.apache.spark.sql.catalyst.expressions.StringRPad | rpad | SELECT rpad('hi', 5, '??') | struct | -| 238 | org.apache.spark.sql.catalyst.expressions.StringRepeat | repeat | SELECT repeat('123', 2) | struct | -| 239 | org.apache.spark.sql.catalyst.expressions.StringReplace | replace | SELECT replace('ABCabc', 'abc', 'DEF') | struct | -| 240 | org.apache.spark.sql.catalyst.expressions.StringSpace | space | SELECT concat(space(2), '1') | struct | -| 241 | org.apache.spark.sql.catalyst.expressions.StringSplit | split | SELECT split('oneAtwoBthreeC', '[ABC]') | struct> | -| 242 | org.apache.spark.sql.catalyst.expressions.StringToMap | str_to_map | SELECT str_to_map('a:1,b:2,c:3', ',', ':') | struct> | -| 243 | org.apache.spark.sql.catalyst.expressions.StringTranslate | translate | SELECT translate('AaBbCc', 'abc', '123') | struct | -| 244 | org.apache.spark.sql.catalyst.expressions.StringTrim | trim | SELECT trim(' SparkSQL ') | struct | -| 245 | org.apache.spark.sql.catalyst.expressions.StringTrimLeft | ltrim | SELECT ltrim(' SparkSQL ') | struct | -| 246 | org.apache.spark.sql.catalyst.expressions.StringTrimRight | rtrim | SELECT rtrim(' SparkSQL ') | struct | -| 247 | org.apache.spark.sql.catalyst.expressions.StructsToCsv | to_csv | SELECT to_csv(named_struct('a', 1, 'b', 2)) | struct | -| 248 | org.apache.spark.sql.catalyst.expressions.StructsToJson | to_json | SELECT to_json(named_struct('a', 1, 'b', 2)) | struct | -| 249 | org.apache.spark.sql.catalyst.expressions.Substring | substr | SELECT substr('Spark SQL', 5) | struct | -| 250 | org.apache.spark.sql.catalyst.expressions.Substring | substring | SELECT substring('Spark SQL', 5) | struct | -| 251 | org.apache.spark.sql.catalyst.expressions.SubstringIndex | substring_index | SELECT substring_index('www.apache.org', '.', 2) | struct | -| 252 | org.apache.spark.sql.catalyst.expressions.Subtract | - | SELECT 2 - 1 | struct<(2 - 1):int> | -| 253 | org.apache.spark.sql.catalyst.expressions.Tan | tan | SELECT tan(0) | struct | -| 254 | org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct | -| 255 | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | Example is missing | Example is missing | -| 256 | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct | -| 257 | org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct | -| 258 | org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp | to_utc_timestamp | SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') | struct | -| 259 | org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp | to_unix_timestamp | SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') | struct | -| 260 | org.apache.spark.sql.catalyst.expressions.TransformKeys | transform_keys | SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) | struct> | -| 261 | org.apache.spark.sql.catalyst.expressions.TransformValues | transform_values | SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1) | struct> | -| 262 | org.apache.spark.sql.catalyst.expressions.TruncDate | trunc | SELECT trunc('2019-08-04', 'week') | struct | -| 263 | org.apache.spark.sql.catalyst.expressions.TruncTimestamp | date_trunc | SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') | struct | -| 264 | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct | -| 265 | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct | -| 266 | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<(- 1):int> | -| 267 | org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | Example is missing | Example is missing | -| 268 | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct | -| 269 | org.apache.spark.sql.catalyst.expressions.Upper | ucase | SELECT ucase('SparkSql') | struct | -| 270 | org.apache.spark.sql.catalyst.expressions.Upper | upper | SELECT upper('SparkSql') | struct | -| 271 | org.apache.spark.sql.catalyst.expressions.WeekDay | weekday | SELECT weekday('2009-07-30') | struct | -| 272 | org.apache.spark.sql.catalyst.expressions.WeekOfYear | weekofyear | SELECT weekofyear('2008-02-20') | struct | -| 273 | org.apache.spark.sql.catalyst.expressions.XxHash64 | xxhash64 | SELECT xxhash64('Spark', array(123), 2) | struct | -| 274 | org.apache.spark.sql.catalyst.expressions.Year | year | SELECT year('2016-07-30') | struct | -| 275 | org.apache.spark.sql.catalyst.expressions.ZipWith | zip_with | SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) | struct>> | -| 276 | org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | approx_percentile | SELECT approx_percentile(10.0, array(0.5, 0.4, 0.1), 100) | struct> | -| 277 | org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | percentile_approx | SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100) | struct> | -| 278 | org.apache.spark.sql.catalyst.expressions.aggregate.Average | avg | SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | -| 279 | org.apache.spark.sql.catalyst.expressions.aggregate.Average | mean | SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | -| 280 | org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg | bit_and | SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col) | struct | -| 281 | org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg | bit_or | SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col) | struct | -| 282 | org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg | bit_xor | SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col) | struct | -| 283 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd | every | SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col) | struct | -| 284 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd | bool_and | SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) | struct | -| 285 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | bool_or | SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | -| 286 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | some | SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | -| 287 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | any | SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | -| 288 | org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | collect_list | SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | -| 289 | org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet | collect_set | SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | -| 290 | org.apache.spark.sql.catalyst.expressions.aggregate.Corr | corr | SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) | struct | -| 291 | org.apache.spark.sql.catalyst.expressions.aggregate.Count | count | SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) | struct | -| 292 | org.apache.spark.sql.catalyst.expressions.aggregate.CountIf | count_if | SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) | struct | -| 293 | org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg | count_min_sketch | Example is missing | Example is missing | -| 294 | org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation | covar_pop | SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct | -| 295 | org.apache.spark.sql.catalyst.expressions.aggregate.CovSample | covar_samp | SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct | -| 296 | org.apache.spark.sql.catalyst.expressions.aggregate.First | first_value | SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | -| 297 | org.apache.spark.sql.catalyst.expressions.aggregate.First | first | SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | -| 298 | org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus | approx_count_distinct | SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1) | struct | -| 299 | org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis | kurtosis | SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct | -| 300 | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last_value | SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | -| 301 | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last | SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | -| 302 | org.apache.spark.sql.catalyst.expressions.aggregate.Max | max | SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) | struct | -| 303 | org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct | -| 304 | org.apache.spark.sql.catalyst.expressions.aggregate.Min | min | SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) | struct | -| 305 | org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct | -| 306 | org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile | SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) | struct | -| 307 | org.apache.spark.sql.catalyst.expressions.aggregate.Skewness | skewness | SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct | -| 308 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop | stddev_pop | SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | -| 309 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev_samp | SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | -| 310 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev | SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | -| 311 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | std | SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | -| 312 | org.apache.spark.sql.catalyst.expressions.aggregate.Sum | sum | SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct | -| 313 | org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop | var_pop | SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | -| 314 | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | var_samp | SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | -| 315 | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | variance | SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | -| 316 | org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean | xpath_boolean | SELECT xpath_boolean('1','a/b') | struct1, a/b):boolean> | -| 317 | org.apache.spark.sql.catalyst.expressions.xml.XPathDouble | xpath_number | SELECT xpath_number('12', 'sum(a/b)') | struct12, sum(a/b)):double> | -| 318 | org.apache.spark.sql.catalyst.expressions.xml.XPathDouble | xpath_double | SELECT xpath_double('12', 'sum(a/b)') | struct12, sum(a/b)):double> | -| 319 | org.apache.spark.sql.catalyst.expressions.xml.XPathFloat | xpath_float | SELECT xpath_float('12', 'sum(a/b)') | struct12, sum(a/b)):float> | -| 320 | org.apache.spark.sql.catalyst.expressions.xml.XPathInt | xpath_int | SELECT xpath_int('12', 'sum(a/b)') | struct12, sum(a/b)):int> | -| 321 | org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('b1b2b3c1c2','a/b/text()') | structb1b2b3c1c2, a/b/text()):array> | -| 322 | org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('12', 'sum(a/b)') | struct12, sum(a/b)):bigint> | -| 323 | org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('12', 'sum(a/b)') | struct12, sum(a/b)):smallint> | -| 324 | org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('bcc','a/c') | structbcc, a/c):string> | \ No newline at end of file +| Class name | Function name or alias | Query example | Output schema | +| ---------- | ---------------------- | ------------- | ------------- | + | org.apache.spark.sql.catalyst.expressions.Abs | abs | SELECT abs(-1) | struct | +| org.apache.spark.sql.catalyst.expressions.Acos | acos | SELECT acos(1) | struct | +| org.apache.spark.sql.catalyst.expressions.Acosh | acosh | SELECT acosh(1) | struct | +| org.apache.spark.sql.catalyst.expressions.Add | + | SELECT 1 + 2 | struct<(1 + 2):int> | +| org.apache.spark.sql.catalyst.expressions.AddMonths | add_months | SELECT add_months('2016-08-31', 1) | struct | +| org.apache.spark.sql.catalyst.expressions.And | and | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.ArrayAggregate | aggregate | SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct | +| org.apache.spark.sql.catalyst.expressions.ArrayContains | array_contains | SELECT array_contains(array(1, 2, 3), 2) | struct | +| org.apache.spark.sql.catalyst.expressions.ArrayDistinct | array_distinct | SELECT array_distinct(array(1, 2, 3, null, 3)) | struct> | +| org.apache.spark.sql.catalyst.expressions.ArrayExcept | array_except | SELECT array_except(array(1, 2, 3), array(1, 3, 5)) | struct> | +| org.apache.spark.sql.catalyst.expressions.ArrayExists | exists | SELECT exists(array(1, 2, 3), x -> x % 2 == 0) | struct | +| org.apache.spark.sql.catalyst.expressions.ArrayFilter | filter | SELECT filter(array(1, 2, 3), x -> x % 2 == 1) | struct> | +| org.apache.spark.sql.catalyst.expressions.ArrayForAll | forall | SELECT forall(array(1, 2, 3), x -> x % 2 == 0) | struct | +| org.apache.spark.sql.catalyst.expressions.ArrayIntersect | array_intersect | SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)) | struct> | +| org.apache.spark.sql.catalyst.expressions.ArrayJoin | array_join | SELECT array_join(array('hello', 'world'), ' ') | struct | +| org.apache.spark.sql.catalyst.expressions.ArrayMax | array_max | SELECT array_max(array(1, 20, null, 3)) | struct | +| org.apache.spark.sql.catalyst.expressions.ArrayMin | array_min | SELECT array_min(array(1, 20, null, 3)) | struct | +| org.apache.spark.sql.catalyst.expressions.ArrayPosition | array_position | SELECT array_position(array(3, 2, 1), 1) | struct | +| org.apache.spark.sql.catalyst.expressions.ArrayRemove | array_remove | SELECT array_remove(array(1, 2, 3, null, 3), 3) | struct> | +| org.apache.spark.sql.catalyst.expressions.ArrayRepeat | array_repeat | SELECT array_repeat('123', 2) | struct> | +| org.apache.spark.sql.catalyst.expressions.ArraySort | array_sort | SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end) | struct namedlambdavariable()) THEN 1 ELSE 0 END, namedlambdavariable(), namedlambdavariable())):array> | +| org.apache.spark.sql.catalyst.expressions.ArrayTransform | transform | SELECT transform(array(1, 2, 3), x -> x + 1) | struct> | +| org.apache.spark.sql.catalyst.expressions.ArrayUnion | array_union | SELECT array_union(array(1, 2, 3), array(1, 3, 5)) | struct> | +| org.apache.spark.sql.catalyst.expressions.ArraysOverlap | arrays_overlap | SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5)) | struct | +| org.apache.spark.sql.catalyst.expressions.ArraysZip | arrays_zip | SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4)) | struct>> | +| org.apache.spark.sql.catalyst.expressions.Ascii | ascii | SELECT ascii('222') | struct | +| org.apache.spark.sql.catalyst.expressions.Asin | asin | SELECT asin(0) | struct | +| org.apache.spark.sql.catalyst.expressions.Asinh | asinh | SELECT asinh(0) | struct | +| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct | +| org.apache.spark.sql.catalyst.expressions.Atan | atan | SELECT atan(0) | struct | +| org.apache.spark.sql.catalyst.expressions.Atan2 | atan2 | SELECT atan2(0, 0) | struct | +| org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | struct | +| org.apache.spark.sql.catalyst.expressions.BRound | bround | SELECT bround(2.5, 0) | struct | +| org.apache.spark.sql.catalyst.expressions.Base64 | base64 | SELECT base64('Spark SQL') | struct | +| org.apache.spark.sql.catalyst.expressions.Bin | bin | SELECT bin(13) | struct | +| org.apache.spark.sql.catalyst.expressions.BitLength | bit_length | SELECT bit_length('Spark SQL') | struct | +| org.apache.spark.sql.catalyst.expressions.BitwiseAnd | & | SELECT 3 & 5 | struct<(3 & 5):int> | +| org.apache.spark.sql.catalyst.expressions.BitwiseCount | bit_count | SELECT bit_count(0) | struct | +| org.apache.spark.sql.catalyst.expressions.BitwiseNot | ~ | SELECT ~ 0 | struct<~0:int> | +| org.apache.spark.sql.catalyst.expressions.BitwiseOr | | | SELECT 3 | 5 | struct<(3 | 5):int> | +| org.apache.spark.sql.catalyst.expressions.BitwiseXor | ^ | SELECT 3 ^ 5 | struct<(3 ^ 5):int> | +| org.apache.spark.sql.catalyst.expressions.CaseWhen | when | SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END | struct 0) THEN CAST(1 AS DECIMAL(11,1)) WHEN (2 > 0) THEN CAST(2.0 AS DECIMAL(11,1)) ELSE CAST(1.2 AS DECIMAL(11,1)) END:decimal(11,1)> | +| org.apache.spark.sql.catalyst.expressions.Cast | string | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cast | cast | SELECT cast('10' as int) | struct | +| org.apache.spark.sql.catalyst.expressions.Cast | tinyint | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cast | double | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cast | smallint | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cast | date | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cast | decimal | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cast | boolean | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cast | float | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cast | binary | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cast | bigint | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cast | int | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cast | timestamp | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Cbrt | cbrt | SELECT cbrt(27.0) | struct | +| org.apache.spark.sql.catalyst.expressions.Ceil | ceil | SELECT ceil(-0.1) | struct | +| org.apache.spark.sql.catalyst.expressions.Ceil | ceiling | SELECT ceiling(-0.1) | struct | +| org.apache.spark.sql.catalyst.expressions.Chr | char | SELECT char(65) | struct | +| org.apache.spark.sql.catalyst.expressions.Chr | chr | SELECT chr(65) | struct | +| org.apache.spark.sql.catalyst.expressions.Coalesce | coalesce | SELECT coalesce(NULL, 1, NULL) | struct | +| org.apache.spark.sql.catalyst.expressions.Concat | concat | SELECT concat('Spark', 'SQL') | struct | +| org.apache.spark.sql.catalyst.expressions.ConcatWs | concat_ws | SELECT concat_ws(' ', 'Spark', 'SQL') | struct | +| org.apache.spark.sql.catalyst.expressions.Conv | conv | SELECT conv('100', 2, 10) | struct | +| org.apache.spark.sql.catalyst.expressions.Cos | cos | SELECT cos(0) | struct | +| org.apache.spark.sql.catalyst.expressions.Cosh | cosh | SELECT cosh(0) | struct | +| org.apache.spark.sql.catalyst.expressions.Cot | cot | SELECT cot(1) | struct | +| org.apache.spark.sql.catalyst.expressions.Crc32 | crc32 | SELECT crc32('Spark') | struct | +| org.apache.spark.sql.catalyst.expressions.CreateArray | array | SELECT array(1, 2, 3) | struct> | +| org.apache.spark.sql.catalyst.expressions.CreateMap | map | SELECT map(1.0, '2', 3.0, '4') | struct> | +| org.apache.spark.sql.catalyst.expressions.CreateNamedStruct | named_struct | SELECT named_struct("a", 1, "b", 2, "c", 3) | struct> | +| org.apache.spark.sql.catalyst.expressions.CsvToStructs | from_csv | SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') | struct> | +| org.apache.spark.sql.catalyst.expressions.Cube | cube | SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name, age) | struct | +| org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct | +| org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | now | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct | +| org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct | +| org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct | +| org.apache.spark.sql.catalyst.expressions.DatePart | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct | +| org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct | +| org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct | +| org.apache.spark.sql.catalyst.expressions.DayOfMonth | dayofmonth | SELECT dayofmonth('2009-07-30') | struct | +| org.apache.spark.sql.catalyst.expressions.DayOfWeek | dayofweek | SELECT dayofweek('2009-07-30') | struct | +| org.apache.spark.sql.catalyst.expressions.DayOfYear | dayofyear | SELECT dayofyear('2016-04-09') | struct | +| org.apache.spark.sql.catalyst.expressions.Decode | decode | SELECT decode(encode('abc', 'utf-8'), 'utf-8') | struct | +| org.apache.spark.sql.catalyst.expressions.DenseRank | dense_rank | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Divide | / | SELECT 3 / 2 | struct<(CAST(3 AS DOUBLE) / CAST(2 AS DOUBLE)):double> | +| org.apache.spark.sql.catalyst.expressions.ElementAt | element_at | SELECT element_at(array(1, 2, 3), 2) | struct | +| org.apache.spark.sql.catalyst.expressions.Elt | elt | SELECT elt(1, 'scala', 'java') | struct | +| org.apache.spark.sql.catalyst.expressions.Encode | encode | SELECT encode('abc', 'utf-8') | struct | +| org.apache.spark.sql.catalyst.expressions.EqualNullSafe | <=> | SELECT 2 <=> 2 | struct<(2 <=> 2):boolean> | +| org.apache.spark.sql.catalyst.expressions.EqualTo | = | SELECT 2 = 2 | struct<(2 = 2):boolean> | +| org.apache.spark.sql.catalyst.expressions.EqualTo | == | SELECT 2 == 2 | struct<(2 = 2):boolean> | +| org.apache.spark.sql.catalyst.expressions.EulerNumber | e | SELECT e() | struct | +| org.apache.spark.sql.catalyst.expressions.Exp | exp | SELECT exp(0) | struct | +| org.apache.spark.sql.catalyst.expressions.Explode | explode | SELECT explode(array(10, 20)) | struct | +| org.apache.spark.sql.catalyst.expressions.Explode | explode_outer | SELECT explode_outer(array(10, 20)) | struct | +| org.apache.spark.sql.catalyst.expressions.Expm1 | expm1 | SELECT expm1(0) | struct | +| org.apache.spark.sql.catalyst.expressions.Factorial | factorial | SELECT factorial(5) | struct | +| org.apache.spark.sql.catalyst.expressions.FindInSet | find_in_set | SELECT find_in_set('ab','abc,b,ab,c,def') | struct | +| org.apache.spark.sql.catalyst.expressions.Flatten | flatten | SELECT flatten(array(array(1, 2), array(3, 4))) | struct> | +| org.apache.spark.sql.catalyst.expressions.Floor | floor | SELECT floor(-0.1) | struct | +| org.apache.spark.sql.catalyst.expressions.FormatNumber | format_number | SELECT format_number(12332.123456, 4) | struct | +| org.apache.spark.sql.catalyst.expressions.FormatString | printf | SELECT printf("Hello World %d %s", 100, "days") | struct | +| org.apache.spark.sql.catalyst.expressions.FormatString | format_string | SELECT format_string("Hello World %d %s", 100, "days") | struct | +| org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp | from_utc_timestamp | SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul') | struct | +| org.apache.spark.sql.catalyst.expressions.FromUnixTime | from_unixtime | SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') | struct | +| org.apache.spark.sql.catalyst.expressions.GetJsonObject | get_json_object | SELECT get_json_object('{"a":"b"}', '$.a') | struct | +| org.apache.spark.sql.catalyst.expressions.GreaterThan | > | SELECT 2 > 1 | struct<(2 > 1):boolean> | +| org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual | >= | SELECT 2 >= 1 | struct<(2 >= 1):boolean> | +| org.apache.spark.sql.catalyst.expressions.Greatest | greatest | SELECT greatest(10, 9, 2, 4, 3) | struct | +| org.apache.spark.sql.catalyst.expressions.Grouping | grouping | SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name) | struct | +| org.apache.spark.sql.catalyst.expressions.GroupingID | grouping_id | SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height) | struct | +| org.apache.spark.sql.catalyst.expressions.Hex | hex | SELECT hex(17) | struct | +| org.apache.spark.sql.catalyst.expressions.Hour | hour | SELECT hour('2009-07-30 12:58:59') | struct | +| org.apache.spark.sql.catalyst.expressions.Hypot | hypot | SELECT hypot(3, 4) | struct | +| org.apache.spark.sql.catalyst.expressions.If | if | SELECT if(1 < 2, 'a', 'b') | struct<(IF((1 < 2), a, b)):string> | +| org.apache.spark.sql.catalyst.expressions.IfNull | ifnull | SELECT ifnull(NULL, array('2')) | struct> | +| org.apache.spark.sql.catalyst.expressions.In | in | SELECT 1 in(1, 2, 3) | struct<(1 IN (1, 2, 3)):boolean> | +| org.apache.spark.sql.catalyst.expressions.InitCap | initcap | SELECT initcap('sPark sql') | struct | +| org.apache.spark.sql.catalyst.expressions.Inline | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct | +| org.apache.spark.sql.catalyst.expressions.Inline | inline_outer | SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) | struct | +| org.apache.spark.sql.catalyst.expressions.InputFileBlockLength | input_file_block_length | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.InputFileBlockStart | input_file_block_start | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.InputFileName | input_file_name | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.IntegralDivide | div | SELECT 3 div 2 | struct<(3 div 2):bigint> | +| org.apache.spark.sql.catalyst.expressions.IsNaN | isnan | SELECT isnan(cast('NaN' as double)) | struct | +| org.apache.spark.sql.catalyst.expressions.IsNotNull | isnotnull | SELECT isnotnull(1) | struct<(1 IS NOT NULL):boolean> | +| org.apache.spark.sql.catalyst.expressions.IsNull | isnull | SELECT isnull(1) | struct<(1 IS NULL):boolean> | +| org.apache.spark.sql.catalyst.expressions.JsonObjectKeys | json_object_keys | Select json_object_keys('{}') | struct> | +| org.apache.spark.sql.catalyst.expressions.JsonToStructs | from_json | SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') | struct> | +| org.apache.spark.sql.catalyst.expressions.JsonTuple | json_tuple | SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') | struct | +| org.apache.spark.sql.catalyst.expressions.Lag | lag | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.LastDay | last_day | SELECT last_day('2009-01-12') | struct | +| org.apache.spark.sql.catalyst.expressions.Lead | lead | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Least | least | SELECT least(10, 9, 2, 4, 3) | struct | +| org.apache.spark.sql.catalyst.expressions.Left | left | SELECT left('Spark SQL', 3) | struct | +| org.apache.spark.sql.catalyst.expressions.Length | character_length | SELECT character_length('Spark SQL ') | struct | +| org.apache.spark.sql.catalyst.expressions.Length | char_length | SELECT char_length('Spark SQL ') | struct | +| org.apache.spark.sql.catalyst.expressions.Length | length | SELECT length('Spark SQL ') | struct | +| org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray | json_array_length | SELECT json_array_length('[1,2,3,4]') | struct | +| org.apache.spark.sql.catalyst.expressions.LessThan | < | SELECT 1 < 2 | struct<(1 < 2):boolean> | +| org.apache.spark.sql.catalyst.expressions.LessThanOrEqual | <= | SELECT 2 <= 2 | struct<(2 <= 2):boolean> | +| org.apache.spark.sql.catalyst.expressions.Levenshtein | levenshtein | SELECT levenshtein('kitten', 'sitting') | struct | +| org.apache.spark.sql.catalyst.expressions.Like | like | SELECT like('Spark', '_park') | struct | +| org.apache.spark.sql.catalyst.expressions.Log | ln | SELECT ln(1) | struct | +| org.apache.spark.sql.catalyst.expressions.Log10 | log10 | SELECT log10(10) | struct | +| org.apache.spark.sql.catalyst.expressions.Log1p | log1p | SELECT log1p(0) | struct | +| org.apache.spark.sql.catalyst.expressions.Log2 | log2 | SELECT log2(2) | struct | +| org.apache.spark.sql.catalyst.expressions.Logarithm | log | SELECT log(10, 100) | struct | +| org.apache.spark.sql.catalyst.expressions.Lower | lcase | SELECT lcase('SparkSql') | struct | +| org.apache.spark.sql.catalyst.expressions.Lower | lower | SELECT lower('SparkSql') | struct | +| org.apache.spark.sql.catalyst.expressions.MakeDate | make_date | SELECT make_date(2013, 7, 15) | struct | +| org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct | +| org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct | +| org.apache.spark.sql.catalyst.expressions.MapConcat | map_concat | SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) | struct> | +| org.apache.spark.sql.catalyst.expressions.MapEntries | map_entries | SELECT map_entries(map(1, 'a', 2, 'b')) | struct>> | +| org.apache.spark.sql.catalyst.expressions.MapFilter | map_filter | SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v) | struct namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):map> | +| org.apache.spark.sql.catalyst.expressions.MapFromArrays | map_from_arrays | SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')) | struct> | +| org.apache.spark.sql.catalyst.expressions.MapFromEntries | map_from_entries | SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b'))) | struct> | +| org.apache.spark.sql.catalyst.expressions.MapKeys | map_keys | SELECT map_keys(map(1, 'a', 2, 'b')) | struct> | +| org.apache.spark.sql.catalyst.expressions.MapValues | map_values | SELECT map_values(map(1, 'a', 2, 'b')) | struct> | +| org.apache.spark.sql.catalyst.expressions.MapZipWith | map_zip_with | SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) | struct> | +| org.apache.spark.sql.catalyst.expressions.Md5 | md5 | SELECT md5('Spark') | struct | +| org.apache.spark.sql.catalyst.expressions.Minute | minute | SELECT minute('2009-07-30 12:58:59') | struct | +| org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID | monotonically_increasing_id | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Month | month | SELECT month('2016-07-30') | struct | +| org.apache.spark.sql.catalyst.expressions.MonthsBetween | months_between | SELECT months_between('1997-02-28 10:30:00', '1996-10-30') | struct | +| org.apache.spark.sql.catalyst.expressions.Multiply | * | SELECT 2 * 3 | struct<(2 * 3):int> | +| org.apache.spark.sql.catalyst.expressions.Murmur3Hash | hash | SELECT hash('Spark', array(123), 2) | struct | +| org.apache.spark.sql.catalyst.expressions.NTile | ntile | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.NaNvl | nanvl | SELECT nanvl(cast('NaN' as double), 123) | struct | +| org.apache.spark.sql.catalyst.expressions.NamedStruct | struct | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.NextDay | next_day | SELECT next_day('2015-01-14', 'TU') | struct | +| org.apache.spark.sql.catalyst.expressions.Not | ! | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Not | not | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct | +| org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct> | +| org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct | +| org.apache.spark.sql.catalyst.expressions.OctetLength | octet_length | SELECT octet_length('Spark SQL') | struct | +| org.apache.spark.sql.catalyst.expressions.Or | or | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct | +| org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct | +| org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct | +| org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct | +| org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Pi | pi | SELECT pi() | struct | +| org.apache.spark.sql.catalyst.expressions.Pmod | pmod | SELECT pmod(10, 3) | struct | +| org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode_outer | SELECT posexplode_outer(array(10,20)) | struct | +| org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode | SELECT posexplode(array(10,20)) | struct | +| org.apache.spark.sql.catalyst.expressions.Pow | pow | SELECT pow(2, 3) | struct | +| org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct | +| org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct | +| org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' | struct<%SystemDrive%UsersJohn RLIKE %SystemDrive%\Users.*:boolean> | +| org.apache.spark.sql.catalyst.expressions.Rank | rank | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.RegExpExtract | regexp_extract | SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) | struct | +| org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct | +| org.apache.spark.sql.catalyst.expressions.Remainder | % | SELECT 2 % 1.8 | struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> | +| org.apache.spark.sql.catalyst.expressions.Remainder | mod | SELECT 2 % 1.8 | struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> | +| org.apache.spark.sql.catalyst.expressions.Reverse | reverse | SELECT reverse('Spark SQL') | struct | +| org.apache.spark.sql.catalyst.expressions.Right | right | SELECT right('Spark SQL', 3) | struct | +| org.apache.spark.sql.catalyst.expressions.Rint | rint | SELECT rint(12.3456) | struct | +| org.apache.spark.sql.catalyst.expressions.Rollup | rollup | SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY rollup(name, age) | struct | +| org.apache.spark.sql.catalyst.expressions.Round | round | SELECT round(2.5, 0) | struct | +| org.apache.spark.sql.catalyst.expressions.RowNumber | row_number | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.SchemaOfCsv | schema_of_csv | SELECT schema_of_csv('1,abc') | struct | +| org.apache.spark.sql.catalyst.expressions.SchemaOfJson | schema_of_json | SELECT schema_of_json('[{"col":0}]') | struct | +| org.apache.spark.sql.catalyst.expressions.Second | second | SELECT second('2009-07-30 12:58:59') | struct | +| org.apache.spark.sql.catalyst.expressions.Sentences | sentences | SELECT sentences('Hi there! Good morning.') | struct>> | +| org.apache.spark.sql.catalyst.expressions.Sequence | sequence | SELECT sequence(1, 5) | struct> | +| org.apache.spark.sql.catalyst.expressions.Sha1 | sha1 | SELECT sha1('Spark') | struct | +| org.apache.spark.sql.catalyst.expressions.Sha1 | sha | SELECT sha('Spark') | struct | +| org.apache.spark.sql.catalyst.expressions.Sha2 | sha2 | SELECT sha2('Spark', 256) | struct | +| org.apache.spark.sql.catalyst.expressions.ShiftLeft | shiftleft | SELECT shiftleft(2, 1) | struct | +| org.apache.spark.sql.catalyst.expressions.ShiftRight | shiftright | SELECT shiftright(4, 1) | struct | +| org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned | shiftrightunsigned | SELECT shiftrightunsigned(4, 1) | struct | +| org.apache.spark.sql.catalyst.expressions.Signum | signum | SELECT signum(40) | struct | +| org.apache.spark.sql.catalyst.expressions.Signum | sign | SELECT sign(40) | struct | +| org.apache.spark.sql.catalyst.expressions.Sin | sin | SELECT sin(0) | struct | +| org.apache.spark.sql.catalyst.expressions.Sinh | sinh | SELECT sinh(0) | struct | +| org.apache.spark.sql.catalyst.expressions.Size | size | SELECT size(array('b', 'd', 'c', 'a')) | struct | +| org.apache.spark.sql.catalyst.expressions.Size | cardinality | SELECT cardinality(array('b', 'd', 'c', 'a')) | struct | +| org.apache.spark.sql.catalyst.expressions.Slice | slice | SELECT slice(array(1, 2, 3, 4), 2, 2) | struct> | +| org.apache.spark.sql.catalyst.expressions.SortArray | sort_array | SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) | struct> | +| org.apache.spark.sql.catalyst.expressions.SoundEx | soundex | SELECT soundex('Miller') | struct | +| org.apache.spark.sql.catalyst.expressions.SparkPartitionID | spark_partition_id | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.SparkVersion | version | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Sqrt | sqrt | SELECT sqrt(4) | struct | +| org.apache.spark.sql.catalyst.expressions.Stack | stack | SELECT stack(2, 1, 2, 3) | struct | +| org.apache.spark.sql.catalyst.expressions.StringInstr | instr | SELECT instr('SparkSQL', 'SQL') | struct | +| org.apache.spark.sql.catalyst.expressions.StringLPad | lpad | SELECT lpad('hi', 5, '??') | struct | +| org.apache.spark.sql.catalyst.expressions.StringLocate | position | SELECT position('bar', 'foobarbar') | struct | +| org.apache.spark.sql.catalyst.expressions.StringLocate | locate | SELECT locate('bar', 'foobarbar') | struct | +| org.apache.spark.sql.catalyst.expressions.StringRPad | rpad | SELECT rpad('hi', 5, '??') | struct | +| org.apache.spark.sql.catalyst.expressions.StringRepeat | repeat | SELECT repeat('123', 2) | struct | +| org.apache.spark.sql.catalyst.expressions.StringReplace | replace | SELECT replace('ABCabc', 'abc', 'DEF') | struct | +| org.apache.spark.sql.catalyst.expressions.StringSpace | space | SELECT concat(space(2), '1') | struct | +| org.apache.spark.sql.catalyst.expressions.StringSplit | split | SELECT split('oneAtwoBthreeC', '[ABC]') | struct> | +| org.apache.spark.sql.catalyst.expressions.StringToMap | str_to_map | SELECT str_to_map('a:1,b:2,c:3', ',', ':') | struct> | +| org.apache.spark.sql.catalyst.expressions.StringTranslate | translate | SELECT translate('AaBbCc', 'abc', '123') | struct | +| org.apache.spark.sql.catalyst.expressions.StringTrim | trim | SELECT trim(' SparkSQL ') | struct | +| org.apache.spark.sql.catalyst.expressions.StringTrimLeft | ltrim | SELECT ltrim(' SparkSQL ') | struct | +| org.apache.spark.sql.catalyst.expressions.StringTrimRight | rtrim | SELECT rtrim(' SparkSQL ') | struct | +| org.apache.spark.sql.catalyst.expressions.StructsToCsv | to_csv | SELECT to_csv(named_struct('a', 1, 'b', 2)) | struct | +| org.apache.spark.sql.catalyst.expressions.StructsToJson | to_json | SELECT to_json(named_struct('a', 1, 'b', 2)) | struct | +| org.apache.spark.sql.catalyst.expressions.Substring | substr | SELECT substr('Spark SQL', 5) | struct | +| org.apache.spark.sql.catalyst.expressions.Substring | substring | SELECT substring('Spark SQL', 5) | struct | +| org.apache.spark.sql.catalyst.expressions.SubstringIndex | substring_index | SELECT substring_index('www.apache.org', '.', 2) | struct | +| org.apache.spark.sql.catalyst.expressions.Subtract | - | SELECT 2 - 1 | struct<(2 - 1):int> | +| org.apache.spark.sql.catalyst.expressions.Tan | tan | SELECT tan(0) | struct | +| org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct | +| org.apache.spark.sql.catalyst.expressions.TimeWindow | window | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct | +| org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct | +| org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp | to_utc_timestamp | SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') | struct | +| org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp | to_unix_timestamp | SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') | struct | +| org.apache.spark.sql.catalyst.expressions.TransformKeys | transform_keys | SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) | struct> | +| org.apache.spark.sql.catalyst.expressions.TransformValues | transform_values | SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1) | struct> | +| org.apache.spark.sql.catalyst.expressions.TruncDate | trunc | SELECT trunc('2019-08-04', 'week') | struct | +| org.apache.spark.sql.catalyst.expressions.TruncTimestamp | date_trunc | SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') | struct | +| org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct | +| org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct | +| org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<(- 1):int> | +| org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct | +| org.apache.spark.sql.catalyst.expressions.Upper | ucase | SELECT ucase('SparkSql') | struct | +| org.apache.spark.sql.catalyst.expressions.Upper | upper | SELECT upper('SparkSql') | struct | +| org.apache.spark.sql.catalyst.expressions.WeekDay | weekday | SELECT weekday('2009-07-30') | struct | +| org.apache.spark.sql.catalyst.expressions.WeekOfYear | weekofyear | SELECT weekofyear('2008-02-20') | struct | +| org.apache.spark.sql.catalyst.expressions.XxHash64 | xxhash64 | SELECT xxhash64('Spark', array(123), 2) | struct | +| org.apache.spark.sql.catalyst.expressions.Year | year | SELECT year('2016-07-30') | struct | +| org.apache.spark.sql.catalyst.expressions.ZipWith | zip_with | SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) | struct>> | +| org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | approx_percentile | SELECT approx_percentile(10.0, array(0.5, 0.4, 0.1), 100) | struct> | +| org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | percentile_approx | SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100) | struct> | +| org.apache.spark.sql.catalyst.expressions.aggregate.Average | avg | SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.Average | mean | SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg | bit_and | SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg | bit_or | SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg | bit_xor | SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd | every | SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd | bool_and | SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | bool_or | SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | some | SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | any | SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | collect_list | SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | +| org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet | collect_set | SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | +| org.apache.spark.sql.catalyst.expressions.aggregate.Corr | corr | SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.Count | count | SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.CountIf | count_if | SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg | count_min_sketch | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation | covar_pop | SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.CovSample | covar_samp | SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.First | first_value | SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.First | first | SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus | approx_count_distinct | SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis | kurtosis | SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.Last | last_value | SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.Last | last | SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.Max | max | SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.Min | min | SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile | SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.Skewness | skewness | SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop | stddev_pop | SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev_samp | SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev | SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | std | SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.Sum | sum | SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop | var_pop | SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | var_samp | SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | variance | SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean | xpath_boolean | SELECT xpath_boolean('1','a/b') | struct1, a/b):boolean> | +| org.apache.spark.sql.catalyst.expressions.xml.XPathDouble | xpath_number | SELECT xpath_number('12', 'sum(a/b)') | struct12, sum(a/b)):double> | +| org.apache.spark.sql.catalyst.expressions.xml.XPathDouble | xpath_double | SELECT xpath_double('12', 'sum(a/b)') | struct12, sum(a/b)):double> | +| org.apache.spark.sql.catalyst.expressions.xml.XPathFloat | xpath_float | SELECT xpath_float('12', 'sum(a/b)') | struct12, sum(a/b)):float> | +| org.apache.spark.sql.catalyst.expressions.xml.XPathInt | xpath_int | SELECT xpath_int('12', 'sum(a/b)') | struct12, sum(a/b)):int> | +| org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('b1b2b3c1c2','a/b/text()') | structb1b2b3c1c2, a/b/text()):array> | +| org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('12', 'sum(a/b)') | struct12, sum(a/b)):bigint> | +| org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('12', 'sum(a/b)') | struct12, sum(a/b)):smallint> | +| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('bcc','a/c') | structbcc, a/c):string> | \ No newline at end of file diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index 7e8509c2b2665..6ed1b2dd7003d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -21,9 +21,7 @@ import java.io.File import scala.collection.mutable.ArrayBuffer -import org.apache.spark.sql.catalyst.expressions.ExpressionInfo import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile} -import org.apache.spark.sql.execution.HiveResult.hiveResultString import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.tags.ExtendedSQLTest @@ -97,17 +95,14 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { // The example calls methods that return unstable results. "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection") - val MISSING_EXAMPLE = "Example is missing" - /** A single SQL query's SQL and schema. */ protected case class QueryOutput( - number: String = "0", className: String, funcName: String, - sql: String = MISSING_EXAMPLE, - schema: String = MISSING_EXAMPLE) { + sql: String = "N/A", + schema: String = "N/A") { override def toString: String = { - s"| $number | $className | $funcName | $sql | $schema |" + s"| $className | $funcName | $sql | $schema |" } } @@ -122,22 +117,17 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { val outputs = new ArrayBuffer[QueryOutput] val missingExamples = new ArrayBuffer[String] - var _curNumber = 0 - def curNumber: String = { - _curNumber += 1 - _curNumber.toString - } - classFunsMap.foreach { kv => val className = kv._1 if (!ignoreSet.contains(className)) { kv._2.foreach { funInfo => val example = funInfo.getExamples + val funcName = funInfo.getName.replaceAll("\\|", "|") if (example == "") { - val queryOutput = QueryOutput(curNumber, className, funInfo.getName) + val queryOutput = QueryOutput(className, funcName) outputBuffer += queryOutput.toString outputs += queryOutput - missingExamples += queryOutput.funcName + missingExamples += funcName } // If expression exists 'Examples' segment, the first element is 'Examples'. Because @@ -145,31 +135,34 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { // Therefore, we only need to output the first SQL and its corresponding schema. // Note: We need to filter out the commands that set the parameters, such as: // SET spark.sql.parser.escapedStringLiterals=true - example.split(" > ").tail - .filterNot(_.trim.startsWith("SET")).take(1).foreach(_ match { - case exampleRe(sql, expected) => + example.split(" > ").tail.filterNot(_.trim.startsWith("SET")).take(1).foreach { + _ match { + case exampleRe(sql, _) => val df = spark.sql(sql) - val schema = df.schema.catalogString - val queryOutput = QueryOutput(curNumber, className, funInfo.getName, sql, schema) + val escapedSql = sql.replaceAll("\\|", "|") + val schema = df.schema.catalogString.replaceAll("\\|", "|") + val queryOutput = QueryOutput(className, funcName, escapedSql, schema) outputBuffer += queryOutput.toString outputs += queryOutput case _ => - }) + } } + } } } if (regenerateGoldenFiles) { val missingExampleStr = missingExamples.mkString(",") val goldenOutput = { - "## Summary\n" + - s" - Number of queries: ${outputs.size}\n" + - s" - Number of expressions that missing example: ${missingExamples.size}\n" + - s" - Expressions missing examples: $missingExampleStr\n" + - "## Schema of Built-in Functions\n" + - "| No | Class name | Function name or alias | Query example | Output schema |\n" + - "| -- | ---------- | ---------------------- | ------------- | ------------- |\n" + - outputBuffer.mkString("\n") + s"""## Summary + | - Number of queries: ${outputs.size} + | - Number of expressions that missing example: ${missingExamples.size} + | - Expressions missing examples: $missingExampleStr + |## Schema of Built-in Functions + || Class name | Function name or alias | Query example | Output schema | + || ---------- | ---------------------- | ------------- | ------------- | + """.stripMargin + + outputBuffer.mkString("\n") } val parent = resultFile.getParentFile if (!parent.exists()) { @@ -189,25 +182,11 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { Seq.tabulate(outputs.size) { i => val segments = lines(i + 7).split('|') - if (segments(2).trim == "org.apache.spark.sql.catalyst.expressions.BitwiseOr") { - // scalastyle:off line.size.limit - // The name of `BitwiseOr` is '|', so the line in golden file looks like below. - // | 40 | org.apache.spark.sql.catalyst.expressions.BitwiseOr | | | SELECT 3 | 5 | struct<(3 | 5):int> | - QueryOutput( - className = segments(2).trim, - funcName = "|", - sql = (segments(5) + "|" + segments(6)).trim, - schema = (segments(7) + "|" + segments(8)).trim) - } else { - // The lines most expressions output to a file are in the following format - // | 1 | org.apache.spark.sql.catalyst.expressions.Abs | abs | SELECT abs(-1) | struct | - // scalastyle:on line.size.limit - QueryOutput( - className = segments(2).trim, - funcName = segments(3).trim, - sql = segments(4).trim, - schema = segments(5).trim) - } + QueryOutput( + className = segments(1).trim, + funcName = segments(2).trim, + sql = segments(3).trim, + schema = segments(4).trim) } } From e0dde7469616b35644d5c729ac3ded0e08c41bef Mon Sep 17 00:00:00 2001 From: beliefer Date: Mon, 27 Apr 2020 23:16:52 +0800 Subject: [PATCH 11/22] Revert some code --- .../sql-functions/sql-expression-schema.md | 6 +++--- .../spark/sql/ExpressionsSchemaSuite.scala | 17 ++++++++--------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 5e597e797528d..7a8ea8fa51c9b 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -3,9 +3,9 @@ - Number of expressions that missing example: 37 - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,current_date,current_timestamp,now,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch ## Schema of Built-in Functions -| Class name | Function name or alias | Query example | Output schema | -| ---------- | ---------------------- | ------------- | ------------- | - | org.apache.spark.sql.catalyst.expressions.Abs | abs | SELECT abs(-1) | struct | +| No | Class name | Function name or alias | Query example | Output schema | +| -- | ---------- | ---------------------- | ------------- | ------------- | +| org.apache.spark.sql.catalyst.expressions.Abs | abs | SELECT abs(-1) | struct | | org.apache.spark.sql.catalyst.expressions.Acos | acos | SELECT acos(1) | struct | | org.apache.spark.sql.catalyst.expressions.Acosh | acosh | SELECT acosh(1) | struct | | org.apache.spark.sql.catalyst.expressions.Add | + | SELECT 1 + 2 | struct<(1 + 2):int> | diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index 6ed1b2dd7003d..7f309b9d488d9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -154,15 +154,14 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { if (regenerateGoldenFiles) { val missingExampleStr = missingExamples.mkString(",") val goldenOutput = { - s"""## Summary - | - Number of queries: ${outputs.size} - | - Number of expressions that missing example: ${missingExamples.size} - | - Expressions missing examples: $missingExampleStr - |## Schema of Built-in Functions - || Class name | Function name or alias | Query example | Output schema | - || ---------- | ---------------------- | ------------- | ------------- | - """.stripMargin + - outputBuffer.mkString("\n") + "## Summary\n" + + s" - Number of queries: ${outputs.size}\n" + + s" - Number of expressions that missing example: ${missingExamples.size}\n" + + s" - Expressions missing examples: $missingExampleStr\n" + + "## Schema of Built-in Functions\n" + + "| No | Class name | Function name or alias | Query example | Output schema |\n" + + "| -- | ---------- | ---------------------- | ------------- | ------------- |\n" + + outputBuffer.mkString("\n") } val parent = resultFile.getParentFile if (!parent.exists()) { From b6f32caf5bd2b25c9b94491a51ff5aae6805b4aa Mon Sep 17 00:00:00 2001 From: beliefer Date: Mon, 27 Apr 2020 23:30:31 +0800 Subject: [PATCH 12/22] Optimize code --- .../src/test/resources/sql-functions/sql-expression-schema.md | 2 +- .../scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 7a8ea8fa51c9b..6983109e5b91b 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -3,7 +3,7 @@ - Number of expressions that missing example: 37 - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,current_date,current_timestamp,now,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch ## Schema of Built-in Functions -| No | Class name | Function name or alias | Query example | Output schema | +| Class name | Function name or alias | Query example | Output schema | | -- | ---------- | ---------------------- | ------------- | ------------- | | org.apache.spark.sql.catalyst.expressions.Abs | abs | SELECT abs(-1) | struct | | org.apache.spark.sql.catalyst.expressions.Acos | acos | SELECT acos(1) | struct | diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index 7f309b9d488d9..af57879df787c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -159,7 +159,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { s" - Number of expressions that missing example: ${missingExamples.size}\n" + s" - Expressions missing examples: $missingExampleStr\n" + "## Schema of Built-in Functions\n" + - "| No | Class name | Function name or alias | Query example | Output schema |\n" + + "| Class name | Function name or alias | Query example | Output schema |\n" + "| -- | ---------- | ---------------------- | ------------- | ------------- |\n" + outputBuffer.mkString("\n") } From 9803e4a2f1ebfd10c7054bd0a16f2521528d81c7 Mon Sep 17 00:00:00 2001 From: beliefer Date: Tue, 28 Apr 2020 09:50:28 +0800 Subject: [PATCH 13/22] Update golden file. --- .../sql-functions/sql-expression-schema.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 6983109e5b91b..7fb3a35ca0f06 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,7 +1,7 @@ ## Summary - - Number of queries: 324 - - Number of expressions that missing example: 37 - - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,current_date,current_timestamp,now,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch + - Number of queries: 325 + - Number of expressions that missing example: 34 + - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch ## Schema of Built-in Functions | Class name | Function name or alias | Query example | Output schema | | -- | ---------- | ---------------------- | ------------- | ------------- | @@ -80,9 +80,8 @@ | org.apache.spark.sql.catalyst.expressions.Cube | cube | SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name, age) | struct | | org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct | -| org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | N/A | N/A | -| org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | N/A | N/A | -| org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | now | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | SELECT current_date() | struct | +| org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | SELECT current_timestamp() | struct | | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct | | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct | @@ -106,6 +105,7 @@ | org.apache.spark.sql.catalyst.expressions.Explode | explode | SELECT explode(array(10, 20)) | struct | | org.apache.spark.sql.catalyst.expressions.Explode | explode_outer | SELECT explode_outer(array(10, 20)) | struct | | org.apache.spark.sql.catalyst.expressions.Expm1 | expm1 | SELECT expm1(0) | struct | +| org.apache.spark.sql.catalyst.expressions.Extract | extract | SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456') | struct | | org.apache.spark.sql.catalyst.expressions.Factorial | factorial | SELECT factorial(5) | struct | | org.apache.spark.sql.catalyst.expressions.FindInSet | find_in_set | SELECT find_in_set('ab','abc,b,ab,c,def') | struct | | org.apache.spark.sql.catalyst.expressions.Flatten | flatten | SELECT flatten(array(array(1, 2), array(3, 4))) | struct> | @@ -137,7 +137,7 @@ | org.apache.spark.sql.catalyst.expressions.IsNaN | isnan | SELECT isnan(cast('NaN' as double)) | struct | | org.apache.spark.sql.catalyst.expressions.IsNotNull | isnotnull | SELECT isnotnull(1) | struct<(1 IS NOT NULL):boolean> | | org.apache.spark.sql.catalyst.expressions.IsNull | isnull | SELECT isnull(1) | struct<(1 IS NULL):boolean> | -| org.apache.spark.sql.catalyst.expressions.JsonObjectKeys | json_object_keys | Select json_object_keys('{}') | struct> | +| org.apache.spark.sql.catalyst.expressions.JsonObjectKeys | json_object_keys | SELECT json_object_keys('{}') | struct> | | org.apache.spark.sql.catalyst.expressions.JsonToStructs | from_json | SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') | struct> | | org.apache.spark.sql.catalyst.expressions.JsonTuple | json_tuple | SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') | struct | | org.apache.spark.sql.catalyst.expressions.Lag | lag | N/A | N/A | @@ -184,6 +184,7 @@ | org.apache.spark.sql.catalyst.expressions.NextDay | next_day | SELECT next_day('2015-01-14', 'TU') | struct | | org.apache.spark.sql.catalyst.expressions.Not | ! | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.Not | not | N/A | N/A | +| org.apache.spark.sql.catalyst.expressions.Now | now | SELECT now() | struct | | org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct | | org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct> | | org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct | From a7bc72d8c8d6bc1990de5558765c3feab537858e Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 29 Apr 2020 00:16:06 +0800 Subject: [PATCH 14/22] Optimize code --- .../resources/sql-functions/sql-expression-schema.md | 7 ++++++- .../org/apache/spark/sql/ExpressionsSchemaSuite.scala | 10 ++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 7fb3a35ca0f06..3be001eb33173 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,5 +1,5 @@ ## Summary - - Number of queries: 325 + - Number of queries: 330 - Number of expressions that missing example: 34 - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch ## Schema of Built-in Functions @@ -46,6 +46,8 @@ | org.apache.spark.sql.catalyst.expressions.BitwiseNot | ~ | SELECT ~ 0 | struct<~0:int> | | org.apache.spark.sql.catalyst.expressions.BitwiseOr | | | SELECT 3 | 5 | struct<(3 | 5):int> | | org.apache.spark.sql.catalyst.expressions.BitwiseXor | ^ | SELECT 3 ^ 5 | struct<(3 ^ 5):int> | +| org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection | java_method | SELECT java_method('java.util.UUID', 'randomUUID') | struct | +| org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection | reflect | SELECT reflect('java.util.UUID', 'randomUUID') | struct | | org.apache.spark.sql.catalyst.expressions.CaseWhen | when | SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END | struct 0) THEN CAST(1 AS DECIMAL(11,1)) WHEN (2 > 0) THEN CAST(2.0 AS DECIMAL(11,1)) ELSE CAST(1.2 AS DECIMAL(11,1)) END:decimal(11,1)> | | org.apache.spark.sql.catalyst.expressions.Cast | string | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.Cast | cast | SELECT cast('10' as int) | struct | @@ -225,6 +227,7 @@ | org.apache.spark.sql.catalyst.expressions.ShiftLeft | shiftleft | SELECT shiftleft(2, 1) | struct | | org.apache.spark.sql.catalyst.expressions.ShiftRight | shiftright | SELECT shiftright(4, 1) | struct | | org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned | shiftrightunsigned | SELECT shiftrightunsigned(4, 1) | struct | +| org.apache.spark.sql.catalyst.expressions.Shuffle | shuffle | SELECT shuffle(array(1, 20, 3, 5)) | struct> | | org.apache.spark.sql.catalyst.expressions.Signum | signum | SELECT signum(40) | struct | | org.apache.spark.sql.catalyst.expressions.Signum | sign | SELECT sign(40) | struct | | org.apache.spark.sql.catalyst.expressions.Sin | sin | SELECT sin(0) | struct | @@ -274,8 +277,10 @@ | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<(- 1):int> | | org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct | +| org.apache.spark.sql.catalyst.expressions.UnixTimestamp | unix_timestamp | SELECT unix_timestamp() | struct | | org.apache.spark.sql.catalyst.expressions.Upper | ucase | SELECT ucase('SparkSql') | struct | | org.apache.spark.sql.catalyst.expressions.Upper | upper | SELECT upper('SparkSql') | struct | +| org.apache.spark.sql.catalyst.expressions.Uuid | uuid | SELECT uuid() | struct | | org.apache.spark.sql.catalyst.expressions.WeekDay | weekday | SELECT weekday('2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.WeekOfYear | weekofyear | SELECT weekofyear('2008-02-20') | struct | | org.apache.spark.sql.catalyst.expressions.XxHash64 | xxhash64 | SELECT xxhash64('Spark', array(123), 2) | struct | diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index af57879df787c..c6082f68226c5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -58,7 +58,7 @@ import org.apache.spark.tags.ExtendedSQLTest * The format for golden result files look roughly like: * {{{ * ... - * | 238 | org.apache.spark.sql.catalyst.expressions.StringRepeat | repeat | SELECT repeat('123', 2) | struct | + * | org.apache.spark.sql.catalyst.expressions.StringRepeat | repeat | SELECT repeat('123', 2) | struct | * ... * }}} */ @@ -85,15 +85,9 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { private val resultFile = new File(baseResourcePath, "sql-expression-schema.md") val ignoreSet = Set( - // One of examples shows getting the current timestamp - "org.apache.spark.sql.catalyst.expressions.UnixTimestamp", // Random output without a seed "org.apache.spark.sql.catalyst.expressions.Rand", - "org.apache.spark.sql.catalyst.expressions.Randn", - "org.apache.spark.sql.catalyst.expressions.Shuffle", - "org.apache.spark.sql.catalyst.expressions.Uuid", - // The example calls methods that return unstable results. - "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection") + "org.apache.spark.sql.catalyst.expressions.Randn") /** A single SQL query's SQL and schema. */ protected case class QueryOutput( From f38f94270340d2b157998b629dfea44da5d812cb Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 29 Apr 2020 10:21:17 +0800 Subject: [PATCH 15/22] Optimize code --- .../spark/sql/ExpressionsSchemaSuite.scala | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index c6082f68226c5..1c49d3bcb1e4d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -130,16 +130,14 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { // Note: We need to filter out the commands that set the parameters, such as: // SET spark.sql.parser.escapedStringLiterals=true example.split(" > ").tail.filterNot(_.trim.startsWith("SET")).take(1).foreach { - _ match { - case exampleRe(sql, _) => - val df = spark.sql(sql) - val escapedSql = sql.replaceAll("\\|", "|") - val schema = df.schema.catalogString.replaceAll("\\|", "|") - val queryOutput = QueryOutput(className, funcName, escapedSql, schema) - outputBuffer += queryOutput.toString - outputs += queryOutput - case _ => - } + case exampleRe(sql, _) => + val df = spark.sql(sql) + val escapedSql = sql.replaceAll("\\|", "|") + val schema = df.schema.catalogString.replaceAll("\\|", "|") + val queryOutput = QueryOutput(className, funcName, escapedSql, schema) + outputBuffer += queryOutput.toString + outputs += queryOutput + case _ => } } } From c4d45ea9c5c9eed4a9ee2b1bce987e8a32ee2e13 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 29 Apr 2020 11:16:23 +0800 Subject: [PATCH 16/22] Add header of golden file --- .../resources/sql-functions/sql-expression-schema.md | 1 + .../org/apache/spark/sql/ExpressionsSchemaSuite.scala | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 3be001eb33173..cbba7101954e0 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,3 +1,4 @@ +Automatically generated by ExpressionsSchemaSuite ## Summary - Number of queries: 330 - Number of expressions that missing example: 34 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index 1c49d3bcb1e4d..158c7e15d8b15 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -146,6 +146,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { if (regenerateGoldenFiles) { val missingExampleStr = missingExamples.mkString(",") val goldenOutput = { + "Automatically generated by ExpressionsSchemaSuite\n" + "## Summary\n" + s" - Number of queries: ${outputs.size}\n" + s" - Number of expressions that missing example: ${missingExamples.size}\n" + @@ -166,13 +167,14 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { val goldenOutput = fileToString(resultFile) val lines = goldenOutput.split("\n") - // The summary has 4 lines, plus the header of schema table has 3 lines - assert(lines.size == outputs.size + 7, - s"Expected ${outputs.size + 7} blocks in result file but got ${lines.size}. " + + // The header of golden file has one line, plus four lines of the summary and three + // lines of the header of schema table. + assert(lines.size == outputs.size + 8, + s"Expected ${outputs.size + 8} blocks in result file but got ${lines.size}. " + s"Try regenerate the result files.") Seq.tabulate(outputs.size) { i => - val segments = lines(i + 7).split('|') + val segments = lines(i + 8).split('|') QueryOutput( className = segments(1).trim, funcName = segments(2).trim, From 78115575ba2b24bc8b76eba4ea1a794b6adda093 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 29 Apr 2020 11:21:09 +0800 Subject: [PATCH 17/22] Update golden file --- .../scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index 158c7e15d8b15..6a5fcd6845ca8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -153,7 +153,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { s" - Expressions missing examples: $missingExampleStr\n" + "## Schema of Built-in Functions\n" + "| Class name | Function name or alias | Query example | Output schema |\n" + - "| -- | ---------- | ---------------------- | ------------- | ------------- |\n" + + "| ---------- | ---------------------- | ------------- | ------------- |\n" + outputBuffer.mkString("\n") } val parent = resultFile.getParentFile From 2173536afe896910b1c5faa35de1efa34f01e0f3 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 29 Apr 2020 11:23:52 +0800 Subject: [PATCH 18/22] Update golden file --- .../src/test/resources/sql-functions/sql-expression-schema.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index cbba7101954e0..77b1013d69773 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -5,7 +5,7 @@ Automatically generated by ExpressionsSchemaSuite - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch ## Schema of Built-in Functions | Class name | Function name or alias | Query example | Output schema | -| -- | ---------- | ---------------------- | ------------- | ------------- | +| ---------- | ---------------------- | ------------- | ------------- | | org.apache.spark.sql.catalyst.expressions.Abs | abs | SELECT abs(-1) | struct | | org.apache.spark.sql.catalyst.expressions.Acos | acos | SELECT acos(1) | struct | | org.apache.spark.sql.catalyst.expressions.Acosh | acosh | SELECT acosh(1) | struct | From 460da00a0f9045e2a4672459e20324a8e5c3a6fa Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 29 Apr 2020 15:11:38 +0800 Subject: [PATCH 19/22] Comment out header --- .../src/test/resources/sql-functions/sql-expression-schema.md | 2 +- .../scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 77b1013d69773..77b75444c0400 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,4 +1,4 @@ -Automatically generated by ExpressionsSchemaSuite + ## Summary - Number of queries: 330 - Number of expressions that missing example: 34 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index 6a5fcd6845ca8..ff20b1ab8fb56 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -146,7 +146,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { if (regenerateGoldenFiles) { val missingExampleStr = missingExamples.mkString(",") val goldenOutput = { - "Automatically generated by ExpressionsSchemaSuite\n" + + s"\n" + "## Summary\n" + s" - Number of queries: ${outputs.size}\n" + s" - Number of expressions that missing example: ${missingExamples.size}\n" + From 133456d2dc809ea7cd03139556998955074dd288 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 29 Apr 2020 15:52:35 +0800 Subject: [PATCH 20/22] Not ignore expression. --- .../sql-functions/sql-expression-schema.md | 5 +- .../spark/sql/ExpressionsSchemaSuite.scala | 51 ++++++++----------- 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 77b75444c0400..1e22ae2eefeb2 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 330 + - Number of queries: 333 - Number of expressions that missing example: 34 - Expressions missing examples: and,string,tinyint,double,smallint,date,decimal,boolean,float,binary,bigint,int,timestamp,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,struct,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch ## Schema of Built-in Functions @@ -206,6 +206,9 @@ | org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct | | org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' | struct<%SystemDrive%UsersJohn RLIKE %SystemDrive%\Users.*:boolean> | +| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct | +| org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct | +| org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct | | org.apache.spark.sql.catalyst.expressions.Rank | rank | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.RegExpExtract | regexp_extract | SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) | struct | | org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct | diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index ff20b1ab8fb56..1b4af76522298 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -84,11 +84,6 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { private val resultFile = new File(baseResourcePath, "sql-expression-schema.md") - val ignoreSet = Set( - // Random output without a seed - "org.apache.spark.sql.catalyst.expressions.Rand", - "org.apache.spark.sql.catalyst.expressions.Randn") - /** A single SQL query's SQL and schema. */ protected case class QueryOutput( className: String, @@ -113,32 +108,30 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { classFunsMap.foreach { kv => val className = kv._1 - if (!ignoreSet.contains(className)) { - kv._2.foreach { funInfo => - val example = funInfo.getExamples - val funcName = funInfo.getName.replaceAll("\\|", "|") - if (example == "") { - val queryOutput = QueryOutput(className, funcName) + kv._2.foreach { funInfo => + val example = funInfo.getExamples + val funcName = funInfo.getName.replaceAll("\\|", "|") + if (example == "") { + val queryOutput = QueryOutput(className, funcName) + outputBuffer += queryOutput.toString + outputs += queryOutput + missingExamples += funcName + } + + // If expression exists 'Examples' segment, the first element is 'Examples'. Because + // this test case is only used to print aliases of expressions for double checking. + // Therefore, we only need to output the first SQL and its corresponding schema. + // Note: We need to filter out the commands that set the parameters, such as: + // SET spark.sql.parser.escapedStringLiterals=true + example.split(" > ").tail.filterNot(_.trim.startsWith("SET")).take(1).foreach { + case exampleRe(sql, _) => + val df = spark.sql(sql) + val escapedSql = sql.replaceAll("\\|", "|") + val schema = df.schema.catalogString.replaceAll("\\|", "|") + val queryOutput = QueryOutput(className, funcName, escapedSql, schema) outputBuffer += queryOutput.toString outputs += queryOutput - missingExamples += funcName - } - - // If expression exists 'Examples' segment, the first element is 'Examples'. Because - // this test case is only used to print aliases of expressions for double checking. - // Therefore, we only need to output the first SQL and its corresponding schema. - // Note: We need to filter out the commands that set the parameters, such as: - // SET spark.sql.parser.escapedStringLiterals=true - example.split(" > ").tail.filterNot(_.trim.startsWith("SET")).take(1).foreach { - case exampleRe(sql, _) => - val df = spark.sql(sql) - val escapedSql = sql.replaceAll("\\|", "|") - val schema = df.schema.catalogString.replaceAll("\\|", "|") - val queryOutput = QueryOutput(className, funcName, escapedSql, schema) - outputBuffer += queryOutput.toString - outputs += queryOutput - case _ => - } + case _ => } } } From e57166790e5da48d7d31f609f4658605d43d4482 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 29 Apr 2020 20:28:46 +0800 Subject: [PATCH 21/22] Optimize code --- .../org/apache/spark/sql/ExpressionsSchemaSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index 1b4af76522298..db741b5f5ecdf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -86,10 +86,10 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { /** A single SQL query's SQL and schema. */ protected case class QueryOutput( - className: String, - funcName: String, - sql: String = "N/A", - schema: String = "N/A") { + className: String, + funcName: String, + sql: String = "N/A", + schema: String = "N/A") { override def toString: String = { s"| $className | $funcName | $sql | $schema |" } From a4d4de9e472dbd55ffbbc13ae1c8ad615a7e3455 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 29 Apr 2020 20:43:24 +0800 Subject: [PATCH 22/22] Simplify check --- .../spark/sql/ExpressionsSchemaSuite.scala | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index db741b5f5ecdf..dd72473f0ea6c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -177,19 +177,11 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { } // Compare results. - assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") { - outputs.size - } + assert(expectedOutputs.size == outputs.size, s"Number of queries not equals") - outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) => - assertResult(expected.sql, - s"SQL query did not match for query #$i\n${expected.sql}") { - output.sql - } - assertResult(expected.schema, - s"Schema did not match for query #$i\n${expected.sql}: $output") { - output.schema - } + outputs.zip(expectedOutputs).foreach { case (output, expected) => + assert(expected.sql == output.sql, "SQL query did not match") + assert(expected.schema == output.schema, s"Schema did not match for query ${expected.sql}") } } }