diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index caa83bd2e1a57..09661c8c907b1 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -7983,8 +7983,8 @@ def current_date() -> Column: Examples -------- - >>> df = spark.range(1) - >>> df.select(current_date()).show() # doctest: +SKIP + >>> from pyspark.sql import functions as sf + >>> spark.range(1).select(sf.current_date()).show() # doctest: +SKIP +--------------+ |current_date()| +--------------+ @@ -8008,8 +8008,9 @@ def current_timezone() -> Column: Examples -------- + >>> from pyspark.sql import functions as sf >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") - >>> spark.range(1).select(current_timezone()).show() + >>> spark.range(1).select(sf.current_timezone()).show() +-------------------+ | current_timezone()| +-------------------+ @@ -8038,8 +8039,8 @@ def current_timestamp() -> Column: Examples -------- - >>> df = spark.range(1) - >>> df.select(current_timestamp()).show(truncate=False) # doctest: +SKIP + >>> from pyspark.sql import functions as sf + >>> spark.range(1).select(sf.current_timestamp()).show(truncate=False) # doctest: +SKIP +-----------------------+ |current_timestamp() | +-----------------------+ @@ -8064,8 +8065,7 @@ def now() -> Column: Examples -------- >>> from pyspark.sql import functions as sf - >>> df = spark.range(1) - >>> df.select(sf.now()).show(truncate=False) # doctest: +SKIP + >>> spark.range(1).select(sf.now()).show(truncate=False) # doctest: +SKIP +--------------------------+ |now() | +--------------------------+ @@ -8094,8 +8094,8 @@ def localtimestamp() -> Column: Examples -------- - >>> df = spark.range(1) - >>> df.select(localtimestamp()).show(truncate=False) # doctest: +SKIP + >>> from pyspark.sql import functions as sf + >>> spark.range(1).select(sf.localtimestamp()).show(truncate=False) # doctest: +SKIP +-----------------------+ |localtimestamp() | +-----------------------+ @@ -8160,7 +8160,7 @@ def year(col: "ColumnOrName") -> Column: Parameters ---------- - col : :class:`~pyspark.sql.Column` or str + col : :class:`~pyspark.sql.Column` or column name target date/timestamp column to work on. Returns @@ -8170,9 +8170,59 @@ def year(col: "ColumnOrName") -> Column: Examples -------- - >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) - >>> df.select(year('dt').alias('year')).collect() - [Row(year=2015)] + Example 1: Extract the year from a string column representing dates + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt']) + >>> df.select("*", sf.typeof('dt'), sf.year('dt')).show() + +----------+----------+--------+ + | dt|typeof(dt)|year(dt)| + +----------+----------+--------+ + |2015-04-08| string| 2015| + |2024-10-31| string| 2024| + +----------+----------+--------+ + + Example 2: Extract the year from a string column representing timestamp + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.year('ts')).show() + +-------------------+----------+--------+ + | ts|typeof(ts)|year(ts)| + +-------------------+----------+--------+ + |2015-04-08 13:08:15| string| 2015| + |2024-10-31 10:09:16| string| 2024| + +-------------------+----------+--------+ + + Example 3: Extract the year from a date column + + >>> import datetime + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([ + ... (datetime.date(2015, 4, 8),), + ... (datetime.date(2024, 10, 31),)], ['dt']) + >>> df.select("*", sf.typeof('dt'), sf.year('dt')).show() + +----------+----------+--------+ + | dt|typeof(dt)|year(dt)| + +----------+----------+--------+ + |2015-04-08| date| 2015| + |2024-10-31| date| 2024| + +----------+----------+--------+ + + Example 4: Extract the year from a timestamp column + + >>> import datetime + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([ + ... (datetime.datetime(2015, 4, 8, 13, 8, 15),), + ... (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.year('ts')).show() + +-------------------+----------+--------+ + | ts|typeof(ts)|year(ts)| + +-------------------+----------+--------+ + |2015-04-08 13:08:15| timestamp| 2015| + |2024-10-31 10:09:16| timestamp| 2024| + +-------------------+----------+--------+ """ return _invoke_function_over_columns("year", col) @@ -8189,7 +8239,7 @@ def quarter(col: "ColumnOrName") -> Column: Parameters ---------- - col : :class:`~pyspark.sql.Column` or str + col : :class:`~pyspark.sql.Column` or column name target date/timestamp column to work on. Returns @@ -8199,9 +8249,59 @@ def quarter(col: "ColumnOrName") -> Column: Examples -------- - >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) - >>> df.select(quarter('dt').alias('quarter')).collect() - [Row(quarter=2)] + Example 1: Extract the quarter from a string column representing dates + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt']) + >>> df.select("*", sf.typeof('dt'), sf.quarter('dt')).show() + +----------+----------+-----------+ + | dt|typeof(dt)|quarter(dt)| + +----------+----------+-----------+ + |2015-04-08| string| 2| + |2024-10-31| string| 4| + +----------+----------+-----------+ + + Example 2: Extract the quarter from a string column representing timestamp + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.quarter('ts')).show() + +-------------------+----------+-----------+ + | ts|typeof(ts)|quarter(ts)| + +-------------------+----------+-----------+ + |2015-04-08 13:08:15| string| 2| + |2024-10-31 10:09:16| string| 4| + +-------------------+----------+-----------+ + + Example 3: Extract the quarter from a date column + + >>> import datetime + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([ + ... (datetime.date(2015, 4, 8),), + ... (datetime.date(2024, 10, 31),)], ['dt']) + >>> df.select("*", sf.typeof('dt'), sf.quarter('dt')).show() + +----------+----------+-----------+ + | dt|typeof(dt)|quarter(dt)| + +----------+----------+-----------+ + |2015-04-08| date| 2| + |2024-10-31| date| 4| + +----------+----------+-----------+ + + Example 4: Extract the quarter from a timestamp column + + >>> import datetime + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([ + ... (datetime.datetime(2015, 4, 8, 13, 8, 15),), + ... (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.quarter('ts')).show() + +-------------------+----------+-----------+ + | ts|typeof(ts)|quarter(ts)| + +-------------------+----------+-----------+ + |2015-04-08 13:08:15| timestamp| 2| + |2024-10-31 10:09:16| timestamp| 4| + +-------------------+----------+-----------+ """ return _invoke_function_over_columns("quarter", col) @@ -8218,7 +8318,7 @@ def month(col: "ColumnOrName") -> Column: Parameters ---------- - col : :class:`~pyspark.sql.Column` or str + col : :class:`~pyspark.sql.Column` or column name target date/timestamp column to work on. Returns @@ -8228,9 +8328,59 @@ def month(col: "ColumnOrName") -> Column: Examples -------- - >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) - >>> df.select(month('dt').alias('month')).collect() - [Row(month=4)] + Example 1: Extract the month from a string column representing dates + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt']) + >>> df.select("*", sf.typeof('dt'), sf.month('dt')).show() + +----------+----------+---------+ + | dt|typeof(dt)|month(dt)| + +----------+----------+---------+ + |2015-04-08| string| 4| + |2024-10-31| string| 10| + +----------+----------+---------+ + + Example 2: Extract the month from a string column representing timestamp + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.month('ts')).show() + +-------------------+----------+---------+ + | ts|typeof(ts)|month(ts)| + +-------------------+----------+---------+ + |2015-04-08 13:08:15| string| 4| + |2024-10-31 10:09:16| string| 10| + +-------------------+----------+---------+ + + Example 3: Extract the month from a date column + + >>> import datetime + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([ + ... (datetime.date(2015, 4, 8),), + ... (datetime.date(2024, 10, 31),)], ['dt']) + >>> df.select("*", sf.typeof('dt'), sf.month('dt')).show() + +----------+----------+---------+ + | dt|typeof(dt)|month(dt)| + +----------+----------+---------+ + |2015-04-08| date| 4| + |2024-10-31| date| 10| + +----------+----------+---------+ + + Example 3: Extract the month from a timestamp column + + >>> import datetime + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([ + ... (datetime.datetime(2015, 4, 8, 13, 8, 15),), + ... (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.month('ts')).show() + +-------------------+----------+---------+ + | ts|typeof(ts)|month(ts)| + +-------------------+----------+---------+ + |2015-04-08 13:08:15| timestamp| 4| + |2024-10-31 10:09:16| timestamp| 10| + +-------------------+----------+---------+ """ return _invoke_function_over_columns("month", col) @@ -8303,7 +8453,7 @@ def day(col: "ColumnOrName") -> Column: Parameters ---------- - col : :class:`~pyspark.sql.Column` or str + col : :class:`~pyspark.sql.Column` or column name target date/timestamp column to work on. Returns @@ -8313,9 +8463,59 @@ def day(col: "ColumnOrName") -> Column: Examples -------- - >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) - >>> df.select(day('dt').alias('day')).collect() - [Row(day=8)] + Example 1: Extract the day of the month from a string column representing dates + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt']) + >>> df.select("*", sf.typeof('dt'), sf.day('dt')).show() + +----------+----------+-------+ + | dt|typeof(dt)|day(dt)| + +----------+----------+-------+ + |2015-04-08| string| 8| + |2024-10-31| string| 31| + +----------+----------+-------+ + + Example 2: Extract the day of the month from a string column representing timestamp + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.day('ts')).show() + +-------------------+----------+-------+ + | ts|typeof(ts)|day(ts)| + +-------------------+----------+-------+ + |2015-04-08 13:08:15| string| 8| + |2024-10-31 10:09:16| string| 31| + +-------------------+----------+-------+ + + Example 3: Extract the day of the month from a date column + + >>> import datetime + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([ + ... (datetime.date(2015, 4, 8),), + ... (datetime.date(2024, 10, 31),)], ['dt']) + >>> df.select("*", sf.typeof('dt'), sf.day('dt')).show() + +----------+----------+-------+ + | dt|typeof(dt)|day(dt)| + +----------+----------+-------+ + |2015-04-08| date| 8| + |2024-10-31| date| 31| + +----------+----------+-------+ + + Example 4: Extract the day of the month from a timestamp column + + >>> import datetime + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([ + ... (datetime.datetime(2015, 4, 8, 13, 8, 15),), + ... (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.day('ts')).show() + +-------------------+----------+-------+ + | ts|typeof(ts)|day(ts)| + +-------------------+----------+-------+ + |2015-04-08 13:08:15| timestamp| 8| + |2024-10-31 10:09:16| timestamp| 31| + +-------------------+----------+-------+ """ return _invoke_function_over_columns("day", col) @@ -8361,7 +8561,7 @@ def hour(col: "ColumnOrName") -> Column: Parameters ---------- - col : :class:`~pyspark.sql.Column` or str + col : :class:`~pyspark.sql.Column` or column name target date/timestamp column to work on. Returns @@ -8371,10 +8571,32 @@ def hour(col: "ColumnOrName") -> Column: Examples -------- + Example 1: Extract the hours from a string column representing timestamp + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.hour('ts')).show() + +-------------------+----------+--------+ + | ts|typeof(ts)|hour(ts)| + +-------------------+----------+--------+ + |2015-04-08 13:08:15| string| 13| + |2024-10-31 10:09:16| string| 10| + +-------------------+----------+--------+ + + Example 2: Extract the hours from a timestamp column + >>> import datetime - >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts']) - >>> df.select(hour('ts').alias('hour')).collect() - [Row(hour=13)] + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([ + ... (datetime.datetime(2015, 4, 8, 13, 8, 15),), + ... (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.hour('ts')).show() + +-------------------+----------+--------+ + | ts|typeof(ts)|hour(ts)| + +-------------------+----------+--------+ + |2015-04-08 13:08:15| timestamp| 13| + |2024-10-31 10:09:16| timestamp| 10| + +-------------------+----------+--------+ """ return _invoke_function_over_columns("hour", col) @@ -8391,7 +8613,7 @@ def minute(col: "ColumnOrName") -> Column: Parameters ---------- - col : :class:`~pyspark.sql.Column` or str + col : :class:`~pyspark.sql.Column` or column name target date/timestamp column to work on. Returns @@ -8401,10 +8623,32 @@ def minute(col: "ColumnOrName") -> Column: Examples -------- + Example 1: Extract the minutes from a string column representing timestamp + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.minute('ts')).show() + +-------------------+----------+----------+ + | ts|typeof(ts)|minute(ts)| + +-------------------+----------+----------+ + |2015-04-08 13:08:15| string| 8| + |2024-10-31 10:09:16| string| 9| + +-------------------+----------+----------+ + + Example 2: Extract the minutes from a timestamp column + >>> import datetime - >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts']) - >>> df.select(minute('ts').alias('minute')).collect() - [Row(minute=8)] + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([ + ... (datetime.datetime(2015, 4, 8, 13, 8, 15),), + ... (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.minute('ts')).show() + +-------------------+----------+----------+ + | ts|typeof(ts)|minute(ts)| + +-------------------+----------+----------+ + |2015-04-08 13:08:15| timestamp| 8| + |2024-10-31 10:09:16| timestamp| 9| + +-------------------+----------+----------+ """ return _invoke_function_over_columns("minute", col) @@ -8421,7 +8665,7 @@ def second(col: "ColumnOrName") -> Column: Parameters ---------- - col : :class:`~pyspark.sql.Column` or str + col : :class:`~pyspark.sql.Column` or column name target date/timestamp column to work on. Returns @@ -8431,10 +8675,32 @@ def second(col: "ColumnOrName") -> Column: Examples -------- + Example 1: Extract the seconds from a string column representing timestamp + + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.second('ts')).show() + +-------------------+----------+----------+ + | ts|typeof(ts)|second(ts)| + +-------------------+----------+----------+ + |2015-04-08 13:08:15| string| 15| + |2024-10-31 10:09:16| string| 16| + +-------------------+----------+----------+ + + Example 2: Extract the seconds from a timestamp column + >>> import datetime - >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts']) - >>> df.select(second('ts').alias('second')).collect() - [Row(second=15)] + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([ + ... (datetime.datetime(2015, 4, 8, 13, 8, 15),), + ... (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts']) + >>> df.select("*", sf.typeof('ts'), sf.second('ts')).show() + +-------------------+----------+----------+ + | ts|typeof(ts)|second(ts)| + +-------------------+----------+----------+ + |2015-04-08 13:08:15| timestamp| 15| + |2024-10-31 10:09:16| timestamp| 16| + +-------------------+----------+----------+ """ return _invoke_function_over_columns("second", col)