Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion python/pyspark/sql/connect/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,17 @@ def __repr__(self) -> str:
ts = TimestampNTZType().fromInternal(self._value)
if ts is not None and isinstance(ts, datetime.datetime):
return ts.strftime("%Y-%m-%d %H:%M:%S.%f")
# TODO(SPARK-49693): Refine the string representation of timedelta
elif isinstance(self._dataType, DayTimeIntervalType):
delta = DayTimeIntervalType().fromInternal(self._value)
if delta is not None and isinstance(delta, datetime.timedelta):
import pandas as pd

# Note: timedelta itself does not provide isoformat method.
# Both Pandas and java.time.Duration provide it, but the format
# is sightly different:
# java.time.Duration only applies HOURS, MINUTES, SECONDS units,
# while Pandas applies all supported units.
return pd.Timedelta(delta).isoformat() # type: ignore[attr-defined]
return f"{self._value}"


Expand Down
23 changes: 22 additions & 1 deletion python/pyspark/sql/tests/test_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
from enum import Enum
from itertools import chain
import datetime
import unittest

from pyspark.sql import Column, Row
from pyspark.sql import functions as sf
from pyspark.sql.types import StructType, StructField, IntegerType, LongType
from pyspark.errors import AnalysisException, PySparkTypeError, PySparkValueError
from pyspark.testing.sqlutils import ReusedSQLTestCase
from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, pandas_requirement_message


class ColumnTestsMixin:
Expand Down Expand Up @@ -289,6 +290,26 @@ def test_lit_time_representation(self):
ts = datetime.datetime(2021, 3, 4, 12, 34, 56, 1234)
self.assertEqual(str(sf.lit(ts)), "Column<'2021-03-04 12:34:56.001234'>")

@unittest.skipIf(not have_pandas, pandas_requirement_message)
def test_lit_delta_representation(self):
for delta in [
datetime.timedelta(days=1),
datetime.timedelta(hours=2),
datetime.timedelta(minutes=3),
datetime.timedelta(seconds=4),
datetime.timedelta(microseconds=5),
datetime.timedelta(days=2, hours=21, microseconds=908),
datetime.timedelta(days=1, minutes=-3, microseconds=-1001),
datetime.timedelta(days=1, hours=2, minutes=3, seconds=4, microseconds=5),
]:
import pandas as pd

# Column<'PT69H0.000908S'> or Column<'P2DT21H0M0.000908S'>
s = str(sf.lit(delta))

# Parse the ISO string representation and compare
self.assertTrue(pd.Timedelta(s[8:-2]).to_pytimedelta() == delta)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it should be connect specific test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it also works for pyspark classic.

Classic also use a ISO-8601 string, but JVM side and Pandas apply different units.

A string representation from the JVM side can also be parsed by Pandas.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test will be ran in both classic and connect


def test_enum_literals(self):
class IntEnum(Enum):
X = 1
Expand Down