@@ -3023,6 +3023,42 @@ def pandas_udf(f=None, returnType=None, functionType=None):
30233023 conversion on returned data. The conversion is not guaranteed to be correct and results
30243024 should be checked for accuracy by users.
30253025 """
3026+
3027+ # The following table shows most of Pandas data and SQL type conversions in Pandas UDFs that
3028+ # are not yet visible to the user. Some of behaviors are buggy and might be changed in the near
3029+ # future. The table might have to be eventually documented externally.
3030+ # Please see SPARK-25798's PR to see the codes in order to generate the table below.
3031+ #
3032+ # +-----------------------------+----------------------+----------+-------+--------+--------------------+--------------------+--------+---------+---------+---------+------------+-----------------------------------+-----------------------------------------------------+-----------------+------------------+-----------------------------+-----------+--------------------------------+ # noqa
3033+ # |SQL Type \ Pandas Value(Type)|None(object(NoneType))|True(bool)|1(int8)|1(int16)| 1(int32)| 1(int64)|1(uint8)|1(uint16)|1(uint32)|1(uint64)|1.0(float64)|1970-01-01 00:00:00(datetime64[ns])|1970-01-01 00:00:00-05:00(datetime64[ns, US/Eastern])|a(object(string))|1(object(Decimal))|[1 2 3](object(array[int32]))|A(category)|1 days 00:00:00(timedelta64[ns])| # noqa
3034+ # +-----------------------------+----------------------+----------+-------+--------+--------------------+--------------------+--------+---------+---------+---------+------------+-----------------------------------+-----------------------------------------------------+-----------------+------------------+-----------------------------+-----------+--------------------------------+ # noqa
3035+ # | boolean| None| True| True| True| True| True| True| True| True| True| False| False| False| X| X| X| X| False| # noqa
3036+ # | tinyint| None| 1| 1| 1| 1| 1| X| X| X| X| 1| X| X| X| X| X| 0| X| # noqa
3037+ # | smallint| None| 1| 1| 1| 1| 1| 1| X| X| X| 1| X| X| X| X| X| X| X| # noqa
3038+ # | int| None| 1| 1| 1| 1| 1| 1| 1| X| X| 1| X| X| X| X| X| X| X| # noqa
3039+ # | bigint| None| 1| 1| 1| 1| 1| 1| 1| 1| X| 1| 0| 18000000000000| X| X| X| X| X| # noqa
3040+ # | float| None| 1.0| 1.0| 1.0| 1.0| 1.0| 1.0| 1.0| 1.0| 1.0| 1.0| X| X| X| X| X| X| X| # noqa
3041+ # | double| None| 1.0| 1.0| 1.0| 1.0| 1.0| 1.0| 1.0| 1.0| 1.0| 1.0| X| X| X| X| X| X| X| # noqa
3042+ # | date| None| X| X| X|datetime.date(197...| X| X| X| X| X| X| datetime.date(197...| X| X| X| X| X| X| # noqa
3043+ # | timestamp| None| X| X| X| X|datetime.datetime...| X| X| X| X| X| datetime.datetime...| datetime.datetime...| X| X| X| X| X| # noqa
3044+ # | string| None| u''|u'\x01'| u'\x01'| u'\x01'| u'\x01'| u'\x01'| u'\x01'| u'\x01'| u'\x01'| u''| X| X| u'a'| X| X| X| X| # noqa
3045+ # | decimal(10,0)| None| X| X| X| X| X| X| X| X| X| X| X| X| X| Decimal('1')| X| X| X| # noqa
3046+ # | array<int>| None| X| X| X| X| X| X| X| X| X| X| X| X| X| X| [1, 2, 3]| X| X| # noqa
3047+ # | map<string,int>| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| # noqa
3048+ # | struct<_1:int>| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| # noqa
3049+ # | binary| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| X| # noqa
3050+ # +-----------------------------+----------------------+----------+-------+--------+--------------------+--------------------+--------+---------+---------+---------+------------+-----------------------------------+-----------------------------------------------------+-----------------+------------------+-----------------------------+-----------+--------------------------------+ # noqa
3051+ #
3052+ # Note: DDL formatted string is used for 'SQL Type' for simplicity. This string can be
3053+ # used in `returnType`.
3054+ # Note: The values inside of the table are generated by `repr`.
3055+ # Note: Python 2 is used to generate this table since it is used to check the backward
3056+ # compatibility often in practice.
3057+ # Note: Pandas 0.19.2 and PyArrow 0.9.0 are used.
3058+ # Note: Timezone is Singapore timezone.
3059+ # Note: 'X' means it throws an exception during the conversion.
3060+ # Note: 'binary' type is only supported with PyArrow 0.10.0+ (SPARK-23555).
3061+
30263062 # decorator @pandas_udf(returnType, functionType)
30273063 is_decorator = f is None or isinstance (f , (str , DataType ))
30283064
0 commit comments