Skip to content

Commit 032fef0

Browse files
committed
add python test
1 parent 5f325a4 commit 032fef0

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

python/pyspark/sql/tests.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5925,6 +5925,22 @@ def test_invalid_args(self):
59255925
'mixture.*aggregate function.*group aggregate pandas UDF'):
59265926
df.groupby(df.id).agg(mean_udf(df.v), mean(df.v)).collect()
59275927

5928+
def test_self_join_with_pandas(self):
5929+
import pyspark.sql.functions as F
5930+
5931+
@F.pandas_udf('key long, col string', F.PandasUDFType.GROUPED_MAP)
5932+
def dummy_pandas_udf(df):
5933+
return df[['key','col']]
5934+
5935+
df = self.spark.createDataFrame([Row(key=1, col='A'), Row(key=1, col='B'),
5936+
Row(key=2, col='C')])
5937+
dfWithPandas = df.groupBy('key').apply(dummy_pandas_udf)
5938+
5939+
# this was throwing an AnalysisException before SPARK-24208
5940+
res = dfWithPandas.alias('temp0').join(dfWithPandas.alias('temp1'),
5941+
F.col('temp0.key') == F.col('temp1.key'))
5942+
self.assertEquals(res.count(), 5)
5943+
59285944

59295945
@unittest.skipIf(
59305946
not _have_pandas or not _have_pyarrow,

0 commit comments

Comments
 (0)