add python test

mgaido91 · mgaido91 · commit 032fef003e3f · 2018-07-09T18:15:34.000+02:00
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
@@ -5925,6 +5925,22 @@ def test_invalid_args(self):
                     'mixture.*aggregate function.*group aggregate pandas UDF'):
                 df.groupby(df.id).agg(mean_udf(df.v), mean(df.v)).collect()
 
+    def test_self_join_with_pandas(self):
+        import pyspark.sql.functions as F
+
+        @F.pandas_udf('key long, col string', F.PandasUDFType.GROUPED_MAP)
+        def dummy_pandas_udf(df):
+            return df[['key','col']]
+
+        df = self.spark.createDataFrame([Row(key=1, col='A'), Row(key=1, col='B'),
+                                         Row(key=2, col='C')])
+        dfWithPandas = df.groupBy('key').apply(dummy_pandas_udf)
+
+        # this was throwing an AnalysisException before SPARK-24208
+        res = dfWithPandas.alias('temp0').join(dfWithPandas.alias('temp1'),
+                                               F.col('temp0.key') == F.col('temp1.key'))
+        self.assertEquals(res.count(), 5)
+
 
 @unittest.skipIf(
     not _have_pandas or not _have_pyarrow,