From 3cd53f39f23ebd1b9b4134a9ac22348b301f8bd4 Mon Sep 17 00:00:00 2001
From: "Michael (Stu) Stewart" <mstewart141@gmail.com>
Date: Sat, 3 Mar 2018 13:54:53 -0800
Subject: [PATCH 1/2] [SPARK-23569][PYTHON] Allow pandas_udf to work with
 python3 style type-annotated functions

---
 python/pyspark/sql/udf.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index e5b35fc60e167..8f6111fe6d6a5 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -42,10 +42,15 @@ def _create_udf(f, returnType, evalType):
                     PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF):
 
         import inspect
+        import sys
         from pyspark.sql.utils import require_minimum_pyarrow_version
 
         require_minimum_pyarrow_version()
-        argspec = inspect.getargspec(f)
+
+        if sys.version_info[0] < 3:
+            argspec = inspect.getargspec(f)
+        else:
+            argspec = inspect.getfullargspec(f)
 
         if evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF and len(argspec.args) == 0 and \
                 argspec.varargs is None:

From 0395690d8d2c719d306c46a08a7a2faf8469ecb9 Mon Sep 17 00:00:00 2001
From: "Michael (Stu) Stewart" <mstewart141@gmail.com>
Date: Sun, 4 Mar 2018 12:23:28 -0800
Subject: [PATCH 2/2] Add test

---
 python/pyspark/sql/tests.py | 18 ++++++++++++++++++
 python/pyspark/sql/udf.py   |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 19653072ea316..fa3b7203e10ac 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -4381,6 +4381,24 @@ def test_timestamp_dst(self):
         result = df.withColumn('time', foo_udf(df.time))
         self.assertEquals(df.collect(), result.collect())
 
+    @unittest.skipIf(sys.version_info[:2] < (3, 5), "Type hints are supported from Python 3.5.")
+    def test_type_annotation(self):
+        from pyspark.sql.functions import pandas_udf
+        # Regression test to check if type hints can be used. See SPARK-23569.
+        # Note that it throws an error during compilation in lower Python versions if 'exec'
+        # is not used. Also, note that we explicitly use another dictionary to avoid modifications
+        # in the current 'locals()'.
+        #
+        # Hyukjin: I think it's an ugly way to test issues about syntax specific in
+        # higher versions of Python, which we shouldn't encourage. This was the last resort
+        # I could come up with at that time.
+        _locals = {}
+        exec(
+            "import pandas as pd\ndef noop(col: pd.Series) -> pd.Series: return col",
+            _locals)
+        df = self.spark.range(1).select(pandas_udf(f=_locals['noop'], returnType='bigint')('id'))
+        self.assertEqual(df.first()[0], 0)
+
 
 @unittest.skipIf(
     not _have_pandas or not _have_pyarrow,
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index 8f6111fe6d6a5..b9b490874f4fb 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -48,6 +48,8 @@ def _create_udf(f, returnType, evalType):
         require_minimum_pyarrow_version()
 
         if sys.version_info[0] < 3:
+            # `getargspec` is deprecated since python3.0 (incompatible with function annotations).
+            # See SPARK-23569.
             argspec = inspect.getargspec(f)
         else:
             argspec = inspect.getfullargspec(f)