diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index dae5f3d122ee8..f146d06e3e3d2 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -223,6 +223,11 @@ def select(self, __cols: Union[List[Column], List[str]]) -> ParentDataFrame: def select(self, *cols: "ColumnOrName") -> ParentDataFrame: # type: ignore[misc] if len(cols) == 1 and isinstance(cols[0], list): cols = cols[0] + if any(not isinstance(c, (str, Column)) for c in cols): + raise PySparkTypeError( + error_class="NOT_LIST_OF_COLUMN_OR_STR", + message_parameters={"arg_name": "columns"}, + ) return DataFrame( plan.Project(self._plan, [F._to_col(c) for c in cols]), session=self._session, diff --git a/python/pyspark/sql/tests/connect/test_connect_error.py b/python/pyspark/sql/tests/connect/test_connect_error.py index 8c2dd71c0c6ab..4677f3b84d754 100644 --- a/python/pyspark/sql/tests/connect/test_connect_error.py +++ b/python/pyspark/sql/tests/connect/test_connect_error.py @@ -21,6 +21,7 @@ from pyspark.errors.exceptions.base import SessionNotSameException from pyspark.sql.types import Row from pyspark.testing.connectutils import should_test_connect +from pyspark.errors import PySparkTypeError from pyspark.errors.exceptions.connect import AnalysisException from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase @@ -214,6 +215,16 @@ def test_column_cannot_be_constructed_from_string(self): with self.assertRaises(TypeError): Column("col") + def test_select_none(self): + with self.assertRaises(PySparkTypeError) as e1: + self.connect.range(1).select(None) + + self.check_error( + exception=e1.exception, + error_class="NOT_LIST_OF_COLUMN_OR_STR", + message_parameters={"arg_name": "columns"}, + ) + if __name__ == "__main__": from pyspark.sql.tests.connect.test_connect_error import * # noqa: F401