diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py index a933a5de5c618..c160419390932 100644 --- a/python/pyspark/errors/error_classes.py +++ b/python/pyspark/errors/error_classes.py @@ -286,6 +286,46 @@ "An error occurred while calling : ." ] }, + "FIELD_DATA_TYPE_UNACCEPTABLE": { + "message": [ + " can not accept object in type ." + ] + }, + "FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME": { + "message": [ + ": can not accept object in type ." + ] + }, + "FIELD_NOT_NULLABLE": { + "message": [ + "Field is not nullable, but got None." + ] + }, + "FIELD_NOT_NULLABLE_WITH_NAME": { + "message": [ + ": This field is not nullable, but got None." + ] + }, + "FIELD_STRUCT_LENGTH_MISMATCH": { + "message": [ + "Length of object () does not match with length of fields ()." + ] + }, + "FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME": { + "message": [ + ": Length of object () does not match with length of fields ()." + ] + }, + "FIELD_TYPE_MISMATCH": { + "message": [ + " is not an instance of type ." + ] + }, + "FIELD_TYPE_MISMATCH_WITH_NAME": { + "message": [ + ": is not an instance of type ." + ] + }, "HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN": { "message": [ "Function `` should return Column, got ." @@ -612,11 +652,6 @@ " is not implemented." ] }, - "NOT_INSTANCE_OF": { - "message": [ - " is not an instance of type ." - ] - }, "NOT_INT": { "message": [ "Argument `` should be an int, got ." diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index 2134c1fe46153..6b790bc568dab 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -1271,13 +1271,13 @@ def test_toDF_with_schema_string(self): # number of fields must match. self.assertRaisesRegex( - Exception, "LENGTH_SHOULD_BE_THE_SAME", lambda: rdd.toDF("key: int").collect() + Exception, "FIELD_STRUCT_LENGTH_MISMATCH", lambda: rdd.toDF("key: int").collect() ) # field types mismatch will cause exception at runtime. self.assertRaisesRegex( Exception, - "CANNOT_ACCEPT_OBJECT_IN_TYPE", + "FIELD_DATA_TYPE_UNACCEPTABLE", lambda: rdd.toDF("key: float, value: string").collect(), ) diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 4316e4962c9d1..b0242033b051c 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -1458,9 +1458,9 @@ def test_verify_type_exception_msg(self): self.check_error( exception=pe.exception, - error_class="CANNOT_BE_NONE", + error_class="FIELD_NOT_NULLABLE_WITH_NAME", message_parameters={ - "arg_name": "obj", + "field_name": "test_name", }, ) @@ -1470,11 +1470,12 @@ def test_verify_type_exception_msg(self): self.check_error( exception=pe.exception, - error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE", + error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME", message_parameters={ "data_type": "IntegerType()", - "obj_name": "data", - "obj_type": "str", + "field_name": "field b in field a", + "obj": "'data'", + "obj_type": "", }, ) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index df254ac42379a..72c7b62bb2cef 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -2196,9 +2196,16 @@ def verify_nullability(obj: Any) -> bool: if nullable: return True else: + if name is not None: + raise PySparkValueError( + error_class="FIELD_NOT_NULLABLE_WITH_NAME", + message_parameters={ + "field_name": str(name), + }, + ) raise PySparkValueError( - error_class="CANNOT_BE_NONE", - message_parameters={"arg_name": "obj"}, + error_class="FIELD_NOT_NULLABLE", + message_parameters={}, ) else: return False @@ -2213,12 +2220,22 @@ def assert_acceptable_types(obj: Any) -> None: def verify_acceptable_types(obj: Any) -> None: # subclass of them can not be fromInternal in JVM if type(obj) not in _acceptable_types[_type]: + if name is not None: + raise PySparkTypeError( + error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME", + message_parameters={ + "field_name": str(name), + "data_type": str(dataType), + "obj": repr(obj), + "obj_type": str(type(obj)), + }, + ) raise PySparkTypeError( - error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE", + error_class="FIELD_DATA_TYPE_UNACCEPTABLE", message_parameters={ "data_type": str(dataType), - "obj_name": str(obj), - "obj_type": type(obj).__name__, + "obj": repr(obj), + "obj_type": str(type(obj)), }, ) @@ -2232,11 +2249,20 @@ def verify_value(obj: Any) -> None: def verify_udf(obj: Any) -> None: if not (hasattr(obj, "__UDT__") and obj.__UDT__ == dataType): + if name is not None: + raise PySparkValueError( + error_class="FIELD_TYPE_MISMATCH_WITH_NAME", + message_parameters={ + "field_name": str(name), + "obj": str(obj), + "data_type": str(dataType), + }, + ) raise PySparkValueError( - error_class="NOT_INSTANCE_OF", + error_class="FIELD_TYPE_MISMATCH", message_parameters={ - "value": str(obj), - "type": str(dataType), + "obj": str(obj), + "data_type": str(dataType), }, ) verifier(dataType.toInternal(obj)) @@ -2365,13 +2391,20 @@ def verify_struct(obj: Any) -> None: verifier(obj.get(f)) elif isinstance(obj, (tuple, list)): if len(obj) != len(verifiers): + if name is not None: + raise PySparkValueError( + error_class="FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME", + message_parameters={ + "field_name": str(name), + "object_length": str(len(obj)), + "field_length": str(len(verifiers)), + }, + ) raise PySparkValueError( - error_class="LENGTH_SHOULD_BE_THE_SAME", + error_class="FIELD_STRUCT_LENGTH_MISMATCH", message_parameters={ - "arg1": "obj", - "arg2": "fields", - "arg1_length": str(len(obj)), - "arg2_length": str(len(verifiers)), + "object_length": str(len(obj)), + "field_length": str(len(verifiers)), }, ) for v, (_, verifier) in zip(obj, verifiers): @@ -2381,12 +2414,22 @@ def verify_struct(obj: Any) -> None: for f, verifier in verifiers: verifier(d.get(f)) else: + if name is not None: + raise PySparkTypeError( + error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME", + message_parameters={ + "field_name": str(name), + "data_type": str(dataType), + "obj": repr(obj), + "obj_type": str(type(obj)), + }, + ) raise PySparkTypeError( - error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE", + error_class="FIELD_DATA_TYPE_UNACCEPTABLE", message_parameters={ - "data_type": "StructType", - "obj_name": str(obj), - "obj_type": type(obj).__name__, + "data_type": str(dataType), + "obj": repr(obj), + "obj_type": str(type(obj)), }, )