From 0fa5a39ad1979886ae9c2f28bd594b9326a14422 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Wed, 24 Jan 2024 12:12:31 +0900 Subject: [PATCH 1/8] Improve error message when createDataFrame have illegal nullable --- python/pyspark/errors/error_classes.py | 5 +++++ python/pyspark/sql/types.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py index a643615803c2e..e26769c821444 100644 --- a/python/pyspark/errors/error_classes.py +++ b/python/pyspark/errors/error_classes.py @@ -742,6 +742,11 @@ "Only allows to be a path without scheme, and Spark Driver should use the default scheme to determine the destination file system." ] }, + "NULLABILITY_CHECK_FAILED": { + "message": [ + "The nullability check is failed. Make sure the parameter does not contain None." + ] + }, "ONLY_ALLOWED_FOR_SINGLE_COLUMN": { "message": [ "Argument `` can only be provided for a single column." diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 9afeb651c1878..45b94b116f8de 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -2197,8 +2197,8 @@ def verify_nullability(obj: Any) -> bool: return True else: raise PySparkValueError( - error_class="CANNOT_BE_NONE", - message_parameters={"arg_name": "obj"}, + error_class="NULLABILITY_CHECK_FAILED", + message_parameters={}, ) else: return False From 57ffa2c6d312ac73a460c5fa877da0ef75ac56ff Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Wed, 24 Jan 2024 12:20:02 +0900 Subject: [PATCH 2/8] Trigger CI From 18e83c39b09a7492b61adfdb4192963afd715364 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Wed, 24 Jan 2024 13:09:35 +0900 Subject: [PATCH 3/8] Fix error message regression --- python/pyspark/errors/error_classes.py | 15 ------ python/pyspark/sql/types.py | 72 ++++++-------------------- 2 files changed, 16 insertions(+), 71 deletions(-) diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py index e26769c821444..4051fdbc132c2 100644 --- a/python/pyspark/errors/error_classes.py +++ b/python/pyspark/errors/error_classes.py @@ -612,11 +612,6 @@ " is not implemented." ] }, - "NOT_INSTANCE_OF": { - "message": [ - " is not an instance of type ." - ] - }, "NOT_INT": { "message": [ "Argument `` should be an int, got ." @@ -742,11 +737,6 @@ "Only allows to be a path without scheme, and Spark Driver should use the default scheme to determine the destination file system." ] }, - "NULLABILITY_CHECK_FAILED": { - "message": [ - "The nullability check is failed. Make sure the parameter does not contain None." - ] - }, "ONLY_ALLOWED_FOR_SINGLE_COLUMN": { "message": [ "Argument `` can only be provided for a single column." @@ -1108,11 +1098,6 @@ "Value for `` must be True, got ''." ] }, - "VALUE_OUT_OF_BOUND": { - "message": [ - "Value for `` must be greater than or less than , got " - ] - }, "WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION": { "message": [ "Function `` should take between 1 and 3 arguments, but the provided function takes ." diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 45b94b116f8de..d79257dfcb04a 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -2196,10 +2196,7 @@ def verify_nullability(obj: Any) -> bool: if nullable: return True else: - raise PySparkValueError( - error_class="NULLABILITY_CHECK_FAILED", - message_parameters={}, - ) + raise PySparkValueError(message=new_msg("This field is not nullable, but got None")) else: return False @@ -2214,12 +2211,9 @@ def verify_acceptable_types(obj: Any) -> None: # subclass of them can not be fromInternal in JVM if type(obj) not in _acceptable_types[_type]: raise PySparkTypeError( - error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE", - message_parameters={ - "data_type": str(dataType), - "obj_name": str(obj), - "obj_type": type(obj).__name__, - }, + message=new_msg( + "%s can not accept object %r in type %s" % (dataType, obj, type(obj)) + ) ) if isinstance(dataType, (StringType, CharType, VarcharType)): @@ -2233,11 +2227,7 @@ def verify_value(obj: Any) -> None: def verify_udf(obj: Any) -> None: if not (hasattr(obj, "__UDT__") and obj.__UDT__ == dataType): raise PySparkValueError( - error_class="NOT_INSTANCE_OF", - message_parameters={ - "value": str(obj), - "type": str(dataType), - }, + message=new_msg("%r is not an instance of type %r" % (obj, dataType)) ) verifier(dataType.toInternal(obj)) @@ -2250,13 +2240,7 @@ def verify_byte(obj: Any) -> None: verify_acceptable_types(obj) if obj < -128 or obj > 127: raise PySparkValueError( - error_class="VALUE_OUT_OF_BOUND", - message_parameters={ - "arg_name": "obj", - "lower_bound": "127", - "upper_bound": "-127", - "actual": str(obj), - }, + message=new_msg("object of ByteType out of range, got: %s" % obj) ) verify_value = verify_byte @@ -2268,13 +2252,7 @@ def verify_short(obj: Any) -> None: verify_acceptable_types(obj) if obj < -32768 or obj > 32767: raise PySparkValueError( - error_class="VALUE_OUT_OF_BOUND", - message_parameters={ - "arg_name": "obj", - "lower_bound": "32767", - "upper_bound": "-32768", - "actual": str(obj), - }, + message=new_msg("object of ShortType out of range, got: %s" % obj) ) verify_value = verify_short @@ -2286,13 +2264,7 @@ def verify_integer(obj: Any) -> None: verify_acceptable_types(obj) if obj < -2147483648 or obj > 2147483647: raise PySparkValueError( - error_class="VALUE_OUT_OF_BOUND", - message_parameters={ - "arg_name": "obj", - "lower_bound": "2147483647", - "upper_bound": "-2147483648", - "actual": str(obj), - }, + message=new_msg("object of IntegerType out of range, got: %s" % obj) ) verify_value = verify_integer @@ -2304,13 +2276,7 @@ def verify_long(obj: Any) -> None: verify_acceptable_types(obj) if obj < -9223372036854775808 or obj > 9223372036854775807: raise PySparkValueError( - error_class="VALUE_OUT_OF_BOUND", - message_parameters={ - "arg_name": "obj", - "lower_bound": "9223372036854775807", - "upper_bound": "-9223372036854775808", - "actual": str(obj), - }, + message=new_msg("object of LongType out of range, got: %s" % obj) ) verify_value = verify_long @@ -2358,13 +2324,10 @@ def verify_struct(obj: Any) -> None: elif isinstance(obj, (tuple, list)): if len(obj) != len(verifiers): raise PySparkValueError( - error_class="LENGTH_SHOULD_BE_THE_SAME", - message_parameters={ - "arg1": "obj", - "arg2": "fields", - "arg1_length": str(len(obj)), - "arg2_length": str(len(verifiers)), - }, + message=new_msg( + "Length of object (%d) does not match with " + "length of fields (%d)" % (len(obj), len(verifiers)) + ) ) for v, (_, verifier) in zip(obj, verifiers): verifier(v) @@ -2374,12 +2337,9 @@ def verify_struct(obj: Any) -> None: verifier(d.get(f)) else: raise PySparkTypeError( - error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE", - message_parameters={ - "data_type": "StructType", - "obj_name": str(obj), - "obj_type": type(obj).__name__, - }, + message=new_msg( + "StructType can not accept object %r in type %s" % (obj, type(obj)) + ) ) verify_value = verify_struct From fcb6c5c53a1d996b886bddb760d475dd0674d627 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Tue, 30 Jan 2024 10:17:02 +0900 Subject: [PATCH 4/8] Use error class --- python/pyspark/errors/error_classes.py | 20 ++++++++++ python/pyspark/sql/types.py | 52 +++++++++++++++++++++----- 2 files changed, 62 insertions(+), 10 deletions(-) diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py index 4051fdbc132c2..c0b52d8c6ac0c 100644 --- a/python/pyspark/errors/error_classes.py +++ b/python/pyspark/errors/error_classes.py @@ -286,6 +286,26 @@ "An error occurred while calling : ." ] }, + "FIELD_NOT_NULLABLE": { + "message": [ + ": This field is not nullable, but got None." + ] + }, + "FIELD_STRUCT_LENGTH_MISMATCH": { + "message": [ + ": Length of object () does not match with length of fields ()." + ] + }, + "FIELD_TYPE_MISMATCH": { + "message": [ + ": is not an instance of type ." + ] + }, + "FIELD_VALUE_OUT_OF_RANGE": { + "message": [ + ": object of out of range, got: ." + ] + }, "HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN": { "message": [ "Function `` should return Column, got ." diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index d79257dfcb04a..52052bc4f679c 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -2196,7 +2196,12 @@ def verify_nullability(obj: Any) -> bool: if nullable: return True else: - raise PySparkValueError(message=new_msg("This field is not nullable, but got None")) + raise PySparkValueError( + error_class="FIELD_NOT_NULLABLE", + message_parameters={ + "field_name": name if name is not None else "", + }, + ) else: return False @@ -2227,7 +2232,12 @@ def verify_value(obj: Any) -> None: def verify_udf(obj: Any) -> None: if not (hasattr(obj, "__UDT__") and obj.__UDT__ == dataType): raise PySparkValueError( - message=new_msg("%r is not an instance of type %r" % (obj, dataType)) + error_class="FIELD_TYPE_MISMATCH", + message_parameters={ + "field_name": name if name is not None else "", + "obj": str(obj), + "data_type": str(dataType), + }, ) verifier(dataType.toInternal(obj)) @@ -2240,7 +2250,12 @@ def verify_byte(obj: Any) -> None: verify_acceptable_types(obj) if obj < -128 or obj > 127: raise PySparkValueError( - message=new_msg("object of ByteType out of range, got: %s" % obj) + error_class="FIELD_VALUE_OUT_OF_RANGE", + message_parameters={ + "field_name": name if name is not None else "", + "data_type": "ByteType", + "obj": str(obj), + }, ) verify_value = verify_byte @@ -2252,7 +2267,12 @@ def verify_short(obj: Any) -> None: verify_acceptable_types(obj) if obj < -32768 or obj > 32767: raise PySparkValueError( - message=new_msg("object of ShortType out of range, got: %s" % obj) + error_class="FIELD_VALUE_OUT_OF_RANGE", + message_parameters={ + "field_name": name if name is not None else "", + "data_type": "ShortType", + "obj": str(obj), + }, ) verify_value = verify_short @@ -2264,7 +2284,12 @@ def verify_integer(obj: Any) -> None: verify_acceptable_types(obj) if obj < -2147483648 or obj > 2147483647: raise PySparkValueError( - message=new_msg("object of IntegerType out of range, got: %s" % obj) + error_class="FIELD_VALUE_OUT_OF_RANGE", + message_parameters={ + "field_name": name if name is not None else "", + "data_type": "IntegerType", + "obj": str(obj), + }, ) verify_value = verify_integer @@ -2276,7 +2301,12 @@ def verify_long(obj: Any) -> None: verify_acceptable_types(obj) if obj < -9223372036854775808 or obj > 9223372036854775807: raise PySparkValueError( - message=new_msg("object of LongType out of range, got: %s" % obj) + error_class="FIELD_VALUE_OUT_OF_RANGE", + message_parameters={ + "field_name": name if name is not None else "", + "data_type": "LongType", + "obj": str(obj), + }, ) verify_value = verify_long @@ -2324,10 +2354,12 @@ def verify_struct(obj: Any) -> None: elif isinstance(obj, (tuple, list)): if len(obj) != len(verifiers): raise PySparkValueError( - message=new_msg( - "Length of object (%d) does not match with " - "length of fields (%d)" % (len(obj), len(verifiers)) - ) + error_class="STRUCT_LENGTH_MISMATCH", + message_parameters={ + "field_name": name if name is not None else "", + "object_length": str(len(obj)), + "field_length": str(len(verifiers)), + }, ) for v, (_, verifier) in zip(obj, verifiers): verifier(v) From d49e74c515f98ca0d8dd180f8dfd699f187ce63f Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Tue, 30 Jan 2024 10:17:46 +0900 Subject: [PATCH 5/8] fix type --- python/pyspark/sql/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 52052bc4f679c..c46a15065bc31 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -2354,7 +2354,7 @@ def verify_struct(obj: Any) -> None: elif isinstance(obj, (tuple, list)): if len(obj) != len(verifiers): raise PySparkValueError( - error_class="STRUCT_LENGTH_MISMATCH", + error_class="FIELD_STRUCT_LENGTH_MISMATCH", message_parameters={ "field_name": name if name is not None else "", "object_length": str(len(obj)), From f67953d3755571ae29046abb6239515c35db4221 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Tue, 30 Jan 2024 14:00:11 +0900 Subject: [PATCH 6/8] Fix test & error class --- python/pyspark/errors/error_classes.py | 5 +++++ python/pyspark/sql/tests/test_dataframe.py | 7 +++++-- python/pyspark/sql/types.py | 20 ++++++++++++++------ 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py index c0b52d8c6ac0c..b3a7e449d9fd2 100644 --- a/python/pyspark/errors/error_classes.py +++ b/python/pyspark/errors/error_classes.py @@ -286,6 +286,11 @@ "An error occurred while calling : ." ] }, + "FIELD_DATA_TYPE_UNACCEPTABLE": { + "message": [ + ": can not accept object in type ." + ] + }, "FIELD_NOT_NULLABLE": { "message": [ ": This field is not nullable, but got None." diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index 2134c1fe46153..af3fe0df72314 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -1271,13 +1271,13 @@ def test_toDF_with_schema_string(self): # number of fields must match. self.assertRaisesRegex( - Exception, "LENGTH_SHOULD_BE_THE_SAME", lambda: rdd.toDF("key: int").collect() + Exception, "FIELD_STRUCT_LENGTH_MISMATCH", lambda: rdd.toDF("key: int").collect() ) # field types mismatch will cause exception at runtime. self.assertRaisesRegex( Exception, - "CANNOT_ACCEPT_OBJECT_IN_TYPE", + "FIELD_DATA_TYPE_UNACCEPTABLE", lambda: rdd.toDF("key: float, value: string").collect(), ) @@ -2123,6 +2123,9 @@ def test_query_execution_listener_on_collect_with_arrow(self): class DataFrameTests(DataFrameTestsMixin, ReusedSQLTestCase): + def test_toDF_with_schema_string(self): + super().test_toDF_with_schema_string() + pass diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index c46a15065bc31..f2d1d8deb1155 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -2216,9 +2216,13 @@ def verify_acceptable_types(obj: Any) -> None: # subclass of them can not be fromInternal in JVM if type(obj) not in _acceptable_types[_type]: raise PySparkTypeError( - message=new_msg( - "%s can not accept object %r in type %s" % (dataType, obj, type(obj)) - ) + error_class="FIELD_DATA_TYPE_UNACCEPTABLE", + message_parameters={ + "field_name": name if name is not None else "", + "data_type": str(dataType), + "obj": repr(obj), + "obj_type": str(type(obj)), + }, ) if isinstance(dataType, (StringType, CharType, VarcharType)): @@ -2369,9 +2373,13 @@ def verify_struct(obj: Any) -> None: verifier(d.get(f)) else: raise PySparkTypeError( - message=new_msg( - "StructType can not accept object %r in type %s" % (obj, type(obj)) - ) + error_class="FIELD_DATA_TYPE_UNACCEPTABLE", + message_parameters={ + "field_name": name if name is not None else "", + "data_type": str(dataType), + "obj": repr(obj), + "obj_type": str(type(obj)), + }, ) verify_value = verify_struct From 04a07ea9365119616b1a3fa9faaee1b33a345128 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Tue, 13 Feb 2024 19:52:40 +0900 Subject: [PATCH 7/8] fix test --- python/pyspark/sql/tests/test_types.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 4316e4962c9d1..eafd22db2eb03 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -1458,9 +1458,9 @@ def test_verify_type_exception_msg(self): self.check_error( exception=pe.exception, - error_class="CANNOT_BE_NONE", + error_class="FIELD_NOT_NULLABLE", message_parameters={ - "arg_name": "obj", + "field_name": "test_name", }, ) @@ -1470,11 +1470,12 @@ def test_verify_type_exception_msg(self): self.check_error( exception=pe.exception, - error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE", + error_class="FIELD_DATA_TYPE_UNACCEPTABLE", message_parameters={ "data_type": "IntegerType()", - "obj_name": "data", - "obj_type": "str", + "field_name": "field b in field a", + "obj": "'data'", + "obj_type": "", }, ) From f8ab11ae5ebea44494382a246d2a39c6060c9fb9 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Fri, 16 Feb 2024 17:40:37 +0900 Subject: [PATCH 8/8] Add error classes to separate the field_name case --- python/pyspark/errors/error_classes.py | 25 ++++++ python/pyspark/sql/tests/test_dataframe.py | 3 - python/pyspark/sql/types.py | 93 +++++++++++++++++++--- 3 files changed, 107 insertions(+), 14 deletions(-) diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py index b3a7e449d9fd2..6b2bcd7eee5c2 100644 --- a/python/pyspark/errors/error_classes.py +++ b/python/pyspark/errors/error_classes.py @@ -287,21 +287,41 @@ ] }, "FIELD_DATA_TYPE_UNACCEPTABLE": { + "message": [ + " can not accept object in type ." + ] + }, + "FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME": { "message": [ ": can not accept object in type ." ] }, "FIELD_NOT_NULLABLE": { + "message": [ + "Field is not nullable, but got None." + ] + }, + "FIELD_NOT_NULLABLE_WITH_NAME": { "message": [ ": This field is not nullable, but got None." ] }, "FIELD_STRUCT_LENGTH_MISMATCH": { + "message": [ + "Length of object () does not match with length of fields ()." + ] + }, + "FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME": { "message": [ ": Length of object () does not match with length of fields ()." ] }, "FIELD_TYPE_MISMATCH": { + "message": [ + " is not an instance of type ." + ] + }, + "FIELD_TYPE_MISMATCH_WITH_NAME": { "message": [ ": is not an instance of type ." ] @@ -311,6 +331,11 @@ ": object of out of range, got: ." ] }, + "FIELD_VALUE_OUT_OF_RANGE_WITH_NAME": { + "message": [ + "Object of out of range, got: ." + ] + }, "HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN": { "message": [ "Function `` should return Column, got ." diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index af3fe0df72314..6b790bc568dab 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -2123,9 +2123,6 @@ def test_query_execution_listener_on_collect_with_arrow(self): class DataFrameTests(DataFrameTestsMixin, ReusedSQLTestCase): - def test_toDF_with_schema_string(self): - super().test_toDF_with_schema_string() - pass diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index f2d1d8deb1155..a36c5cac20551 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -2196,11 +2196,16 @@ def verify_nullability(obj: Any) -> bool: if nullable: return True else: + if name is not None: + raise PySparkValueError( + error_class="FIELD_NOT_NULLABLE_WITH_NAME", + message_parameters={ + "field_name": str(name), + }, + ) raise PySparkValueError( error_class="FIELD_NOT_NULLABLE", - message_parameters={ - "field_name": name if name is not None else "", - }, + message_parameters={}, ) else: return False @@ -2215,10 +2220,19 @@ def assert_acceptable_types(obj: Any) -> None: def verify_acceptable_types(obj: Any) -> None: # subclass of them can not be fromInternal in JVM if type(obj) not in _acceptable_types[_type]: + if name is not None: + raise PySparkTypeError( + error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME", + message_parameters={ + "field_name": str(name), + "data_type": str(dataType), + "obj": repr(obj), + "obj_type": str(type(obj)), + }, + ) raise PySparkTypeError( error_class="FIELD_DATA_TYPE_UNACCEPTABLE", message_parameters={ - "field_name": name if name is not None else "", "data_type": str(dataType), "obj": repr(obj), "obj_type": str(type(obj)), @@ -2235,10 +2249,18 @@ def verify_value(obj: Any) -> None: def verify_udf(obj: Any) -> None: if not (hasattr(obj, "__UDT__") and obj.__UDT__ == dataType): + if name is not None: + raise PySparkValueError( + error_class="FIELD_TYPE_MISMATCH_WITH_NAME", + message_parameters={ + "field_name": str(name), + "obj": str(obj), + "data_type": str(dataType), + }, + ) raise PySparkValueError( error_class="FIELD_TYPE_MISMATCH", message_parameters={ - "field_name": name if name is not None else "", "obj": str(obj), "data_type": str(dataType), }, @@ -2253,10 +2275,18 @@ def verify_byte(obj: Any) -> None: assert_acceptable_types(obj) verify_acceptable_types(obj) if obj < -128 or obj > 127: + if name is not None: + raise PySparkValueError( + error_class="FIELD_VALUE_OUT_OF_RANGE_WITH_NAME", + message_parameters={ + "field_name": str(name), + "data_type": "ByteType", + "obj": str(obj), + }, + ) raise PySparkValueError( error_class="FIELD_VALUE_OUT_OF_RANGE", message_parameters={ - "field_name": name if name is not None else "", "data_type": "ByteType", "obj": str(obj), }, @@ -2270,10 +2300,18 @@ def verify_short(obj: Any) -> None: assert_acceptable_types(obj) verify_acceptable_types(obj) if obj < -32768 or obj > 32767: + if name is not None: + raise PySparkValueError( + error_class="FIELD_VALUE_OUT_OF_RANGE_WITH_NAME", + message_parameters={ + "field_name": str(name), + "data_type": "ShortType", + "obj": str(obj), + }, + ) raise PySparkValueError( error_class="FIELD_VALUE_OUT_OF_RANGE", message_parameters={ - "field_name": name if name is not None else "", "data_type": "ShortType", "obj": str(obj), }, @@ -2287,10 +2325,18 @@ def verify_integer(obj: Any) -> None: assert_acceptable_types(obj) verify_acceptable_types(obj) if obj < -2147483648 or obj > 2147483647: + if name is not None: + raise PySparkValueError( + error_class="FIELD_VALUE_OUT_OF_RANGE_WITH_NAME", + message_parameters={ + "field_name": str(name), + "data_type": "IntegerType", + "obj": str(obj), + }, + ) raise PySparkValueError( error_class="FIELD_VALUE_OUT_OF_RANGE", message_parameters={ - "field_name": name if name is not None else "", "data_type": "IntegerType", "obj": str(obj), }, @@ -2304,10 +2350,18 @@ def verify_long(obj: Any) -> None: assert_acceptable_types(obj) verify_acceptable_types(obj) if obj < -9223372036854775808 or obj > 9223372036854775807: + if name is not None: + raise PySparkValueError( + error_class="FIELD_VALUE_OUT_OF_RANGE_WITH_NAME", + message_parameters={ + "field_name": str(name), + "data_type": "LongType", + "obj": str(obj), + }, + ) raise PySparkValueError( error_class="FIELD_VALUE_OUT_OF_RANGE", message_parameters={ - "field_name": name if name is not None else "", "data_type": "LongType", "obj": str(obj), }, @@ -2357,10 +2411,18 @@ def verify_struct(obj: Any) -> None: verifier(obj.get(f)) elif isinstance(obj, (tuple, list)): if len(obj) != len(verifiers): + if name is not None: + raise PySparkValueError( + error_class="FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME", + message_parameters={ + "field_name": str(name), + "object_length": str(len(obj)), + "field_length": str(len(verifiers)), + }, + ) raise PySparkValueError( error_class="FIELD_STRUCT_LENGTH_MISMATCH", message_parameters={ - "field_name": name if name is not None else "", "object_length": str(len(obj)), "field_length": str(len(verifiers)), }, @@ -2372,10 +2434,19 @@ def verify_struct(obj: Any) -> None: for f, verifier in verifiers: verifier(d.get(f)) else: + if name is not None: + raise PySparkTypeError( + error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME", + message_parameters={ + "field_name": str(name), + "data_type": str(dataType), + "obj": repr(obj), + "obj_type": str(type(obj)), + }, + ) raise PySparkTypeError( error_class="FIELD_DATA_TYPE_UNACCEPTABLE", message_parameters={ - "field_name": name if name is not None else "", "data_type": str(dataType), "obj": repr(obj), "obj_type": str(type(obj)),