Skip to content
Closed
45 changes: 40 additions & 5 deletions python/pyspark/errors/error_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,46 @@
"An error occurred while calling <func_name>: <error_msg>."
]
},
"FIELD_DATA_TYPE_UNACCEPTABLE": {
"message": [
"<data_type> can not accept object <obj> in type <obj_type>."
]
},
"FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME": {
"message": [
"<field_name>: <data_type> can not accept object <obj> in type <obj_type>."
]
},
"FIELD_NOT_NULLABLE": {
"message": [
"Field is not nullable, but got None."
]
},
"FIELD_NOT_NULLABLE_WITH_NAME": {
"message": [
"<field_name>: This field is not nullable, but got None."
]
},
"FIELD_STRUCT_LENGTH_MISMATCH": {
"message": [
"Length of object (<object_length>) does not match with length of fields (<field_length>)."
]
},
"FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME": {
"message": [
"<field_name>: Length of object (<object_length>) does not match with length of fields (<field_length>)."
]
},
"FIELD_TYPE_MISMATCH": {
"message": [
"<obj> is not an instance of type <data_type>."
]
},
"FIELD_TYPE_MISMATCH_WITH_NAME": {
"message": [
"<field_name>: <obj> is not an instance of type <data_type>."
]
},
"HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN": {
"message": [
"Function `<func_name>` should return Column, got <return_type>."
Expand Down Expand Up @@ -612,11 +652,6 @@
"<feature> is not implemented."
]
},
"NOT_INSTANCE_OF": {
"message": [
"<value> is not an instance of type <type>."
]
},
"NOT_INT": {
"message": [
"Argument `<arg_name>` should be an int, got <arg_type>."
Expand Down
4 changes: 2 additions & 2 deletions python/pyspark/sql/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1271,13 +1271,13 @@ def test_toDF_with_schema_string(self):

# number of fields must match.
self.assertRaisesRegex(
Exception, "LENGTH_SHOULD_BE_THE_SAME", lambda: rdd.toDF("key: int").collect()
Exception, "FIELD_STRUCT_LENGTH_MISMATCH", lambda: rdd.toDF("key: int").collect()
)

# field types mismatch will cause exception at runtime.
self.assertRaisesRegex(
Exception,
"CANNOT_ACCEPT_OBJECT_IN_TYPE",
"FIELD_DATA_TYPE_UNACCEPTABLE",
lambda: rdd.toDF("key: float, value: string").collect(),
)

Expand Down
11 changes: 6 additions & 5 deletions python/pyspark/sql/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1458,9 +1458,9 @@ def test_verify_type_exception_msg(self):

self.check_error(
exception=pe.exception,
error_class="CANNOT_BE_NONE",
error_class="FIELD_NOT_NULLABLE_WITH_NAME",
message_parameters={
"arg_name": "obj",
"field_name": "test_name",
},
)

Expand All @@ -1470,11 +1470,12 @@ def test_verify_type_exception_msg(self):

self.check_error(
exception=pe.exception,
error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE",
error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
message_parameters={
"data_type": "IntegerType()",
"obj_name": "data",
"obj_type": "str",
"field_name": "field b in field a",
"obj": "'data'",
"obj_type": "<class 'str'>",
},
)

Expand Down
77 changes: 60 additions & 17 deletions python/pyspark/sql/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2196,9 +2196,16 @@ def verify_nullability(obj: Any) -> bool:
if nullable:
return True
else:
if name is not None:
raise PySparkValueError(
error_class="FIELD_NOT_NULLABLE_WITH_NAME",
message_parameters={
"field_name": str(name),
},
)
raise PySparkValueError(
error_class="CANNOT_BE_NONE",
message_parameters={"arg_name": "obj"},
error_class="FIELD_NOT_NULLABLE",
message_parameters={},
)
else:
return False
Expand All @@ -2213,12 +2220,22 @@ def assert_acceptable_types(obj: Any) -> None:
def verify_acceptable_types(obj: Any) -> None:
# subclass of them can not be fromInternal in JVM
if type(obj) not in _acceptable_types[_type]:
if name is not None:
raise PySparkTypeError(
error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
message_parameters={
"field_name": str(name),
"data_type": str(dataType),
"obj": repr(obj),
"obj_type": str(type(obj)),
},
)
raise PySparkTypeError(
error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE",
error_class="FIELD_DATA_TYPE_UNACCEPTABLE",
message_parameters={
"data_type": str(dataType),
"obj_name": str(obj),
"obj_type": type(obj).__name__,
"obj": repr(obj),
"obj_type": str(type(obj)),
},
)

Expand All @@ -2232,11 +2249,20 @@ def verify_value(obj: Any) -> None:

def verify_udf(obj: Any) -> None:
if not (hasattr(obj, "__UDT__") and obj.__UDT__ == dataType):
if name is not None:
raise PySparkValueError(
error_class="FIELD_TYPE_MISMATCH_WITH_NAME",
message_parameters={
"field_name": str(name),
"obj": str(obj),
"data_type": str(dataType),
},
)
raise PySparkValueError(
error_class="NOT_INSTANCE_OF",
error_class="FIELD_TYPE_MISMATCH",
message_parameters={
"value": str(obj),
"type": str(dataType),
"obj": str(obj),
"data_type": str(dataType),
},
)
verifier(dataType.toInternal(obj))
Expand Down Expand Up @@ -2365,13 +2391,20 @@ def verify_struct(obj: Any) -> None:
verifier(obj.get(f))
elif isinstance(obj, (tuple, list)):
if len(obj) != len(verifiers):
if name is not None:
raise PySparkValueError(
error_class="FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME",
message_parameters={
"field_name": str(name),
"object_length": str(len(obj)),
"field_length": str(len(verifiers)),
},
)
raise PySparkValueError(
error_class="LENGTH_SHOULD_BE_THE_SAME",
error_class="FIELD_STRUCT_LENGTH_MISMATCH",
message_parameters={
"arg1": "obj",
"arg2": "fields",
"arg1_length": str(len(obj)),
"arg2_length": str(len(verifiers)),
"object_length": str(len(obj)),
"field_length": str(len(verifiers)),
},
)
for v, (_, verifier) in zip(obj, verifiers):
Expand All @@ -2381,12 +2414,22 @@ def verify_struct(obj: Any) -> None:
for f, verifier in verifiers:
verifier(d.get(f))
else:
if name is not None:
raise PySparkTypeError(
error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
message_parameters={
"field_name": str(name),
"data_type": str(dataType),
"obj": repr(obj),
"obj_type": str(type(obj)),
},
)
raise PySparkTypeError(
error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE",
error_class="FIELD_DATA_TYPE_UNACCEPTABLE",
message_parameters={
"data_type": "StructType",
"obj_name": str(obj),
"obj_type": type(obj).__name__,
"data_type": str(dataType),
"obj": repr(obj),
"obj_type": str(type(obj)),
},
)

Expand Down