From 0fa5a39ad1979886ae9c2f28bd594b9326a14422 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon.lee@databricks.com>
Date: Wed, 24 Jan 2024 12:12:31 +0900
Subject: [PATCH 1/8] Improve error message when createDataFrame have illegal
 nullable

---
 python/pyspark/errors/error_classes.py | 5 +++++
 python/pyspark/sql/types.py            | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
index a643615803c2e..e26769c821444 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -742,6 +742,11 @@
       "Only allows <arg_name> to be a path without scheme, and Spark Driver should use the default scheme to determine the destination file system."
     ]
   },
+  "NULLABILITY_CHECK_FAILED": {
+    "message": [
+      "The nullability check is failed. Make sure the parameter does not contain None."
+    ]
+  },
   "ONLY_ALLOWED_FOR_SINGLE_COLUMN": {
     "message": [
       "Argument `<arg_name>` can only be provided for a single column."
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 9afeb651c1878..45b94b116f8de 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -2197,8 +2197,8 @@ def verify_nullability(obj: Any) -> bool:
                 return True
             else:
                 raise PySparkValueError(
-                    error_class="CANNOT_BE_NONE",
-                    message_parameters={"arg_name": "obj"},
+                    error_class="NULLABILITY_CHECK_FAILED",
+                    message_parameters={},
                 )
         else:
             return False

From 57ffa2c6d312ac73a460c5fa877da0ef75ac56ff Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon.lee@databricks.com>
Date: Wed, 24 Jan 2024 12:20:02 +0900
Subject: [PATCH 2/8] Trigger CI


From 18e83c39b09a7492b61adfdb4192963afd715364 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon.lee@databricks.com>
Date: Wed, 24 Jan 2024 13:09:35 +0900
Subject: [PATCH 3/8] Fix error message regression

---
 python/pyspark/errors/error_classes.py | 15 ------
 python/pyspark/sql/types.py            | 72 ++++++--------------------
 2 files changed, 16 insertions(+), 71 deletions(-)

diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
index e26769c821444..4051fdbc132c2 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -612,11 +612,6 @@
       "<feature> is not implemented."
     ]
   },
-  "NOT_INSTANCE_OF": {
-    "message": [
-      "<value> is not an instance of type <type>."
-    ]
-  },
   "NOT_INT": {
     "message": [
       "Argument `<arg_name>` should be an int, got <arg_type>."
@@ -742,11 +737,6 @@
       "Only allows <arg_name> to be a path without scheme, and Spark Driver should use the default scheme to determine the destination file system."
     ]
   },
-  "NULLABILITY_CHECK_FAILED": {
-    "message": [
-      "The nullability check is failed. Make sure the parameter does not contain None."
-    ]
-  },
   "ONLY_ALLOWED_FOR_SINGLE_COLUMN": {
     "message": [
       "Argument `<arg_name>` can only be provided for a single column."
@@ -1108,11 +1098,6 @@
       "Value for `<arg_name>` must be True, got '<arg_value>'."
     ]
   },
-  "VALUE_OUT_OF_BOUND": {
-    "message": [
-      "Value for `<arg_name>` must be greater than <lower_bound> or less than <upper_bound>, got <actual>"
-    ]
-  },
   "WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION": {
     "message": [
       "Function `<func_name>` should take between 1 and 3 arguments, but the provided function takes <num_args>."
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 45b94b116f8de..d79257dfcb04a 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -2196,10 +2196,7 @@ def verify_nullability(obj: Any) -> bool:
             if nullable:
                 return True
             else:
-                raise PySparkValueError(
-                    error_class="NULLABILITY_CHECK_FAILED",
-                    message_parameters={},
-                )
+                raise PySparkValueError(message=new_msg("This field is not nullable, but got None"))
         else:
             return False
 
@@ -2214,12 +2211,9 @@ def verify_acceptable_types(obj: Any) -> None:
         # subclass of them can not be fromInternal in JVM
         if type(obj) not in _acceptable_types[_type]:
             raise PySparkTypeError(
-                error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE",
-                message_parameters={
-                    "data_type": str(dataType),
-                    "obj_name": str(obj),
-                    "obj_type": type(obj).__name__,
-                },
+                message=new_msg(
+                    "%s can not accept object %r in type %s" % (dataType, obj, type(obj))
+                )
             )
 
     if isinstance(dataType, (StringType, CharType, VarcharType)):
@@ -2233,11 +2227,7 @@ def verify_value(obj: Any) -> None:
         def verify_udf(obj: Any) -> None:
             if not (hasattr(obj, "__UDT__") and obj.__UDT__ == dataType):
                 raise PySparkValueError(
-                    error_class="NOT_INSTANCE_OF",
-                    message_parameters={
-                        "value": str(obj),
-                        "type": str(dataType),
-                    },
+                    message=new_msg("%r is not an instance of type %r" % (obj, dataType))
                 )
             verifier(dataType.toInternal(obj))
 
@@ -2250,13 +2240,7 @@ def verify_byte(obj: Any) -> None:
             verify_acceptable_types(obj)
             if obj < -128 or obj > 127:
                 raise PySparkValueError(
-                    error_class="VALUE_OUT_OF_BOUND",
-                    message_parameters={
-                        "arg_name": "obj",
-                        "lower_bound": "127",
-                        "upper_bound": "-127",
-                        "actual": str(obj),
-                    },
+                    message=new_msg("object of ByteType out of range, got: %s" % obj)
                 )
 
         verify_value = verify_byte
@@ -2268,13 +2252,7 @@ def verify_short(obj: Any) -> None:
             verify_acceptable_types(obj)
             if obj < -32768 or obj > 32767:
                 raise PySparkValueError(
-                    error_class="VALUE_OUT_OF_BOUND",
-                    message_parameters={
-                        "arg_name": "obj",
-                        "lower_bound": "32767",
-                        "upper_bound": "-32768",
-                        "actual": str(obj),
-                    },
+                    message=new_msg("object of ShortType out of range, got: %s" % obj)
                 )
 
         verify_value = verify_short
@@ -2286,13 +2264,7 @@ def verify_integer(obj: Any) -> None:
             verify_acceptable_types(obj)
             if obj < -2147483648 or obj > 2147483647:
                 raise PySparkValueError(
-                    error_class="VALUE_OUT_OF_BOUND",
-                    message_parameters={
-                        "arg_name": "obj",
-                        "lower_bound": "2147483647",
-                        "upper_bound": "-2147483648",
-                        "actual": str(obj),
-                    },
+                    message=new_msg("object of IntegerType out of range, got: %s" % obj)
                 )
 
         verify_value = verify_integer
@@ -2304,13 +2276,7 @@ def verify_long(obj: Any) -> None:
             verify_acceptable_types(obj)
             if obj < -9223372036854775808 or obj > 9223372036854775807:
                 raise PySparkValueError(
-                    error_class="VALUE_OUT_OF_BOUND",
-                    message_parameters={
-                        "arg_name": "obj",
-                        "lower_bound": "9223372036854775807",
-                        "upper_bound": "-9223372036854775808",
-                        "actual": str(obj),
-                    },
+                    message=new_msg("object of LongType out of range, got: %s" % obj)
                 )
 
         verify_value = verify_long
@@ -2358,13 +2324,10 @@ def verify_struct(obj: Any) -> None:
             elif isinstance(obj, (tuple, list)):
                 if len(obj) != len(verifiers):
                     raise PySparkValueError(
-                        error_class="LENGTH_SHOULD_BE_THE_SAME",
-                        message_parameters={
-                            "arg1": "obj",
-                            "arg2": "fields",
-                            "arg1_length": str(len(obj)),
-                            "arg2_length": str(len(verifiers)),
-                        },
+                        message=new_msg(
+                            "Length of object (%d) does not match with "
+                            "length of fields (%d)" % (len(obj), len(verifiers))
+                        )
                     )
                 for v, (_, verifier) in zip(obj, verifiers):
                     verifier(v)
@@ -2374,12 +2337,9 @@ def verify_struct(obj: Any) -> None:
                     verifier(d.get(f))
             else:
                 raise PySparkTypeError(
-                    error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE",
-                    message_parameters={
-                        "data_type": "StructType",
-                        "obj_name": str(obj),
-                        "obj_type": type(obj).__name__,
-                    },
+                    message=new_msg(
+                        "StructType can not accept object %r in type %s" % (obj, type(obj))
+                    )
                 )
 
         verify_value = verify_struct

From fcb6c5c53a1d996b886bddb760d475dd0674d627 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon.lee@databricks.com>
Date: Tue, 30 Jan 2024 10:17:02 +0900
Subject: [PATCH 4/8] Use error class

---
 python/pyspark/errors/error_classes.py | 20 ++++++++++
 python/pyspark/sql/types.py            | 52 +++++++++++++++++++++-----
 2 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
index 4051fdbc132c2..c0b52d8c6ac0c 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -286,6 +286,26 @@
       "An error occurred while calling <func_name>: <error_msg>."
     ]
   },
+  "FIELD_NOT_NULLABLE": {
+    "message": [
+      "<field_name>: This field is not nullable, but got None."
+    ]
+  },
+  "FIELD_STRUCT_LENGTH_MISMATCH": {
+    "message": [
+      "<field_name>: Length of object (<object_length>) does not match with length of fields (<field_length>)."
+    ]
+  },
+  "FIELD_TYPE_MISMATCH": {
+    "message": [
+      "<field_name>: <obj> is not an instance of type <data_type>."
+    ]
+  },
+  "FIELD_VALUE_OUT_OF_RANGE": {
+    "message": [
+      "<field_name>: object of <data_type> out of range, got: <obj>."
+    ]
+  },
   "HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN": {
     "message": [
       "Function `<func_name>` should return Column, got <return_type>."
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index d79257dfcb04a..52052bc4f679c 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -2196,7 +2196,12 @@ def verify_nullability(obj: Any) -> bool:
             if nullable:
                 return True
             else:
-                raise PySparkValueError(message=new_msg("This field is not nullable, but got None"))
+                raise PySparkValueError(
+                    error_class="FIELD_NOT_NULLABLE",
+                    message_parameters={
+                        "field_name": name if name is not None else "",
+                    },
+                )
         else:
             return False
 
@@ -2227,7 +2232,12 @@ def verify_value(obj: Any) -> None:
         def verify_udf(obj: Any) -> None:
             if not (hasattr(obj, "__UDT__") and obj.__UDT__ == dataType):
                 raise PySparkValueError(
-                    message=new_msg("%r is not an instance of type %r" % (obj, dataType))
+                    error_class="FIELD_TYPE_MISMATCH",
+                    message_parameters={
+                        "field_name": name if name is not None else "",
+                        "obj": str(obj),
+                        "data_type": str(dataType),
+                    },
                 )
             verifier(dataType.toInternal(obj))
 
@@ -2240,7 +2250,12 @@ def verify_byte(obj: Any) -> None:
             verify_acceptable_types(obj)
             if obj < -128 or obj > 127:
                 raise PySparkValueError(
-                    message=new_msg("object of ByteType out of range, got: %s" % obj)
+                    error_class="FIELD_VALUE_OUT_OF_RANGE",
+                    message_parameters={
+                        "field_name": name if name is not None else "",
+                        "data_type": "ByteType",
+                        "obj": str(obj),
+                    },
                 )
 
         verify_value = verify_byte
@@ -2252,7 +2267,12 @@ def verify_short(obj: Any) -> None:
             verify_acceptable_types(obj)
             if obj < -32768 or obj > 32767:
                 raise PySparkValueError(
-                    message=new_msg("object of ShortType out of range, got: %s" % obj)
+                    error_class="FIELD_VALUE_OUT_OF_RANGE",
+                    message_parameters={
+                        "field_name": name if name is not None else "",
+                        "data_type": "ShortType",
+                        "obj": str(obj),
+                    },
                 )
 
         verify_value = verify_short
@@ -2264,7 +2284,12 @@ def verify_integer(obj: Any) -> None:
             verify_acceptable_types(obj)
             if obj < -2147483648 or obj > 2147483647:
                 raise PySparkValueError(
-                    message=new_msg("object of IntegerType out of range, got: %s" % obj)
+                    error_class="FIELD_VALUE_OUT_OF_RANGE",
+                    message_parameters={
+                        "field_name": name if name is not None else "",
+                        "data_type": "IntegerType",
+                        "obj": str(obj),
+                    },
                 )
 
         verify_value = verify_integer
@@ -2276,7 +2301,12 @@ def verify_long(obj: Any) -> None:
             verify_acceptable_types(obj)
             if obj < -9223372036854775808 or obj > 9223372036854775807:
                 raise PySparkValueError(
-                    message=new_msg("object of LongType out of range, got: %s" % obj)
+                    error_class="FIELD_VALUE_OUT_OF_RANGE",
+                    message_parameters={
+                        "field_name": name if name is not None else "",
+                        "data_type": "LongType",
+                        "obj": str(obj),
+                    },
                 )
 
         verify_value = verify_long
@@ -2324,10 +2354,12 @@ def verify_struct(obj: Any) -> None:
             elif isinstance(obj, (tuple, list)):
                 if len(obj) != len(verifiers):
                     raise PySparkValueError(
-                        message=new_msg(
-                            "Length of object (%d) does not match with "
-                            "length of fields (%d)" % (len(obj), len(verifiers))
-                        )
+                        error_class="STRUCT_LENGTH_MISMATCH",
+                        message_parameters={
+                            "field_name": name if name is not None else "",
+                            "object_length": str(len(obj)),
+                            "field_length": str(len(verifiers)),
+                        },
                     )
                 for v, (_, verifier) in zip(obj, verifiers):
                     verifier(v)

From d49e74c515f98ca0d8dd180f8dfd699f187ce63f Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon.lee@databricks.com>
Date: Tue, 30 Jan 2024 10:17:46 +0900
Subject: [PATCH 5/8] fix type

---
 python/pyspark/sql/types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 52052bc4f679c..c46a15065bc31 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -2354,7 +2354,7 @@ def verify_struct(obj: Any) -> None:
             elif isinstance(obj, (tuple, list)):
                 if len(obj) != len(verifiers):
                     raise PySparkValueError(
-                        error_class="STRUCT_LENGTH_MISMATCH",
+                        error_class="FIELD_STRUCT_LENGTH_MISMATCH",
                         message_parameters={
                             "field_name": name if name is not None else "",
                             "object_length": str(len(obj)),

From f67953d3755571ae29046abb6239515c35db4221 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon.lee@databricks.com>
Date: Tue, 30 Jan 2024 14:00:11 +0900
Subject: [PATCH 6/8] Fix test & error class

---
 python/pyspark/errors/error_classes.py     |  5 +++++
 python/pyspark/sql/tests/test_dataframe.py |  7 +++++--
 python/pyspark/sql/types.py                | 20 ++++++++++++++------
 3 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
index c0b52d8c6ac0c..b3a7e449d9fd2 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -286,6 +286,11 @@
       "An error occurred while calling <func_name>: <error_msg>."
     ]
   },
+  "FIELD_DATA_TYPE_UNACCEPTABLE": {
+    "message": [
+      "<field_name>: <data_type> can not accept object <obj> in type <obj_type>."
+    ]
+  },
   "FIELD_NOT_NULLABLE": {
     "message": [
       "<field_name>: This field is not nullable, but got None."
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 2134c1fe46153..af3fe0df72314 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -1271,13 +1271,13 @@ def test_toDF_with_schema_string(self):
 
         # number of fields must match.
         self.assertRaisesRegex(
-            Exception, "LENGTH_SHOULD_BE_THE_SAME", lambda: rdd.toDF("key: int").collect()
+            Exception, "FIELD_STRUCT_LENGTH_MISMATCH", lambda: rdd.toDF("key: int").collect()
         )
 
         # field types mismatch will cause exception at runtime.
         self.assertRaisesRegex(
             Exception,
-            "CANNOT_ACCEPT_OBJECT_IN_TYPE",
+            "FIELD_DATA_TYPE_UNACCEPTABLE",
             lambda: rdd.toDF("key: float, value: string").collect(),
         )
 
@@ -2123,6 +2123,9 @@ def test_query_execution_listener_on_collect_with_arrow(self):
 
 
 class DataFrameTests(DataFrameTestsMixin, ReusedSQLTestCase):
+    def test_toDF_with_schema_string(self):
+        super().test_toDF_with_schema_string()
+
     pass
 
 
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index c46a15065bc31..f2d1d8deb1155 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -2216,9 +2216,13 @@ def verify_acceptable_types(obj: Any) -> None:
         # subclass of them can not be fromInternal in JVM
         if type(obj) not in _acceptable_types[_type]:
             raise PySparkTypeError(
-                message=new_msg(
-                    "%s can not accept object %r in type %s" % (dataType, obj, type(obj))
-                )
+                error_class="FIELD_DATA_TYPE_UNACCEPTABLE",
+                message_parameters={
+                    "field_name": name if name is not None else "",
+                    "data_type": str(dataType),
+                    "obj": repr(obj),
+                    "obj_type": str(type(obj)),
+                },
             )
 
     if isinstance(dataType, (StringType, CharType, VarcharType)):
@@ -2369,9 +2373,13 @@ def verify_struct(obj: Any) -> None:
                     verifier(d.get(f))
             else:
                 raise PySparkTypeError(
-                    message=new_msg(
-                        "StructType can not accept object %r in type %s" % (obj, type(obj))
-                    )
+                    error_class="FIELD_DATA_TYPE_UNACCEPTABLE",
+                    message_parameters={
+                        "field_name": name if name is not None else "",
+                        "data_type": str(dataType),
+                        "obj": repr(obj),
+                        "obj_type": str(type(obj)),
+                    },
                 )
 
         verify_value = verify_struct

From 04a07ea9365119616b1a3fa9faaee1b33a345128 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon.lee@databricks.com>
Date: Tue, 13 Feb 2024 19:52:40 +0900
Subject: [PATCH 7/8] fix test

---
 python/pyspark/sql/tests/test_types.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 4316e4962c9d1..eafd22db2eb03 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -1458,9 +1458,9 @@ def test_verify_type_exception_msg(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_BE_NONE",
+            error_class="FIELD_NOT_NULLABLE",
             message_parameters={
-                "arg_name": "obj",
+                "field_name": "test_name",
             },
         )
 
@@ -1470,11 +1470,12 @@ def test_verify_type_exception_msg(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_ACCEPT_OBJECT_IN_TYPE",
+            error_class="FIELD_DATA_TYPE_UNACCEPTABLE",
             message_parameters={
                 "data_type": "IntegerType()",
-                "obj_name": "data",
-                "obj_type": "str",
+                "field_name": "field b in field a",
+                "obj": "'data'",
+                "obj_type": "<class 'str'>",
             },
         )
 

From f8ab11ae5ebea44494382a246d2a39c6060c9fb9 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon.lee@databricks.com>
Date: Fri, 16 Feb 2024 17:40:37 +0900
Subject: [PATCH 8/8] Add error classes to separate the field_name case

---
 python/pyspark/errors/error_classes.py     | 25 ++++++
 python/pyspark/sql/tests/test_dataframe.py |  3 -
 python/pyspark/sql/types.py                | 93 +++++++++++++++++++---
 3 files changed, 107 insertions(+), 14 deletions(-)

diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
index b3a7e449d9fd2..6b2bcd7eee5c2 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -287,21 +287,41 @@
     ]
   },
   "FIELD_DATA_TYPE_UNACCEPTABLE": {
+    "message": [
+      "<data_type> can not accept object <obj> in type <obj_type>."
+    ]
+  },
+  "FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME": {
     "message": [
       "<field_name>: <data_type> can not accept object <obj> in type <obj_type>."
     ]
   },
   "FIELD_NOT_NULLABLE": {
+    "message": [
+      "Field is not nullable, but got None."
+    ]
+  },
+  "FIELD_NOT_NULLABLE_WITH_NAME": {
     "message": [
       "<field_name>: This field is not nullable, but got None."
     ]
   },
   "FIELD_STRUCT_LENGTH_MISMATCH": {
+    "message": [
+      "Length of object (<object_length>) does not match with length of fields (<field_length>)."
+    ]
+  },
+  "FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME": {
     "message": [
       "<field_name>: Length of object (<object_length>) does not match with length of fields (<field_length>)."
     ]
   },
   "FIELD_TYPE_MISMATCH": {
+    "message": [
+      "<obj> is not an instance of type <data_type>."
+    ]
+  },
+  "FIELD_TYPE_MISMATCH_WITH_NAME": {
     "message": [
       "<field_name>: <obj> is not an instance of type <data_type>."
     ]
@@ -311,6 +331,11 @@
       "<field_name>: object of <data_type> out of range, got: <obj>."
     ]
   },
+  "FIELD_VALUE_OUT_OF_RANGE_WITH_NAME": {
+    "message": [
+      "Object of <data_type> out of range, got: <obj>."
+    ]
+  },
   "HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN": {
     "message": [
       "Function `<func_name>` should return Column, got <return_type>."
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index af3fe0df72314..6b790bc568dab 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -2123,9 +2123,6 @@ def test_query_execution_listener_on_collect_with_arrow(self):
 
 
 class DataFrameTests(DataFrameTestsMixin, ReusedSQLTestCase):
-    def test_toDF_with_schema_string(self):
-        super().test_toDF_with_schema_string()
-
     pass
 
 
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index f2d1d8deb1155..a36c5cac20551 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -2196,11 +2196,16 @@ def verify_nullability(obj: Any) -> bool:
             if nullable:
                 return True
             else:
+                if name is not None:
+                    raise PySparkValueError(
+                        error_class="FIELD_NOT_NULLABLE_WITH_NAME",
+                        message_parameters={
+                            "field_name": str(name),
+                        },
+                    )
                 raise PySparkValueError(
                     error_class="FIELD_NOT_NULLABLE",
-                    message_parameters={
-                        "field_name": name if name is not None else "",
-                    },
+                    message_parameters={},
                 )
         else:
             return False
@@ -2215,10 +2220,19 @@ def assert_acceptable_types(obj: Any) -> None:
     def verify_acceptable_types(obj: Any) -> None:
         # subclass of them can not be fromInternal in JVM
         if type(obj) not in _acceptable_types[_type]:
+            if name is not None:
+                raise PySparkTypeError(
+                    error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
+                    message_parameters={
+                        "field_name": str(name),
+                        "data_type": str(dataType),
+                        "obj": repr(obj),
+                        "obj_type": str(type(obj)),
+                    },
+                )
             raise PySparkTypeError(
                 error_class="FIELD_DATA_TYPE_UNACCEPTABLE",
                 message_parameters={
-                    "field_name": name if name is not None else "",
                     "data_type": str(dataType),
                     "obj": repr(obj),
                     "obj_type": str(type(obj)),
@@ -2235,10 +2249,18 @@ def verify_value(obj: Any) -> None:
 
         def verify_udf(obj: Any) -> None:
             if not (hasattr(obj, "__UDT__") and obj.__UDT__ == dataType):
+                if name is not None:
+                    raise PySparkValueError(
+                        error_class="FIELD_TYPE_MISMATCH_WITH_NAME",
+                        message_parameters={
+                            "field_name": str(name),
+                            "obj": str(obj),
+                            "data_type": str(dataType),
+                        },
+                    )
                 raise PySparkValueError(
                     error_class="FIELD_TYPE_MISMATCH",
                     message_parameters={
-                        "field_name": name if name is not None else "",
                         "obj": str(obj),
                         "data_type": str(dataType),
                     },
@@ -2253,10 +2275,18 @@ def verify_byte(obj: Any) -> None:
             assert_acceptable_types(obj)
             verify_acceptable_types(obj)
             if obj < -128 or obj > 127:
+                if name is not None:
+                    raise PySparkValueError(
+                        error_class="FIELD_VALUE_OUT_OF_RANGE_WITH_NAME",
+                        message_parameters={
+                            "field_name": str(name),
+                            "data_type": "ByteType",
+                            "obj": str(obj),
+                        },
+                    )
                 raise PySparkValueError(
                     error_class="FIELD_VALUE_OUT_OF_RANGE",
                     message_parameters={
-                        "field_name": name if name is not None else "",
                         "data_type": "ByteType",
                         "obj": str(obj),
                     },
@@ -2270,10 +2300,18 @@ def verify_short(obj: Any) -> None:
             assert_acceptable_types(obj)
             verify_acceptable_types(obj)
             if obj < -32768 or obj > 32767:
+                if name is not None:
+                    raise PySparkValueError(
+                        error_class="FIELD_VALUE_OUT_OF_RANGE_WITH_NAME",
+                        message_parameters={
+                            "field_name": str(name),
+                            "data_type": "ShortType",
+                            "obj": str(obj),
+                        },
+                    )
                 raise PySparkValueError(
                     error_class="FIELD_VALUE_OUT_OF_RANGE",
                     message_parameters={
-                        "field_name": name if name is not None else "",
                         "data_type": "ShortType",
                         "obj": str(obj),
                     },
@@ -2287,10 +2325,18 @@ def verify_integer(obj: Any) -> None:
             assert_acceptable_types(obj)
             verify_acceptable_types(obj)
             if obj < -2147483648 or obj > 2147483647:
+                if name is not None:
+                    raise PySparkValueError(
+                        error_class="FIELD_VALUE_OUT_OF_RANGE_WITH_NAME",
+                        message_parameters={
+                            "field_name": str(name),
+                            "data_type": "IntegerType",
+                            "obj": str(obj),
+                        },
+                    )
                 raise PySparkValueError(
                     error_class="FIELD_VALUE_OUT_OF_RANGE",
                     message_parameters={
-                        "field_name": name if name is not None else "",
                         "data_type": "IntegerType",
                         "obj": str(obj),
                     },
@@ -2304,10 +2350,18 @@ def verify_long(obj: Any) -> None:
             assert_acceptable_types(obj)
             verify_acceptable_types(obj)
             if obj < -9223372036854775808 or obj > 9223372036854775807:
+                if name is not None:
+                    raise PySparkValueError(
+                        error_class="FIELD_VALUE_OUT_OF_RANGE_WITH_NAME",
+                        message_parameters={
+                            "field_name": str(name),
+                            "data_type": "LongType",
+                            "obj": str(obj),
+                        },
+                    )
                 raise PySparkValueError(
                     error_class="FIELD_VALUE_OUT_OF_RANGE",
                     message_parameters={
-                        "field_name": name if name is not None else "",
                         "data_type": "LongType",
                         "obj": str(obj),
                     },
@@ -2357,10 +2411,18 @@ def verify_struct(obj: Any) -> None:
                     verifier(obj.get(f))
             elif isinstance(obj, (tuple, list)):
                 if len(obj) != len(verifiers):
+                    if name is not None:
+                        raise PySparkValueError(
+                            error_class="FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME",
+                            message_parameters={
+                                "field_name": str(name),
+                                "object_length": str(len(obj)),
+                                "field_length": str(len(verifiers)),
+                            },
+                        )
                     raise PySparkValueError(
                         error_class="FIELD_STRUCT_LENGTH_MISMATCH",
                         message_parameters={
-                            "field_name": name if name is not None else "",
                             "object_length": str(len(obj)),
                             "field_length": str(len(verifiers)),
                         },
@@ -2372,10 +2434,19 @@ def verify_struct(obj: Any) -> None:
                 for f, verifier in verifiers:
                     verifier(d.get(f))
             else:
+                if name is not None:
+                    raise PySparkTypeError(
+                        error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
+                        message_parameters={
+                            "field_name": str(name),
+                            "data_type": str(dataType),
+                            "obj": repr(obj),
+                            "obj_type": str(type(obj)),
+                        },
+                    )
                 raise PySparkTypeError(
                     error_class="FIELD_DATA_TYPE_UNACCEPTABLE",
                     message_parameters={
-                        "field_name": name if name is not None else "",
                         "data_type": str(dataType),
                         "obj": repr(obj),
                         "obj_type": str(type(obj)),