@@ -2036,24 +2036,29 @@ def _check_schema_compatible(table_schema: Schema, other_schema: pa.Schema, down
20362036 """
20372037 Check if the `table_schema` is compatible with `other_schema`.
20382038
2039- Two schemas are considered compatible when they are equal in terms of the Iceberg Schema type.
2039+ The schemas are compatible if:
2040+ - All fields in `other_schema` are present in `table_schema`. (other_schema <= table_schema)
2041+ - All required fields in `table_schema` are present in `other_schema`.
20402042
20412043 Raises:
20422044 ValueError: If the schemas are not compatible.
20432045 """
2046+ from pyiceberg .io .pyarrow import _pyarrow_to_schema_without_ids , pyarrow_to_schema
2047+
20442048 name_mapping = table_schema .name_mapping
20452049 try :
2046- task_schema = pyarrow_to_schema (
2047- other_schema , name_mapping = name_mapping , downcast_ns_timestamp_to_us = downcast_ns_timestamp_to_us
2048- )
2050+ other_schema = pyarrow_to_schema (other_schema , name_mapping = name_mapping )
20492051 except ValueError as e :
2050- other_schema = _pyarrow_to_schema_without_ids (other_schema , downcast_ns_timestamp_to_us = downcast_ns_timestamp_to_us )
2052+ other_schema = _pyarrow_to_schema_without_ids (other_schema )
20512053 additional_names = set (other_schema .column_names ) - set (table_schema .column_names )
20522054 raise ValueError (
20532055 f"PyArrow table contains more columns: { ', ' .join (sorted (additional_names ))} . Update the schema first (hint, use union_by_name)."
20542056 ) from e
20552057
2056- if table_schema .as_struct () != task_schema .as_struct ():
2058+ missing_table_schema_fields = {field for field in other_schema .fields if field not in table_schema .fields }
2059+ required_table_schema_fields = {field for field in table_schema .fields if field .required }
2060+ missing_required_fields = {field for field in required_table_schema_fields if field not in other_schema .fields }
2061+ if missing_table_schema_fields or missing_required_fields :
20572062 from rich .console import Console
20582063 from rich .table import Table as RichTable
20592064
@@ -2066,7 +2071,7 @@ def _check_schema_compatible(table_schema: Schema, other_schema: pa.Schema, down
20662071
20672072 for lhs in table_schema .fields :
20682073 try :
2069- rhs = task_schema .find_field (lhs .field_id )
2074+ rhs = other_schema .find_field (lhs .field_id )
20702075 rich_table .add_row ("✅" if lhs == rhs else "❌" , str (lhs ), str (rhs ))
20712076 except ValueError :
20722077 rich_table .add_row ("❌" , str (lhs ), "Missing" )
0 commit comments