@@ -459,9 +459,8 @@ def to_v2(self) -> TableMetadataV2:
459459 return TableMetadataV2 .model_validate (metadata )
460460
461461 format_version : Literal [1 ] = Field (alias = "format-version" , default = 1 )
462- """An integer version number for the format. Currently, this can be 1 or 2
463- based on the spec. Implementations must throw an exception if a table’s
464- version is higher than the supported version."""
462+ """An integer version number for the format. Implementations must throw
463+ an exception if a table’s version is higher than the supported version."""
465464
466465 schema_ : Schema = Field (alias = "schema" )
467466 """The table’s current schema. (Deprecated: use schemas and
@@ -507,16 +506,75 @@ def construct_refs(cls, table_metadata: TableMetadata) -> TableMetadata:
507506 return construct_refs (table_metadata )
508507
509508 format_version : Literal [2 ] = Field (alias = "format-version" , default = 2 )
510- """An integer version number for the format. Currently, this can be 1 or 2
511- based on the spec. Implementations must throw an exception if a table’s
512- version is higher than the supported version."""
509+ """An integer version number for the format. Implementations must throw
510+ an exception if a table’s version is higher than the supported version."""
513511
514512 last_sequence_number : int = Field (alias = "last-sequence-number" , default = INITIAL_SEQUENCE_NUMBER )
515513 """The table’s highest assigned sequence number, a monotonically
516514 increasing long that tracks the order of snapshots in a table."""
517515
518516
519- TableMetadata = Annotated [Union [TableMetadataV1 , TableMetadataV2 ], Field (discriminator = "format_version" )]
517+ class TableMetadataV3 (TableMetadataCommonFields , IcebergBaseModel ):
518+ """Represents version 3 of the Table Metadata.
519+
520+ Version 3 of the Iceberg spec extends data types and existing metadata structures to add new capabilities:
521+
522+ - New data types: nanosecond timestamp(tz), unknown
523+ - Default value support for columns
524+ - Multi-argument transforms for partitioning and sorting
525+ - Row Lineage tracking
526+ - Binary deletion vectors
527+
528+ For more information:
529+ https://iceberg.apache.org/spec/?column-projection#version-3-extended-types-and-capabilities
530+ """
531+
532+ @model_validator (mode = "before" )
533+ def cleanup_snapshot_id (cls , data : Dict [str , Any ]) -> Dict [str , Any ]:
534+ return cleanup_snapshot_id (data )
535+
536+ @model_validator (mode = "after" )
537+ def check_schemas (cls , table_metadata : TableMetadata ) -> TableMetadata :
538+ return check_schemas (table_metadata )
539+
540+ @model_validator (mode = "after" )
541+ def check_partition_specs (cls , table_metadata : TableMetadata ) -> TableMetadata :
542+ return check_partition_specs (table_metadata )
543+
544+ @model_validator (mode = "after" )
545+ def check_sort_orders (cls , table_metadata : TableMetadata ) -> TableMetadata :
546+ return check_sort_orders (table_metadata )
547+
548+ @model_validator (mode = "after" )
549+ def construct_refs (cls , table_metadata : TableMetadata ) -> TableMetadata :
550+ return construct_refs (table_metadata )
551+
552+ format_version : Literal [3 ] = Field (alias = "format-version" , default = 3 )
553+ """An integer version number for the format. Implementations must throw
554+ an exception if a table’s version is higher than the supported version."""
555+
556+ last_sequence_number : int = Field (alias = "last-sequence-number" , default = INITIAL_SEQUENCE_NUMBER )
557+ """The table’s highest assigned sequence number, a monotonically
558+ increasing long that tracks the order of snapshots in a table."""
559+
560+
561+ row_lineage : bool = Field (alias = "row-lineage" , default = False )
562+ """Indicates that row-lineage is enabled on the table
563+
564+ For more information:
565+ https://iceberg.apache.org/spec/?column-projection#row-lineage
566+ """
567+
568+ next_row_id : Optional [int ] = Field (alias = "next-row-id" , default = None )
569+ """A long higher than all assigned row IDs; the next snapshot's `first-row-id`."""
570+
571+ def model_dump_json (
572+ self , exclude_none : bool = True , exclude : Optional [Any ] = None , by_alias : bool = True , ** kwargs : Any
573+ ) -> str :
574+ raise NotImplementedError ("Writing V3 is not yet supported, see: https://github.com/apache/iceberg-python/issues/1551" )
575+
576+
577+ TableMetadata = Annotated [Union [TableMetadataV1 , TableMetadataV2 , TableMetadataV3 ], Field (discriminator = "format_version" )]
520578
521579
522580def new_table_metadata (
@@ -553,20 +611,36 @@ def new_table_metadata(
553611 last_partition_id = fresh_partition_spec .last_assigned_field_id ,
554612 table_uuid = table_uuid ,
555613 )
556-
557- return TableMetadataV2 (
558- location = location ,
559- schemas = [fresh_schema ],
560- last_column_id = fresh_schema .highest_field_id ,
561- current_schema_id = fresh_schema .schema_id ,
562- partition_specs = [fresh_partition_spec ],
563- default_spec_id = fresh_partition_spec .spec_id ,
564- sort_orders = [fresh_sort_order ],
565- default_sort_order_id = fresh_sort_order .order_id ,
566- properties = properties ,
567- last_partition_id = fresh_partition_spec .last_assigned_field_id ,
568- table_uuid = table_uuid ,
569- )
614+ elif format_version == 2 :
615+ return TableMetadataV2 (
616+ location = location ,
617+ schemas = [fresh_schema ],
618+ last_column_id = fresh_schema .highest_field_id ,
619+ current_schema_id = fresh_schema .schema_id ,
620+ partition_specs = [fresh_partition_spec ],
621+ default_spec_id = fresh_partition_spec .spec_id ,
622+ sort_orders = [fresh_sort_order ],
623+ default_sort_order_id = fresh_sort_order .order_id ,
624+ properties = properties ,
625+ last_partition_id = fresh_partition_spec .last_assigned_field_id ,
626+ table_uuid = table_uuid ,
627+ )
628+ elif format_version == 3 :
629+ return TableMetadataV3 (
630+ location = location ,
631+ schemas = [fresh_schema ],
632+ last_column_id = fresh_schema .highest_field_id ,
633+ current_schema_id = fresh_schema .schema_id ,
634+ partition_specs = [fresh_partition_spec ],
635+ default_spec_id = fresh_partition_spec .spec_id ,
636+ sort_orders = [fresh_sort_order ],
637+ default_sort_order_id = fresh_sort_order .order_id ,
638+ properties = properties ,
639+ last_partition_id = fresh_partition_spec .last_assigned_field_id ,
640+ table_uuid = table_uuid ,
641+ )
642+ else :
643+ raise ValidationError (f"Unknown format version: { format_version } " )
570644
571645
572646class TableMetadataWrapper (IcebergRootModel [TableMetadata ]):
@@ -593,6 +667,8 @@ def parse_obj(data: Dict[str, Any]) -> TableMetadata:
593667 return TableMetadataV1 (** data )
594668 elif format_version == 2 :
595669 return TableMetadataV2 (** data )
670+ elif format_version == 3 :
671+ return TableMetadataV3 (** data )
596672 else :
597673 raise ValidationError (f"Unknown format version: { format_version } " )
598674
@@ -609,6 +685,8 @@ def _construct_without_validation(table_metadata: TableMetadata) -> TableMetadat
609685 return TableMetadataV1 .model_construct (** dict (table_metadata ))
610686 elif table_metadata .format_version == 2 :
611687 return TableMetadataV2 .model_construct (** dict (table_metadata ))
688+ elif table_metadata .format_version == 3 :
689+ return TableMetadataV3 .model_construct (** dict (table_metadata ))
612690 else :
613691 raise ValidationError (f"Unknown format version: { table_metadata .format_version } " )
614692
0 commit comments