From e1451ac6edcd0d7c97f20fd6d1d69c5cf2eb239a Mon Sep 17 00:00:00 2001 From: Christian Thiel Date: Wed, 4 Dec 2024 10:59:34 +0100 Subject: [PATCH 1/2] Fix current snapshot id --- crates/iceberg/src/spec/table_metadata.rs | 121 +++++++++++++++++- .../src/spec/table_metadata_builder.rs | 2 + 2 files changed, 118 insertions(+), 5 deletions(-) diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index cd242f4b5a..fcd9d27398 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -1010,10 +1010,14 @@ pub(super) mod _serde { .collect(), default_spec_id: v.default_spec.spec_id(), last_partition_id: v.last_partition_id, - properties: Some(v.properties), - current_snapshot_id: v.current_snapshot_id.or(Some(-1)), + properties: if v.properties.is_empty() { + None + } else { + Some(v.properties) + }, + current_snapshot_id: v.current_snapshot_id, snapshots: if v.snapshots.is_empty() { - Some(vec![]) + None } else { Some( v.snapshots @@ -1042,7 +1046,11 @@ pub(super) mod _serde { .map(|x| Arc::try_unwrap(x).unwrap_or_else(|s| s.as_ref().clone())) .collect(), default_sort_order_id: v.default_sort_order_id, - refs: Some(v.refs), + refs: if v.refs.is_empty() { + None + } else { + Some(v.refs) + }, } } } @@ -1091,7 +1099,7 @@ pub(super) mod _serde { } else { Some(v.properties) }, - current_snapshot_id: v.current_snapshot_id.or(Some(-1)), + current_snapshot_id: v.current_snapshot_id, snapshots: if v.snapshots.is_empty() { None } else { @@ -1349,7 +1357,11 @@ mod tests { refs: HashMap::new(), }; + let expected_json_value = serde_json::to_value(&expected).unwrap(); check_table_metadata_serde(data, expected); + + let json_value = serde_json::from_str::(data).unwrap(); + assert_eq!(json_value, expected_json_value); } #[test] @@ -1519,6 +1531,105 @@ mod tests { check_table_metadata_serde(data, expected); } + #[test] + fn test_table_data_v2_no_snapshots() { + let data = r#" + { + "format-version" : 2, + "table-uuid": "fb072c92-a02b-11e9-ae9c-1bb7bc9eca94", + "location": "s3://b/wh/data.db/table", + "last-sequence-number" : 1, + "last-updated-ms": 1515100955770, + "last-column-id": 1, + "schemas": [ + { + "schema-id" : 1, + "type" : "struct", + "fields" :[ + { + "id": 1, + "name": "struct_name", + "required": true, + "type": "fixed[1]" + } + ] + } + ], + "current-schema-id" : 1, + "partition-specs": [ + { + "spec-id": 0, + "fields": [] + } + ], + "default-spec-id": 0, + "last-partition-id": 1000, + "metadata-log": [ + { + "metadata-file": "s3://bucket/.../v1.json", + "timestamp-ms": 1515100 + } + ], + "sort-orders": [ + { + "order-id": 0, + "fields": [] + } + ], + "default-sort-order-id": 0 + } + "#; + + let schema = Schema::builder() + .with_schema_id(1) + .with_fields(vec![Arc::new(NestedField::required( + 1, + "struct_name", + Type::Primitive(PrimitiveType::Fixed(1)), + ))]) + .build() + .unwrap(); + + let partition_spec = BoundPartitionSpec::builder(schema.clone()) + .with_spec_id(0) + .build() + .unwrap(); + + let expected = TableMetadata { + format_version: FormatVersion::V2, + table_uuid: Uuid::parse_str("fb072c92-a02b-11e9-ae9c-1bb7bc9eca94").unwrap(), + location: "s3://b/wh/data.db/table".to_string(), + last_updated_ms: 1515100955770, + last_column_id: 1, + schemas: HashMap::from_iter(vec![(1, Arc::new(schema))]), + current_schema_id: 1, + partition_specs: HashMap::from_iter(vec![( + 0, + partition_spec.clone().into_schemaless().into(), + )]), + default_spec: partition_spec.into(), + last_partition_id: 1000, + default_sort_order_id: 0, + sort_orders: HashMap::from_iter(vec![(0, SortOrder::unsorted_order().into())]), + snapshots: HashMap::default(), + current_snapshot_id: None, + last_sequence_number: 1, + properties: HashMap::new(), + snapshot_log: Vec::new(), + metadata_log: vec![MetadataLog { + metadata_file: "s3://bucket/.../v1.json".to_string(), + timestamp_ms: 1515100, + }], + refs: HashMap::new(), + }; + + let expected_json_value = serde_json::to_value(&expected).unwrap(); + check_table_metadata_serde(data, expected); + + let json_value = serde_json::from_str::(data).unwrap(); + assert_eq!(json_value, expected_json_value); + } + #[test] fn test_current_snapshot_id_must_match_main_branch() { let data = r#" diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index bae2948034..966abb156f 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -1220,6 +1220,7 @@ mod tests { assert_eq!(metadata.last_partition_id, 1000); assert_eq!(metadata.last_column_id, 3); assert_eq!(metadata.snapshots.len(), 0); + assert_eq!(metadata.current_snapshot_id, None); assert_eq!(metadata.refs.len(), 0); assert_eq!(metadata.properties.len(), 0); assert_eq!(metadata.metadata_log.len(), 0); @@ -1268,6 +1269,7 @@ mod tests { assert_eq!(metadata.last_partition_id, UNPARTITIONED_LAST_ASSIGNED_ID); assert_eq!(metadata.last_column_id, 0); assert_eq!(metadata.snapshots.len(), 0); + assert_eq!(metadata.current_snapshot_id, None); assert_eq!(metadata.refs.len(), 0); assert_eq!(metadata.properties.len(), 0); assert_eq!(metadata.metadata_log.len(), 0); From e508bcdfaf0afdc95a216f0b591a73a9b23b817d Mon Sep 17 00:00:00 2001 From: Christian Thiel Date: Wed, 4 Dec 2024 11:03:24 +0100 Subject: [PATCH 2/2] keep empty refs object for V2 --- crates/iceberg/src/spec/table_metadata.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index fcd9d27398..bc1fe17c11 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -1046,11 +1046,7 @@ pub(super) mod _serde { .map(|x| Arc::try_unwrap(x).unwrap_or_else(|s| s.as_ref().clone())) .collect(), default_sort_order_id: v.default_sort_order_id, - refs: if v.refs.is_empty() { - None - } else { - Some(v.refs) - }, + refs: Some(v.refs), } } } @@ -1287,6 +1283,7 @@ mod tests { "timestamp-ms": 1515100 } ], + "refs": {}, "sort-orders": [ { "order-id": 0, @@ -1562,6 +1559,7 @@ mod tests { "fields": [] } ], + "refs": {}, "default-spec-id": 0, "last-partition-id": 1000, "metadata-log": [