From 35e00bbed46350c35c41d733b74c9ef7714f5eed Mon Sep 17 00:00:00 2001 From: dentiny Date: Thu, 15 May 2025 17:41:43 +0000 Subject: [PATCH 1/4] Add snapshot summary properties --- crates/iceberg/src/catalog/mod.rs | 29 +++++++++ .../src/spec/table_metadata_builder.rs | 63 +++++++++++++++++++ crates/iceberg/src/transaction/mod.rs | 12 ++++ 3 files changed, 104 insertions(+) diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 9521cde208..c49560be10 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -495,6 +495,12 @@ pub enum TableUpdate { /// Schema IDs to remove. schema_ids: Vec, }, + /// Add snapshot summary properties. + #[serde(rename_all = "kebab-case")] + AddSnapshotSummaryProperties { + /// Additional properties to add. + properties: HashMap, + }, } impl TableUpdate { @@ -539,6 +545,9 @@ impl TableUpdate { Ok(builder.remove_partition_statistics(snapshot_id)) } TableUpdate::RemoveSchemas { schema_ids } => builder.remove_schemas(&schema_ids), + TableUpdate::AddSnapshotSummaryProperties { properties } => { + builder.add_snapshot_summary_properties(properties) + } } } } @@ -2097,4 +2106,24 @@ mod tests { }, ); } + + #[test] + fn test_add_snapshot_summary_properties() { + let mut expected_properties = HashMap::new(); + expected_properties.insert("prop-key".to_string(), "prop-value".to_string()); + + test_serde_json( + r#" + { + "action": "add-snapshot-summary-properties", + "properties": { + "prop-key": "prop-value" + } + } + "#, + TableUpdate::AddSnapshotSummaryProperties { + properties: expected_properties, + }, + ); + } } diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index 7bee6ddd31..fad1afeaba 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -1241,6 +1241,21 @@ impl TableMetadataBuilder { Ok(self) } + + /// Add snapshot summary properties for the table metadata. + pub fn add_snapshot_summary_properties( + mut self, + properties: HashMap, + ) -> Result { + if properties.is_empty() { + return Ok(self); + } + + self.changes + .push(TableUpdate::AddSnapshotSummaryProperties { properties }); + + Ok(self) + } } impl From for TableMetadata { @@ -2493,4 +2508,52 @@ mod tests { }; assert_eq!(remove_schema_ids, &[0]); } + + #[test] + fn test_add_snapshot_summary_properties() { + let file = File::open(format!( + "{}/testdata/table_metadata/{}", + env!("CARGO_MANIFEST_DIR"), + "TableMetadataV2Valid.json" + )) + .unwrap(); + let reader = BufReader::new(file); + let resp = serde_json::from_reader::<_, TableMetadata>(reader).unwrap(); + + let table = Table::builder() + .metadata(resp) + .metadata_location("s3://bucket/test/location/metadata/v1.json".to_string()) + .identifier(TableIdent::from_strs(["ns1", "test1"]).unwrap()) + .file_io(FileIOBuilder::new("memory").build().unwrap()) + .build() + .unwrap(); + assert_eq!( + 0, + table + .metadata() + .current_snapshot() + .unwrap() + .summary() + .additional_properties + .len() + ); + + let mut new_properties = HashMap::new(); + new_properties.insert("prop-key".to_string(), "prop-value".to_string()); + + let mut meta_data_builder = table.metadata().clone().into_builder(None); + meta_data_builder = meta_data_builder + .add_snapshot_summary_properties(new_properties.clone()) + .unwrap(); + let build_result = meta_data_builder.build().unwrap(); + assert_eq!( + build_result + .metadata + .current_snapshot() + .unwrap() + .summary() + .additional_properties, + new_properties + ); + } } diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index 108ad10595..cbd62fa703 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -128,6 +128,18 @@ impl<'a> Transaction<'a> { Ok(self) } + /// Add snapshot summary properties. + pub fn add_snapshot_summary_properties( + mut self, + props: HashMap, + ) -> Result { + self.apply( + vec![TableUpdate::AddSnapshotSummaryProperties { properties: props }], + vec![], + )?; + Ok(self) + } + fn generate_unique_snapshot_id(&self) -> i64 { let generate_random_id = || -> i64 { let (lhs, rhs) = Uuid::new_v4().as_u64_pair(); From bb95952920415a6905c22426e655a885c0db4069 Mon Sep 17 00:00:00 2001 From: dentiny Date: Thu, 15 May 2025 18:19:07 +0000 Subject: [PATCH 2/4] fix test --- crates/iceberg/src/spec/snapshot.rs | 5 +++ .../src/spec/table_metadata_builder.rs | 37 ++++++++++++------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/crates/iceberg/src/spec/snapshot.rs b/crates/iceberg/src/spec/snapshot.rs index 922e7bab95..6fd362097d 100644 --- a/crates/iceberg/src/spec/snapshot.rs +++ b/crates/iceberg/src/spec/snapshot.rs @@ -214,6 +214,11 @@ impl Snapshot { snapshot_id: self.snapshot_id, } } + + /// Add the given properties map to snapshot summary. + pub(crate) fn add_summary_properties(&mut self, props: HashMap) { + self.summary.additional_properties.extend(props); + } } pub(super) mod _serde { diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index fad1afeaba..9fde54ea78 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -1242,7 +1242,7 @@ impl TableMetadataBuilder { Ok(self) } - /// Add snapshot summary properties for the table metadata. + /// Add summary properties to the latest snapshot for the table metadata. pub fn add_snapshot_summary_properties( mut self, properties: HashMap, @@ -1251,8 +1251,22 @@ impl TableMetadataBuilder { return Ok(self); } - self.changes - .push(TableUpdate::AddSnapshotSummaryProperties { properties }); + if self.metadata.current_snapshot_id.is_some() { + let snapshot_id = self.metadata.current_snapshot_id.unwrap(); + let mut cur_snapshot = self + .metadata + .snapshots + .remove(&snapshot_id) + .unwrap() + .as_ref() + .clone(); + cur_snapshot.add_summary_properties(properties.clone()); + self.metadata + .snapshots + .insert(snapshot_id, Arc::new(cur_snapshot)); + self.changes + .push(TableUpdate::AddSnapshotSummaryProperties { properties }); + } Ok(self) } @@ -2527,16 +2541,13 @@ mod tests { .file_io(FileIOBuilder::new("memory").build().unwrap()) .build() .unwrap(); - assert_eq!( - 0, - table - .metadata() - .current_snapshot() - .unwrap() - .summary() - .additional_properties - .len() - ); + assert!(table + .metadata() + .current_snapshot() + .unwrap() + .summary() + .additional_properties + .is_empty()); let mut new_properties = HashMap::new(); new_properties.insert("prop-key".to_string(), "prop-value".to_string()); From e268be776b5ab7c0271480ee416e93c3db52d144 Mon Sep 17 00:00:00 2001 From: dentiny Date: Thu, 15 May 2025 18:21:10 +0000 Subject: [PATCH 3/4] get current snapshot id --- .../src/spec/table_metadata_builder.rs | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index 9fde54ea78..d617e10e92 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -1251,22 +1251,20 @@ impl TableMetadataBuilder { return Ok(self); } - if self.metadata.current_snapshot_id.is_some() { - let snapshot_id = self.metadata.current_snapshot_id.unwrap(); - let mut cur_snapshot = self - .metadata - .snapshots - .remove(&snapshot_id) - .unwrap() - .as_ref() - .clone(); - cur_snapshot.add_summary_properties(properties.clone()); - self.metadata - .snapshots - .insert(snapshot_id, Arc::new(cur_snapshot)); - self.changes - .push(TableUpdate::AddSnapshotSummaryProperties { properties }); - } + let snapshot_id = self.metadata.current_snapshot_id.unwrap(); + let mut cur_snapshot = self + .metadata + .snapshots + .remove(&snapshot_id) + .unwrap() + .as_ref() + .clone(); + cur_snapshot.add_summary_properties(properties.clone()); + self.metadata + .snapshots + .insert(snapshot_id, Arc::new(cur_snapshot)); + self.changes + .push(TableUpdate::AddSnapshotSummaryProperties { properties }); Ok(self) } From 7f9301df2fc1418423fa3a9a314fdd6fd205a672 Mon Sep 17 00:00:00 2001 From: dentiny Date: Fri, 23 May 2025 17:22:04 +0000 Subject: [PATCH 4/4] cargo fmt --- Cargo.lock | 38 +++++-------------- .../src/spec/table_metadata_builder.rs | 16 ++++---- 2 files changed, 18 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0a2987a89b..8ec4146bc7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -637,7 +637,7 @@ dependencies = [ "aws-sdk-ssooidc", "aws-sdk-sts", "aws-smithy-async", - "aws-smithy-http 0.62.1", + "aws-smithy-http", "aws-smithy-json", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -699,7 +699,7 @@ dependencies = [ "aws-credential-types", "aws-sigv4", "aws-smithy-async", - "aws-smithy-http 0.62.1", + "aws-smithy-http", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -723,7 +723,7 @@ dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http 0.62.1", + "aws-smithy-http", "aws-smithy-json", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -745,7 +745,7 @@ dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http 0.62.1", + "aws-smithy-http", "aws-smithy-json", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -767,7 +767,7 @@ dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http 0.62.1", + "aws-smithy-http", "aws-smithy-json", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -790,7 +790,7 @@ dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http 0.62.1", + "aws-smithy-http", "aws-smithy-json", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -813,7 +813,7 @@ dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http 0.62.1", + "aws-smithy-http", "aws-smithy-json", "aws-smithy-query", "aws-smithy-runtime", @@ -835,7 +835,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3503af839bd8751d0bdc5a46b9cac93a003a353e635b0c12cf2376b5b53e41ea" dependencies = [ "aws-credential-types", - "aws-smithy-http 0.62.1", + "aws-smithy-http", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", @@ -861,26 +861,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "aws-smithy-http" -version = "0.60.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7809c27ad8da6a6a68c454e651d4962479e81472aa19ae99e59f9aba1f9713cc" -dependencies = [ - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "bytes-utils", - "futures-core", - "http 0.2.12", - "http-body 0.4.6", - "once_cell", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tracing", -] - [[package]] name = "aws-smithy-http" version = "0.62.1" @@ -964,7 +944,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14302f06d1d5b7d333fd819943075b13d27c7700b414f574c3c35859bfb55d5e" dependencies = [ "aws-smithy-async", - "aws-smithy-http 0.62.1", + "aws-smithy-http", "aws-smithy-http-client", "aws-smithy-observability", "aws-smithy-runtime-api", diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index 048a97e485..2408e57d69 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -2542,13 +2542,15 @@ mod tests { .file_io(FileIOBuilder::new("memory").build().unwrap()) .build() .unwrap(); - assert!(table - .metadata() - .current_snapshot() - .unwrap() - .summary() - .additional_properties - .is_empty()); + assert!( + table + .metadata() + .current_snapshot() + .unwrap() + .summary() + .additional_properties + .is_empty() + ); let mut new_properties = HashMap::new(); new_properties.insert("prop-key".to_string(), "prop-value".to_string());