From fd25829b1ac1c616565bbebede8296fd67746295 Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Thu, 9 Oct 2025 08:40:02 -0400 Subject: [PATCH 1/9] add table properties class --- Cargo.lock | 3 +- crates/iceberg/Cargo.toml | 1 + crates/iceberg/src/spec/mod.rs | 2 + crates/iceberg/src/spec/table_properties.rs | 194 ++++++++++++++++++++ 4 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 crates/iceberg/src/spec/table_properties.rs diff --git a/Cargo.lock b/Cargo.lock index 00d73288f7..a519aa2b6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3628,6 +3628,7 @@ dependencies = [ "opendal", "ordered-float 4.6.0", "parquet", + "paste", "pretty_assertions", "rand 0.8.5", "regex", @@ -3805,7 +3806,7 @@ dependencies = [ "iceberg-catalog-rest", "iceberg-datafusion", "iceberg_test_utils", - "ordered-float 4.6.0", + "ordered-float 2.10.1", "parquet", "tokio", "uuid", diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index d592700b73..77ae8bc2b3 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -71,6 +71,7 @@ murmur3 = { workspace = true } num-bigint = { workspace = true } once_cell = { workspace = true } opendal = { workspace = true } +paste = "1.0" ordered-float = { workspace = true } parquet = { workspace = true, features = ["async"] } rand = { workspace = true } diff --git a/crates/iceberg/src/spec/mod.rs b/crates/iceberg/src/spec/mod.rs index 90dafcc110..44b35e5a6b 100644 --- a/crates/iceberg/src/spec/mod.rs +++ b/crates/iceberg/src/spec/mod.rs @@ -30,6 +30,7 @@ mod sort; mod statistic_file; mod table_metadata; mod table_metadata_builder; +mod table_properties; mod transform; mod values; mod view_metadata; @@ -48,6 +49,7 @@ pub use snapshot_summary::*; pub use sort::*; pub use statistic_file::*; pub use table_metadata::*; +pub use table_properties::*; pub use transform::*; pub use values::*; pub use view_metadata::*; diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs new file mode 100644 index 0000000000..4329bae6ca --- /dev/null +++ b/crates/iceberg/src/spec/table_properties.rs @@ -0,0 +1,194 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::HashMap; + +/// Macro to define table properties with type-safe access and validation. +/// +/// # Example +/// ```ignore +/// define_table_properties! { +/// TableProperties { +/// commit_num_retries: usize = "commit.retry.num-retries" => 4, +/// commit_min_retry_wait_ms: u64 = "commit.retry.min-wait-ms" => 100, +/// write_format_default: String = "write.format.default" => "parquet", +/// } +/// } +/// ``` +macro_rules! define_table_properties { + ( + $struct_name:ident { + $( + $(#[$field_doc:meta])* + $field_name:ident: $field_type:ty = $key:literal => $default:expr + ),* $(,)? + } + ) => { + /// Table properties with type-safe access and validation + #[derive(Clone)] + pub struct $struct_name { + $( + $(#[$field_doc])* + pub $field_name: $field_type, + )* + } + + impl $struct_name { + $( + paste::paste! { + #[doc = "Property key for " $key] + pub const []: &'static str = $key; + } + )* + + /// Create a new instance with default values + pub fn new() -> Self { + Self { + $($field_name: parse_default!($default, $field_type),)* + } + } + } + + impl Default for $struct_name { + fn default() -> Self { + Self::new() + } + } + + impl TryFrom> for $struct_name { + type Error = anyhow::Error; + + fn try_from(properties: HashMap) -> Result { + let mut result = Self::new(); + + $( + paste::paste! { + if let Some(value_str) = properties.get(Self::[]) { + result.$field_name = parse_value!( + value_str, + $field_type, + Self::[] + )?; + } + } + )* + + Ok(result) + } + } + + impl From<$struct_name> for HashMap { + fn from(properties: $struct_name) -> Self { + let mut map = HashMap::new(); + + $( + paste::paste! { + map.insert( + $struct_name::[].to_string(), + format_value!(properties.$field_name) + ); + } + )* + + map + } + } + }; +} + +/// Helper macro to parse default values based on type +#[macro_export] +macro_rules! parse_default { + ($value:expr, String) => { + $value.to_string() + }; + ($value:expr, $type:ty) => { + $value + }; +} + +/// Helper macro to parse values from strings based on type +#[macro_export] +macro_rules! parse_value { + ($value:expr, String, $key:expr) => { + Ok::($value.clone()) + }; + ($value:expr, $type:ty, $key:expr) => { + $value + .parse::<$type>() + .map_err(|e| anyhow::anyhow!("Invalid value for {}: {}", $key, e)) + }; +} + +/// Helper macro to format values for storage +#[macro_export] +macro_rules! format_value { + ($value:expr) => { + $value.to_string() + }; +} + +// Define the actual TableProperties struct using the macro +define_table_properties! { + TableProperties { + /// Number of commit retries + commit_num_retries: usize = "commit.retry.num-retries" => 4, + /// Minimum wait time (ms) between retries + commit_min_retry_wait_ms: u64 = "commit.retry.min-wait-ms" => 100, + /// Maximum wait time (ms) between retries + commit_max_retry_wait_ms: u64 = "commit.retry.max-wait-ms" => 60000, + /// Total maximum retry time (ms) + commit_total_retry_timeout_ms: u64 = "commit.retry.total-timeout-ms" => 1800000, + /// Default file format for data files + write_format_default: String = "write.format.default" => "parquet".to_string(), + /// Target file size in bytes + write_target_file_size_bytes: u64 = "write.target-file-size-bytes" => 536870912, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{Error, ErrorKind}; + use anyhow::anyhow; + + #[test] + fn test_table_properties_default() { + let properties = TableProperties::new(); + assert_eq!(properties.commit_num_retries, 4); + assert_eq!(properties.commit_min_retry_wait_ms, 100); + assert_eq!(properties.commit_max_retry_wait_ms, 60000); + assert_eq!(properties.commit_total_retry_timeout_ms, 1800000); + assert_eq!(properties.write_format_default, "parquet"); + assert_eq!(properties.write_target_file_size_bytes, 536870912); + } + + #[test] + fn test_table_properties_from_map() { + let properties = TableProperties::try_from(HashMap::from([ + ("commit.retry.num-retries".to_string(), "5".to_string()), + ("commit.retry.min-wait-ms".to_string(), "10".to_string()), + ("write.format.default".to_string(), "avro".to_string()), + ])).unwrap(); + assert_eq!(properties.commit_num_retries, 5); + assert_eq!(properties.commit_min_retry_wait_ms, 10); + assert_eq!(properties.commit_max_retry_wait_ms, 60000); + assert_eq!(properties.commit_max_retry_wait_ms, 1800000); + assert_eq!(properties.write_format_default, "avro"); + assert_eq!(properties.write_target_file_size_bytes, 536870912); + } +} From 529dbfa40833c8ff956c654ce28e130609ab9ddc Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Thu, 9 Oct 2025 14:31:22 +0000 Subject: [PATCH 2/9] lint: fixes to tests --- crates/iceberg/src/spec/table_properties.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 4329bae6ca..6637247e7e 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -162,9 +162,10 @@ define_table_properties! { #[cfg(test)] mod tests { + use anyhow::anyhow; + use super::*; use crate::{Error, ErrorKind}; - use anyhow::anyhow; #[test] fn test_table_properties_default() { @@ -183,7 +184,8 @@ mod tests { ("commit.retry.num-retries".to_string(), "5".to_string()), ("commit.retry.min-wait-ms".to_string(), "10".to_string()), ("write.format.default".to_string(), "avro".to_string()), - ])).unwrap(); + ])) + .unwrap(); assert_eq!(properties.commit_num_retries, 5); assert_eq!(properties.commit_min_retry_wait_ms, 10); assert_eq!(properties.commit_max_retry_wait_ms, 60000); From 629f1bed1123668fba2aa0792021d42a91166300 Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Thu, 9 Oct 2025 14:51:21 +0000 Subject: [PATCH 3/9] move paste crate to upper level workspace --- Cargo.toml | 5 +++-- crates/iceberg/Cargo.toml | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 999b911753..60e79c1517 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -76,10 +76,10 @@ futures = "0.3" hive_metastore = "0.2.0" http = "1.2" iceberg = { version = "0.7.0", path = "./crates/iceberg" } -iceberg-catalog-rest = { version = "0.7.0", path = "./crates/catalog/rest" } iceberg-catalog-glue = { version = "0.7.0", path = "./crates/catalog/glue" } -iceberg-catalog-s3tables = { version = "0.7.0", path = "./crates/catalog/s3tables" } iceberg-catalog-hms = { version = "0.7.0", path = "./crates/catalog/hms" } +iceberg-catalog-rest = { version = "0.7.0", path = "./crates/catalog/rest" } +iceberg-catalog-s3tables = { version = "0.7.0", path = "./crates/catalog/s3tables" } iceberg-datafusion = { version = "0.7.0", path = "./crates/integrations/datafusion" } indicatif = "0.17" itertools = "0.13" @@ -95,6 +95,7 @@ once_cell = "1.20" opendal = "0.54.0" ordered-float = "4" parquet = "55.1" +paste = "1.0.15" pilota = "0.11.10" port_scanner = "0.1.5" pretty_assertions = "1.4" diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 77ae8bc2b3..3fe0651a06 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -71,12 +71,12 @@ murmur3 = { workspace = true } num-bigint = { workspace = true } once_cell = { workspace = true } opendal = { workspace = true } -paste = "1.0" ordered-float = { workspace = true } parquet = { workspace = true, features = ["async"] } +paste = { workspace = true } rand = { workspace = true } -reqwest = { workspace = true } reqsign = { version = "0.16.3", optional = true, default-features = false } +reqwest = { workspace = true } roaring = { workspace = true } rust_decimal = { workspace = true } serde = { workspace = true } From 1ae1374467fd3600d94ddfdc079ebe8dcf4d9a93 Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Thu, 9 Oct 2025 22:09:03 +0000 Subject: [PATCH 4/9] remove unused import for tests --- crates/iceberg/src/spec/table_properties.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 6637247e7e..2c4d28c19a 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -162,10 +162,8 @@ define_table_properties! { #[cfg(test)] mod tests { - use anyhow::anyhow; use super::*; - use crate::{Error, ErrorKind}; #[test] fn test_table_properties_default() { From a7fb8b08ba74a62b14489916bc1f4bd5ebdd2053 Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Sat, 11 Oct 2025 13:57:29 -0400 Subject: [PATCH 5/9] refactor: table properties uses static values --- crates/iceberg/src/spec/table_properties.rs | 265 ++++++++------------ 1 file changed, 98 insertions(+), 167 deletions(-) diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 2c4d28c19a..ab72496be7 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -17,178 +17,109 @@ use std::collections::HashMap; -/// Macro to define table properties with type-safe access and validation. -/// -/// # Example -/// ```ignore -/// define_table_properties! { -/// TableProperties { -/// commit_num_retries: usize = "commit.retry.num-retries" => 4, -/// commit_min_retry_wait_ms: u64 = "commit.retry.min-wait-ms" => 100, -/// write_format_default: String = "write.format.default" => "parquet", -/// } -/// } -/// ``` -macro_rules! define_table_properties { - ( - $struct_name:ident { - $( - $(#[$field_doc:meta])* - $field_name:ident: $field_type:ty = $key:literal => $default:expr - ),* $(,)? - } - ) => { - /// Table properties with type-safe access and validation - #[derive(Clone)] - pub struct $struct_name { - $( - $(#[$field_doc])* - pub $field_name: $field_type, - )* - } - - impl $struct_name { - $( - paste::paste! { - #[doc = "Property key for " $key] - pub const []: &'static str = $key; - } - )* - - /// Create a new instance with default values - pub fn new() -> Self { - Self { - $($field_name: parse_default!($default, $field_type),)* - } - } - } - - impl Default for $struct_name { - fn default() -> Self { - Self::new() - } - } - - impl TryFrom> for $struct_name { - type Error = anyhow::Error; - - fn try_from(properties: HashMap) -> Result { - let mut result = Self::new(); - - $( - paste::paste! { - if let Some(value_str) = properties.get(Self::[]) { - result.$field_name = parse_value!( - value_str, - $field_type, - Self::[] - )?; - } - } - )* - - Ok(result) - } - } - - impl From<$struct_name> for HashMap { - fn from(properties: $struct_name) -> Self { - let mut map = HashMap::new(); - - $( - paste::paste! { - map.insert( - $struct_name::[].to_string(), - format_value!(properties.$field_name) - ); - } - )* - - map - } - } - }; +// Helper function to parse a property from a HashMap +// If the property is not found, use the default value +fn parse_property( + properties: &HashMap, + key: &str, + default: T, +) -> Result +where + ::Err: std::fmt::Display, +{ + properties.get(key).map_or(Ok(default), |value| { + value + .parse::() + .map_err(|e| anyhow::anyhow!("Invalid value for {}: {}", key, e)) + }) } -/// Helper macro to parse default values based on type -#[macro_export] -macro_rules! parse_default { - ($value:expr, String) => { - $value.to_string() - }; - ($value:expr, $type:ty) => { - $value - }; +/// TableProperties that contains the properties of a table. +pub struct TableProperties { + /// The number of times to retry a commit. + pub commit_num_retries: usize, + /// The minimum wait time between retries. + pub commit_min_retry_wait_ms: u64, + /// The maximum wait time between retries. + pub commit_max_retry_wait_ms: u64, + /// The total timeout for commit retries. + pub commit_total_retry_timeout_ms: u64, + /// The default format for files. + pub write_format_default: String, + /// The target file size for files. + pub write_target_file_size_bytes: usize, } -/// Helper macro to parse values from strings based on type -#[macro_export] -macro_rules! parse_value { - ($value:expr, String, $key:expr) => { - Ok::($value.clone()) - }; - ($value:expr, $type:ty, $key:expr) => { - $value - .parse::<$type>() - .map_err(|e| anyhow::anyhow!("Invalid value for {}: {}", $key, e)) - }; +impl TableProperties { + /// Property key for number of commit retries. + pub const PROPERTY_COMMIT_NUM_RETRIES: &str = "commit.retry.num-retries"; + /// Default value for number of commit retries. + pub const PROPERTY_COMMIT_NUM_RETRIES_DEFAULT: usize = 4; + + /// Property key for minimum wait time (ms) between retries. + pub const PROPERTY_COMMIT_MIN_RETRY_WAIT_MS: &str = "commit.retry.min-wait-ms"; + /// Default value for minimum wait time (ms) between retries. + pub const PROPERTY_COMMIT_MIN_RETRY_WAIT_MS_DEFAULT: u64 = 100; + + /// Property key for maximum wait time (ms) between retries. + pub const PROPERTY_COMMIT_MAX_RETRY_WAIT_MS: &str = "commit.retry.max-wait-ms"; + /// Default value for maximum wait time (ms) between retries. + pub const PROPERTY_COMMIT_MAX_RETRY_WAIT_MS_DEFAULT: u64 = 60 * 1000; // 1 minute + + /// Property key for total maximum retry time (ms). + pub const PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS: &str = "commit.retry.total-timeout-ms"; + /// Default value for total maximum retry time (ms). + pub const PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT: u64 = 30 * 60 * 1000; // 30 minutes + + /// Default file format for data files + pub const PROPERTY_DEFAULT_FILE_FORMAT: &str = "write.format.default"; + /// Default file format for delete files + pub const PROPERTY_DELETE_DEFAULT_FILE_FORMAT: &str = "write.delete.format.default"; + /// Default value for data file format + pub const PROPERTY_DEFAULT_FILE_FORMAT_DEFAULT: &str = "parquet"; + + /// Target file size for newly written files. + pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES: &str = "write.target-file-size-bytes"; + /// Default target file size + pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT: usize = 512 * 1024 * 1024; // 512 MB } -/// Helper macro to format values for storage -#[macro_export] -macro_rules! format_value { - ($value:expr) => { - $value.to_string() - }; -} - -// Define the actual TableProperties struct using the macro -define_table_properties! { - TableProperties { - /// Number of commit retries - commit_num_retries: usize = "commit.retry.num-retries" => 4, - /// Minimum wait time (ms) between retries - commit_min_retry_wait_ms: u64 = "commit.retry.min-wait-ms" => 100, - /// Maximum wait time (ms) between retries - commit_max_retry_wait_ms: u64 = "commit.retry.max-wait-ms" => 60000, - /// Total maximum retry time (ms) - commit_total_retry_timeout_ms: u64 = "commit.retry.total-timeout-ms" => 1800000, - /// Default file format for data files - write_format_default: String = "write.format.default" => "parquet".to_string(), - /// Target file size in bytes - write_target_file_size_bytes: u64 = "write.target-file-size-bytes" => 536870912, - } -} - -#[cfg(test)] -mod tests { - - use super::*; - - #[test] - fn test_table_properties_default() { - let properties = TableProperties::new(); - assert_eq!(properties.commit_num_retries, 4); - assert_eq!(properties.commit_min_retry_wait_ms, 100); - assert_eq!(properties.commit_max_retry_wait_ms, 60000); - assert_eq!(properties.commit_total_retry_timeout_ms, 1800000); - assert_eq!(properties.write_format_default, "parquet"); - assert_eq!(properties.write_target_file_size_bytes, 536870912); - } - - #[test] - fn test_table_properties_from_map() { - let properties = TableProperties::try_from(HashMap::from([ - ("commit.retry.num-retries".to_string(), "5".to_string()), - ("commit.retry.min-wait-ms".to_string(), "10".to_string()), - ("write.format.default".to_string(), "avro".to_string()), - ])) - .unwrap(); - assert_eq!(properties.commit_num_retries, 5); - assert_eq!(properties.commit_min_retry_wait_ms, 10); - assert_eq!(properties.commit_max_retry_wait_ms, 60000); - assert_eq!(properties.commit_max_retry_wait_ms, 1800000); - assert_eq!(properties.write_format_default, "avro"); - assert_eq!(properties.write_target_file_size_bytes, 536870912); +impl TryFrom<&HashMap> for TableProperties { + // parse by entry key or use default value + type Error = anyhow::Error; + + fn try_from(props: &HashMap) -> Result { + Ok(TableProperties { + commit_num_retries: parse_property( + props, + TableProperties::PROPERTY_COMMIT_NUM_RETRIES, + TableProperties::PROPERTY_COMMIT_NUM_RETRIES_DEFAULT, + )?, + commit_min_retry_wait_ms: parse_property( + props, + TableProperties::PROPERTY_COMMIT_MIN_RETRY_WAIT_MS, + TableProperties::PROPERTY_COMMIT_MIN_RETRY_WAIT_MS_DEFAULT, + )?, + commit_max_retry_wait_ms: parse_property( + props, + TableProperties::PROPERTY_COMMIT_MAX_RETRY_WAIT_MS, + TableProperties::PROPERTY_COMMIT_MAX_RETRY_WAIT_MS_DEFAULT, + )?, + commit_total_retry_timeout_ms: parse_property( + props, + TableProperties::PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS, + TableProperties::PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT, + )?, + write_format_default: parse_property( + props, + TableProperties::PROPERTY_DEFAULT_FILE_FORMAT, + TableProperties::PROPERTY_DEFAULT_FILE_FORMAT_DEFAULT.to_string(), + )?, + write_target_file_size_bytes: parse_property( + props, + TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES, + TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, + )?, + }) } } From 47f0f2da22114c89c23f6d81f0163259656fabea Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Sat, 11 Oct 2025 13:58:47 -0400 Subject: [PATCH 6/9] refactor: refactor to properties from `TableProperties` --- crates/iceberg/src/spec/table_metadata.rs | 32 --------- crates/iceberg/src/transaction/mod.rs | 69 ++++--------------- .../src/writer/file_writer/rolling_writer.rs | 4 +- .../datafusion/src/physical_plan/write.rs | 14 ++-- 4 files changed, 21 insertions(+), 98 deletions(-) diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index d7347d50ee..0cfc68b754 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -99,38 +99,6 @@ pub const RESERVED_PROPERTIES: [&str; 9] = [ PROPERTY_DEFAULT_SORT_ORDER, ]; -/// Property key for number of commit retries. -pub const PROPERTY_COMMIT_NUM_RETRIES: &str = "commit.retry.num-retries"; -/// Default value for number of commit retries. -pub const PROPERTY_COMMIT_NUM_RETRIES_DEFAULT: usize = 4; - -/// Property key for minimum wait time (ms) between retries. -pub const PROPERTY_COMMIT_MIN_RETRY_WAIT_MS: &str = "commit.retry.min-wait-ms"; -/// Default value for minimum wait time (ms) between retries. -pub const PROPERTY_COMMIT_MIN_RETRY_WAIT_MS_DEFAULT: u64 = 100; - -/// Property key for maximum wait time (ms) between retries. -pub const PROPERTY_COMMIT_MAX_RETRY_WAIT_MS: &str = "commit.retry.max-wait-ms"; -/// Default value for maximum wait time (ms) between retries. -pub const PROPERTY_COMMIT_MAX_RETRY_WAIT_MS_DEFAULT: u64 = 60 * 1000; // 1 minute - -/// Property key for total maximum retry time (ms). -pub const PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS: &str = "commit.retry.total-timeout-ms"; -/// Default value for total maximum retry time (ms). -pub const PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT: u64 = 30 * 60 * 1000; // 30 minutes - -/// Default file format for data files -pub const PROPERTY_DEFAULT_FILE_FORMAT: &str = "write.format.default"; -/// Default file format for delete files -pub const PROPERTY_DELETE_DEFAULT_FILE_FORMAT: &str = "write.delete.format.default"; -/// Default value for data file format -pub const PROPERTY_DEFAULT_FILE_FORMAT_DEFAULT: &str = "parquet"; - -/// Target file size for newly written files. -pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES: &str = "write.target-file-size-bytes"; -/// Default target file size -pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT: usize = 512 * 1024 * 1024; // 512 MB - /// Reference to [`TableMetadata`]. pub type TableMetadataRef = Arc; diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index 06549a95c5..26bd652226 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -52,8 +52,6 @@ /// that allows users to apply a transaction action to a `Transaction`. mod action; -use std::collections::HashMap; - pub use action::*; mod append; mod snapshot; @@ -69,12 +67,7 @@ use std::time::Duration; use backon::{BackoffBuilder, ExponentialBackoff, ExponentialBuilder, RetryableWithContext}; use crate::error::Result; -use crate::spec::{ - PROPERTY_COMMIT_MAX_RETRY_WAIT_MS, PROPERTY_COMMIT_MAX_RETRY_WAIT_MS_DEFAULT, - PROPERTY_COMMIT_MIN_RETRY_WAIT_MS, PROPERTY_COMMIT_MIN_RETRY_WAIT_MS_DEFAULT, - PROPERTY_COMMIT_NUM_RETRIES, PROPERTY_COMMIT_NUM_RETRIES_DEFAULT, - PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS, PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT, -}; +use crate::spec::TableProperties; use crate::table::Table; use crate::transaction::action::BoxedTransactionAction; use crate::transaction::append::FastAppendAction; @@ -170,7 +163,12 @@ impl Transaction { return Ok(self.table); } - let backoff = Self::build_backoff(self.table.metadata().properties())?; + let table_props = + TableProperties::try_from(self.table.metadata().properties()).map_err(|e| { + Error::new(ErrorKind::DataInvalid, "Invalid table properties").with_source(e) + })?; + + let backoff = Self::build_backoff(table_props)?; let tx = self; (|mut tx: Transaction| async { @@ -185,53 +183,14 @@ impl Transaction { .1 } - fn build_backoff(props: &HashMap) -> Result { - let min_delay = match props.get(PROPERTY_COMMIT_MIN_RETRY_WAIT_MS) { - Some(value_str) => value_str.parse::().map_err(|e| { - Error::new( - ErrorKind::DataInvalid, - "Invalid value for commit.retry.min-wait-ms", - ) - .with_source(e) - })?, - None => PROPERTY_COMMIT_MIN_RETRY_WAIT_MS_DEFAULT, - }; - let max_delay = match props.get(PROPERTY_COMMIT_MAX_RETRY_WAIT_MS) { - Some(value_str) => value_str.parse::().map_err(|e| { - Error::new( - ErrorKind::DataInvalid, - "Invalid value for commit.retry.max-wait-ms", - ) - .with_source(e) - })?, - None => PROPERTY_COMMIT_MAX_RETRY_WAIT_MS_DEFAULT, - }; - let total_delay = match props.get(PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS) { - Some(value_str) => value_str.parse::().map_err(|e| { - Error::new( - ErrorKind::DataInvalid, - "Invalid value for commit.retry.total-timeout-ms", - ) - .with_source(e) - })?, - None => PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT, - }; - let max_times = match props.get(PROPERTY_COMMIT_NUM_RETRIES) { - Some(value_str) => value_str.parse::().map_err(|e| { - Error::new( - ErrorKind::DataInvalid, - "Invalid value for commit.retry.num-retries", - ) - .with_source(e) - })?, - None => PROPERTY_COMMIT_NUM_RETRIES_DEFAULT, - }; - + fn build_backoff(props: TableProperties) -> Result { Ok(ExponentialBuilder::new() - .with_min_delay(Duration::from_millis(min_delay)) - .with_max_delay(Duration::from_millis(max_delay)) - .with_total_delay(Some(Duration::from_millis(total_delay))) - .with_max_times(max_times) + .with_min_delay(Duration::from_millis(props.commit_min_retry_wait_ms)) + .with_max_delay(Duration::from_millis(props.commit_max_retry_wait_ms)) + .with_total_delay(Some(Duration::from_millis( + props.commit_total_retry_timeout_ms, + ))) + .with_max_times(props.commit_num_retries) .with_factor(2.0) .build()) } diff --git a/crates/iceberg/src/writer/file_writer/rolling_writer.rs b/crates/iceberg/src/writer/file_writer/rolling_writer.rs index 0b9b105c5e..0003617043 100644 --- a/crates/iceberg/src/writer/file_writer/rolling_writer.rs +++ b/crates/iceberg/src/writer/file_writer/rolling_writer.rs @@ -20,7 +20,7 @@ use std::fmt::{Debug, Formatter}; use arrow_array::RecordBatch; use crate::io::{FileIO, OutputFile}; -use crate::spec::{DataFileBuilder, PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, PartitionKey}; +use crate::spec::{DataFileBuilder, PartitionKey, TableProperties}; use crate::writer::CurrentFileStatus; use crate::writer::file_writer::location_generator::{FileNameGenerator, LocationGenerator}; use crate::writer::file_writer::{FileWriter, FileWriterBuilder}; @@ -95,7 +95,7 @@ where ) -> Self { Self { inner_builder, - target_file_size: PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, + target_file_size: TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, file_io, location_generator, file_name_generator, diff --git a/crates/integrations/datafusion/src/physical_plan/write.rs b/crates/integrations/datafusion/src/physical_plan/write.rs index 759f1a8de2..7bff239222 100644 --- a/crates/integrations/datafusion/src/physical_plan/write.rs +++ b/crates/integrations/datafusion/src/physical_plan/write.rs @@ -36,11 +36,7 @@ use datafusion::physical_plan::{ }; use futures::StreamExt; use iceberg::arrow::{FieldMatchMode, schema_to_arrow_schema}; -use iceberg::spec::{ - DataFileFormat, PROPERTY_DEFAULT_FILE_FORMAT, PROPERTY_DEFAULT_FILE_FORMAT_DEFAULT, - PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES, PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, - serialize_data_file_to_json, -}; +use iceberg::spec::{DataFileFormat, TableProperties, serialize_data_file_to_json}; use iceberg::table::Table; use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; use iceberg::writer::file_writer::ParquetWriterBuilder; @@ -226,8 +222,8 @@ impl ExecutionPlan for IcebergWriteExec { self.table .metadata() .properties() - .get(PROPERTY_DEFAULT_FILE_FORMAT) - .unwrap_or(&PROPERTY_DEFAULT_FILE_FORMAT_DEFAULT.to_string()), + .get(TableProperties::PROPERTY_DEFAULT_FILE_FORMAT) + .unwrap_or(&TableProperties::PROPERTY_DEFAULT_FILE_FORMAT_DEFAULT.to_string()), ) .map_err(to_datafusion_error)?; if file_format != DataFileFormat::Parquet { @@ -250,7 +246,7 @@ impl ExecutionPlan for IcebergWriteExec { .table .metadata() .properties() - .get(PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES) + .get(TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES) { Some(value_str) => value_str .parse::() @@ -262,7 +258,7 @@ impl ExecutionPlan for IcebergWriteExec { .with_source(e) }) .map_err(to_datafusion_error)?, - None => PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, + None => TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT, }; let file_io = self.table.file_io().clone(); From daee3873f1e8279abc2fd309ff5c1c8fd79a8342 Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Sat, 11 Oct 2025 13:59:56 -0400 Subject: [PATCH 7/9] remove paste crate --- Cargo.lock | 1 - Cargo.toml | 1 - crates/iceberg/Cargo.toml | 1 - 3 files changed, 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3d01a9855..596e943bc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3628,7 +3628,6 @@ dependencies = [ "opendal", "ordered-float 4.6.0", "parquet", - "paste", "pretty_assertions", "rand 0.8.5", "regex", diff --git a/Cargo.toml b/Cargo.toml index 60e79c1517..46c99cc3d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -95,7 +95,6 @@ once_cell = "1.20" opendal = "0.54.0" ordered-float = "4" parquet = "55.1" -paste = "1.0.15" pilota = "0.11.10" port_scanner = "0.1.5" pretty_assertions = "1.4" diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 3fe0651a06..b831607aab 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -73,7 +73,6 @@ once_cell = { workspace = true } opendal = { workspace = true } ordered-float = { workspace = true } parquet = { workspace = true, features = ["async"] } -paste = { workspace = true } rand = { workspace = true } reqsign = { version = "0.16.3", optional = true, default-features = false } reqwest = { workspace = true } From 2e780950bb8ce8365d0b9b5ae0c759acec349eca Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Sat, 11 Oct 2025 22:20:54 -0400 Subject: [PATCH 8/9] move more properties to module --- crates/iceberg/src/spec/table_metadata.rs | 53 ------------------ .../src/spec/table_metadata_builder.rs | 23 ++++---- crates/iceberg/src/spec/table_properties.rs | 54 +++++++++++++++++++ crates/iceberg/src/transaction/snapshot.rs | 13 +++-- 4 files changed, 71 insertions(+), 72 deletions(-) diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index 0cfc68b754..ca298f308d 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -46,59 +46,6 @@ pub(crate) static ONE_MINUTE_MS: i64 = 60_000; pub(crate) static EMPTY_SNAPSHOT_ID: i64 = -1; pub(crate) static INITIAL_SEQUENCE_NUMBER: i64 = 0; -/// Reserved table property for table format version. -/// -/// Iceberg will default a new table's format version to the latest stable and recommended -/// version. This reserved property keyword allows users to override the Iceberg format version of -/// the table metadata. -/// -/// If this table property exists when creating a table, the table will use the specified format -/// version. If a table updates this property, it will try to upgrade to the specified format -/// version. -pub const PROPERTY_FORMAT_VERSION: &str = "format-version"; -/// Reserved table property for table UUID. -pub const PROPERTY_UUID: &str = "uuid"; -/// Reserved table property for the total number of snapshots. -pub const PROPERTY_SNAPSHOT_COUNT: &str = "snapshot-count"; -/// Reserved table property for current snapshot summary. -pub const PROPERTY_CURRENT_SNAPSHOT_SUMMARY: &str = "current-snapshot-summary"; -/// Reserved table property for current snapshot id. -pub const PROPERTY_CURRENT_SNAPSHOT_ID: &str = "current-snapshot-id"; -/// Reserved table property for current snapshot timestamp. -pub const PROPERTY_CURRENT_SNAPSHOT_TIMESTAMP: &str = "current-snapshot-timestamp-ms"; -/// Reserved table property for the JSON representation of current schema. -pub const PROPERTY_CURRENT_SCHEMA: &str = "current-schema"; -/// Reserved table property for the JSON representation of current(default) partition spec. -pub const PROPERTY_DEFAULT_PARTITION_SPEC: &str = "default-partition-spec"; -/// Reserved table property for the JSON representation of current(default) sort order. -pub const PROPERTY_DEFAULT_SORT_ORDER: &str = "default-sort-order"; - -/// Property key for max number of previous versions to keep. -pub const PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX: &str = "write.metadata.previous-versions-max"; -/// Default value for max number of previous versions to keep. -pub const PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT: usize = 100; - -/// Property key for max number of partitions to keep summary stats for. -pub const PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT: &str = "write.summary.partition-limit"; -/// Default value for the max number of partitions to keep summary stats for. -pub const PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT: u64 = 0; - -/// Reserved Iceberg table properties list. -/// -/// Reserved table properties are only used to control behaviors when creating or updating a -/// table. The value of these properties are not persisted as a part of the table metadata. -pub const RESERVED_PROPERTIES: [&str; 9] = [ - PROPERTY_FORMAT_VERSION, - PROPERTY_UUID, - PROPERTY_SNAPSHOT_COUNT, - PROPERTY_CURRENT_SNAPSHOT_ID, - PROPERTY_CURRENT_SNAPSHOT_SUMMARY, - PROPERTY_CURRENT_SNAPSHOT_TIMESTAMP, - PROPERTY_CURRENT_SCHEMA, - PROPERTY_DEFAULT_PARTITION_SPEC, - PROPERTY_DEFAULT_SORT_ORDER, -]; - /// Reference to [`TableMetadata`]. pub type TableMetadataRef = Arc; diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index 068f2002f9..7881ebea40 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -22,11 +22,10 @@ use uuid::Uuid; use super::{ DEFAULT_PARTITION_SPEC_ID, DEFAULT_SCHEMA_ID, FormatVersion, MAIN_BRANCH, MetadataLog, - ONE_MINUTE_MS, PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX, - PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT, PartitionSpec, PartitionSpecBuilder, - PartitionStatisticsFile, RESERVED_PROPERTIES, Schema, SchemaRef, Snapshot, SnapshotLog, - SnapshotReference, SnapshotRetention, SortOrder, SortOrderRef, StatisticsFile, StructType, - TableMetadata, UNPARTITIONED_LAST_ASSIGNED_ID, UnboundPartitionSpec, + ONE_MINUTE_MS, PartitionSpec, PartitionSpecBuilder, PartitionStatisticsFile, Schema, SchemaRef, + Snapshot, SnapshotLog, SnapshotReference, SnapshotRetention, SortOrder, SortOrderRef, + StatisticsFile, StructType, TableMetadata, TableProperties, UNPARTITIONED_LAST_ASSIGNED_ID, + UnboundPartitionSpec, }; use crate::error::{Error, ErrorKind, Result}; use crate::{TableCreation, TableUpdate}; @@ -247,7 +246,7 @@ impl TableMetadataBuilder { // List of specified properties that are RESERVED and should not be persisted. let reserved_properties = properties .keys() - .filter(|key| RESERVED_PROPERTIES.contains(&key.as_str())) + .filter(|key| TableProperties::RESERVED_PROPERTIES.contains(&key.as_str())) .map(ToString::to_string) .collect::>(); @@ -285,7 +284,7 @@ impl TableMetadataBuilder { // disallow removal of reserved properties let reserved_properties = properties .iter() - .filter(|key| RESERVED_PROPERTIES.contains(&key.as_str())) + .filter(|key| TableProperties::RESERVED_PROPERTIES.contains(&key.as_str())) .map(ToString::to_string) .collect::>(); @@ -1061,9 +1060,9 @@ impl TableMetadataBuilder { let max_size = self .metadata .properties - .get(PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX) + .get(TableProperties::PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX) .and_then(|v| v.parse::().ok()) - .unwrap_or(PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT) + .unwrap_or(TableProperties::PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT) .max(1); if self.metadata.metadata_log.len() > max_size { @@ -1360,8 +1359,8 @@ mod tests { use crate::io::FileIOBuilder; use crate::spec::{ BlobMetadata, NestedField, NullOrder, Operation, PartitionSpec, PrimitiveType, Schema, - SnapshotRetention, SortDirection, SortField, StructType, Summary, Transform, Type, - UnboundPartitionField, + SnapshotRetention, SortDirection, SortField, StructType, Summary, TableProperties, + Transform, Type, UnboundPartitionField, }; use crate::table::Table; @@ -2299,7 +2298,7 @@ mod tests { let builder = builder_without_changes(FormatVersion::V2); let metadata = builder .set_properties(HashMap::from_iter(vec![( - PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX.to_string(), + TableProperties::PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX.to_string(), "2".to_string(), )])) .unwrap() diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index ab72496be7..899b709190 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -51,6 +51,60 @@ pub struct TableProperties { } impl TableProperties { + /// Reserved table property for table format version. + /// + /// Iceberg will default a new table's format version to the latest stable and recommended + /// version. This reserved property keyword allows users to override the Iceberg format version of + /// the table metadata. + /// + /// If this table property exists when creating a table, the table will use the specified format + /// version. If a table updates this property, it will try to upgrade to the specified format + /// version. + pub const PROPERTY_FORMAT_VERSION: &str = "format-version"; + /// Reserved table property for table UUID. + pub const PROPERTY_UUID: &str = "uuid"; + /// Reserved table property for the total number of snapshots. + pub const PROPERTY_SNAPSHOT_COUNT: &str = "snapshot-count"; + /// Reserved table property for current snapshot summary. + pub const PROPERTY_CURRENT_SNAPSHOT_SUMMARY: &str = "current-snapshot-summary"; + /// Reserved table property for current snapshot id. + pub const PROPERTY_CURRENT_SNAPSHOT_ID: &str = "current-snapshot-id"; + /// Reserved table property for current snapshot timestamp. + pub const PROPERTY_CURRENT_SNAPSHOT_TIMESTAMP: &str = "current-snapshot-timestamp-ms"; + /// Reserved table property for the JSON representation of current schema. + pub const PROPERTY_CURRENT_SCHEMA: &str = "current-schema"; + /// Reserved table property for the JSON representation of current(default) partition spec. + pub const PROPERTY_DEFAULT_PARTITION_SPEC: &str = "default-partition-spec"; + /// Reserved table property for the JSON representation of current(default) sort order. + pub const PROPERTY_DEFAULT_SORT_ORDER: &str = "default-sort-order"; + + /// Property key for max number of previous versions to keep. + pub const PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX: &str = + "write.metadata.previous-versions-max"; + /// Default value for max number of previous versions to keep. + pub const PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT: usize = 100; + + /// Property key for max number of partitions to keep summary stats for. + pub const PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT: &str = "write.summary.partition-limit"; + /// Default value for the max number of partitions to keep summary stats for. + pub const PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT: u64 = 0; + + /// Reserved Iceberg table properties list. + /// + /// Reserved table properties are only used to control behaviors when creating or updating a + /// table. The value of these properties are not persisted as a part of the table metadata. + pub const RESERVED_PROPERTIES: [&str; 9] = [ + Self::PROPERTY_FORMAT_VERSION, + Self::PROPERTY_UUID, + Self::PROPERTY_SNAPSHOT_COUNT, + Self::PROPERTY_CURRENT_SNAPSHOT_ID, + Self::PROPERTY_CURRENT_SNAPSHOT_SUMMARY, + Self::PROPERTY_CURRENT_SNAPSHOT_TIMESTAMP, + Self::PROPERTY_CURRENT_SCHEMA, + Self::PROPERTY_DEFAULT_PARTITION_SPEC, + Self::PROPERTY_DEFAULT_SORT_ORDER, + ]; + /// Property key for number of commit retries. pub const PROPERTY_COMMIT_NUM_RETRIES: &str = "commit.retry.num-retries"; /// Default value for number of commit retries. diff --git a/crates/iceberg/src/transaction/snapshot.rs b/crates/iceberg/src/transaction/snapshot.rs index 48dc2b5b90..a03b8dc490 100644 --- a/crates/iceberg/src/transaction/snapshot.rs +++ b/crates/iceberg/src/transaction/snapshot.rs @@ -24,10 +24,9 @@ use uuid::Uuid; use crate::error::Result; use crate::spec::{ DataFile, DataFileFormat, FormatVersion, MAIN_BRANCH, ManifestContentType, ManifestEntry, - ManifestFile, ManifestListWriter, ManifestWriter, ManifestWriterBuilder, Operation, - PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT, PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT, - Snapshot, SnapshotReference, SnapshotRetention, SnapshotSummaryCollector, Struct, StructType, - Summary, update_snapshot_summaries, + ManifestFile, ManifestListWriter, ManifestWriter, ManifestWriterBuilder, Operation, Snapshot, + SnapshotReference, SnapshotRetention, SnapshotSummaryCollector, Struct, StructType, Summary, + TableProperties, update_snapshot_summaries, }; use crate::table::Table; use crate::transaction::ActionCommit; @@ -322,15 +321,15 @@ impl<'a> SnapshotProducer<'a> { let partition_summary_limit = if let Some(limit) = table_metadata .properties() - .get(PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT) + .get(TableProperties::PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT) { if let Ok(limit) = limit.parse::() { limit } else { - PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT + TableProperties::PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT } } else { - PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT + TableProperties::PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT }; summary_collector.set_partition_summary_limit(partition_summary_limit); From dd23d7f10b9169f0e17f1f0d41247f3b6b6484ef Mon Sep 17 00:00:00 2001 From: Kaushik Srinivasan Date: Sat, 11 Oct 2025 22:21:17 -0400 Subject: [PATCH 9/9] tests: add tests for table_properties --- crates/iceberg/src/spec/table_properties.rs | 105 ++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs index 899b709190..9aa789fed8 100644 --- a/crates/iceberg/src/spec/table_properties.rs +++ b/crates/iceberg/src/spec/table_properties.rs @@ -35,6 +35,7 @@ where } /// TableProperties that contains the properties of a table. +#[derive(Debug)] pub struct TableProperties { /// The number of times to retry a commit. pub commit_num_retries: usize, @@ -177,3 +178,107 @@ impl TryFrom<&HashMap> for TableProperties { }) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_table_properties_default() { + let props = HashMap::new(); + let table_properties = TableProperties::try_from(&props).unwrap(); + assert_eq!( + table_properties.commit_num_retries, + TableProperties::PROPERTY_COMMIT_NUM_RETRIES_DEFAULT + ); + assert_eq!( + table_properties.commit_min_retry_wait_ms, + TableProperties::PROPERTY_COMMIT_MIN_RETRY_WAIT_MS_DEFAULT + ); + assert_eq!( + table_properties.commit_max_retry_wait_ms, + TableProperties::PROPERTY_COMMIT_MAX_RETRY_WAIT_MS_DEFAULT + ); + assert_eq!( + table_properties.write_format_default, + TableProperties::PROPERTY_DEFAULT_FILE_FORMAT_DEFAULT.to_string() + ); + assert_eq!( + table_properties.write_target_file_size_bytes, + TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT + ); + } + + #[test] + fn test_table_properties_valid() { + let props = HashMap::from([ + ( + TableProperties::PROPERTY_COMMIT_NUM_RETRIES.to_string(), + "10".to_string(), + ), + ( + TableProperties::PROPERTY_COMMIT_MAX_RETRY_WAIT_MS.to_string(), + "20".to_string(), + ), + ( + TableProperties::PROPERTY_DEFAULT_FILE_FORMAT.to_string(), + "avro".to_string(), + ), + ( + TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES.to_string(), + "512".to_string(), + ), + ]); + let table_properties = TableProperties::try_from(&props).unwrap(); + assert_eq!(table_properties.commit_num_retries, 10); + assert_eq!(table_properties.commit_max_retry_wait_ms, 20); + assert_eq!(table_properties.write_format_default, "avro".to_string()); + assert_eq!(table_properties.write_target_file_size_bytes, 512); + } + + #[test] + fn test_table_properties_invalid() { + let invalid_retries = HashMap::from([( + TableProperties::PROPERTY_COMMIT_NUM_RETRIES.to_string(), + "abc".to_string(), + )]); + + let table_properties = TableProperties::try_from(&invalid_retries).unwrap_err(); + assert!( + table_properties.to_string().contains( + "Invalid value for commit.retry.num-retries: invalid digit found in string" + ) + ); + + let invalid_min_wait = HashMap::from([( + TableProperties::PROPERTY_COMMIT_MIN_RETRY_WAIT_MS.to_string(), + "abc".to_string(), + )]); + let table_properties = TableProperties::try_from(&invalid_min_wait).unwrap_err(); + assert!( + table_properties.to_string().contains( + "Invalid value for commit.retry.min-wait-ms: invalid digit found in string" + ) + ); + + let invalid_max_wait = HashMap::from([( + TableProperties::PROPERTY_COMMIT_MAX_RETRY_WAIT_MS.to_string(), + "abc".to_string(), + )]); + let table_properties = TableProperties::try_from(&invalid_max_wait).unwrap_err(); + assert!( + table_properties.to_string().contains( + "Invalid value for commit.retry.max-wait-ms: invalid digit found in string" + ) + ); + + let invalid_target_size = HashMap::from([( + TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES.to_string(), + "abc".to_string(), + )]); + let table_properties = TableProperties::try_from(&invalid_target_size).unwrap_err(); + assert!(table_properties.to_string().contains( + "Invalid value for write.target-file-size-bytes: invalid digit found in string" + )); + } +}