From b332ffe612a370127997ceea9ef175b11c78248b Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Thu, 17 Jul 2025 22:53:00 -0700 Subject: [PATCH 01/23] big squash Signed-off-by: CTTY DerGut --- crates/catalog/glue/src/utils.rs | 56 +---- crates/catalog/s3tables/src/utils.rs | 29 --- crates/iceberg/src/catalog/memory/catalog.rs | 217 ++++++++++++++++-- .../src/catalog/memory/namespace_state.rs | 37 ++- crates/iceberg/src/catalog/mod.rs | 23 +- crates/iceberg/src/error.rs | 6 + crates/iceberg/src/spec/table_metadata.rs | 6 +- crates/iceberg/src/table.rs | 6 + crates/iceberg/src/transaction/mod.rs | 2 +- 9 files changed, 274 insertions(+), 108 deletions(-) diff --git a/crates/catalog/glue/src/utils.rs b/crates/catalog/glue/src/utils.rs index c51dd6249f..2895e4b239 100644 --- a/crates/catalog/glue/src/utils.rs +++ b/crates/catalog/glue/src/utils.rs @@ -22,7 +22,6 @@ use aws_sdk_glue::config::Credentials; use aws_sdk_glue::types::{Database, DatabaseInput, StorageDescriptor, TableInput}; use iceberg::spec::TableMetadata; use iceberg::{Error, ErrorKind, Namespace, NamespaceIdent, Result}; -use uuid::Uuid; use crate::error::from_aws_build_error; use crate::schema::GlueSchemaBuilder; @@ -228,30 +227,6 @@ pub(crate) fn get_default_table_location( } } -/// Create metadata location from `location` and `version` -pub(crate) fn create_metadata_location(location: impl AsRef, version: i32) -> Result { - if version < 0 { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Table metadata version: '{}' must be a non-negative integer", - version - ), - )); - }; - - let version = format!("{:0>5}", version); - let id = Uuid::new_v4(); - let metadata_location = format!( - "{}/metadata/{}-{}.metadata.json", - location.as_ref(), - version, - id - ); - - Ok(metadata_location) -} - /// Get metadata location from `GlueTable` parameters pub(crate) fn get_metadata_location( parameters: &Option>, @@ -287,7 +262,9 @@ macro_rules! with_catalog_id { mod tests { use aws_sdk_glue::config::ProvideCredentials; use aws_sdk_glue::types::Column; - use iceberg::spec::{NestedField, PrimitiveType, Schema, TableMetadataBuilder, Type}; + use iceberg::spec::{ + MetadataLocation, NestedField, PrimitiveType, Schema, TableMetadataBuilder, Type, + }; use iceberg::{Namespace, Result, TableCreation}; use super::*; @@ -332,7 +309,7 @@ mod tests { fn test_convert_to_glue_table() -> Result<()> { let table_name = "my_table".to_string(); let location = "s3a://warehouse/hive".to_string(); - let metadata_location = create_metadata_location(location.clone(), 0)?; + let metadata_location = MetadataLocation::new_with_prefix(&location); let properties = HashMap::new(); let schema = Schema::builder() .with_schema_id(1) @@ -362,8 +339,13 @@ mod tests { .location(metadata.location()) .build(); - let result = - convert_to_glue_table(&table_name, metadata_location, &metadata, &properties, None)?; + let result = convert_to_glue_table( + &table_name, + metadata_location.to_string(), + &metadata, + &properties, + None, + )?; assert_eq!(result.name(), &table_name); assert_eq!(result.description(), None); @@ -372,22 +354,6 @@ mod tests { Ok(()) } - #[test] - fn test_create_metadata_location() -> Result<()> { - let location = "my_base_location"; - let valid_version = 0; - let invalid_version = -1; - - let valid_result = create_metadata_location(location, valid_version)?; - let invalid_result = create_metadata_location(location, invalid_version); - - assert!(valid_result.starts_with("my_base_location/metadata/00000-")); - assert!(valid_result.ends_with(".metadata.json")); - assert!(invalid_result.is_err()); - - Ok(()) - } - #[test] fn test_get_default_table_location() -> Result<()> { let properties = HashMap::from([(LOCATION.to_string(), "db_location".to_string())]); diff --git a/crates/catalog/s3tables/src/utils.rs b/crates/catalog/s3tables/src/utils.rs index d0195dccfd..2f3e16a937 100644 --- a/crates/catalog/s3tables/src/utils.rs +++ b/crates/catalog/s3tables/src/utils.rs @@ -19,8 +19,6 @@ use std::collections::HashMap; use aws_config::{BehaviorVersion, Region, SdkConfig}; use aws_sdk_s3tables::config::Credentials; -use iceberg::{Error, ErrorKind, Result}; -use uuid::Uuid; /// Property aws profile name pub const AWS_PROFILE_NAME: &str = "profile_name"; @@ -71,30 +69,3 @@ pub(crate) async fn create_sdk_config( config.load().await } - -/// Create metadata location from `location` and `version` -pub(crate) fn create_metadata_location( - warehouse_location: impl AsRef, - version: i32, -) -> Result { - if version < 0 { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Table metadata version: '{}' must be a non-negative integer", - version - ), - )); - }; - - let version = format!("{:0>5}", version); - let id = Uuid::new_v4(); - let metadata_location = format!( - "{}/metadata/{}-{}.metadata.json", - warehouse_location.as_ref(), - version, - id - ); - - Ok(metadata_location) -} diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index d1d361c7a1..0d5fd02632 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -20,7 +20,7 @@ use std::collections::HashMap; use async_trait::async_trait; -use futures::lock::Mutex; +use futures::lock::{Mutex, MutexGuard}; use itertools::Itertools; use uuid::Uuid; @@ -45,7 +45,7 @@ pub struct MemoryCatalog { } impl MemoryCatalog { - /// Creates an memory catalog. + /// Creates a memory catalog. pub fn new(file_io: FileIO, warehouse_location: Option) -> Self { Self { root_namespace_state: Mutex::new(NamespaceState::default()), @@ -53,6 +53,23 @@ impl MemoryCatalog { warehouse_location, } } + + /// Loads a table from the locked namespace state. + async fn load_table_from_locked_state( + &self, + table_ident: &TableIdent, + root_namespace_state: &MutexGuard<'_, NamespaceState>, + ) -> Result { + let metadata_location = root_namespace_state.get_existing_table_location(table_ident)?; + let metadata = TableMetadata::read_from(&self.file_io, metadata_location).await?; + + Table::builder() + .identifier(table_ident.clone()) + .metadata(metadata) + .metadata_location(metadata_location.to_string()) + .file_io(self.file_io.clone()) + .build() + } } #[async_trait] @@ -226,15 +243,8 @@ impl Catalog for MemoryCatalog { async fn load_table(&self, table_ident: &TableIdent) -> Result
{ let root_namespace_state = self.root_namespace_state.lock().await; - let metadata_location = root_namespace_state.get_existing_table_location(table_ident)?; - let metadata = TableMetadata::read_from(&self.file_io, metadata_location).await?; - - Table::builder() - .file_io(self.file_io.clone()) - .metadata_location(metadata_location.clone()) - .metadata(metadata) - .identifier(table_ident.clone()) - .build() + self.load_table_from_locked_state(table_ident, &root_namespace_state) + .await } /// Drop a table from the catalog. @@ -289,12 +299,29 @@ impl Catalog for MemoryCatalog { .build() } - /// Update a table to the catalog. - async fn update_table(&self, _commit: TableCommit) -> Result
{ - Err(Error::new( - ErrorKind::FeatureUnsupported, - "MemoryCatalog does not currently support updating tables.", - )) + /// Update a table in the catalog. + async fn update_table(&self, commit: TableCommit) -> Result
{ + let mut root_namespace_state = self.root_namespace_state.lock().await; + + // Updates the current table version and writes a new metadata file. + let current_table = self + .load_table_from_locked_state(commit.identifier(), &root_namespace_state) + .await?; + + // Apply TableCommit to get staged table + let staged_table = commit.apply(current_table)?; + + // Write table metadata to the new location + TableMetadata::write( + staged_table.file_io(), + staged_table.metadata(), + &MetadataLocation::from_str(staged_table.metadata_location().unwrap())?, + ).await?; + + // Flip the pointer to reference the new metadata file. + let updated_table = root_namespace_state.commit_table_update(staged_table)?; + + Ok(updated_table) } } @@ -303,13 +330,18 @@ mod tests { use std::collections::HashSet; use std::hash::Hash; use std::iter::FromIterator; + use std::vec; use regex::Regex; use tempfile::TempDir; use super::*; use crate::io::FileIOBuilder; - use crate::spec::{NestedField, PartitionSpec, PrimitiveType, Schema, SortOrder, Type}; + use crate::spec::{ + NestedField, NullOrder, PROPERTY_COMMIT_NUM_RETRIES, PartitionSpec, PrimitiveType, Schema, + SortOrder, Type, + }; + use crate::transaction::{ApplyTransactionAction, Transaction}; fn temp_path() -> String { let temp_dir = TempDir::new().unwrap(); @@ -348,8 +380,8 @@ mod tests { .unwrap() } - async fn create_table(catalog: &C, table_ident: &TableIdent) { - let _ = catalog + async fn create_table(catalog: &C, table_ident: &TableIdent) -> Table { + catalog .create_table( &table_ident.namespace, TableCreation::builder() @@ -358,7 +390,7 @@ mod tests { .build(), ) .await - .unwrap(); + .unwrap() } async fn create_tables(catalog: &C, table_idents: Vec<&TableIdent>) { @@ -367,6 +399,14 @@ mod tests { } } + async fn create_table_with_namespace(catalog: &C) -> Table { + let namespace_ident = NamespaceIdent::new("abc".into()); + create_namespace(catalog, &namespace_ident).await; + + let table_ident = TableIdent::new(namespace_ident, "test".to_string()); + create_table(catalog, &table_ident).await + } + fn assert_table_eq(table: &Table, expected_table_ident: &TableIdent, expected_schema: &Schema) { assert_eq!(table.identifier(), expected_table_ident); @@ -1705,7 +1745,7 @@ mod tests { .unwrap_err() .to_string(), format!( - "TableAlreadyExists => Cannot create table {:? }. Table already exists.", + "TableAlreadyExists => Cannot create table {:?}. Table already exists.", &dst_table_ident ), ); @@ -1754,4 +1794,137 @@ mod tests { metadata_location ); } + + #[tokio::test] + async fn test_update_table() { + let catalog = new_memory_catalog(); + + let table = create_table_with_namespace(&catalog).await; + + // Assert the table doesn't contain the update yet + assert!(!table.metadata().properties().contains_key("key")); + + // Update table metadata + let tx = Transaction::new(&table); + let updated_table = tx + .update_table_properties() + .set("key".to_string(), "value".to_string()) + .apply(tx) + .unwrap() + .commit(&catalog) + .await + .unwrap(); + + assert_eq!( + updated_table.metadata().properties().get("key").unwrap(), + "value" + ); + + assert_eq!(table.identifier(), updated_table.identifier()); + assert_eq!(table.metadata().uuid(), updated_table.metadata().uuid()); + assert!(table.metadata().last_updated_ms() < updated_table.metadata().last_updated_ms()); + assert_ne!(table.metadata_location(), updated_table.metadata_location()); + + println!("left: {:?}", table.metadata().metadata_log()); + println!("right: {:?}", updated_table.metadata().metadata_log()); + assert!( + table.metadata().metadata_log().len() < updated_table.metadata().metadata_log().len() + ); + } + + #[tokio::test] + async fn test_update_table_fails_if_commit_conflicts() { + let catalog = new_memory_catalog(); + let base_table = create_table_with_namespace(&catalog).await; + + // Turn off retry to test conflict + let tx = Transaction::new(&base_table); + let base_table = tx + .update_table_properties() + .set(PROPERTY_COMMIT_NUM_RETRIES.to_string(), "0".to_string()) + .apply(tx) + .unwrap() + .commit(&catalog) + .await + .unwrap(); + + // Update the table by adding a new sort order. + let tx = Transaction::new(&base_table); + let _sort_table = tx + .replace_sort_order() + .asc("foo", NullOrder::First) + .apply(tx) + .unwrap() + .commit(&catalog) + .await + .unwrap(); + + // Try to update the "now old" table again with a different sort order. + let tx = Transaction::new(&base_table); + let err = tx + .replace_sort_order() + .desc("foo", NullOrder::Last) + .apply(tx) + .unwrap() + .commit(&catalog) + .await + .unwrap_err(); + + // The second transaction should fail because it didn't take the new update + // into account. + println!("{}", err.message()); // todo remove this + assert_eq!(err.kind(), ErrorKind::CatalogCommitConflicts); + assert!( + err.message() + .contains("Default sort order id does not match") + ); + } + + #[tokio::test] + async fn test_update_table_fails_if_table_doesnt_exist() { + let catalog = new_memory_catalog(); + + let namespace_ident = NamespaceIdent::new("a".into()); + create_namespace(&catalog, &namespace_ident).await; + + // This table is not known to the catalog. + let table_ident = TableIdent::new(namespace_ident, "test".to_string()); + let table = build_table(table_ident); + + let tx = Transaction::new(&table); + let err = tx + .update_table_properties() + .set("key".to_string(), "value".to_string()) + .apply(tx) + .unwrap() + .commit(&catalog) + .await + .unwrap_err(); + assert_eq!(err.kind(), ErrorKind::TableNotFound); + } + + fn build_table(ident: TableIdent) -> Table { + let file_io = FileIOBuilder::new_fs_io().build().unwrap(); + + let temp_dir = TempDir::new().unwrap(); + let location = temp_dir.path().to_str().unwrap().to_string(); + + let table_creation = TableCreation::builder() + .name(ident.name().to_string()) + .schema(simple_table_schema()) + .location(location) + .build(); + let metadata = TableMetadataBuilder::from_table_creation(table_creation) + .unwrap() + .build() + .unwrap() + .metadata; + + Table::builder() + .identifier(ident) + .metadata(metadata) + .file_io(file_io) + .build() + .unwrap() + } } diff --git a/crates/iceberg/src/catalog/memory/namespace_state.rs b/crates/iceberg/src/catalog/memory/namespace_state.rs index 2ab00e710a..5ad13ffecd 100644 --- a/crates/iceberg/src/catalog/memory/namespace_state.rs +++ b/crates/iceberg/src/catalog/memory/namespace_state.rs @@ -16,9 +16,12 @@ // under the License. use std::collections::{HashMap, hash_map}; +use std::str::FromStr; use itertools::Itertools; +use crate::spec::MetadataLocation; +use crate::table::Table; use crate::{Error, ErrorKind, NamespaceIdent, Result, TableIdent}; // Represents the state of a namespace @@ -29,7 +32,7 @@ pub(crate) struct NamespaceState { // Namespaces nested inside this namespace namespaces: HashMap, // Mapping of tables to metadata locations in this namespace - table_metadata_locations: HashMap, + table_metadata_locations: HashMap, } fn no_such_namespace_err(namespace_ident: &NamespaceIdent) -> Result { @@ -254,7 +257,10 @@ impl NamespaceState { } // Returns the metadata location of the given table or an error if doesn't exist - pub(crate) fn get_existing_table_location(&self, table_ident: &TableIdent) -> Result<&String> { + pub(crate) fn get_existing_table_location( + &self, + table_ident: &TableIdent, + ) -> Result<&MetadataLocation> { let namespace = self.get_namespace(table_ident.namespace())?; match namespace.table_metadata_locations.get(table_ident.name()) { @@ -267,7 +273,7 @@ impl NamespaceState { pub(crate) fn insert_new_table( &mut self, table_ident: &TableIdent, - metadata_location: String, + location: MetadataLocation, ) -> Result<()> { let namespace = self.get_mut_namespace(table_ident.namespace())?; @@ -277,7 +283,7 @@ impl NamespaceState { { hash_map::Entry::Occupied(_) => table_already_exists_err(table_ident), hash_map::Entry::Vacant(entry) => { - let _ = entry.insert(metadata_location); + let _ = entry.insert(location); Ok(()) } @@ -285,7 +291,10 @@ impl NamespaceState { } // Removes the given table or returns an error if doesn't exist - pub(crate) fn remove_existing_table(&mut self, table_ident: &TableIdent) -> Result { + pub(crate) fn remove_existing_table( + &mut self, + table_ident: &TableIdent, + ) -> Result { let namespace = self.get_mut_namespace(table_ident.namespace())?; match namespace @@ -296,4 +305,22 @@ impl NamespaceState { Some(metadata_location) => Ok(metadata_location), } } + + /// Updates the metadata location of the given table or returns an error if doesn't exist + pub(crate) fn commit_table_update(&mut self, staged_table: Table) -> Result
{ + let namespace = self.get_mut_namespace(staged_table.identifier().namespace())?; + + let _ = namespace + .table_metadata_locations + .insert( + staged_table.identifier().name().to_string(), + MetadataLocation::from_str(staged_table.metadata_location().unwrap())?, + ) + .ok_or(Error::new( + ErrorKind::TableNotFound, + format!("No such table: {:?}", staged_table.identifier()), + ))?; + + Ok(staged_table) + } } diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 73c6c10b74..f75311c2a2 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -335,14 +335,31 @@ impl TableCommit { requirement.check(Some(table.metadata()))?; } - // apply updates to metadata builder - let mut metadata_builder = table.metadata().clone().into_builder(None); + // get current metadata location + let current_metadata_location = + table.metadata_location().ok_or(Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to apply commit, table metadata location is not set for table: {}", + table.identifier() + ), + ))?; + // apply updates to metadata builder + let mut metadata_builder = table + .metadata() + .clone() + .into_builder(Some(current_metadata_location)); for update in self.updates { metadata_builder = update.apply(metadata_builder)?; } - Ok(table.with_metadata(Arc::new(metadata_builder.build()?.metadata))) + // Bump the version of metadata + let new_metadata_location = current_metadata_location.with_next_version(); + + Ok(table + .with_metadata(Arc::new(metadata_builder.build()?.metadata)) + .with_metadata_location(new_metadata_location.to_string())) } } diff --git a/crates/iceberg/src/error.rs b/crates/iceberg/src/error.rs index 2781cf6788..7ae01f1b51 100644 --- a/crates/iceberg/src/error.rs +++ b/crates/iceberg/src/error.rs @@ -348,6 +348,12 @@ define_from_err!( "handling invalid utf-8 characters" ); +define_from_err!( + core::num::ParseIntError, + ErrorKind::Unexpected, + "parsing integer from string" +); + define_from_err!( std::array::TryFromSliceError, ErrorKind::DataInvalid, diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index 0f0854f7fc..3b89f54674 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -177,7 +177,7 @@ pub struct TableMetadata { /// that encodes changes to the previous metadata files for the table. /// Each time a new metadata file is created, a new entry of the /// previous metadata file location should be added to the list. - /// Tables can be configured to remove oldest metadata log entries and + /// Tables can be configured to remove the oldest metadata log entries and /// keep a fixed-size log of the most recent entries after a commit. pub(crate) metadata_log: Vec, @@ -3078,7 +3078,7 @@ mod tests { } #[tokio::test] - async fn test_table_metadata_io_read_write() { + async fn test_table_metadata_read_write() { // Create a temporary directory for our test let temp_dir = TempDir::new().unwrap(); let temp_path = temp_dir.path().to_str().unwrap(); @@ -3111,7 +3111,7 @@ mod tests { } #[tokio::test] - async fn test_table_metadata_io_read_nonexistent_file() { + async fn test_table_metadata_read_nonexistent_file() { // Create a FileIO instance let file_io = FileIOBuilder::new_fs_io().build().unwrap(); diff --git a/crates/iceberg/src/table.rs b/crates/iceberg/src/table.rs index 7534143c26..e094292ce6 100644 --- a/crates/iceberg/src/table.rs +++ b/crates/iceberg/src/table.rs @@ -168,6 +168,12 @@ impl Table { self } + /// Sets the [`Table`] metadata location and returns an updated instance. + pub(crate) fn with_metadata_location(mut self, metadata_location: String) -> Self { + self.metadata_location = Some(metadata_location); + self + } + /// Returns a TableBuilder to build a table pub fn builder() -> TableBuilder { TableBuilder::new() diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index 06549a95c5..f38b29262b 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -238,7 +238,7 @@ impl Transaction { async fn do_commit(&mut self, catalog: &dyn Catalog) -> Result
{ let refreshed = catalog.load_table(self.table.identifier()).await?; - + if self.table.metadata() != refreshed.metadata() || self.table.metadata_location() != refreshed.metadata_location() { From dd29076cedaba6ade522b4fce3f1bf53af387b79 Mon Sep 17 00:00:00 2001 From: DerGut Date: Wed, 23 Jul 2025 18:28:41 -0700 Subject: [PATCH 02/23] Add MetadataLocationParser --- crates/catalog/glue/src/catalog.rs | 11 +- crates/catalog/s3tables/src/catalog.rs | 8 +- crates/iceberg/src/catalog/memory/catalog.rs | 14 +- .../src/catalog/memory/namespace_state.rs | 20 +- crates/iceberg/src/catalog/mod.rs | 26 +- crates/iceberg/src/catalog/util.rs | 239 ++++++++++++++++++ crates/iceberg/src/transaction/mod.rs | 2 +- 7 files changed, 279 insertions(+), 41 deletions(-) create mode 100644 crates/iceberg/src/catalog/util.rs diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index f4b4a01f9a..36c17453db 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -26,15 +26,15 @@ use iceberg::io::{ use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ - Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, - TableIdent, + Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, + TableCommit, TableCreation, TableIdent, }; use typed_builder::TypedBuilder; use crate::error::{from_aws_build_error, from_aws_sdk_error}; use crate::utils::{ - convert_to_database, convert_to_glue_table, convert_to_namespace, create_metadata_location, - create_sdk_config, get_default_table_location, get_metadata_location, validate_namespace, + convert_to_database, convert_to_glue_table, convert_to_namespace, create_sdk_config, + get_default_table_location, get_metadata_location, validate_namespace, }; use crate::{ AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, with_catalog_id, @@ -393,7 +393,8 @@ impl Catalog for GlueCatalog { let metadata = TableMetadataBuilder::from_table_creation(creation)? .build()? .metadata; - let metadata_location = create_metadata_location(&location, 0)?; + let metadata_location = + MetadataLocationParser::new_location_with_prefix(location).to_string(); metadata.write_to(&self.file_io, &metadata_location).await?; diff --git a/crates/catalog/s3tables/src/catalog.rs b/crates/catalog/s3tables/src/catalog.rs index 191356d711..27ec3c3c30 100644 --- a/crates/catalog/s3tables/src/catalog.rs +++ b/crates/catalog/s3tables/src/catalog.rs @@ -28,12 +28,12 @@ use iceberg::io::{FileIO, FileIOBuilder}; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ - Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, - TableIdent, + Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, + TableCommit, TableCreation, TableIdent, }; use typed_builder::TypedBuilder; -use crate::utils::{create_metadata_location, create_sdk_config}; +use crate::utils::create_sdk_config; /// S3Tables catalog configuration. #[derive(Debug, TypedBuilder)] @@ -325,7 +325,7 @@ impl Catalog for S3TablesCatalog { .await .map_err(from_aws_sdk_error)?; let warehouse_location = get_resp.warehouse_location().to_string(); - create_metadata_location(warehouse_location, 0)? + MetadataLocationParser::new_location_with_prefix(warehouse_location).to_string() } }; diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index 0d5fd02632..c9cd953e1c 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -312,11 +312,13 @@ impl Catalog for MemoryCatalog { let staged_table = commit.apply(current_table)?; // Write table metadata to the new location - TableMetadata::write( - staged_table.file_io(), - staged_table.metadata(), - &MetadataLocation::from_str(staged_table.metadata_location().unwrap())?, - ).await?; + staged_table + .metadata() + .write_to( + staged_table.file_io(), + staged_table.metadata_location().unwrap(), + ) + .await?; // Flip the pointer to reference the new metadata file. let updated_table = root_namespace_state.commit_table_update(staged_table)?; @@ -367,7 +369,7 @@ mod tests { } } - fn to_set(vec: Vec) -> HashSet { + fn to_set(vec: Vec) -> HashSet { HashSet::from_iter(vec) } diff --git a/crates/iceberg/src/catalog/memory/namespace_state.rs b/crates/iceberg/src/catalog/memory/namespace_state.rs index 5ad13ffecd..17e20d7639 100644 --- a/crates/iceberg/src/catalog/memory/namespace_state.rs +++ b/crates/iceberg/src/catalog/memory/namespace_state.rs @@ -16,11 +16,9 @@ // under the License. use std::collections::{HashMap, hash_map}; -use std::str::FromStr; use itertools::Itertools; -use crate::spec::MetadataLocation; use crate::table::Table; use crate::{Error, ErrorKind, NamespaceIdent, Result, TableIdent}; @@ -32,7 +30,7 @@ pub(crate) struct NamespaceState { // Namespaces nested inside this namespace namespaces: HashMap, // Mapping of tables to metadata locations in this namespace - table_metadata_locations: HashMap, + table_metadata_locations: HashMap, } fn no_such_namespace_err(namespace_ident: &NamespaceIdent) -> Result { @@ -257,15 +255,12 @@ impl NamespaceState { } // Returns the metadata location of the given table or an error if doesn't exist - pub(crate) fn get_existing_table_location( - &self, - table_ident: &TableIdent, - ) -> Result<&MetadataLocation> { + pub(crate) fn get_existing_table_location(&self, table_ident: &TableIdent) -> Result<&String> { let namespace = self.get_namespace(table_ident.namespace())?; match namespace.table_metadata_locations.get(table_ident.name()) { None => no_such_table_err(table_ident), - Some(table_metadadata_location) => Ok(table_metadadata_location), + Some(table_metadata_location) => Ok(table_metadata_location), } } @@ -273,7 +268,7 @@ impl NamespaceState { pub(crate) fn insert_new_table( &mut self, table_ident: &TableIdent, - location: MetadataLocation, + location: String, ) -> Result<()> { let namespace = self.get_mut_namespace(table_ident.namespace())?; @@ -291,10 +286,7 @@ impl NamespaceState { } // Removes the given table or returns an error if doesn't exist - pub(crate) fn remove_existing_table( - &mut self, - table_ident: &TableIdent, - ) -> Result { + pub(crate) fn remove_existing_table(&mut self, table_ident: &TableIdent) -> Result { let namespace = self.get_mut_namespace(table_ident.namespace())?; match namespace @@ -314,7 +306,7 @@ impl NamespaceState { .table_metadata_locations .insert( staged_table.identifier().name().to_string(), - MetadataLocation::from_str(staged_table.metadata_location().unwrap())?, + staged_table.metadata_location().unwrap().to_string(), ) .ok_or(Error::new( ErrorKind::TableNotFound, diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index f75311c2a2..d08f509a87 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -18,12 +18,14 @@ //! Catalog API for Apache Iceberg pub mod memory; +mod util; use std::collections::HashMap; use std::fmt::{Debug, Display}; use std::future::Future; use std::mem::take; use std::ops::Deref; +use std::str::FromStr; use std::sync::Arc; use _serde::deserialize_snapshot; @@ -33,6 +35,7 @@ pub use memory::MemoryCatalog; use mockall::automock; use serde_derive::{Deserialize, Serialize}; use typed_builder::TypedBuilder; +pub use util::*; use uuid::Uuid; use crate::spec::{ @@ -336,30 +339,31 @@ impl TableCommit { } // get current metadata location - let current_metadata_location = - table.metadata_location().ok_or(Error::new( - ErrorKind::DataInvalid, - format!( - "Failed to apply commit, table metadata location is not set for table: {}", - table.identifier() - ), - ))?; + let current_metadata_location = table.metadata_location().ok_or(Error::new( + ErrorKind::DataInvalid, + format!( + "Failed to apply commit, table metadata location is not set for table: {}", + table.identifier() + ), + ))?; // apply updates to metadata builder let mut metadata_builder = table .metadata() .clone() - .into_builder(Some(current_metadata_location)); + .into_builder(Some(current_metadata_location.to_string())); for update in self.updates { metadata_builder = update.apply(metadata_builder)?; } // Bump the version of metadata - let new_metadata_location = current_metadata_location.with_next_version(); + let new_metadata_location = MetadataLocationParser::from_str(current_metadata_location)? + .with_next_version() + .to_string(); Ok(table .with_metadata(Arc::new(metadata_builder.build()?.metadata)) - .with_metadata_location(new_metadata_location.to_string())) + .with_metadata_location(new_metadata_location)) } } diff --git a/crates/iceberg/src/catalog/util.rs b/crates/iceberg/src/catalog/util.rs new file mode 100644 index 0000000000..75e37e7d90 --- /dev/null +++ b/crates/iceberg/src/catalog/util.rs @@ -0,0 +1,239 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::Display; +use std::str::FromStr; + +use uuid::Uuid; + +use crate::{Error, ErrorKind, Result}; + +/// Helper for parsing a location of the format: `/metadata/-.metadata.json` +#[derive(Clone, Debug, PartialEq)] +pub struct MetadataLocationParser { + prefix: String, + version: i32, + id: Uuid, +} + +impl MetadataLocationParser { + /// Creates a completely new metadata location starting at version 0. + /// Only used for creating a new table. For updates, see `with_next_version`. + pub fn new_location_with_prefix(prefix: impl ToString) -> Self { + Self { + prefix: prefix.to_string(), + version: 0, + id: Uuid::new_v4(), + } + } + + /// Creates a new metadata location for an updated metadata file. + pub fn with_next_version(&self) -> Self { + Self { + prefix: self.prefix.clone(), + version: self.version + 1, + id: Uuid::new_v4(), + } + } + + fn parse_metadata_path_prefix(path: &str) -> Result { + let prefix = path.strip_suffix("/metadata").ok_or(Error::new( + ErrorKind::Unexpected, + format!( + "Metadata location not under \"/metadata\" subdirectory: {}", + path + ), + ))?; + + Ok(prefix.to_string()) + } + + /// Parses a file name of the format `-.metadata.json`. + fn parse_file_name(file_name: &str) -> Result<(i32, Uuid)> { + let (version, id) = file_name + .strip_suffix(".metadata.json") + .ok_or(Error::new( + ErrorKind::Unexpected, + format!("Invalid metadata file ending: {}", file_name), + ))? + .split_once('-') + .ok_or(Error::new( + ErrorKind::Unexpected, + format!("Invalid metadata file name format: {}", file_name), + ))?; + + Ok((version.parse::()?, Uuid::parse_str(id)?)) + } +} + +impl Display for MetadataLocationParser { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}/metadata/{}-{}.metadata.json", + self.prefix, self.version, self.id + ) + } +} + +impl FromStr for MetadataLocationParser { + type Err = Error; + + fn from_str(s: &str) -> Result { + let (path, file_name) = s.rsplit_once('/').ok_or(Error::new( + ErrorKind::Unexpected, + format!("Invalid metadata location: {}", s), + ))?; + + let prefix = Self::parse_metadata_path_prefix(path)?; + let (version, id) = Self::parse_file_name(file_name)?; + + Ok(MetadataLocationParser { + prefix, + version, + id, + }) + } +} + +#[cfg(test)] +mod test { + use std::str::FromStr; + + use uuid::Uuid; + + use crate::MetadataLocationParser; + + #[test] + fn test_metadata_location_from_string() { + let test_cases = vec![ + // No prefix + ( + "/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + Ok(MetadataLocationParser { + prefix: "".to_string(), + version: 1234567, + id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), + }), + ), + // Some prefix + ( + "/abc/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + Ok(MetadataLocationParser { + prefix: "/abc".to_string(), + version: 1234567, + id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), + }), + ), + // Longer prefix + ( + "/abc/def/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + Ok(MetadataLocationParser { + prefix: "/abc/def".to_string(), + version: 1234567, + id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), + }), + ), + // Prefix with special characters + ( + "https://127.0.0.1/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + Ok(MetadataLocationParser { + prefix: "https://127.0.0.1".to_string(), + version: 1234567, + id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), + }), + ), + // Another id + ( + "/abc/metadata/1234567-81056704-ce5b-41c4-bb83-eb6408081af6.metadata.json", + Ok(MetadataLocationParser { + prefix: "/abc".to_string(), + version: 1234567, + id: Uuid::from_str("81056704-ce5b-41c4-bb83-eb6408081af6").unwrap(), + }), + ), + // Version 0 + ( + "/abc/metadata/0-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + Ok(MetadataLocationParser { + prefix: "/abc".to_string(), + version: 0, + id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), + }), + ), + // Negative version + ( + "/metadata/-123-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + Err("".to_string()), + ), + // Invalid uuid + ( + "/metadata/1234567-no-valid-id.metadata.json", + Err("".to_string()), + ), + // Non-numeric version + ( + "/metadata/noversion-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + Err("".to_string()), + ), + // No /metadata subdirectory + ( + "/wrongsubdir/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + Err("".to_string()), + ), + // No .metadata.json suffix + ( + "/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata", + Err("".to_string()), + ), + ( + "/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.wrong.file", + Err("".to_string()), + ), + ]; + + for (input, expected) in test_cases { + match MetadataLocationParser::from_str(input) { + Ok(metadata_location) => { + assert!(expected.is_ok()); + assert_eq!(metadata_location, expected.unwrap()); + } + Err(_) => assert!(expected.is_err()), + } + } + } + + #[test] + fn test_metadata_location_with_next_version() { + let test_cases = vec![ + MetadataLocationParser::new_location_with_prefix("/abc"), + MetadataLocationParser::from_str( + "/abc/def/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + ) + .unwrap(), + ]; + + for input in test_cases { + let next = MetadataLocationParser::from_str(&input.to_string()) + .unwrap() + .with_next_version(); + assert_eq!(next.prefix, input.prefix); + assert_eq!(next.version, input.version + 1); + assert_ne!(next.id, input.id); + } + } +} diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index f38b29262b..06549a95c5 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -238,7 +238,7 @@ impl Transaction { async fn do_commit(&mut self, catalog: &dyn Catalog) -> Result
{ let refreshed = catalog.load_table(self.table.identifier()).await?; - + if self.table.metadata() != refreshed.metadata() || self.table.metadata_location() != refreshed.metadata_location() { From 9bf38006f46ece5689f4e5627ec1b305259a542f Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 23 Jul 2025 18:34:42 -0700 Subject: [PATCH 03/23] clean up --- crates/catalog/glue/src/catalog.rs | 3 +- crates/catalog/glue/src/utils.rs | 8 ++--- crates/catalog/hms/src/catalog.rs | 14 ++++---- crates/catalog/hms/src/utils.rs | 47 ++------------------------ crates/catalog/s3tables/src/catalog.rs | 2 +- crates/iceberg/src/catalog/util.rs | 4 +-- 6 files changed, 18 insertions(+), 60 deletions(-) diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index 36c17453db..e3c119e30c 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -393,8 +393,7 @@ impl Catalog for GlueCatalog { let metadata = TableMetadataBuilder::from_table_creation(creation)? .build()? .metadata; - let metadata_location = - MetadataLocationParser::new_location_with_prefix(location).to_string(); + let metadata_location = MetadataLocationParser::new_with_prefix(location).to_string(); metadata.write_to(&self.file_io, &metadata_location).await?; diff --git a/crates/catalog/glue/src/utils.rs b/crates/catalog/glue/src/utils.rs index 2895e4b239..0abc63d931 100644 --- a/crates/catalog/glue/src/utils.rs +++ b/crates/catalog/glue/src/utils.rs @@ -262,10 +262,8 @@ macro_rules! with_catalog_id { mod tests { use aws_sdk_glue::config::ProvideCredentials; use aws_sdk_glue::types::Column; - use iceberg::spec::{ - MetadataLocation, NestedField, PrimitiveType, Schema, TableMetadataBuilder, Type, - }; - use iceberg::{Namespace, Result, TableCreation}; + use iceberg::spec::{NestedField, PrimitiveType, Schema, TableMetadataBuilder, Type}; + use iceberg::{MetadataLocationParser, Namespace, Result, TableCreation}; use super::*; use crate::schema::{ICEBERG_FIELD_CURRENT, ICEBERG_FIELD_ID, ICEBERG_FIELD_OPTIONAL}; @@ -309,7 +307,7 @@ mod tests { fn test_convert_to_glue_table() -> Result<()> { let table_name = "my_table".to_string(); let location = "s3a://warehouse/hive".to_string(); - let metadata_location = MetadataLocation::new_with_prefix(&location); + let metadata_location = MetadataLocationParser::new_with_prefix(&location).to_string(); let properties = HashMap::new(); let schema = Schema::builder() .with_schema_id(1) diff --git a/crates/catalog/hms/src/catalog.rs b/crates/catalog/hms/src/catalog.rs index 72fb8c6b33..76f7e6ffc3 100644 --- a/crates/catalog/hms/src/catalog.rs +++ b/crates/catalog/hms/src/catalog.rs @@ -29,8 +29,8 @@ use iceberg::io::FileIO; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ - Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, - TableIdent, + Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, + TableCommit, TableCreation, TableIdent, }; use typed_builder::TypedBuilder; use volo_thrift::MaybeException; @@ -351,16 +351,18 @@ impl Catalog for HmsCatalog { .build()? .metadata; - let metadata_location = create_metadata_location(&location, 0)?; + let metadata_location = MetadataLocationParser::new_with_prefix(&location); - metadata.write_to(&self.file_io, &metadata_location).await?; + metadata + .write_to(&self.file_io, &metadata_location.to_string()) + .await?; let hive_table = convert_to_hive_table( db_name.clone(), metadata.current_schema(), table_name.clone(), location, - metadata_location.clone(), + metadata_location.to_string(), metadata.properties(), )?; @@ -372,7 +374,7 @@ impl Catalog for HmsCatalog { Table::builder() .file_io(self.file_io()) - .metadata_location(metadata_location) + .metadata_location(metadata_location.to_string()) .metadata(metadata) .identifier(TableIdent::new(NamespaceIdent::new(db_name), table_name)) .build() diff --git a/crates/catalog/hms/src/utils.rs b/crates/catalog/hms/src/utils.rs index 432ceac833..95aaba7046 100644 --- a/crates/catalog/hms/src/utils.rs +++ b/crates/catalog/hms/src/utils.rs @@ -22,7 +22,6 @@ use hive_metastore::{Database, PrincipalType, SerDeInfo, StorageDescriptor}; use iceberg::spec::Schema; use iceberg::{Error, ErrorKind, Namespace, NamespaceIdent, Result}; use pilota::{AHashMap, FastStr}; -use uuid::Uuid; use crate::schema::HiveSchemaBuilder; @@ -249,30 +248,6 @@ pub(crate) fn get_default_table_location( format!("{}/{}", location, table_name.as_ref()) } -/// Create metadata location from `location` and `version` -pub(crate) fn create_metadata_location(location: impl AsRef, version: i32) -> Result { - if version < 0 { - return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Table metadata version: '{}' must be a non-negative integer", - version - ), - )); - }; - - let version = format!("{:0>5}", version); - let id = Uuid::new_v4(); - let metadata_location = format!( - "{}/metadata/{}-{}.metadata.json", - location.as_ref(), - version, - id - ); - - Ok(metadata_location) -} - /// Get metadata location from `HiveTable` parameters pub(crate) fn get_metadata_location( parameters: &Option>, @@ -339,7 +314,7 @@ fn get_current_time() -> Result { #[cfg(test)] mod tests { use iceberg::spec::{NestedField, PrimitiveType, Type}; - use iceberg::{Namespace, NamespaceIdent}; + use iceberg::{MetadataLocationParser, Namespace, NamespaceIdent}; use super::*; @@ -370,7 +345,7 @@ mod tests { let db_name = "my_db".to_string(); let table_name = "my_table".to_string(); let location = "s3a://warehouse/hms".to_string(); - let metadata_location = create_metadata_location(location.clone(), 0)?; + let metadata_location = MetadataLocationParser::new_with_prefix(location.clone()); let properties = HashMap::new(); let schema = Schema::builder() .with_schema_id(1) @@ -385,7 +360,7 @@ mod tests { &schema, table_name.clone(), location.clone(), - metadata_location, + metadata_location.to_string(), &properties, )?; @@ -414,22 +389,6 @@ mod tests { Ok(()) } - #[test] - fn test_create_metadata_location() -> Result<()> { - let location = "my_base_location"; - let valid_version = 0; - let invalid_version = -1; - - let valid_result = create_metadata_location(location, valid_version)?; - let invalid_result = create_metadata_location(location, invalid_version); - - assert!(valid_result.starts_with("my_base_location/metadata/00000-")); - assert!(valid_result.ends_with(".metadata.json")); - assert!(invalid_result.is_err()); - - Ok(()) - } - #[test] fn test_get_default_table_location() -> Result<()> { let properties = HashMap::from([(LOCATION.to_string(), "db_location".to_string())]); diff --git a/crates/catalog/s3tables/src/catalog.rs b/crates/catalog/s3tables/src/catalog.rs index 27ec3c3c30..3c3df35a1a 100644 --- a/crates/catalog/s3tables/src/catalog.rs +++ b/crates/catalog/s3tables/src/catalog.rs @@ -325,7 +325,7 @@ impl Catalog for S3TablesCatalog { .await .map_err(from_aws_sdk_error)?; let warehouse_location = get_resp.warehouse_location().to_string(); - MetadataLocationParser::new_location_with_prefix(warehouse_location).to_string() + MetadataLocationParser::new_with_prefix(warehouse_location).to_string() } }; diff --git a/crates/iceberg/src/catalog/util.rs b/crates/iceberg/src/catalog/util.rs index 75e37e7d90..e2337d2aab 100644 --- a/crates/iceberg/src/catalog/util.rs +++ b/crates/iceberg/src/catalog/util.rs @@ -33,7 +33,7 @@ pub struct MetadataLocationParser { impl MetadataLocationParser { /// Creates a completely new metadata location starting at version 0. /// Only used for creating a new table. For updates, see `with_next_version`. - pub fn new_location_with_prefix(prefix: impl ToString) -> Self { + pub fn new_with_prefix(prefix: impl ToString) -> Self { Self { prefix: prefix.to_string(), version: 0, @@ -220,7 +220,7 @@ mod test { #[test] fn test_metadata_location_with_next_version() { let test_cases = vec![ - MetadataLocationParser::new_location_with_prefix("/abc"), + MetadataLocationParser::new_with_prefix("/abc"), MetadataLocationParser::from_str( "/abc/def/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", ) From 00fe1bcec7615148e9278459b9782538283ce64c Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 23 Jul 2025 20:09:38 -0700 Subject: [PATCH 04/23] machete and padding --- Cargo.lock | 2 -- crates/catalog/glue/Cargo.toml | 1 - crates/catalog/glue/tests/glue_catalog_test.rs | 2 +- crates/catalog/s3tables/Cargo.toml | 1 - crates/iceberg/src/catalog/mod.rs | 4 ++-- crates/iceberg/src/catalog/{util.rs => utils.rs} | 2 +- 6 files changed, 4 insertions(+), 8 deletions(-) rename crates/iceberg/src/catalog/{util.rs => utils.rs} (99%) diff --git a/Cargo.lock b/Cargo.lock index c3e581ccb0..f33a5c3f59 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3580,7 +3580,6 @@ dependencies = [ "tokio", "tracing", "typed-builder 0.20.1", - "uuid", ] [[package]] @@ -3645,7 +3644,6 @@ dependencies = [ "itertools 0.13.0", "tokio", "typed-builder 0.20.1", - "uuid", ] [[package]] diff --git a/crates/catalog/glue/Cargo.toml b/crates/catalog/glue/Cargo.toml index a71b51f8d9..613160e468 100644 --- a/crates/catalog/glue/Cargo.toml +++ b/crates/catalog/glue/Cargo.toml @@ -38,7 +38,6 @@ serde_json = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } typed-builder = { workspace = true } -uuid = { workspace = true } [dev-dependencies] ctor = { workspace = true } diff --git a/crates/catalog/glue/tests/glue_catalog_test.rs b/crates/catalog/glue/tests/glue_catalog_test.rs index bec9494fe9..46711d096e 100644 --- a/crates/catalog/glue/tests/glue_catalog_test.rs +++ b/crates/catalog/glue/tests/glue_catalog_test.rs @@ -234,7 +234,7 @@ async fn test_create_table() -> Result<()> { assert!( result .metadata_location() - .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/00000-")) + .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/0-")) ); assert!( catalog diff --git a/crates/catalog/s3tables/Cargo.toml b/crates/catalog/s3tables/Cargo.toml index 0ec1f55f62..cdcd96b6b1 100644 --- a/crates/catalog/s3tables/Cargo.toml +++ b/crates/catalog/s3tables/Cargo.toml @@ -35,7 +35,6 @@ aws-config = { workspace = true } aws-sdk-s3tables = "1.10.0" iceberg = { workspace = true } typed-builder = { workspace = true } -uuid = { workspace = true, features = ["v4"] } [dev-dependencies] iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index d08f509a87..d4632e39c9 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -18,7 +18,7 @@ //! Catalog API for Apache Iceberg pub mod memory; -mod util; +mod utils; use std::collections::HashMap; use std::fmt::{Debug, Display}; @@ -35,7 +35,7 @@ pub use memory::MemoryCatalog; use mockall::automock; use serde_derive::{Deserialize, Serialize}; use typed_builder::TypedBuilder; -pub use util::*; +pub use utils::*; use uuid::Uuid; use crate::spec::{ diff --git a/crates/iceberg/src/catalog/util.rs b/crates/iceberg/src/catalog/utils.rs similarity index 99% rename from crates/iceberg/src/catalog/util.rs rename to crates/iceberg/src/catalog/utils.rs index e2337d2aab..a276c62970 100644 --- a/crates/iceberg/src/catalog/util.rs +++ b/crates/iceberg/src/catalog/utils.rs @@ -84,7 +84,7 @@ impl Display for MetadataLocationParser { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "{}/metadata/{}-{}.metadata.json", + "{}/metadata/{:0>5}-{}.metadata.json", self.prefix, self.version, self.id ) } From 5b902661d9f82e5ec828482a5b1f03f85ca7ab96 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 23 Jul 2025 20:11:59 -0700 Subject: [PATCH 05/23] who let the test out --- crates/catalog/glue/tests/glue_catalog_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/catalog/glue/tests/glue_catalog_test.rs b/crates/catalog/glue/tests/glue_catalog_test.rs index 46711d096e..bec9494fe9 100644 --- a/crates/catalog/glue/tests/glue_catalog_test.rs +++ b/crates/catalog/glue/tests/glue_catalog_test.rs @@ -234,7 +234,7 @@ async fn test_create_table() -> Result<()> { assert!( result .metadata_location() - .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/0-")) + .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/00000-")) ); assert!( catalog From ece2b8bf441bfce6f87ff6f3dfa41c578e828a65 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 23 Jul 2025 20:21:05 -0700 Subject: [PATCH 06/23] clean up sql catalog, remove unneeded changes --- Cargo.lock | 2 -- crates/catalog/hms/Cargo.toml | 1 - crates/catalog/sql/Cargo.toml | 1 - crates/catalog/sql/src/catalog.rs | 12 ++---------- crates/iceberg/src/catalog/memory/namespace_state.rs | 6 +++--- 5 files changed, 5 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f33a5c3f59..d72e47aa28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3603,7 +3603,6 @@ dependencies = [ "tokio", "tracing", "typed-builder 0.20.1", - "uuid", "volo", "volo-thrift", ] @@ -3659,7 +3658,6 @@ dependencies = [ "tempfile", "tokio", "typed-builder 0.20.1", - "uuid", ] [[package]] diff --git a/crates/catalog/hms/Cargo.toml b/crates/catalog/hms/Cargo.toml index 5f8956f40d..707f3ed6a4 100644 --- a/crates/catalog/hms/Cargo.toml +++ b/crates/catalog/hms/Cargo.toml @@ -39,7 +39,6 @@ serde_json = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } typed-builder = { workspace = true } -uuid = { workspace = true } volo-thrift = { workspace = true } # Transitive dependencies below diff --git a/crates/catalog/sql/Cargo.toml b/crates/catalog/sql/Cargo.toml index b767b68775..33ca700bf7 100644 --- a/crates/catalog/sql/Cargo.toml +++ b/crates/catalog/sql/Cargo.toml @@ -33,7 +33,6 @@ async-trait = { workspace = true } iceberg = { workspace = true } sqlx = { version = "0.8.1", features = ["any"], default-features = false } typed-builder = { workspace = true } -uuid = { workspace = true, features = ["v4"] } [dev-dependencies] iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index 56c6fadcf1..43f138c182 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -22,14 +22,10 @@ use async_trait::async_trait; use iceberg::io::FileIO; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; -use iceberg::{ - Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, - TableIdent, -}; +use iceberg::{Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, TableIdent}; use sqlx::any::{AnyPoolOptions, AnyQueryResult, AnyRow, install_default_drivers}; use sqlx::{Any, AnyPool, Row, Transaction}; use typed_builder::TypedBuilder; -use uuid::Uuid; use crate::error::{ from_sqlx_error, no_such_namespace_err, no_such_table_err, table_already_exists_err, @@ -700,11 +696,7 @@ impl Catalog for SqlCatalog { let tbl_metadata = TableMetadataBuilder::from_table_creation(tbl_creation)? .build()? .metadata; - let tbl_metadata_location = format!( - "{}/metadata/0-{}.metadata.json", - location.clone(), - Uuid::new_v4() - ); + let tbl_metadata_location = MetadataLocationParser::new_with_prefix(location).to_string(); tbl_metadata .write_to(&self.fileio, &tbl_metadata_location) diff --git a/crates/iceberg/src/catalog/memory/namespace_state.rs b/crates/iceberg/src/catalog/memory/namespace_state.rs index 17e20d7639..f5df207d1f 100644 --- a/crates/iceberg/src/catalog/memory/namespace_state.rs +++ b/crates/iceberg/src/catalog/memory/namespace_state.rs @@ -268,7 +268,7 @@ impl NamespaceState { pub(crate) fn insert_new_table( &mut self, table_ident: &TableIdent, - location: String, + metadata_location: String, ) -> Result<()> { let namespace = self.get_mut_namespace(table_ident.namespace())?; @@ -278,7 +278,7 @@ impl NamespaceState { { hash_map::Entry::Occupied(_) => table_already_exists_err(table_ident), hash_map::Entry::Vacant(entry) => { - let _ = entry.insert(location); + let _ = entry.insert(metadata_location); Ok(()) } @@ -298,7 +298,7 @@ impl NamespaceState { } } - /// Updates the metadata location of the given table or returns an error if doesn't exist + // Updates the metadata location of the given table or returns an error if it doesn't exist pub(crate) fn commit_table_update(&mut self, staged_table: Table) -> Result
{ let namespace = self.get_mut_namespace(staged_table.identifier().namespace())?; From 7010df6b5a4720a7eea3f0f62935e58a210e6116 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 23 Jul 2025 20:22:00 -0700 Subject: [PATCH 07/23] remove test that does not make sense --- crates/iceberg/src/catalog/memory/catalog.rs | 48 -------------------- 1 file changed, 48 deletions(-) diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index c9cd953e1c..4ddec86920 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -1834,54 +1834,6 @@ mod tests { ); } - #[tokio::test] - async fn test_update_table_fails_if_commit_conflicts() { - let catalog = new_memory_catalog(); - let base_table = create_table_with_namespace(&catalog).await; - - // Turn off retry to test conflict - let tx = Transaction::new(&base_table); - let base_table = tx - .update_table_properties() - .set(PROPERTY_COMMIT_NUM_RETRIES.to_string(), "0".to_string()) - .apply(tx) - .unwrap() - .commit(&catalog) - .await - .unwrap(); - - // Update the table by adding a new sort order. - let tx = Transaction::new(&base_table); - let _sort_table = tx - .replace_sort_order() - .asc("foo", NullOrder::First) - .apply(tx) - .unwrap() - .commit(&catalog) - .await - .unwrap(); - - // Try to update the "now old" table again with a different sort order. - let tx = Transaction::new(&base_table); - let err = tx - .replace_sort_order() - .desc("foo", NullOrder::Last) - .apply(tx) - .unwrap() - .commit(&catalog) - .await - .unwrap_err(); - - // The second transaction should fail because it didn't take the new update - // into account. - println!("{}", err.message()); // todo remove this - assert_eq!(err.kind(), ErrorKind::CatalogCommitConflicts); - assert!( - err.message() - .contains("Default sort order id does not match") - ); - } - #[tokio::test] async fn test_update_table_fails_if_table_doesnt_exist() { let catalog = new_memory_catalog(); From 5e97888eff629fa6ca8de589baa371b8e84876c9 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 23 Jul 2025 20:27:10 -0700 Subject: [PATCH 08/23] fmt ofc --- crates/catalog/sql/src/catalog.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index 43f138c182..3ad873546d 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -22,7 +22,10 @@ use async_trait::async_trait; use iceberg::io::FileIO; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; -use iceberg::{Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, TableIdent}; +use iceberg::{ + Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, + TableCommit, TableCreation, TableIdent, +}; use sqlx::any::{AnyPoolOptions, AnyQueryResult, AnyRow, install_default_drivers}; use sqlx::{Any, AnyPool, Row, Transaction}; use typed_builder::TypedBuilder; From a3648e264d1c588827ef21733552d25a2f1e1fb5 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 23 Jul 2025 20:44:54 -0700 Subject: [PATCH 09/23] fix and improve unit tests --- crates/iceberg/src/catalog/memory/catalog.rs | 31 +++++++++----------- crates/iceberg/src/catalog/mod.rs | 16 +++++++--- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index 4ddec86920..3a08eadb37 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -29,8 +29,8 @@ use crate::io::FileIO; use crate::spec::{TableMetadata, TableMetadataBuilder}; use crate::table::Table; use crate::{ - Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, - TableIdent, + Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, + TableCommit, TableCreation, TableIdent, }; /// namespace `location` property @@ -220,12 +220,7 @@ impl Catalog for MemoryCatalog { let metadata = TableMetadataBuilder::from_table_creation(table_creation)? .build()? .metadata; - let metadata_location = format!( - "{}/metadata/{}-{}.metadata.json", - &location, - 0, - Uuid::new_v4() - ); + let metadata_location = MetadataLocationParser::new_with_prefix(location).to_string(); metadata.write_to(&self.file_io, &metadata_location).await?; @@ -339,10 +334,7 @@ mod tests { use super::*; use crate::io::FileIOBuilder; - use crate::spec::{ - NestedField, NullOrder, PROPERTY_COMMIT_NUM_RETRIES, PartitionSpec, PrimitiveType, Schema, - SortOrder, Type, - }; + use crate::spec::{NestedField, PartitionSpec, PrimitiveType, Schema, SortOrder, Type}; use crate::transaction::{ApplyTransactionAction, Transaction}; fn temp_path() -> String { @@ -453,7 +445,12 @@ mod tests { fn assert_table_metadata_location_matches(table: &Table, regex_str: &str) { let actual = table.metadata_location().unwrap().to_string(); let regex = Regex::new(regex_str).unwrap(); - assert!(regex.is_match(&actual)) + assert!( + regex.is_match(&actual), + "Expected metadata location to match regex, but got location: {} and regex: {}", + actual, + regex + ) } #[tokio::test] @@ -1105,7 +1102,7 @@ mod tests { let table_name = "tbl1"; let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/tbl1/metadata/0-{}.metadata.json$", + "^{}/tbl1/metadata/00000-{}.metadata.json$", namespace_location, UUID_REGEX_STR, ); @@ -1158,7 +1155,7 @@ mod tests { let expected_table_ident = TableIdent::new(nested_namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/tbl1/metadata/0-{}.metadata.json$", + "^{}/tbl1/metadata/00000-{}.metadata.json$", nested_namespace_location, UUID_REGEX_STR, ); @@ -1199,7 +1196,7 @@ mod tests { let table_name = "tbl1"; let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/a/tbl1/metadata/0-{}.metadata.json$", + "^{}/a/tbl1/metadata/00000-{}.metadata.json$", warehouse_location, UUID_REGEX_STR ); @@ -1247,7 +1244,7 @@ mod tests { let expected_table_ident = TableIdent::new(nested_namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/a/b/tbl1/metadata/0-{}.metadata.json$", + "^{}/a/b/tbl1/metadata/00000-{}.metadata.json$", warehouse_location, UUID_REGEX_STR ); diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index d4632e39c9..51973ef4d7 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -2193,7 +2193,7 @@ mod tests { Table::builder() .metadata(resp) - .metadata_location("s3://bucket/test/location/metadata/v2.json".to_string()) + .metadata_location("s3://bucket/test/location/metadata/00000-8a62c37d-4573-4021-952a-c0baef7d21d0.metadata.json".to_string()) .identifier(TableIdent::from_strs(["ns1", "test1"]).unwrap()) .file_io(FileIOBuilder::new("memory").build().unwrap()) .build() @@ -2202,7 +2202,7 @@ mod tests { let updates = vec![ TableUpdate::SetLocation { - location: "s3://bucket/test/new_location/metadata/v2.json".to_string(), + location: "s3://bucket/test/new_location/data".to_string(), }, TableUpdate::SetProperties { updates: vec![ @@ -2235,9 +2235,17 @@ mod tests { "v2" ); + // metadata version should be bumped + assert!( + updated_table + .metadata_location() + .unwrap() + .starts_with("s3://bucket/test/location/metadata/00001-") + ); + assert_eq!( updated_table.metadata().location, - "s3://bucket/test/new_location/metadata/v2.json".to_string() - ) + "s3://bucket/test/new_location/data", + ); } } From 547b8a85a1cdfcfe24a58c71b15443e66caa9e2b Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 23 Jul 2025 20:49:03 -0700 Subject: [PATCH 10/23] unused import --- crates/iceberg/src/catalog/memory/catalog.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index 3a08eadb37..2326ecd4a2 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -22,7 +22,6 @@ use std::collections::HashMap; use async_trait::async_trait; use futures::lock::{Mutex, MutexGuard}; use itertools::Itertools; -use uuid::Uuid; use super::namespace_state::NamespaceState; use crate::io::FileIO; From 793007ca0a1377f2d942756420280c14a49a77bd Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Wed, 23 Jul 2025 20:55:18 -0700 Subject: [PATCH 11/23] fix and improve more tests --- crates/catalog/sql/src/catalog.rs | 16 ++++++++-------- crates/iceberg/src/catalog/utils.rs | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index 3ad873546d..14800fc1ea 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -60,7 +60,7 @@ static TEST_BEFORE_ACQUIRE: bool = true; // Default the health-check of each con /// such as the database URI, warehouse location, and file I/O settings. /// You are required to provide a `SqlBindStyle`, which determines how SQL statements will be bound to values in the catalog. /// The options available for this parameter include: -/// - `SqlBindStyle::DollarNumeric`: Binds SQL statements using `$1`, `$2`, etc., as placeholders. This is for PostgreSQL databases. +/// - `SqlBindStyle::DollarNumeric`: Binds SQL statements using `$1`, `$2`, etc., as placeholders. This is for PostgresSQL databases. /// - `SqlBindStyle::QuestionMark`: Binds SQL statements using `?` as a placeholder. This is for MySQL and SQLite databases. #[derive(Debug, TypedBuilder)] pub struct SqlCatalogConfig { @@ -283,7 +283,7 @@ impl Catalog for SqlCatalog { if exists { return Err(Error::new( - iceberg::ErrorKind::Unexpected, + ErrorKind::Unexpected, format!("Namespace {:?} already exists", namespace), )); } @@ -486,7 +486,7 @@ impl Catalog for SqlCatalog { let tables = self.list_tables(namespace).await?; if !tables.is_empty() { return Err(Error::new( - iceberg::ErrorKind::Unexpected, + ErrorKind::Unexpected, format!( "Namespace {:?} is not empty. {} tables exist.", namespace, @@ -805,7 +805,7 @@ mod tests { temp_dir.path().to_str().unwrap().to_string() } - fn to_set(vec: Vec) -> HashSet { + fn to_set(vec: Vec) -> HashSet { HashSet::from_iter(vec) } @@ -1505,7 +1505,7 @@ mod tests { let table_name = "tbl1"; let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/tbl1/metadata/0-{}.metadata.json$", + "^{}/tbl1/metadata/00000-{}.metadata.json$", namespace_location, UUID_REGEX_STR, ); @@ -1562,7 +1562,7 @@ mod tests { let expected_table_ident = TableIdent::new(nested_namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/tbl1/metadata/0-{}.metadata.json$", + "^{}/tbl1/metadata/00000-{}.metadata.json$", nested_namespace_location, UUID_REGEX_STR, ); @@ -1602,7 +1602,7 @@ mod tests { let table_name = "tbl1"; let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/a/tbl1/metadata/0-{}.metadata.json$", + "^{}/a/tbl1/metadata/00000-{}.metadata.json$", warehouse_loc, UUID_REGEX_STR ); @@ -1641,7 +1641,7 @@ mod tests { let expected_table_ident = TableIdent::new(nested_namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/a/b/tbl1/metadata/0-{}.metadata.json$", + "^{}/a/b/tbl1/metadata/00000-{}.metadata.json$", warehouse_loc, UUID_REGEX_STR ); diff --git a/crates/iceberg/src/catalog/utils.rs b/crates/iceberg/src/catalog/utils.rs index a276c62970..1741c793c4 100644 --- a/crates/iceberg/src/catalog/utils.rs +++ b/crates/iceberg/src/catalog/utils.rs @@ -168,7 +168,7 @@ mod test { ), // Version 0 ( - "/abc/metadata/0-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", + "/abc/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", Ok(MetadataLocationParser { prefix: "/abc".to_string(), version: 0, From 37c10604bfa7c30e1b23ca4a467e5ff084c533dc Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Thu, 24 Jul 2025 09:30:09 -0700 Subject: [PATCH 12/23] Update crates/iceberg/src/catalog/utils.rs Co-authored-by: Renjie Liu --- crates/iceberg/src/catalog/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/catalog/utils.rs b/crates/iceberg/src/catalog/utils.rs index 1741c793c4..564a77f2c8 100644 --- a/crates/iceberg/src/catalog/utils.rs +++ b/crates/iceberg/src/catalog/utils.rs @@ -24,7 +24,7 @@ use crate::{Error, ErrorKind, Result}; /// Helper for parsing a location of the format: `/metadata/-.metadata.json` #[derive(Clone, Debug, PartialEq)] -pub struct MetadataLocationParser { +pub struct MetadataLocation { prefix: String, version: i32, id: Uuid, From 1eaa51e341480a5ce68e83ed7a8f147e2300f009 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Thu, 24 Jul 2025 09:30:20 -0700 Subject: [PATCH 13/23] Update crates/iceberg/src/catalog/utils.rs Co-authored-by: Renjie Liu --- crates/iceberg/src/catalog/utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/catalog/utils.rs b/crates/iceberg/src/catalog/utils.rs index 564a77f2c8..3ff9f2c6f4 100644 --- a/crates/iceberg/src/catalog/utils.rs +++ b/crates/iceberg/src/catalog/utils.rs @@ -25,7 +25,7 @@ use crate::{Error, ErrorKind, Result}; /// Helper for parsing a location of the format: `/metadata/-.metadata.json` #[derive(Clone, Debug, PartialEq)] pub struct MetadataLocation { - prefix: String, + table_location: String, version: i32, id: Uuid, } From 2fda4fd1f6bfde66e38ea67917a32ce48f933a6c Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Thu, 24 Jul 2025 09:41:59 -0700 Subject: [PATCH 14/23] better naming --- crates/catalog/glue/src/catalog.rs | 6 +- crates/catalog/glue/src/utils.rs | 4 +- crates/catalog/hms/src/catalog.rs | 6 +- crates/catalog/hms/src/utils.rs | 4 +- crates/catalog/s3tables/src/catalog.rs | 6 +- crates/catalog/sql/src/catalog.rs | 6 +- crates/iceberg/src/catalog/memory/catalog.rs | 6 +- .../{utils.rs => metadata_location.rs} | 56 +++++++++---------- crates/iceberg/src/catalog/mod.rs | 6 +- 9 files changed, 50 insertions(+), 50 deletions(-) rename crates/iceberg/src/catalog/{utils.rs => metadata_location.rs} (83%) diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index e3c119e30c..c71ea9b71c 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -26,8 +26,8 @@ use iceberg::io::{ use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ - Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, - TableCommit, TableCreation, TableIdent, + Catalog, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, + TableCreation, TableIdent, }; use typed_builder::TypedBuilder; @@ -393,7 +393,7 @@ impl Catalog for GlueCatalog { let metadata = TableMetadataBuilder::from_table_creation(creation)? .build()? .metadata; - let metadata_location = MetadataLocationParser::new_with_prefix(location).to_string(); + let metadata_location = MetadataLocation::new_with_location(location).to_string(); metadata.write_to(&self.file_io, &metadata_location).await?; diff --git a/crates/catalog/glue/src/utils.rs b/crates/catalog/glue/src/utils.rs index 0abc63d931..f3b1eb0cb7 100644 --- a/crates/catalog/glue/src/utils.rs +++ b/crates/catalog/glue/src/utils.rs @@ -263,7 +263,7 @@ mod tests { use aws_sdk_glue::config::ProvideCredentials; use aws_sdk_glue::types::Column; use iceberg::spec::{NestedField, PrimitiveType, Schema, TableMetadataBuilder, Type}; - use iceberg::{MetadataLocationParser, Namespace, Result, TableCreation}; + use iceberg::{MetadataLocation, Namespace, Result, TableCreation}; use super::*; use crate::schema::{ICEBERG_FIELD_CURRENT, ICEBERG_FIELD_ID, ICEBERG_FIELD_OPTIONAL}; @@ -307,7 +307,7 @@ mod tests { fn test_convert_to_glue_table() -> Result<()> { let table_name = "my_table".to_string(); let location = "s3a://warehouse/hive".to_string(); - let metadata_location = MetadataLocationParser::new_with_prefix(&location).to_string(); + let metadata_location = MetadataLocation::new_with_location(&location).to_string(); let properties = HashMap::new(); let schema = Schema::builder() .with_schema_id(1) diff --git a/crates/catalog/hms/src/catalog.rs b/crates/catalog/hms/src/catalog.rs index 76f7e6ffc3..97b755396c 100644 --- a/crates/catalog/hms/src/catalog.rs +++ b/crates/catalog/hms/src/catalog.rs @@ -29,8 +29,8 @@ use iceberg::io::FileIO; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ - Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, - TableCommit, TableCreation, TableIdent, + Catalog, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, + TableCreation, TableIdent, }; use typed_builder::TypedBuilder; use volo_thrift::MaybeException; @@ -351,7 +351,7 @@ impl Catalog for HmsCatalog { .build()? .metadata; - let metadata_location = MetadataLocationParser::new_with_prefix(&location); + let metadata_location = MetadataLocation::new_with_location(&location); metadata .write_to(&self.file_io, &metadata_location.to_string()) diff --git a/crates/catalog/hms/src/utils.rs b/crates/catalog/hms/src/utils.rs index 95aaba7046..2397fc20f6 100644 --- a/crates/catalog/hms/src/utils.rs +++ b/crates/catalog/hms/src/utils.rs @@ -314,7 +314,7 @@ fn get_current_time() -> Result { #[cfg(test)] mod tests { use iceberg::spec::{NestedField, PrimitiveType, Type}; - use iceberg::{MetadataLocationParser, Namespace, NamespaceIdent}; + use iceberg::{MetadataLocation, Namespace, NamespaceIdent}; use super::*; @@ -345,7 +345,7 @@ mod tests { let db_name = "my_db".to_string(); let table_name = "my_table".to_string(); let location = "s3a://warehouse/hms".to_string(); - let metadata_location = MetadataLocationParser::new_with_prefix(location.clone()); + let metadata_location = MetadataLocation::new_with_location(location.clone()); let properties = HashMap::new(); let schema = Schema::builder() .with_schema_id(1) diff --git a/crates/catalog/s3tables/src/catalog.rs b/crates/catalog/s3tables/src/catalog.rs index 3c3df35a1a..f245d00a27 100644 --- a/crates/catalog/s3tables/src/catalog.rs +++ b/crates/catalog/s3tables/src/catalog.rs @@ -28,8 +28,8 @@ use iceberg::io::{FileIO, FileIOBuilder}; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ - Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, - TableCommit, TableCreation, TableIdent, + Catalog, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, + TableCreation, TableIdent, }; use typed_builder::TypedBuilder; @@ -325,7 +325,7 @@ impl Catalog for S3TablesCatalog { .await .map_err(from_aws_sdk_error)?; let warehouse_location = get_resp.warehouse_location().to_string(); - MetadataLocationParser::new_with_prefix(warehouse_location).to_string() + MetadataLocation::new_with_location(warehouse_location).to_string() } }; diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index 14800fc1ea..6d40e4ebb1 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -23,8 +23,8 @@ use iceberg::io::FileIO; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ - Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, - TableCommit, TableCreation, TableIdent, + Catalog, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, + TableCreation, TableIdent, }; use sqlx::any::{AnyPoolOptions, AnyQueryResult, AnyRow, install_default_drivers}; use sqlx::{Any, AnyPool, Row, Transaction}; @@ -699,7 +699,7 @@ impl Catalog for SqlCatalog { let tbl_metadata = TableMetadataBuilder::from_table_creation(tbl_creation)? .build()? .metadata; - let tbl_metadata_location = MetadataLocationParser::new_with_prefix(location).to_string(); + let tbl_metadata_location = MetadataLocation::new_with_location(location).to_string(); tbl_metadata .write_to(&self.fileio, &tbl_metadata_location) diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index 2326ecd4a2..d6b144d88d 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -28,8 +28,8 @@ use crate::io::FileIO; use crate::spec::{TableMetadata, TableMetadataBuilder}; use crate::table::Table; use crate::{ - Catalog, Error, ErrorKind, MetadataLocationParser, Namespace, NamespaceIdent, Result, - TableCommit, TableCreation, TableIdent, + Catalog, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, + TableCreation, TableIdent, }; /// namespace `location` property @@ -219,7 +219,7 @@ impl Catalog for MemoryCatalog { let metadata = TableMetadataBuilder::from_table_creation(table_creation)? .build()? .metadata; - let metadata_location = MetadataLocationParser::new_with_prefix(location).to_string(); + let metadata_location = MetadataLocation::new_with_location(location).to_string(); metadata.write_to(&self.file_io, &metadata_location).await?; diff --git a/crates/iceberg/src/catalog/utils.rs b/crates/iceberg/src/catalog/metadata_location.rs similarity index 83% rename from crates/iceberg/src/catalog/utils.rs rename to crates/iceberg/src/catalog/metadata_location.rs index 3ff9f2c6f4..797c243077 100644 --- a/crates/iceberg/src/catalog/utils.rs +++ b/crates/iceberg/src/catalog/metadata_location.rs @@ -22,7 +22,7 @@ use uuid::Uuid; use crate::{Error, ErrorKind, Result}; -/// Helper for parsing a location of the format: `/metadata/-.metadata.json` +/// Helper for parsing a location of the format: `/metadata/-.metadata.json` #[derive(Clone, Debug, PartialEq)] pub struct MetadataLocation { table_location: String, @@ -30,12 +30,12 @@ pub struct MetadataLocation { id: Uuid, } -impl MetadataLocationParser { +impl MetadataLocation { /// Creates a completely new metadata location starting at version 0. /// Only used for creating a new table. For updates, see `with_next_version`. - pub fn new_with_prefix(prefix: impl ToString) -> Self { + pub fn new_with_location(table_location: impl ToString) -> Self { Self { - prefix: prefix.to_string(), + table_location: table_location.to_string(), version: 0, id: Uuid::new_v4(), } @@ -44,7 +44,7 @@ impl MetadataLocationParser { /// Creates a new metadata location for an updated metadata file. pub fn with_next_version(&self) -> Self { Self { - prefix: self.prefix.clone(), + table_location: self.table_location.clone(), version: self.version + 1, id: Uuid::new_v4(), } @@ -80,17 +80,17 @@ impl MetadataLocationParser { } } -impl Display for MetadataLocationParser { +impl Display for MetadataLocation { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "{}/metadata/{:0>5}-{}.metadata.json", - self.prefix, self.version, self.id + self.table_location, self.version, self.id ) } } -impl FromStr for MetadataLocationParser { +impl FromStr for MetadataLocation { type Err = Error; fn from_str(s: &str) -> Result { @@ -102,8 +102,8 @@ impl FromStr for MetadataLocationParser { let prefix = Self::parse_metadata_path_prefix(path)?; let (version, id) = Self::parse_file_name(file_name)?; - Ok(MetadataLocationParser { - prefix, + Ok(MetadataLocation { + table_location: prefix, version, id, }) @@ -116,7 +116,7 @@ mod test { use uuid::Uuid; - use crate::MetadataLocationParser; + use crate::MetadataLocation; #[test] fn test_metadata_location_from_string() { @@ -124,8 +124,8 @@ mod test { // No prefix ( "/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", - Ok(MetadataLocationParser { - prefix: "".to_string(), + Ok(MetadataLocation { + table_location: "".to_string(), version: 1234567, id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), }), @@ -133,8 +133,8 @@ mod test { // Some prefix ( "/abc/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", - Ok(MetadataLocationParser { - prefix: "/abc".to_string(), + Ok(MetadataLocation { + table_location: "/abc".to_string(), version: 1234567, id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), }), @@ -142,8 +142,8 @@ mod test { // Longer prefix ( "/abc/def/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", - Ok(MetadataLocationParser { - prefix: "/abc/def".to_string(), + Ok(MetadataLocation { + table_location: "/abc/def".to_string(), version: 1234567, id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), }), @@ -151,8 +151,8 @@ mod test { // Prefix with special characters ( "https://127.0.0.1/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", - Ok(MetadataLocationParser { - prefix: "https://127.0.0.1".to_string(), + Ok(MetadataLocation { + table_location: "https://127.0.0.1".to_string(), version: 1234567, id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), }), @@ -160,8 +160,8 @@ mod test { // Another id ( "/abc/metadata/1234567-81056704-ce5b-41c4-bb83-eb6408081af6.metadata.json", - Ok(MetadataLocationParser { - prefix: "/abc".to_string(), + Ok(MetadataLocation { + table_location: "/abc".to_string(), version: 1234567, id: Uuid::from_str("81056704-ce5b-41c4-bb83-eb6408081af6").unwrap(), }), @@ -169,8 +169,8 @@ mod test { // Version 0 ( "/abc/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", - Ok(MetadataLocationParser { - prefix: "/abc".to_string(), + Ok(MetadataLocation { + table_location: "/abc".to_string(), version: 0, id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(), }), @@ -207,7 +207,7 @@ mod test { ]; for (input, expected) in test_cases { - match MetadataLocationParser::from_str(input) { + match MetadataLocation::from_str(input) { Ok(metadata_location) => { assert!(expected.is_ok()); assert_eq!(metadata_location, expected.unwrap()); @@ -220,18 +220,18 @@ mod test { #[test] fn test_metadata_location_with_next_version() { let test_cases = vec![ - MetadataLocationParser::new_with_prefix("/abc"), - MetadataLocationParser::from_str( + MetadataLocation::new_with_location("/abc"), + MetadataLocation::from_str( "/abc/def/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", ) .unwrap(), ]; for input in test_cases { - let next = MetadataLocationParser::from_str(&input.to_string()) + let next = MetadataLocation::from_str(&input.to_string()) .unwrap() .with_next_version(); - assert_eq!(next.prefix, input.prefix); + assert_eq!(next.table_location, input.table_location); assert_eq!(next.version, input.version + 1); assert_ne!(next.id, input.id); } diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 51973ef4d7..5dfabb9a9e 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -18,7 +18,7 @@ //! Catalog API for Apache Iceberg pub mod memory; -mod utils; +mod metadata_location; use std::collections::HashMap; use std::fmt::{Debug, Display}; @@ -35,7 +35,7 @@ pub use memory::MemoryCatalog; use mockall::automock; use serde_derive::{Deserialize, Serialize}; use typed_builder::TypedBuilder; -pub use utils::*; +pub use metadata_location::*; use uuid::Uuid; use crate::spec::{ @@ -357,7 +357,7 @@ impl TableCommit { } // Bump the version of metadata - let new_metadata_location = MetadataLocationParser::from_str(current_metadata_location)? + let new_metadata_location = MetadataLocation::from_str(current_metadata_location)? .with_next_version() .to_string(); From b7c3e3f859805ec8a6cf9f4556044d0df53d2970 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Thu, 24 Jul 2025 09:52:29 -0700 Subject: [PATCH 15/23] fmt --- crates/iceberg/src/catalog/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 5dfabb9a9e..1747811667 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -31,11 +31,11 @@ use std::sync::Arc; use _serde::deserialize_snapshot; use async_trait::async_trait; pub use memory::MemoryCatalog; +pub use metadata_location::*; #[cfg(test)] use mockall::automock; use serde_derive::{Deserialize, Serialize}; use typed_builder::TypedBuilder; -pub use metadata_location::*; use uuid::Uuid; use crate::spec::{ From a140c4798e89b9b6d075934032b91ca8689db6b2 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Thu, 24 Jul 2025 10:09:18 -0700 Subject: [PATCH 16/23] add md loc res --- crates/iceberg/src/catalog/memory/catalog.rs | 2 +- crates/iceberg/src/catalog/memory/namespace_state.rs | 2 +- crates/iceberg/src/table.rs | 11 +++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index d6b144d88d..f698488520 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -310,7 +310,7 @@ impl Catalog for MemoryCatalog { .metadata() .write_to( staged_table.file_io(), - staged_table.metadata_location().unwrap(), + staged_table.metadata_location_result()?, ) .await?; diff --git a/crates/iceberg/src/catalog/memory/namespace_state.rs b/crates/iceberg/src/catalog/memory/namespace_state.rs index f5df207d1f..c8c2dda391 100644 --- a/crates/iceberg/src/catalog/memory/namespace_state.rs +++ b/crates/iceberg/src/catalog/memory/namespace_state.rs @@ -306,7 +306,7 @@ impl NamespaceState { .table_metadata_locations .insert( staged_table.identifier().name().to_string(), - staged_table.metadata_location().unwrap().to_string(), + staged_table.metadata_location_result()?.to_string(), ) .ok_or(Error::new( ErrorKind::TableNotFound, diff --git a/crates/iceberg/src/table.rs b/crates/iceberg/src/table.rs index e094292ce6..d4e696ce84 100644 --- a/crates/iceberg/src/table.rs +++ b/crates/iceberg/src/table.rs @@ -198,6 +198,17 @@ impl Table { self.metadata_location.as_deref() } + /// Returns current metadata location in a result. + pub fn metadata_location_result(&self) -> Result<&str> { + self.metadata_location.as_deref().ok_or(Error::new( + ErrorKind::DataInvalid, + format!( + "Metadata location does not exist for table: {}", + self.identifier + ), + )) + } + /// Returns file io used in this table. pub fn file_io(&self) -> &FileIO { &self.file_io From 61de3eda9a4fa4605afbabf3a90769622185c604 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Thu, 24 Jul 2025 10:23:12 -0700 Subject: [PATCH 17/23] repairo --- Cargo.lock | 4 ++ crates/catalog/glue/Cargo.toml | 1 + crates/catalog/glue/src/catalog.rs | 10 ++--- crates/catalog/glue/src/utils.rs | 54 +++++++++++++++++++++----- crates/catalog/hms/Cargo.toml | 1 + crates/catalog/hms/src/catalog.rs | 14 +++---- crates/catalog/hms/src/utils.rs | 47 ++++++++++++++++++++-- crates/catalog/s3tables/Cargo.toml | 1 + crates/catalog/s3tables/src/catalog.rs | 8 ++-- crates/catalog/s3tables/src/utils.rs | 29 ++++++++++++++ crates/catalog/sql/Cargo.toml | 1 + crates/catalog/sql/src/catalog.rs | 27 +++++++------ 12 files changed, 157 insertions(+), 40 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d72e47aa28..c3e581ccb0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3580,6 +3580,7 @@ dependencies = [ "tokio", "tracing", "typed-builder 0.20.1", + "uuid", ] [[package]] @@ -3603,6 +3604,7 @@ dependencies = [ "tokio", "tracing", "typed-builder 0.20.1", + "uuid", "volo", "volo-thrift", ] @@ -3643,6 +3645,7 @@ dependencies = [ "itertools 0.13.0", "tokio", "typed-builder 0.20.1", + "uuid", ] [[package]] @@ -3658,6 +3661,7 @@ dependencies = [ "tempfile", "tokio", "typed-builder 0.20.1", + "uuid", ] [[package]] diff --git a/crates/catalog/glue/Cargo.toml b/crates/catalog/glue/Cargo.toml index 613160e468..a71b51f8d9 100644 --- a/crates/catalog/glue/Cargo.toml +++ b/crates/catalog/glue/Cargo.toml @@ -38,6 +38,7 @@ serde_json = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } typed-builder = { workspace = true } +uuid = { workspace = true } [dev-dependencies] ctor = { workspace = true } diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index c71ea9b71c..f4b4a01f9a 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -26,15 +26,15 @@ use iceberg::io::{ use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ - Catalog, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, - TableCreation, TableIdent, + Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, + TableIdent, }; use typed_builder::TypedBuilder; use crate::error::{from_aws_build_error, from_aws_sdk_error}; use crate::utils::{ - convert_to_database, convert_to_glue_table, convert_to_namespace, create_sdk_config, - get_default_table_location, get_metadata_location, validate_namespace, + convert_to_database, convert_to_glue_table, convert_to_namespace, create_metadata_location, + create_sdk_config, get_default_table_location, get_metadata_location, validate_namespace, }; use crate::{ AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, with_catalog_id, @@ -393,7 +393,7 @@ impl Catalog for GlueCatalog { let metadata = TableMetadataBuilder::from_table_creation(creation)? .build()? .metadata; - let metadata_location = MetadataLocation::new_with_location(location).to_string(); + let metadata_location = create_metadata_location(&location, 0)?; metadata.write_to(&self.file_io, &metadata_location).await?; diff --git a/crates/catalog/glue/src/utils.rs b/crates/catalog/glue/src/utils.rs index f3b1eb0cb7..c51dd6249f 100644 --- a/crates/catalog/glue/src/utils.rs +++ b/crates/catalog/glue/src/utils.rs @@ -22,6 +22,7 @@ use aws_sdk_glue::config::Credentials; use aws_sdk_glue::types::{Database, DatabaseInput, StorageDescriptor, TableInput}; use iceberg::spec::TableMetadata; use iceberg::{Error, ErrorKind, Namespace, NamespaceIdent, Result}; +use uuid::Uuid; use crate::error::from_aws_build_error; use crate::schema::GlueSchemaBuilder; @@ -227,6 +228,30 @@ pub(crate) fn get_default_table_location( } } +/// Create metadata location from `location` and `version` +pub(crate) fn create_metadata_location(location: impl AsRef, version: i32) -> Result { + if version < 0 { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Table metadata version: '{}' must be a non-negative integer", + version + ), + )); + }; + + let version = format!("{:0>5}", version); + let id = Uuid::new_v4(); + let metadata_location = format!( + "{}/metadata/{}-{}.metadata.json", + location.as_ref(), + version, + id + ); + + Ok(metadata_location) +} + /// Get metadata location from `GlueTable` parameters pub(crate) fn get_metadata_location( parameters: &Option>, @@ -263,7 +288,7 @@ mod tests { use aws_sdk_glue::config::ProvideCredentials; use aws_sdk_glue::types::Column; use iceberg::spec::{NestedField, PrimitiveType, Schema, TableMetadataBuilder, Type}; - use iceberg::{MetadataLocation, Namespace, Result, TableCreation}; + use iceberg::{Namespace, Result, TableCreation}; use super::*; use crate::schema::{ICEBERG_FIELD_CURRENT, ICEBERG_FIELD_ID, ICEBERG_FIELD_OPTIONAL}; @@ -307,7 +332,7 @@ mod tests { fn test_convert_to_glue_table() -> Result<()> { let table_name = "my_table".to_string(); let location = "s3a://warehouse/hive".to_string(); - let metadata_location = MetadataLocation::new_with_location(&location).to_string(); + let metadata_location = create_metadata_location(location.clone(), 0)?; let properties = HashMap::new(); let schema = Schema::builder() .with_schema_id(1) @@ -337,13 +362,8 @@ mod tests { .location(metadata.location()) .build(); - let result = convert_to_glue_table( - &table_name, - metadata_location.to_string(), - &metadata, - &properties, - None, - )?; + let result = + convert_to_glue_table(&table_name, metadata_location, &metadata, &properties, None)?; assert_eq!(result.name(), &table_name); assert_eq!(result.description(), None); @@ -352,6 +372,22 @@ mod tests { Ok(()) } + #[test] + fn test_create_metadata_location() -> Result<()> { + let location = "my_base_location"; + let valid_version = 0; + let invalid_version = -1; + + let valid_result = create_metadata_location(location, valid_version)?; + let invalid_result = create_metadata_location(location, invalid_version); + + assert!(valid_result.starts_with("my_base_location/metadata/00000-")); + assert!(valid_result.ends_with(".metadata.json")); + assert!(invalid_result.is_err()); + + Ok(()) + } + #[test] fn test_get_default_table_location() -> Result<()> { let properties = HashMap::from([(LOCATION.to_string(), "db_location".to_string())]); diff --git a/crates/catalog/hms/Cargo.toml b/crates/catalog/hms/Cargo.toml index 707f3ed6a4..5f8956f40d 100644 --- a/crates/catalog/hms/Cargo.toml +++ b/crates/catalog/hms/Cargo.toml @@ -39,6 +39,7 @@ serde_json = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } typed-builder = { workspace = true } +uuid = { workspace = true } volo-thrift = { workspace = true } # Transitive dependencies below diff --git a/crates/catalog/hms/src/catalog.rs b/crates/catalog/hms/src/catalog.rs index 97b755396c..72fb8c6b33 100644 --- a/crates/catalog/hms/src/catalog.rs +++ b/crates/catalog/hms/src/catalog.rs @@ -29,8 +29,8 @@ use iceberg::io::FileIO; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ - Catalog, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, - TableCreation, TableIdent, + Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, + TableIdent, }; use typed_builder::TypedBuilder; use volo_thrift::MaybeException; @@ -351,18 +351,16 @@ impl Catalog for HmsCatalog { .build()? .metadata; - let metadata_location = MetadataLocation::new_with_location(&location); + let metadata_location = create_metadata_location(&location, 0)?; - metadata - .write_to(&self.file_io, &metadata_location.to_string()) - .await?; + metadata.write_to(&self.file_io, &metadata_location).await?; let hive_table = convert_to_hive_table( db_name.clone(), metadata.current_schema(), table_name.clone(), location, - metadata_location.to_string(), + metadata_location.clone(), metadata.properties(), )?; @@ -374,7 +372,7 @@ impl Catalog for HmsCatalog { Table::builder() .file_io(self.file_io()) - .metadata_location(metadata_location.to_string()) + .metadata_location(metadata_location) .metadata(metadata) .identifier(TableIdent::new(NamespaceIdent::new(db_name), table_name)) .build() diff --git a/crates/catalog/hms/src/utils.rs b/crates/catalog/hms/src/utils.rs index 2397fc20f6..432ceac833 100644 --- a/crates/catalog/hms/src/utils.rs +++ b/crates/catalog/hms/src/utils.rs @@ -22,6 +22,7 @@ use hive_metastore::{Database, PrincipalType, SerDeInfo, StorageDescriptor}; use iceberg::spec::Schema; use iceberg::{Error, ErrorKind, Namespace, NamespaceIdent, Result}; use pilota::{AHashMap, FastStr}; +use uuid::Uuid; use crate::schema::HiveSchemaBuilder; @@ -248,6 +249,30 @@ pub(crate) fn get_default_table_location( format!("{}/{}", location, table_name.as_ref()) } +/// Create metadata location from `location` and `version` +pub(crate) fn create_metadata_location(location: impl AsRef, version: i32) -> Result { + if version < 0 { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Table metadata version: '{}' must be a non-negative integer", + version + ), + )); + }; + + let version = format!("{:0>5}", version); + let id = Uuid::new_v4(); + let metadata_location = format!( + "{}/metadata/{}-{}.metadata.json", + location.as_ref(), + version, + id + ); + + Ok(metadata_location) +} + /// Get metadata location from `HiveTable` parameters pub(crate) fn get_metadata_location( parameters: &Option>, @@ -314,7 +339,7 @@ fn get_current_time() -> Result { #[cfg(test)] mod tests { use iceberg::spec::{NestedField, PrimitiveType, Type}; - use iceberg::{MetadataLocation, Namespace, NamespaceIdent}; + use iceberg::{Namespace, NamespaceIdent}; use super::*; @@ -345,7 +370,7 @@ mod tests { let db_name = "my_db".to_string(); let table_name = "my_table".to_string(); let location = "s3a://warehouse/hms".to_string(); - let metadata_location = MetadataLocation::new_with_location(location.clone()); + let metadata_location = create_metadata_location(location.clone(), 0)?; let properties = HashMap::new(); let schema = Schema::builder() .with_schema_id(1) @@ -360,7 +385,7 @@ mod tests { &schema, table_name.clone(), location.clone(), - metadata_location.to_string(), + metadata_location, &properties, )?; @@ -389,6 +414,22 @@ mod tests { Ok(()) } + #[test] + fn test_create_metadata_location() -> Result<()> { + let location = "my_base_location"; + let valid_version = 0; + let invalid_version = -1; + + let valid_result = create_metadata_location(location, valid_version)?; + let invalid_result = create_metadata_location(location, invalid_version); + + assert!(valid_result.starts_with("my_base_location/metadata/00000-")); + assert!(valid_result.ends_with(".metadata.json")); + assert!(invalid_result.is_err()); + + Ok(()) + } + #[test] fn test_get_default_table_location() -> Result<()> { let properties = HashMap::from([(LOCATION.to_string(), "db_location".to_string())]); diff --git a/crates/catalog/s3tables/Cargo.toml b/crates/catalog/s3tables/Cargo.toml index cdcd96b6b1..0ec1f55f62 100644 --- a/crates/catalog/s3tables/Cargo.toml +++ b/crates/catalog/s3tables/Cargo.toml @@ -35,6 +35,7 @@ aws-config = { workspace = true } aws-sdk-s3tables = "1.10.0" iceberg = { workspace = true } typed-builder = { workspace = true } +uuid = { workspace = true, features = ["v4"] } [dev-dependencies] iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } diff --git a/crates/catalog/s3tables/src/catalog.rs b/crates/catalog/s3tables/src/catalog.rs index f245d00a27..191356d711 100644 --- a/crates/catalog/s3tables/src/catalog.rs +++ b/crates/catalog/s3tables/src/catalog.rs @@ -28,12 +28,12 @@ use iceberg::io::{FileIO, FileIOBuilder}; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ - Catalog, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, - TableCreation, TableIdent, + Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, + TableIdent, }; use typed_builder::TypedBuilder; -use crate::utils::create_sdk_config; +use crate::utils::{create_metadata_location, create_sdk_config}; /// S3Tables catalog configuration. #[derive(Debug, TypedBuilder)] @@ -325,7 +325,7 @@ impl Catalog for S3TablesCatalog { .await .map_err(from_aws_sdk_error)?; let warehouse_location = get_resp.warehouse_location().to_string(); - MetadataLocation::new_with_location(warehouse_location).to_string() + create_metadata_location(warehouse_location, 0)? } }; diff --git a/crates/catalog/s3tables/src/utils.rs b/crates/catalog/s3tables/src/utils.rs index 2f3e16a937..d0195dccfd 100644 --- a/crates/catalog/s3tables/src/utils.rs +++ b/crates/catalog/s3tables/src/utils.rs @@ -19,6 +19,8 @@ use std::collections::HashMap; use aws_config::{BehaviorVersion, Region, SdkConfig}; use aws_sdk_s3tables::config::Credentials; +use iceberg::{Error, ErrorKind, Result}; +use uuid::Uuid; /// Property aws profile name pub const AWS_PROFILE_NAME: &str = "profile_name"; @@ -69,3 +71,30 @@ pub(crate) async fn create_sdk_config( config.load().await } + +/// Create metadata location from `location` and `version` +pub(crate) fn create_metadata_location( + warehouse_location: impl AsRef, + version: i32, +) -> Result { + if version < 0 { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Table metadata version: '{}' must be a non-negative integer", + version + ), + )); + }; + + let version = format!("{:0>5}", version); + let id = Uuid::new_v4(); + let metadata_location = format!( + "{}/metadata/{}-{}.metadata.json", + warehouse_location.as_ref(), + version, + id + ); + + Ok(metadata_location) +} diff --git a/crates/catalog/sql/Cargo.toml b/crates/catalog/sql/Cargo.toml index 33ca700bf7..b767b68775 100644 --- a/crates/catalog/sql/Cargo.toml +++ b/crates/catalog/sql/Cargo.toml @@ -33,6 +33,7 @@ async-trait = { workspace = true } iceberg = { workspace = true } sqlx = { version = "0.8.1", features = ["any"], default-features = false } typed-builder = { workspace = true } +uuid = { workspace = true, features = ["v4"] } [dev-dependencies] iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index 6d40e4ebb1..56c6fadcf1 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -23,12 +23,13 @@ use iceberg::io::FileIO; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; use iceberg::{ - Catalog, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, - TableCreation, TableIdent, + Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, + TableIdent, }; use sqlx::any::{AnyPoolOptions, AnyQueryResult, AnyRow, install_default_drivers}; use sqlx::{Any, AnyPool, Row, Transaction}; use typed_builder::TypedBuilder; +use uuid::Uuid; use crate::error::{ from_sqlx_error, no_such_namespace_err, no_such_table_err, table_already_exists_err, @@ -60,7 +61,7 @@ static TEST_BEFORE_ACQUIRE: bool = true; // Default the health-check of each con /// such as the database URI, warehouse location, and file I/O settings. /// You are required to provide a `SqlBindStyle`, which determines how SQL statements will be bound to values in the catalog. /// The options available for this parameter include: -/// - `SqlBindStyle::DollarNumeric`: Binds SQL statements using `$1`, `$2`, etc., as placeholders. This is for PostgresSQL databases. +/// - `SqlBindStyle::DollarNumeric`: Binds SQL statements using `$1`, `$2`, etc., as placeholders. This is for PostgreSQL databases. /// - `SqlBindStyle::QuestionMark`: Binds SQL statements using `?` as a placeholder. This is for MySQL and SQLite databases. #[derive(Debug, TypedBuilder)] pub struct SqlCatalogConfig { @@ -283,7 +284,7 @@ impl Catalog for SqlCatalog { if exists { return Err(Error::new( - ErrorKind::Unexpected, + iceberg::ErrorKind::Unexpected, format!("Namespace {:?} already exists", namespace), )); } @@ -486,7 +487,7 @@ impl Catalog for SqlCatalog { let tables = self.list_tables(namespace).await?; if !tables.is_empty() { return Err(Error::new( - ErrorKind::Unexpected, + iceberg::ErrorKind::Unexpected, format!( "Namespace {:?} is not empty. {} tables exist.", namespace, @@ -699,7 +700,11 @@ impl Catalog for SqlCatalog { let tbl_metadata = TableMetadataBuilder::from_table_creation(tbl_creation)? .build()? .metadata; - let tbl_metadata_location = MetadataLocation::new_with_location(location).to_string(); + let tbl_metadata_location = format!( + "{}/metadata/0-{}.metadata.json", + location.clone(), + Uuid::new_v4() + ); tbl_metadata .write_to(&self.fileio, &tbl_metadata_location) @@ -805,7 +810,7 @@ mod tests { temp_dir.path().to_str().unwrap().to_string() } - fn to_set(vec: Vec) -> HashSet { + fn to_set(vec: Vec) -> HashSet { HashSet::from_iter(vec) } @@ -1505,7 +1510,7 @@ mod tests { let table_name = "tbl1"; let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/tbl1/metadata/00000-{}.metadata.json$", + "^{}/tbl1/metadata/0-{}.metadata.json$", namespace_location, UUID_REGEX_STR, ); @@ -1562,7 +1567,7 @@ mod tests { let expected_table_ident = TableIdent::new(nested_namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/tbl1/metadata/00000-{}.metadata.json$", + "^{}/tbl1/metadata/0-{}.metadata.json$", nested_namespace_location, UUID_REGEX_STR, ); @@ -1602,7 +1607,7 @@ mod tests { let table_name = "tbl1"; let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/a/tbl1/metadata/00000-{}.metadata.json$", + "^{}/a/tbl1/metadata/0-{}.metadata.json$", warehouse_loc, UUID_REGEX_STR ); @@ -1641,7 +1646,7 @@ mod tests { let expected_table_ident = TableIdent::new(nested_namespace_ident.clone(), table_name.into()); let expected_table_metadata_location_regex = format!( - "^{}/a/b/tbl1/metadata/00000-{}.metadata.json$", + "^{}/a/b/tbl1/metadata/0-{}.metadata.json$", warehouse_loc, UUID_REGEX_STR ); From 3044a757019609bfe9a40bac9efb825c0c5e0f7e Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Thu, 24 Jul 2025 10:28:25 -0700 Subject: [PATCH 18/23] minor optimization --- crates/iceberg/src/catalog/mod.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 1747811667..a468edc475 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -339,13 +339,7 @@ impl TableCommit { } // get current metadata location - let current_metadata_location = table.metadata_location().ok_or(Error::new( - ErrorKind::DataInvalid, - format!( - "Failed to apply commit, table metadata location is not set for table: {}", - table.identifier() - ), - ))?; + let current_metadata_location = table.metadata_location_result()?; // apply updates to metadata builder let mut metadata_builder = table From a7b1b3803de3685615d144aa10c84a24d744a29e Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Fri, 25 Jul 2025 11:00:43 -0700 Subject: [PATCH 19/23] Update crates/iceberg/src/catalog/memory/catalog.rs Co-authored-by: Jannik Steinmann --- crates/iceberg/src/catalog/memory/catalog.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index f698488520..216837fdd0 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -297,7 +297,6 @@ impl Catalog for MemoryCatalog { async fn update_table(&self, commit: TableCommit) -> Result
{ let mut root_namespace_state = self.root_namespace_state.lock().await; - // Updates the current table version and writes a new metadata file. let current_table = self .load_table_from_locked_state(commit.identifier(), &root_namespace_state) .await?; From d70959d04d9368991dda2e6a46b70e72de545bb3 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Fri, 25 Jul 2025 11:02:14 -0700 Subject: [PATCH 20/23] Update crates/iceberg/src/catalog/memory/namespace_state.rs Co-authored-by: Jannik Steinmann --- crates/iceberg/src/catalog/memory/namespace_state.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/catalog/memory/namespace_state.rs b/crates/iceberg/src/catalog/memory/namespace_state.rs index c8c2dda391..2fc481b767 100644 --- a/crates/iceberg/src/catalog/memory/namespace_state.rs +++ b/crates/iceberg/src/catalog/memory/namespace_state.rs @@ -298,7 +298,7 @@ impl NamespaceState { } } - // Updates the metadata location of the given table or returns an error if it doesn't exist + /// Updates the metadata location of the given table or returns an error if it doesn't exist pub(crate) fn commit_table_update(&mut self, staged_table: Table) -> Result
{ let namespace = self.get_mut_namespace(staged_table.identifier().namespace())?; From 65d806f2cbd238ce0e9751454ba1d25b413640a7 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Fri, 25 Jul 2025 11:03:03 -0700 Subject: [PATCH 21/23] Update crates/iceberg/src/catalog/metadata_location.rs Co-authored-by: Renjie Liu --- crates/iceberg/src/catalog/metadata_location.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/iceberg/src/catalog/metadata_location.rs b/crates/iceberg/src/catalog/metadata_location.rs index 797c243077..6458c7fafa 100644 --- a/crates/iceberg/src/catalog/metadata_location.rs +++ b/crates/iceberg/src/catalog/metadata_location.rs @@ -33,7 +33,7 @@ pub struct MetadataLocation { impl MetadataLocation { /// Creates a completely new metadata location starting at version 0. /// Only used for creating a new table. For updates, see `with_next_version`. - pub fn new_with_location(table_location: impl ToString) -> Self { + pub fn new_with_table_location(table_location: impl ToString) -> Self { Self { table_location: table_location.to_string(), version: 0, From ede1b85616e7acd0152965220e2eb6da711fab8c Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Fri, 25 Jul 2025 11:05:21 -0700 Subject: [PATCH 22/23] fix fix fix --- crates/iceberg/src/catalog/memory/catalog.rs | 4 +- .../iceberg/src/catalog/metadata_location.rs | 2 +- crates/sqllogictest/bin/sqllogictest.rs | 41 +++++++++++++++++++ 3 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 crates/sqllogictest/bin/sqllogictest.rs diff --git a/crates/iceberg/src/catalog/memory/catalog.rs b/crates/iceberg/src/catalog/memory/catalog.rs index 216837fdd0..12d18b9f36 100644 --- a/crates/iceberg/src/catalog/memory/catalog.rs +++ b/crates/iceberg/src/catalog/memory/catalog.rs @@ -219,7 +219,7 @@ impl Catalog for MemoryCatalog { let metadata = TableMetadataBuilder::from_table_creation(table_creation)? .build()? .metadata; - let metadata_location = MetadataLocation::new_with_location(location).to_string(); + let metadata_location = MetadataLocation::new_with_table_location(location).to_string(); metadata.write_to(&self.file_io, &metadata_location).await?; @@ -1822,8 +1822,6 @@ mod tests { assert!(table.metadata().last_updated_ms() < updated_table.metadata().last_updated_ms()); assert_ne!(table.metadata_location(), updated_table.metadata_location()); - println!("left: {:?}", table.metadata().metadata_log()); - println!("right: {:?}", updated_table.metadata().metadata_log()); assert!( table.metadata().metadata_log().len() < updated_table.metadata().metadata_log().len() ); diff --git a/crates/iceberg/src/catalog/metadata_location.rs b/crates/iceberg/src/catalog/metadata_location.rs index 6458c7fafa..8cb5cb11d2 100644 --- a/crates/iceberg/src/catalog/metadata_location.rs +++ b/crates/iceberg/src/catalog/metadata_location.rs @@ -220,7 +220,7 @@ mod test { #[test] fn test_metadata_location_with_next_version() { let test_cases = vec![ - MetadataLocation::new_with_location("/abc"), + MetadataLocation::new_with_table_location("/abc"), MetadataLocation::from_str( "/abc/def/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json", ) diff --git a/crates/sqllogictest/bin/sqllogictest.rs b/crates/sqllogictest/bin/sqllogictest.rs new file mode 100644 index 0000000000..acb0007c3b --- /dev/null +++ b/crates/sqllogictest/bin/sqllogictest.rs @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +const TEST_DIRECTORY: &str = "test_files/"; + +pub fn main() -> Result<()> { + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()? + .block_on(run_tests()) +} + +async fn run_tests() -> Result<()> { + // Enable logging (e.g. set RUST_LOG=debug to see debug logs) + env_logger::init(); + + let options: Options = Parser::parse(); + if options.list { + // nextest parses stdout, so print messages to stderr + eprintln!("NOTICE: --list option unsupported, quitting"); + // return Ok, not error so that tools like nextest which are listing all + // workspace tests (by running `cargo test ... --list --format terse`) + // do not fail when they encounter this binary. Instead, print nothing + // to stdout and return OK so they can continue listing other tests. + return Ok(()); + } +} From 00ac0205b048d77ce530e2483dfad8cdb8be0885 Mon Sep 17 00:00:00 2001 From: Shawn Chang Date: Fri, 25 Jul 2025 11:06:27 -0700 Subject: [PATCH 23/23] something slipped in --- crates/sqllogictest/bin/sqllogictest.rs | 41 ------------------------- 1 file changed, 41 deletions(-) delete mode 100644 crates/sqllogictest/bin/sqllogictest.rs diff --git a/crates/sqllogictest/bin/sqllogictest.rs b/crates/sqllogictest/bin/sqllogictest.rs deleted file mode 100644 index acb0007c3b..0000000000 --- a/crates/sqllogictest/bin/sqllogictest.rs +++ /dev/null @@ -1,41 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -const TEST_DIRECTORY: &str = "test_files/"; - -pub fn main() -> Result<()> { - tokio::runtime::Builder::new_multi_thread() - .enable_all() - .build()? - .block_on(run_tests()) -} - -async fn run_tests() -> Result<()> { - // Enable logging (e.g. set RUST_LOG=debug to see debug logs) - env_logger::init(); - - let options: Options = Parser::parse(); - if options.list { - // nextest parses stdout, so print messages to stderr - eprintln!("NOTICE: --list option unsupported, quitting"); - // return Ok, not error so that tools like nextest which are listing all - // workspace tests (by running `cargo test ... --list --format terse`) - // do not fail when they encounter this binary. Instead, print nothing - // to stdout and return OK so they can continue listing other tests. - return Ok(()); - } -}