From 6ce7a8812404fde2ee584f751d1f4147fc9d22d5 Mon Sep 17 00:00:00 2001 From: geruh Date: Mon, 9 Jun 2025 19:01:43 -0700 Subject: [PATCH 1/2] Fix glue storage descriptor on table creation --- crates/catalog/glue/src/schema.rs | 2 +- crates/catalog/glue/src/utils.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs index 28093c5d74..bd354ec5d0 100644 --- a/crates/catalog/glue/src/schema.rs +++ b/crates/catalog/glue/src/schema.rs @@ -118,7 +118,7 @@ impl SchemaVisitor for GlueSchemaBuilder { (ICEBERG_FIELD_ID.to_string(), format!("{}", field.id)), ( ICEBERG_FIELD_OPTIONAL.to_string(), - format!("{}", field.required).to_lowercase(), + format!("{}", !field.required).to_lowercase(), ), ( ICEBERG_FIELD_CURRENT.to_string(), diff --git a/crates/catalog/glue/src/utils.rs b/crates/catalog/glue/src/utils.rs index 0384e15a20..c51dd6249f 100644 --- a/crates/catalog/glue/src/utils.rs +++ b/crates/catalog/glue/src/utils.rs @@ -151,7 +151,7 @@ pub(crate) fn convert_to_glue_table( let storage_descriptor = StorageDescriptor::builder() .set_columns(Some(glue_schema)) - .location(&metadata_location) + .location(metadata.location().to_string()) .build(); let mut parameters = HashMap::from([ @@ -345,7 +345,7 @@ mod tests { let parameters = HashMap::from([ (ICEBERG_FIELD_ID.to_string(), "1".to_string()), - (ICEBERG_FIELD_OPTIONAL.to_string(), "true".to_string()), + (ICEBERG_FIELD_OPTIONAL.to_string(), "false".to_string()), (ICEBERG_FIELD_CURRENT.to_string(), "true".to_string()), ]); @@ -359,7 +359,7 @@ mod tests { let storage_descriptor = StorageDescriptor::builder() .set_columns(Some(vec![column])) - .location(&metadata_location) + .location(metadata.location()) .build(); let result = From 40dbc1850840e8b050c8ab576244c45abb2adc76 Mon Sep 17 00:00:00 2001 From: geruh Date: Mon, 9 Jun 2025 19:52:57 -0700 Subject: [PATCH 2/2] Add glue/schema tests --- crates/catalog/glue/src/schema.rs | 73 ++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 15 deletions(-) diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs index bd354ec5d0..43918484d5 100644 --- a/crates/catalog/glue/src/schema.rs +++ b/crates/catalog/glue/src/schema.rs @@ -209,10 +209,11 @@ mod tests { name: impl Into, r#type: impl Into, id: impl Into, + optional: bool, ) -> Result { let parameters = HashMap::from([ (ICEBERG_FIELD_ID.to_string(), id.into()), - (ICEBERG_FIELD_OPTIONAL.to_string(), "true".to_string()), + (ICEBERG_FIELD_OPTIONAL.to_string(), optional.to_string()), (ICEBERG_FIELD_CURRENT.to_string(), "true".to_string()), ]); @@ -318,19 +319,19 @@ mod tests { let result = GlueSchemaBuilder::from_iceberg(&metadata)?.build(); let expected = vec![ - create_column("c1", "boolean", "1")?, - create_column("c2", "int", "2")?, - create_column("c3", "bigint", "3")?, - create_column("c4", "float", "4")?, - create_column("c5", "double", "5")?, - create_column("c6", "decimal(2,2)", "6")?, - create_column("c7", "date", "7")?, - create_column("c8", "string", "8")?, - create_column("c9", "timestamp", "9")?, - create_column("c10", "string", "10")?, - create_column("c11", "string", "11")?, - create_column("c12", "binary", "12")?, - create_column("c13", "binary", "13")?, + create_column("c1", "boolean", "1", false)?, + create_column("c2", "int", "2", false)?, + create_column("c3", "bigint", "3", false)?, + create_column("c4", "float", "4", false)?, + create_column("c5", "double", "5", false)?, + create_column("c6", "decimal(2,2)", "6", false)?, + create_column("c7", "date", "7", false)?, + create_column("c8", "string", "8", false)?, + create_column("c9", "timestamp", "9", false)?, + create_column("c10", "string", "10", false)?, + create_column("c11", "string", "11", false)?, + create_column("c12", "binary", "12", false)?, + create_column("c13", "binary", "13", false)?, ]; assert_eq!(result, expected); @@ -378,6 +379,7 @@ mod tests { "person", "struct", "1", + false, )?]; assert_eq!(result, expected); @@ -432,6 +434,7 @@ mod tests { "location", "array>", "1", + false, )?]; assert_eq!(result, expected); @@ -475,10 +478,50 @@ mod tests { let result = GlueSchemaBuilder::from_iceberg(&metadata)?.build(); - let expected = vec![create_column("quux", "map>", "1")?]; + let expected = vec![create_column( + "quux", + "map>", + "1", + false, + )?]; assert_eq!(result, expected); Ok(()) } + + #[test] + fn test_schema_with_optional_fields() -> Result<()> { + let record = r#"{ + "type": "struct", + "schema-id": 1, + "fields": [ + { + "id": 1, + "name": "required_field", + "required": true, + "type": "string" + }, + { + "id": 2, + "name": "optional_field", + "required": false, + "type": "int" + } + ] + }"#; + + let schema = serde_json::from_str::(record)?; + let metadata = create_metadata(schema)?; + + let result = GlueSchemaBuilder::from_iceberg(&metadata)?.build(); + + let expected = vec![ + create_column("required_field", "string", "1", false)?, + create_column("optional_field", "int", "2", true)?, + ]; + + assert_eq!(result, expected); + Ok(()) + } }