Skip to content

Commit 3eee6e3

Browse files
committed
Simplify partition structures
This PR removes `SchemalessPartitionSpec` and `UnboundPartitionSpecField`. From the spec: > The field-id property was added for each partition field in v2. > In v1, the reference implementation assigned field ids sequentially > in each spec starting at 1,000. See Partition Evolution for more details. > In v1, partition field IDs were not tracked, but were assigned sequentially > starting at 1000 in the reference implementation. This assignment caused > problems when reading metadata tables based on manifest files from multiple > specs because partition fields with the same ID may contain different data types. > For compatibility with old versions, the following rules are recommended for partition evolution in v1 tables: > - Do not reorder partition fields > - Do not drop partition fields; instead replace the field's transform with the void transform > - Only add partition fields at the end of the previous partition spec I think for simplicity, we should assign the field-IDs starting from 1000, and this will greatly simplify the objects that we need. Next to that, I also believe that users shouldn't have to worry about the field-IDs and that it should be kept internal to Iceberg-Rust.
1 parent 3e7d47a commit 3eee6e3

File tree

11 files changed

+257
-506
lines changed

11 files changed

+257
-506
lines changed

crates/catalog/memory/src/catalog.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ mod tests {
367367
.partition_specs_iter()
368368
.map(|p| p.as_ref())
369369
.collect_vec(),
370-
vec![&expected_partition_spec.into_schemaless()]
370+
vec![&expected_partition_spec.into_unbound()]
371371
);
372372

373373
let expected_sorted_order = SortOrder::builder()

crates/catalog/rest/src/catalog.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -700,9 +700,9 @@ mod tests {
700700

701701
use chrono::{TimeZone, Utc};
702702
use iceberg::spec::{
703-
FormatVersion, NestedField, NullOrder, Operation, PrimitiveType, Schema, Snapshot,
704-
SnapshotLog, SortDirection, SortField, SortOrder, Summary, Transform, Type,
705-
UnboundPartitionField, UnboundPartitionSpec,
703+
FormatVersion, NestedField, NullOrder, Operation, PartitionField, PrimitiveType, Schema,
704+
Snapshot, SnapshotLog, SortDirection, SortField, SortOrder, Summary, Transform, Type,
705+
UnboundPartitionSpec,
706706
};
707707
use iceberg::transaction::Transaction;
708708
use mockito::{Mock, Server, ServerGuard};
@@ -1489,9 +1489,10 @@ mod tests {
14891489
.properties(HashMap::from([("owner".to_string(), "testx".to_string())]))
14901490
.partition_spec(
14911491
UnboundPartitionSpec::builder()
1492-
.add_partition_fields(vec![UnboundPartitionField::builder()
1492+
.add_partition_fields(vec![PartitionField::builder()
14931493
.source_id(1)
14941494
.transform(Transform::Truncate(3))
1495+
.field_id(1000)
14951496
.name("id".to_string())
14961497
.build()])
14971498
.unwrap()

crates/catalog/sql/src/catalog.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,7 @@ mod tests {
880880
.with_spec_id(0)
881881
.build()
882882
.unwrap()
883-
.into_schemaless();
883+
.into_unbound();
884884

885885
assert_eq!(
886886
metadata

crates/iceberg/src/catalog/mod.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,6 +1317,49 @@ mod tests {
13171317
);
13181318
}
13191319

1320+
#[test]
1321+
fn test_add_spec_with_field_id() {
1322+
test_serde_json(
1323+
r#"
1324+
{
1325+
"action": "add-spec",
1326+
"spec": {
1327+
"fields": [
1328+
{
1329+
"source-id": 4,
1330+
"name": "ts_day",
1331+
"transform": "day",
1332+
"field-id": 1000
1333+
},
1334+
{
1335+
"source-id": 1,
1336+
"name": "id_bucket",
1337+
"transform": "bucket[16]",
1338+
"field-id": 1001
1339+
},
1340+
{
1341+
"source-id": 2,
1342+
"name": "id_truncate",
1343+
"transform": "truncate[4]",
1344+
"field-id": 1002
1345+
}
1346+
]
1347+
}
1348+
}
1349+
"#,
1350+
TableUpdate::AddSpec {
1351+
spec: UnboundPartitionSpec::builder()
1352+
.add_partition_field(4, "ts_day".to_string(), Transform::Day)
1353+
.unwrap()
1354+
.add_partition_field(1, "id_bucket".to_string(), Transform::Bucket(16))
1355+
.unwrap()
1356+
.add_partition_field(2, "id_truncate".to_string(), Transform::Truncate(4))
1357+
.unwrap()
1358+
.build(),
1359+
},
1360+
);
1361+
}
1362+
13201363
#[test]
13211364
fn test_set_default_spec() {
13221365
test_serde_json(

crates/iceberg/src/expr/visitors/expression_evaluator.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,6 @@ mod tests {
260260
use crate::spec::{
261261
BoundPartitionSpec, BoundPartitionSpecRef, DataContentType, DataFile, DataFileFormat,
262262
Datum, Literal, NestedField, PrimitiveType, Schema, Struct, Transform, Type,
263-
UnboundPartitionField,
264263
};
265264
use crate::Result;
266265

@@ -275,12 +274,7 @@ mod tests {
275274

276275
let spec = BoundPartitionSpec::builder(schema.clone())
277276
.with_spec_id(1)
278-
.add_unbound_fields(vec![UnboundPartitionField::builder()
279-
.source_id(1)
280-
.name("a".to_string())
281-
.field_id(1)
282-
.transform(Transform::Identity)
283-
.build()])
277+
.add_partition_field("a", "a", Transform::Identity)
284278
.unwrap()
285279
.build()
286280
.unwrap();
@@ -302,7 +296,7 @@ mod tests {
302296
.build()?;
303297

304298
let mut inclusive_projection =
305-
InclusiveProjection::new((*partition_spec).clone().into_schemaless().into());
299+
InclusiveProjection::new((*partition_spec).clone().into_unbound().into());
306300

307301
let partition_filter = inclusive_projection
308302
.project(predicate)?

crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ mod test {
496496
};
497497
use crate::spec::{
498498
BoundPartitionSpec, DataContentType, DataFile, DataFileFormat, Datum, NestedField,
499-
PrimitiveType, Schema, Struct, Transform, Type, UnboundPartitionField,
499+
PrimitiveType, Schema, Struct, Transform, Type,
500500
};
501501

502502
const INT_MIN_VALUE: i32 = 30;
@@ -1658,12 +1658,7 @@ mod test {
16581658

16591659
let partition_spec = BoundPartitionSpec::builder(table_schema_ref.clone())
16601660
.with_spec_id(1)
1661-
.add_unbound_fields(vec![UnboundPartitionField::builder()
1662-
.source_id(1)
1663-
.name("a".to_string())
1664-
.field_id(1)
1665-
.transform(Transform::Identity)
1666-
.build()])
1661+
.add_partition_field("a", "a", Transform::Identity)
16671662
.unwrap()
16681663
.build()
16691664
.unwrap();

crates/iceberg/src/expr/visitors/inclusive_projection.rs

Lines changed: 18 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,16 @@ use fnv::FnvHashSet;
2121

2222
use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor};
2323
use crate::expr::{BoundPredicate, BoundReference, Predicate};
24-
use crate::spec::{Datum, PartitionField, SchemalessPartitionSpecRef};
24+
use crate::spec::{Datum, PartitionField, UnboundPartitionSpecRef};
2525
use crate::Error;
2626

2727
pub(crate) struct InclusiveProjection {
28-
partition_spec: SchemalessPartitionSpecRef,
28+
partition_spec: UnboundPartitionSpecRef,
2929
cached_parts: HashMap<i32, Vec<PartitionField>>,
3030
}
3131

3232
impl InclusiveProjection {
33-
pub(crate) fn new(partition_spec: SchemalessPartitionSpecRef) -> Self {
33+
pub(crate) fn new(partition_spec: UnboundPartitionSpecRef) -> Self {
3434
Self {
3535
partition_spec,
3636
cached_parts: HashMap::new(),
@@ -235,8 +235,8 @@ mod tests {
235235
use crate::expr::visitors::inclusive_projection::InclusiveProjection;
236236
use crate::expr::{Bind, Predicate, Reference};
237237
use crate::spec::{
238-
BoundPartitionSpec, Datum, NestedField, PrimitiveType, Schema, Transform, Type,
239-
UnboundPartitionField,
238+
BoundPartitionSpec, Datum, NestedField, PartitionField, PrimitiveType, Schema, Transform,
239+
Type,
240240
};
241241

242242
fn build_test_schema() -> Schema {
@@ -271,7 +271,7 @@ mod tests {
271271
.with_spec_id(1)
272272
.build()
273273
.unwrap()
274-
.into_schemaless();
274+
.into_unbound();
275275

276276
let arc_partition_spec = Arc::new(partition_spec);
277277

@@ -301,7 +301,7 @@ mod tests {
301301
let partition_spec = BoundPartitionSpec::builder(arc_schema.clone())
302302
.with_spec_id(1)
303303
.add_unbound_field(
304-
UnboundPartitionField::builder()
304+
PartitionField::builder()
305305
.source_id(1)
306306
.name("a".to_string())
307307
.field_id(1)
@@ -311,7 +311,7 @@ mod tests {
311311
.unwrap()
312312
.build()
313313
.unwrap()
314-
.into_schemaless();
314+
.into_unbound();
315315

316316
let arc_partition_spec = Arc::new(partition_spec);
317317

@@ -338,16 +338,11 @@ mod tests {
338338

339339
let partition_spec = BoundPartitionSpec::builder(arc_schema.clone())
340340
.with_spec_id(1)
341-
.add_unbound_fields(vec![UnboundPartitionField {
342-
source_id: 2,
343-
name: "year".to_string(),
344-
field_id: Some(1000),
345-
transform: Transform::Year,
346-
}])
341+
.add_partition_field("date", "year", Transform::Year)
347342
.unwrap()
348343
.build()
349344
.unwrap()
350-
.into_schemaless();
345+
.into_unbound();
351346

352347
let arc_partition_spec = Arc::new(partition_spec);
353348

@@ -374,16 +369,11 @@ mod tests {
374369

375370
let partition_spec = BoundPartitionSpec::builder(arc_schema.clone())
376371
.with_spec_id(1)
377-
.add_unbound_fields(vec![UnboundPartitionField {
378-
source_id: 2,
379-
name: "month".to_string(),
380-
field_id: Some(1000),
381-
transform: Transform::Month,
382-
}])
372+
.add_partition_field("date", "month", Transform::Month)
383373
.unwrap()
384374
.build()
385375
.unwrap()
386-
.into_schemaless();
376+
.into_unbound();
387377

388378
let arc_partition_spec = Arc::new(partition_spec);
389379

@@ -410,16 +400,11 @@ mod tests {
410400

411401
let partition_spec = BoundPartitionSpec::builder(arc_schema.clone())
412402
.with_spec_id(1)
413-
.add_unbound_fields(vec![UnboundPartitionField {
414-
source_id: 2,
415-
name: "day".to_string(),
416-
field_id: Some(1000),
417-
transform: Transform::Day,
418-
}])
403+
.add_partition_field("date", "day", Transform::Day)
419404
.unwrap()
420405
.build()
421406
.unwrap()
422-
.into_schemaless();
407+
.into_unbound();
423408

424409
let arc_partition_spec = Arc::new(partition_spec);
425410

@@ -447,7 +432,7 @@ mod tests {
447432
let partition_spec = BoundPartitionSpec::builder(arc_schema.clone())
448433
.with_spec_id(1)
449434
.add_unbound_field(
450-
UnboundPartitionField::builder()
435+
PartitionField::builder()
451436
.source_id(3)
452437
.name("name_truncate".to_string())
453438
.field_id(3)
@@ -457,7 +442,7 @@ mod tests {
457442
.unwrap()
458443
.build()
459444
.unwrap()
460-
.into_schemaless();
445+
.into_unbound();
461446

462447
let arc_partition_spec = Arc::new(partition_spec);
463448

@@ -488,7 +473,7 @@ mod tests {
488473
let partition_spec = BoundPartitionSpec::builder(arc_schema.clone())
489474
.with_spec_id(1)
490475
.add_unbound_field(
491-
UnboundPartitionField::builder()
476+
PartitionField::builder()
492477
.source_id(1)
493478
.name("a_bucket[7]".to_string())
494479
.field_id(1)
@@ -498,7 +483,7 @@ mod tests {
498483
.unwrap()
499484
.build()
500485
.unwrap()
501-
.into_schemaless();
486+
.into_unbound();
502487

503488
let arc_partition_spec = Arc::new(partition_spec);
504489

crates/iceberg/src/spec/manifest.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,7 @@ impl ManifestMetadata {
775775
.unwrap_or(0);
776776
BoundPartitionSpec::builder(schema.clone())
777777
.with_spec_id(spec_id)
778-
.add_unbound_fields(fields.into_iter().map(|f| f.into_unbound()))?
778+
.add_unbound_fields(fields)?
779779
.build()?
780780
};
781781
let format_version = if let Some(bs) = meta.get("format-version") {

0 commit comments

Comments
 (0)