Skip to content

Commit 0cc7049

Browse files
Merge branch 'apache:main' into add_equality_delete_writer
2 parents eefc4ae + 1aa05e0 commit 0cc7049

File tree

10 files changed

+508
-244
lines changed

10 files changed

+508
-244
lines changed

crates/iceberg/src/catalog/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ pub enum TableUpdate {
382382
#[serde(rename_all = "kebab-case")]
383383
SetDefaultSortOrder {
384384
/// Sort order ID to set as the default, or -1 to set last added sort order
385-
sort_order_id: i32,
385+
sort_order_id: i64,
386386
},
387387
/// Add snapshot to table.
388388
#[serde(rename_all = "kebab-case")]

crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs

Lines changed: 28 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor};
1919
use crate::expr::{BoundPredicate, BoundReference};
20-
use crate::spec::{DataFile, Datum, Literal, PrimitiveLiteral};
20+
use crate::spec::{DataFile, Datum, PrimitiveLiteral};
2121
use crate::{Error, ErrorKind};
2222
use fnv::FnvHashSet;
2323

@@ -63,11 +63,11 @@ impl<'a> InclusiveMetricsEvaluator<'a> {
6363
self.data_file.value_counts.get(&field_id)
6464
}
6565

66-
fn lower_bound(&self, field_id: i32) -> Option<&Literal> {
66+
fn lower_bound(&self, field_id: i32) -> Option<&Datum> {
6767
self.data_file.lower_bounds.get(&field_id)
6868
}
6969

70-
fn upper_bound(&self, field_id: i32) -> Option<&Literal> {
70+
fn upper_bound(&self, field_id: i32) -> Option<&Datum> {
7171
self.data_file.upper_bounds.get(&field_id)
7272
}
7373

@@ -97,7 +97,7 @@ impl<'a> InclusiveMetricsEvaluator<'a> {
9797
&mut self,
9898
reference: &BoundReference,
9999
datum: &Datum,
100-
cmp_fn: fn(&PrimitiveLiteral, &PrimitiveLiteral) -> bool,
100+
cmp_fn: fn(&Datum, &Datum) -> bool,
101101
use_lower_bound: bool,
102102
) -> crate::Result<bool> {
103103
let field_id = reference.field().id;
@@ -119,14 +119,7 @@ impl<'a> InclusiveMetricsEvaluator<'a> {
119119
};
120120

121121
if let Some(bound) = bound {
122-
let Literal::Primitive(bound) = bound else {
123-
return Err(Error::new(
124-
ErrorKind::Unexpected,
125-
"Inequality Predicates can only compare against a Primitive Literal",
126-
));
127-
};
128-
129-
if cmp_fn(bound, datum.literal()) {
122+
if cmp_fn(bound, datum) {
130123
return ROWS_MIGHT_MATCH;
131124
}
132125

@@ -265,33 +258,21 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
265258
}
266259

267260
if let Some(lower_bound) = self.lower_bound(field_id) {
268-
let Literal::Primitive(lower_bound) = lower_bound else {
269-
return Err(Error::new(
270-
ErrorKind::Unexpected,
271-
"Eq Predicate can only compare against a Primitive Literal",
272-
));
273-
};
274261
if lower_bound.is_nan() {
275262
// NaN indicates unreliable bounds.
276263
// See the InclusiveMetricsEvaluator docs for more.
277264
return ROWS_MIGHT_MATCH;
278-
} else if lower_bound.gt(datum.literal()) {
265+
} else if lower_bound.gt(datum) {
279266
return ROWS_CANNOT_MATCH;
280267
}
281268
}
282269

283270
if let Some(upper_bound) = self.upper_bound(field_id) {
284-
let Literal::Primitive(upper_bound) = upper_bound else {
285-
return Err(Error::new(
286-
ErrorKind::Unexpected,
287-
"Eq Predicate can only compare against a Primitive Literal",
288-
));
289-
};
290271
if upper_bound.is_nan() {
291272
// NaN indicates unreliable bounds.
292273
// See the InclusiveMetricsEvaluator docs for more.
293274
return ROWS_MIGHT_MATCH;
294-
} else if upper_bound.lt(datum.literal()) {
275+
} else if upper_bound.lt(datum) {
295276
return ROWS_CANNOT_MATCH;
296277
}
297278
}
@@ -331,7 +312,7 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
331312
};
332313

333314
if let Some(lower_bound) = self.lower_bound(field_id) {
334-
let Literal::Primitive(PrimitiveLiteral::String(lower_bound)) = lower_bound else {
315+
let PrimitiveLiteral::String(lower_bound) = lower_bound.literal() else {
335316
return Err(Error::new(
336317
ErrorKind::Unexpected,
337318
"Cannot use StartsWith operator on non-string lower_bound value",
@@ -349,7 +330,7 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
349330
}
350331

351332
if let Some(upper_bound) = self.upper_bound(field_id) {
352-
let Literal::Primitive(PrimitiveLiteral::String(upper_bound)) = upper_bound else {
333+
let PrimitiveLiteral::String(upper_bound) = upper_bound.literal() else {
353334
return Err(Error::new(
354335
ErrorKind::Unexpected,
355336
"Cannot use StartsWith operator on non-string upper_bound value",
@@ -395,7 +376,7 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
395376
return ROWS_MIGHT_MATCH;
396377
};
397378

398-
let Literal::Primitive(PrimitiveLiteral::String(lower_bound_str)) = lower_bound else {
379+
let PrimitiveLiteral::String(lower_bound_str) = lower_bound.literal() else {
399380
return Err(Error::new(
400381
ErrorKind::Unexpected,
401382
"Cannot use NotStartsWith operator on non-string lower_bound value",
@@ -416,7 +397,7 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
416397
return ROWS_MIGHT_MATCH;
417398
};
418399

419-
let Literal::Primitive(PrimitiveLiteral::String(upper_bound)) = upper_bound else {
400+
let PrimitiveLiteral::String(upper_bound) = upper_bound.literal() else {
420401
return Err(Error::new(
421402
ErrorKind::Unexpected,
422403
"Cannot use NotStartsWith operator on non-string upper_bound value",
@@ -456,36 +437,24 @@ impl BoundPredicateVisitor for InclusiveMetricsEvaluator<'_> {
456437
}
457438

458439
if let Some(lower_bound) = self.lower_bound(field_id) {
459-
let Literal::Primitive(lower_bound) = lower_bound else {
460-
return Err(Error::new(
461-
ErrorKind::Unexpected,
462-
"Eq Predicate can only compare against a Primitive Literal",
463-
));
464-
};
465440
if lower_bound.is_nan() {
466441
// NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
467442
return ROWS_MIGHT_MATCH;
468443
}
469444

470-
if !literals.iter().any(|datum| datum.literal().ge(lower_bound)) {
445+
if !literals.iter().any(|datum| datum.ge(lower_bound)) {
471446
// if all values are less than lower bound, rows cannot match.
472447
return ROWS_CANNOT_MATCH;
473448
}
474449
}
475450

476451
if let Some(upper_bound) = self.upper_bound(field_id) {
477-
let Literal::Primitive(upper_bound) = upper_bound else {
478-
return Err(Error::new(
479-
ErrorKind::Unexpected,
480-
"Eq Predicate can only compare against a Primitive Literal",
481-
));
482-
};
483452
if upper_bound.is_nan() {
484453
// NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
485454
return ROWS_MIGHT_MATCH;
486455
}
487456

488-
if !literals.iter().any(|datum| datum.literal().le(upper_bound)) {
457+
if !literals.iter().any(|datum| datum.le(upper_bound)) {
489458
// if all values are greater than upper bound, rows cannot match.
490459
return ROWS_CANNOT_MATCH;
491460
}
@@ -519,7 +488,7 @@ mod test {
519488
UnaryExpression,
520489
};
521490
use crate::spec::{
522-
DataContentType, DataFile, DataFileFormat, Datum, Literal, NestedField, PartitionField,
491+
DataContentType, DataFile, DataFileFormat, Datum, NestedField, PartitionField,
523492
PartitionSpec, PrimitiveType, Schema, Struct, Transform, Type,
524493
};
525494
use fnv::FnvHashSet;
@@ -2152,17 +2121,17 @@ mod test {
21522121
nan_value_counts: HashMap::from([(7, 50), (8, 10), (9, 0)]),
21532122

21542123
lower_bounds: HashMap::from([
2155-
(1, Literal::int(INT_MIN_VALUE)),
2156-
(11, Literal::float(f32::NAN)),
2157-
(12, Literal::double(f64::NAN)),
2158-
(14, Literal::string("")),
2124+
(1, Datum::int(INT_MIN_VALUE)),
2125+
(11, Datum::float(f32::NAN)),
2126+
(12, Datum::double(f64::NAN)),
2127+
(14, Datum::string("")),
21592128
]),
21602129

21612130
upper_bounds: HashMap::from([
2162-
(1, Literal::int(INT_MAX_VALUE)),
2163-
(11, Literal::float(f32::NAN)),
2164-
(12, Literal::double(f64::NAN)),
2165-
(14, Literal::string("房东整租霍营小区二层两居室")),
2131+
(1, Datum::int(INT_MAX_VALUE)),
2132+
(11, Datum::float(f32::NAN)),
2133+
(12, Datum::double(f64::NAN)),
2134+
(14, Datum::string("房东整租霍营小区二层两居室")),
21662135
]),
21672136

21682137
column_sizes: Default::default(),
@@ -2187,9 +2156,9 @@ mod test {
21872156

21882157
nan_value_counts: HashMap::default(),
21892158

2190-
lower_bounds: HashMap::from([(3, Literal::string("aa"))]),
2159+
lower_bounds: HashMap::from([(3, Datum::string("aa"))]),
21912160

2192-
upper_bounds: HashMap::from([(3, Literal::string("dC"))]),
2161+
upper_bounds: HashMap::from([(3, Datum::string("dC"))]),
21932162

21942163
column_sizes: Default::default(),
21952164
key_metadata: vec![],
@@ -2214,9 +2183,9 @@ mod test {
22142183

22152184
nan_value_counts: HashMap::default(),
22162185

2217-
lower_bounds: HashMap::from([(3, Literal::string("1str1"))]),
2186+
lower_bounds: HashMap::from([(3, Datum::string("1str1"))]),
22182187

2219-
upper_bounds: HashMap::from([(3, Literal::string("3str3"))]),
2188+
upper_bounds: HashMap::from([(3, Datum::string("3str3"))]),
22202189

22212190
column_sizes: Default::default(),
22222191
key_metadata: vec![],
@@ -2241,9 +2210,9 @@ mod test {
22412210

22422211
nan_value_counts: HashMap::default(),
22432212

2244-
lower_bounds: HashMap::from([(3, Literal::string("abc"))]),
2213+
lower_bounds: HashMap::from([(3, Datum::string("abc"))]),
22452214

2246-
upper_bounds: HashMap::from([(3, Literal::string("イロハニホヘト"))]),
2215+
upper_bounds: HashMap::from([(3, Datum::string("イロハニホヘト"))]),
22472216

22482217
column_sizes: Default::default(),
22492218
key_metadata: vec![],

crates/iceberg/src/spec/datatypes.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,17 @@ impl Type {
118118
matches!(self, Type::Struct(_) | Type::List(_) | Type::Map(_))
119119
}
120120

121+
/// Convert Type to reference of PrimitiveType
122+
pub fn as_primitive_type(&self) -> Option<&PrimitiveType> {
123+
if let Type::Primitive(primitive_type) = self {
124+
Some(primitive_type)
125+
} else {
126+
None
127+
}
128+
}
129+
121130
/// Convert Type to StructType
122-
pub fn as_struct_type(self) -> Option<StructType> {
131+
pub fn to_struct_type(self) -> Option<StructType> {
123132
if let Type::Struct(struct_type) = self {
124133
Some(struct_type)
125134
} else {

0 commit comments

Comments
 (0)