Skip to content

Commit ca9de89

Browse files
authored
feat: add Struct Accessors to BoundReferences (#317)
1 parent 4e89ac7 commit ca9de89

File tree

6 files changed

+322
-7
lines changed

6 files changed

+322
-7
lines changed
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::spec::{Datum, Literal, PrimitiveType, Struct};
19+
use crate::{Error, ErrorKind};
20+
use serde_derive::{Deserialize, Serialize};
21+
use std::sync::Arc;
22+
23+
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
24+
pub struct StructAccessor {
25+
position: usize,
26+
r#type: PrimitiveType,
27+
inner: Option<Box<StructAccessor>>,
28+
}
29+
30+
pub(crate) type StructAccessorRef = Arc<StructAccessor>;
31+
32+
impl StructAccessor {
33+
pub(crate) fn new(position: usize, r#type: PrimitiveType) -> Self {
34+
StructAccessor {
35+
position,
36+
r#type,
37+
inner: None,
38+
}
39+
}
40+
41+
pub(crate) fn wrap(position: usize, inner: Box<StructAccessor>) -> Self {
42+
StructAccessor {
43+
position,
44+
r#type: inner.r#type().clone(),
45+
inner: Some(inner),
46+
}
47+
}
48+
49+
pub(crate) fn position(&self) -> usize {
50+
self.position
51+
}
52+
53+
pub(crate) fn r#type(&self) -> &PrimitiveType {
54+
&self.r#type
55+
}
56+
57+
pub(crate) fn get<'a>(&'a self, container: &'a Struct) -> crate::Result<Datum> {
58+
match &self.inner {
59+
None => {
60+
if let Literal::Primitive(literal) = &container[self.position] {
61+
Ok(Datum::new(self.r#type().clone(), literal.clone()))
62+
} else {
63+
Err(Error::new(
64+
ErrorKind::Unexpected,
65+
"Expected Literal to be Primitive",
66+
))
67+
}
68+
}
69+
Some(inner) => {
70+
if let Literal::Struct(wrapped) = &container[self.position] {
71+
inner.get(wrapped)
72+
} else {
73+
Err(Error::new(
74+
ErrorKind::Unexpected,
75+
"Nested accessor should only be wrapping a Struct",
76+
))
77+
}
78+
}
79+
}
80+
}
81+
}
82+
83+
#[cfg(test)]
84+
mod tests {
85+
use crate::expr::accessor::StructAccessor;
86+
use crate::spec::{Datum, Literal, PrimitiveType, Struct};
87+
88+
#[test]
89+
fn test_single_level_accessor() {
90+
let accessor = StructAccessor::new(1, PrimitiveType::Boolean);
91+
92+
assert_eq!(accessor.r#type(), &PrimitiveType::Boolean);
93+
assert_eq!(accessor.position(), 1);
94+
95+
let test_struct =
96+
Struct::from_iter(vec![Some(Literal::bool(false)), Some(Literal::bool(true))]);
97+
98+
assert_eq!(accessor.get(&test_struct).unwrap(), Datum::bool(true));
99+
}
100+
101+
#[test]
102+
fn test_nested_accessor() {
103+
let nested_accessor = StructAccessor::new(1, PrimitiveType::Boolean);
104+
let accessor = StructAccessor::wrap(2, Box::new(nested_accessor));
105+
106+
assert_eq!(accessor.r#type(), &PrimitiveType::Boolean);
107+
//assert_eq!(accessor.position(), 1);
108+
109+
let nested_test_struct =
110+
Struct::from_iter(vec![Some(Literal::bool(false)), Some(Literal::bool(true))]);
111+
112+
let test_struct = Struct::from_iter(vec![
113+
Some(Literal::bool(false)),
114+
Some(Literal::bool(false)),
115+
Some(Literal::Struct(nested_test_struct)),
116+
]);
117+
118+
assert_eq!(accessor.get(&test_struct).unwrap(), Datum::bool(true));
119+
}
120+
}

crates/iceberg/src/expr/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ mod term;
2222
use std::fmt::{Display, Formatter};
2323

2424
pub use term::*;
25+
pub(crate) mod accessor;
2526
mod predicate;
2627

2728
use crate::spec::SchemaRef;

crates/iceberg/src/expr/term.rs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::fmt::{Display, Formatter};
2121

2222
use fnv::FnvHashSet;
2323

24+
use crate::expr::accessor::{StructAccessor, StructAccessorRef};
2425
use crate::expr::Bind;
2526
use crate::expr::{BinaryExpression, Predicate, PredicateOperator, SetExpression, UnaryExpression};
2627
use crate::spec::{Datum, NestedField, NestedFieldRef, SchemaRef};
@@ -333,7 +334,19 @@ impl Bind for Reference {
333334
format!("Field {} not found in schema", self.name),
334335
)
335336
})?;
336-
Ok(BoundReference::new(self.name.clone(), field.clone()))
337+
338+
let accessor = schema.accessor_by_field_id(field.id).ok_or_else(|| {
339+
Error::new(
340+
ErrorKind::DataInvalid,
341+
format!("Accessor for Field {} not found", self.name),
342+
)
343+
})?;
344+
345+
Ok(BoundReference::new(
346+
self.name.clone(),
347+
field.clone(),
348+
accessor.clone(),
349+
))
337350
}
338351
}
339352

@@ -344,21 +357,32 @@ pub struct BoundReference {
344357
// For example, if the field is `a.b.c`, then `field.name` is `c`, but `original_name` is `a.b.c`.
345358
column_name: String,
346359
field: NestedFieldRef,
360+
accessor: StructAccessorRef,
347361
}
348362

349363
impl BoundReference {
350364
/// Creates a new bound reference.
351-
pub fn new(name: impl Into<String>, field: NestedFieldRef) -> Self {
365+
pub fn new(
366+
name: impl Into<String>,
367+
field: NestedFieldRef,
368+
accessor: StructAccessorRef,
369+
) -> Self {
352370
Self {
353371
column_name: name.into(),
354372
field,
373+
accessor,
355374
}
356375
}
357376

358377
/// Return the field of this reference.
359378
pub fn field(&self) -> &NestedField {
360379
&self.field
361380
}
381+
382+
/// Get this BoundReference's Accessor
383+
pub fn accessor(&self) -> &StructAccessor {
384+
&self.accessor
385+
}
362386
}
363387

364388
impl Display for BoundReference {
@@ -374,6 +398,7 @@ pub type BoundTerm = BoundReference;
374398
mod tests {
375399
use std::sync::Arc;
376400

401+
use crate::expr::accessor::StructAccessor;
377402
use crate::expr::{Bind, BoundReference, Reference};
378403
use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type};
379404

@@ -397,9 +422,11 @@ mod tests {
397422
let schema = table_schema_simple();
398423
let reference = Reference::new("bar").bind(schema, true).unwrap();
399424

425+
let accessor_ref = Arc::new(StructAccessor::new(1, PrimitiveType::Int));
400426
let expected_ref = BoundReference::new(
401427
"bar",
402428
NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
429+
accessor_ref.clone(),
403430
);
404431

405432
assert_eq!(expected_ref, reference);
@@ -410,9 +437,11 @@ mod tests {
410437
let schema = table_schema_simple();
411438
let reference = Reference::new("BAR").bind(schema, false).unwrap();
412439

440+
let accessor_ref = Arc::new(StructAccessor::new(1, PrimitiveType::Int));
413441
let expected_ref = BoundReference::new(
414442
"BAR",
415443
NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
444+
accessor_ref.clone(),
416445
);
417446

418447
assert_eq!(expected_ref, reference);

0 commit comments

Comments
 (0)