Skip to content

Commit 3ababc3

Browse files
committed
add test case for deeply nested struct
1 parent 80db872 commit 3ababc3

File tree

1 file changed

+62
-17
lines changed

1 file changed

+62
-17
lines changed

crates/iceberg/src/writer/base_writer/equality_delete_writer.rs

Lines changed: 62 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -167,15 +167,17 @@ impl<B: FileWriterBuilder> IcebergWriter for EqualityDeleteFileWriter<B> {
167167

168168
#[cfg(test)]
169169
mod test {
170+
use std::collections::HashMap;
170171
use std::sync::Arc;
171172

172173
use arrow_array::types::Int32Type;
173174
use arrow_array::{ArrayRef, BooleanArray, Int32Array, Int64Array, RecordBatch, StructArray};
174175
use arrow_buffer::NullBuffer;
175-
use arrow_schema::DataType;
176+
use arrow_schema::{DataType, Field, Fields};
176177
use arrow_select::concat::concat_batches;
177178
use itertools::Itertools;
178179
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
180+
use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
179181
use parquet::file::properties::WriterProperties;
180182
use tempfile::TempDir;
181183
use uuid::Uuid;
@@ -657,7 +659,7 @@ mod test {
657659
#[tokio::test]
658660
async fn test_equality_delete_with_nullable_field() -> Result<(), anyhow::Error> {
659661
// prepare data
660-
// Int, Struct(Int)
662+
// Int, Struct(Int), Struct(Struct(Int))
661663
let schema = Schema::builder()
662664
.with_schema_id(1)
663665
.with_fields(vec![
@@ -673,28 +675,70 @@ mod test {
673675
.into()])),
674676
)
675677
.into(),
678+
NestedField::optional(
679+
3,
680+
"col2",
681+
Type::Struct(StructType::new(vec![NestedField::optional(
682+
4,
683+
"sub_struct_col",
684+
Type::Struct(StructType::new(vec![NestedField::optional(
685+
5,
686+
"sub_sub_col",
687+
Type::Primitive(PrimitiveType::Int),
688+
)
689+
.into()])),
690+
)
691+
.into()])),
692+
)
693+
.into(),
676694
])
677695
.build()
678696
.unwrap();
679697
let arrow_schema = Arc::new(schema_to_arrow_schema(&schema).unwrap());
680-
// null 1
681-
// 2 null(struct)
682-
// 3 null(field)
698+
// null 1 null(struct)
699+
// 2 null(struct) null(sub_struct_col)
700+
// 3 null(field) null(sub_sub_col)
683701
let col0 = Arc::new(Int32Array::from(vec![None, Some(2), Some(3)])) as ArrayRef;
684-
let nulls = NullBuffer::from(vec![true, false, true]);
685-
let col1 = Arc::new(StructArray::new(
686-
if let DataType::Struct(fields) = arrow_schema.fields.get(1).unwrap().data_type() {
687-
fields.clone()
688-
} else {
689-
unreachable!()
690-
},
691-
vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), None]))],
692-
Some(nulls),
693-
));
694-
let columns = vec![col0, col1];
702+
let col1 = {
703+
let nulls = NullBuffer::from(vec![true, false, true]);
704+
Arc::new(StructArray::new(
705+
if let DataType::Struct(fields) = arrow_schema.fields.get(1).unwrap().data_type() {
706+
fields.clone()
707+
} else {
708+
unreachable!()
709+
},
710+
vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), None]))],
711+
Some(nulls),
712+
))
713+
};
714+
let col2 = {
715+
let inner_col = {
716+
let nulls = NullBuffer::from(vec![true, false, true]);
717+
Arc::new(StructArray::new(
718+
Fields::from(vec![Field::new("sub_sub_col", DataType::Int32, true)
719+
.with_metadata(HashMap::from([(
720+
PARQUET_FIELD_ID_META_KEY.to_string(),
721+
"5".to_string(),
722+
)]))]),
723+
vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), None]))],
724+
Some(nulls),
725+
))
726+
};
727+
let nulls = NullBuffer::from(vec![false, true, true]);
728+
Arc::new(StructArray::new(
729+
if let DataType::Struct(fields) = arrow_schema.fields.get(2).unwrap().data_type() {
730+
fields.clone()
731+
} else {
732+
unreachable!()
733+
},
734+
vec![inner_col],
735+
Some(nulls),
736+
))
737+
};
738+
let columns = vec![col0, col1, col2];
695739

696740
let to_write = RecordBatch::try_new(arrow_schema.clone(), columns).unwrap();
697-
let equality_ids = vec![0_i32, 2];
741+
let equality_ids = vec![0_i32, 2, 5];
698742
let equality_config =
699743
EqualityDeleteWriterConfig::new(equality_ids, Arc::new(schema), None).unwrap();
700744
let projector = equality_config.projector.clone();
@@ -705,6 +749,7 @@ mod test {
705749
RecordBatch::try_new(equality_config.projected_arrow_schema_ref().clone(), vec![
706750
Arc::new(Int32Array::from(vec![None, Some(2), Some(3)])) as ArrayRef,
707751
Arc::new(Int32Array::from(vec![Some(1), None, None])) as ArrayRef,
752+
Arc::new(Int32Array::from(vec![None, None, None])) as ArrayRef,
708753
])
709754
.unwrap();
710755
assert_eq!(to_write_projected, expect_batch);

0 commit comments

Comments
 (0)