Skip to content

Commit 18dd9f9

Browse files
committed
test(row-selection): add first few row selection tests
1 parent ccc0c07 commit 18dd9f9

File tree

1 file changed

+181
-2
lines changed

1 file changed

+181
-2
lines changed

crates/iceberg/src/expr/visitors/page_index_evaluator.rs

Lines changed: 181 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,7 @@ impl BoundPredicateVisitor for PageIndexEvaluator<'_> {
760760
/// returned: NYYYYYNNYYNYN
761761
///
762762
/// This can be removed from here once RowSelection::union is in parquet::arrow
763+
/// (Hopefully once https://github.com/apache/arrow-rs/pull/6308 gets merged)
763764
fn union_row_selections(left: &RowSelection, right: &RowSelection) -> RowSelection {
764765
let mut l_iter = left.iter().copied().peekable();
765766
let mut r_iter = right.iter().copied().peekable();
@@ -840,9 +841,24 @@ fn union_row_selections(left: &RowSelection, right: &RowSelection) -> RowSelecti
840841

841842
#[cfg(test)]
842843
mod tests {
843-
use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
844+
use std::collections::HashMap;
845+
use std::sync::Arc;
844846

845-
use crate::expr::visitors::page_index_evaluator::union_row_selections;
847+
use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
848+
use parquet::basic::{LogicalType as ParquetLogicalType, Type as ParquetPhysicalType};
849+
use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData};
850+
use parquet::file::page_index::index::{Index, NativeIndex};
851+
use parquet::file::statistics::Statistics;
852+
use parquet::format::{BoundaryOrder, PageLocation};
853+
use parquet::schema::types::{
854+
ColumnDescriptor, ColumnPath, SchemaDescriptor, Type as parquetSchemaType,
855+
};
856+
857+
use super::{union_row_selections, PageIndexEvaluator};
858+
// use rand::{thread_rng, Rng};
859+
use crate::expr::{Bind, Reference};
860+
use crate::spec::{Datum, NestedField, PrimitiveType, Schema, Type};
861+
use crate::Result;
846862

847863
#[test]
848864
fn test_union_row_selections() {
@@ -873,4 +889,167 @@ mod tests {
873889
&RowSelector::select(40)
874890
]);
875891
}
892+
893+
#[test]
894+
fn eval_matches_no_rows_for_empty_row_group() -> Result<()> {
895+
let row_group_metadata = create_row_group_metadata(0, 0, None, 0, None)?;
896+
let (column_index, offset_index) = create_page_index()?;
897+
898+
let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?;
899+
900+
let filter = Reference::new("col_float")
901+
.greater_than(Datum::float(1.0))
902+
.bind(iceberg_schema_ref.clone(), false)?;
903+
904+
let result = PageIndexEvaluator::eval(
905+
&filter,
906+
&column_index,
907+
&offset_index,
908+
&row_group_metadata,
909+
&field_id_map,
910+
iceberg_schema_ref.as_ref(),
911+
)?;
912+
913+
let expected = vec![];
914+
915+
assert_eq!(result, expected);
916+
917+
Ok(())
918+
}
919+
920+
#[test]
921+
fn eval_is_null_none_null_select_all_rows() -> Result<()> {
922+
let row_group_metadata = create_row_group_metadata(0, 0, None, 0, None)?;
923+
let (column_index, offset_index) = create_page_index()?;
924+
925+
let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?;
926+
927+
let filter = Reference::new("col_float")
928+
.is_null()
929+
.bind(iceberg_schema_ref.clone(), false)?;
930+
931+
let result = PageIndexEvaluator::eval(
932+
&filter,
933+
&column_index,
934+
&offset_index,
935+
&row_group_metadata,
936+
&field_id_map,
937+
iceberg_schema_ref.as_ref(),
938+
)?;
939+
940+
let expected = vec![];
941+
942+
assert_eq!(result, expected);
943+
944+
Ok(())
945+
}
946+
947+
fn build_iceberg_schema_and_field_map() -> Result<(Arc<Schema>, HashMap<i32, usize>)> {
948+
let iceberg_schema = Schema::builder()
949+
.with_fields([
950+
Arc::new(NestedField::new(
951+
1,
952+
"col_float",
953+
Type::Primitive(PrimitiveType::Float),
954+
false,
955+
)),
956+
Arc::new(NestedField::new(
957+
2,
958+
"col_string",
959+
Type::Primitive(PrimitiveType::String),
960+
false,
961+
)),
962+
])
963+
.build()?;
964+
let iceberg_schema_ref = Arc::new(iceberg_schema);
965+
966+
let field_id_map = HashMap::from_iter([(1, 0), (2, 1)]);
967+
968+
Ok((iceberg_schema_ref, field_id_map))
969+
}
970+
971+
fn build_parquet_schema_descriptor() -> Result<Arc<SchemaDescriptor>> {
972+
let field_1 = Arc::new(
973+
parquetSchemaType::primitive_type_builder("col_float", ParquetPhysicalType::FLOAT)
974+
.with_id(Some(1))
975+
.build()?,
976+
);
977+
978+
let field_2 = Arc::new(
979+
parquetSchemaType::primitive_type_builder(
980+
"col_string",
981+
ParquetPhysicalType::BYTE_ARRAY,
982+
)
983+
.with_id(Some(2))
984+
.with_logical_type(Some(ParquetLogicalType::String))
985+
.build()?,
986+
);
987+
988+
let group_type = Arc::new(
989+
parquetSchemaType::group_type_builder("all")
990+
.with_id(Some(1000))
991+
.with_fields(vec![field_1, field_2])
992+
.build()?,
993+
);
994+
995+
let schema_descriptor = SchemaDescriptor::new(group_type);
996+
let schema_descriptor_arc = Arc::new(schema_descriptor);
997+
Ok(schema_descriptor_arc)
998+
}
999+
1000+
fn create_row_group_metadata(
1001+
num_rows: i64,
1002+
col_1_num_vals: i64,
1003+
col_1_stats: Option<Statistics>,
1004+
col_2_num_vals: i64,
1005+
col_2_stats: Option<Statistics>,
1006+
) -> Result<RowGroupMetaData> {
1007+
let schema_descriptor_arc = build_parquet_schema_descriptor()?;
1008+
1009+
let column_1_desc_ptr = Arc::new(ColumnDescriptor::new(
1010+
schema_descriptor_arc.column(0).self_type_ptr(),
1011+
1,
1012+
1,
1013+
ColumnPath::new(vec!["col_float".to_string()]),
1014+
));
1015+
1016+
let column_2_desc_ptr = Arc::new(ColumnDescriptor::new(
1017+
schema_descriptor_arc.column(1).self_type_ptr(),
1018+
1,
1019+
1,
1020+
ColumnPath::new(vec!["col_string".to_string()]),
1021+
));
1022+
1023+
let mut col_1_meta =
1024+
ColumnChunkMetaData::builder(column_1_desc_ptr).set_num_values(col_1_num_vals);
1025+
if let Some(stats1) = col_1_stats {
1026+
col_1_meta = col_1_meta.set_statistics(stats1)
1027+
}
1028+
1029+
let mut col_2_meta =
1030+
ColumnChunkMetaData::builder(column_2_desc_ptr).set_num_values(col_2_num_vals);
1031+
if let Some(stats2) = col_2_stats {
1032+
col_2_meta = col_2_meta.set_statistics(stats2)
1033+
}
1034+
1035+
let row_group_metadata = RowGroupMetaData::builder(schema_descriptor_arc)
1036+
.set_num_rows(num_rows)
1037+
.set_column_metadata(vec![
1038+
col_1_meta.build()?,
1039+
// .set_statistics(Statistics::float(None, None, None, 1, false))
1040+
col_2_meta.build()?,
1041+
])
1042+
.build();
1043+
1044+
Ok(row_group_metadata?)
1045+
}
1046+
1047+
fn create_page_index() -> Result<(Vec<Index>, Vec<Vec<PageLocation>>)> {
1048+
let idx = Index::FLOAT(NativeIndex::<f32> {
1049+
indexes: vec![],
1050+
boundary_order: BoundaryOrder(0), // UNORDERED
1051+
});
1052+
1053+
Ok((vec![], vec![]))
1054+
}
8761055
}

0 commit comments

Comments
 (0)