@@ -24,14 +24,14 @@ use ordered_float::OrderedFloat;
2424use parquet:: arrow:: arrow_reader:: { RowSelection , RowSelector } ;
2525use parquet:: file:: metadata:: RowGroupMetaData ;
2626use parquet:: file:: page_index:: index:: Index ;
27- use parquet:: format :: PageLocation ;
27+ use parquet:: file :: page_index :: offset_index :: OffsetIndexMetaData ;
2828
2929use crate :: expr:: visitors:: bound_predicate_visitor:: { visit, BoundPredicateVisitor } ;
3030use crate :: expr:: { BoundPredicate , BoundReference } ;
3131use crate :: spec:: { Datum , PrimitiveLiteral , PrimitiveType , Schema } ;
3232use crate :: { Error , ErrorKind , Result } ;
3333
34- type OffsetIndex = Vec < Vec < PageLocation > > ;
34+ type OffsetIndex = Vec < OffsetIndexMetaData > ;
3535
3636const IN_PREDICATE_LIMIT : usize = 200 ;
3737
@@ -206,13 +206,14 @@ impl<'a> PageIndexEvaluator<'a> {
206206 }
207207
208208 /// returns a list of row counts per page
209- fn calc_row_counts ( & self , offset_index : & [ PageLocation ] ) -> Vec < usize > {
209+ fn calc_row_counts ( & self , offset_index : & OffsetIndexMetaData ) -> Vec < usize > {
210210 let mut remaining_rows = self . row_group_metadata . num_rows ( ) as usize ;
211211 let mut row_counts = Vec :: with_capacity ( self . offset_index . len ( ) ) ;
212212
213- for ( idx, page_location) in offset_index. iter ( ) . enumerate ( ) {
214- let row_count = if idx < offset_index. len ( ) - 1 {
215- let row_count = ( offset_index[ idx + 1 ] . first_row_index
213+ let page_locations = offset_index. page_locations ( ) ;
214+ for ( idx, page_location) in page_locations. iter ( ) . enumerate ( ) {
215+ let row_count = if idx < page_locations. len ( ) - 1 {
216+ let row_count = ( page_locations[ idx + 1 ] . first_row_index
216217 - page_location. first_row_index ) as usize ;
217218 remaining_rows -= row_count;
218219 row_count
@@ -868,6 +869,7 @@ mod tests {
868869 use parquet:: data_type:: ByteArray ;
869870 use parquet:: file:: metadata:: { ColumnChunkMetaData , RowGroupMetaData } ;
870871 use parquet:: file:: page_index:: index:: { Index , NativeIndex , PageIndex } ;
872+ use parquet:: file:: page_index:: offset_index:: OffsetIndexMetaData ;
871873 use parquet:: file:: statistics:: Statistics ;
872874 use parquet:: format:: { BoundaryOrder , PageLocation } ;
873875 use parquet:: schema:: types:: {
@@ -1417,28 +1419,36 @@ mod tests {
14171419 Ok ( row_group_metadata?)
14181420 }
14191421
1420- fn create_page_index ( ) -> Result < ( Vec < Index > , Vec < Vec < PageLocation > > ) > {
1422+ fn create_page_index ( ) -> Result < ( Vec < Index > , Vec < OffsetIndexMetaData > ) > {
14211423 let idx_float = Index :: FLOAT ( NativeIndex :: < f32 > {
14221424 indexes : vec ! [
14231425 PageIndex {
14241426 min: None ,
14251427 max: None ,
14261428 null_count: Some ( 1024 ) ,
1429+ repetition_level_histogram: None ,
1430+ definition_level_histogram: None ,
14271431 } ,
14281432 PageIndex {
14291433 min: Some ( 0.0 ) ,
14301434 max: Some ( 10.0 ) ,
14311435 null_count: Some ( 0 ) ,
1436+ repetition_level_histogram: None ,
1437+ definition_level_histogram: None ,
14321438 } ,
14331439 PageIndex {
14341440 min: Some ( 10.0 ) ,
14351441 max: Some ( 20.0 ) ,
14361442 null_count: Some ( 1 ) ,
1443+ repetition_level_histogram: None ,
1444+ definition_level_histogram: None ,
14371445 } ,
14381446 PageIndex {
14391447 min: None ,
14401448 max: None ,
14411449 null_count: None ,
1450+ repetition_level_histogram: None ,
1451+ definition_level_histogram: None ,
14421452 } ,
14431453 ] ,
14441454 boundary_order : BoundaryOrder ( 0 ) , // UNORDERED
@@ -1450,26 +1460,36 @@ mod tests {
14501460 min: Some ( "AA" . into( ) ) ,
14511461 max: Some ( "DD" . into( ) ) ,
14521462 null_count: Some ( 0 ) ,
1463+ repetition_level_histogram: None ,
1464+ definition_level_histogram: None ,
14531465 } ,
14541466 PageIndex {
14551467 min: Some ( "DE" . into( ) ) ,
14561468 max: Some ( "DE" . into( ) ) ,
14571469 null_count: Some ( 0 ) ,
1470+ repetition_level_histogram: None ,
1471+ definition_level_histogram: None ,
14581472 } ,
14591473 PageIndex {
14601474 min: Some ( "DF" . into( ) ) ,
14611475 max: Some ( "UJ" . into( ) ) ,
14621476 null_count: Some ( 1 ) ,
1477+ repetition_level_histogram: None ,
1478+ definition_level_histogram: None ,
14631479 } ,
14641480 PageIndex {
14651481 min: None ,
14661482 max: None ,
14671483 null_count: Some ( 48 ) ,
1484+ repetition_level_histogram: None ,
1485+ definition_level_histogram: None ,
14681486 } ,
14691487 PageIndex {
14701488 min: None ,
14711489 max: None ,
14721490 null_count: None ,
1491+ repetition_level_histogram: None ,
1492+ definition_level_histogram: None ,
14731493 } ,
14741494 ] ,
14751495 boundary_order : BoundaryOrder ( 0 ) , // UNORDERED
@@ -1491,8 +1511,14 @@ mod tests {
14911511 ] ;
14921512
14931513 Ok ( ( vec ! [ idx_float, idx_string] , vec ! [
1494- page_locs_float,
1495- page_locs_string,
1514+ OffsetIndexMetaData {
1515+ page_locations: page_locs_float,
1516+ unencoded_byte_array_data_bytes: None ,
1517+ } ,
1518+ OffsetIndexMetaData {
1519+ page_locations: page_locs_string,
1520+ unencoded_byte_array_data_bytes: None ,
1521+ } ,
14961522 ] ) )
14971523 }
14981524}
0 commit comments