@@ -33,12 +33,12 @@ use arrow_string::like::starts_with;
3333use bytes:: Bytes ;
3434use fnv:: FnvHashSet ;
3535use futures:: future:: BoxFuture ;
36- use futures:: { FutureExt , StreamExt , TryFutureExt , TryStreamExt , try_join } ;
36+ use futures:: { try_join , FutureExt , StreamExt , TryFutureExt , TryStreamExt } ;
3737use parquet:: arrow:: arrow_reader:: {
3838 ArrowPredicateFn , ArrowReaderOptions , RowFilter , RowSelection , RowSelector ,
3939} ;
4040use parquet:: arrow:: async_reader:: AsyncFileReader ;
41- use parquet:: arrow:: { PARQUET_FIELD_ID_META_KEY , ParquetRecordBatchStreamBuilder , ProjectionMask } ;
41+ use parquet:: arrow:: { ParquetRecordBatchStreamBuilder , ProjectionMask , PARQUET_FIELD_ID_META_KEY } ;
4242use parquet:: file:: metadata:: { ParquetMetaData , ParquetMetaDataReader , RowGroupMetaData } ;
4343use parquet:: schema:: types:: { SchemaDescriptor , Type as ParquetType } ;
4444
@@ -47,13 +47,13 @@ use crate::arrow::record_batch_transformer::RecordBatchTransformer;
4747use crate :: arrow:: { arrow_schema_to_schema, get_arrow_datum} ;
4848use crate :: delete_vector:: DeleteVector ;
4949use crate :: error:: Result ;
50- use crate :: expr:: visitors:: bound_predicate_visitor:: { BoundPredicateVisitor , visit } ;
50+ use crate :: expr:: visitors:: bound_predicate_visitor:: { visit , BoundPredicateVisitor } ;
5151use crate :: expr:: visitors:: page_index_evaluator:: PageIndexEvaluator ;
5252use crate :: expr:: visitors:: row_group_metrics_evaluator:: RowGroupMetricsEvaluator ;
5353use crate :: expr:: { BoundPredicate , BoundReference } ;
5454use crate :: io:: { FileIO , FileMetadata , FileRead } ;
5555use crate :: scan:: { ArrowRecordBatchStream , FileScanTask , FileScanTaskStream } ;
56- use crate :: spec:: { Datum , NestedField , PrimitiveType , Schema , Type } ;
56+ use crate :: spec:: { DataContentType , Datum , NestedField , PrimitiveType , Schema , Type } ;
5757use crate :: utils:: available_parallelism;
5858use crate :: { Error , ErrorKind } ;
5959
@@ -312,13 +312,16 @@ impl ArrowReader {
312312
313313 // Build the batch stream and send all the RecordBatches that it generates
314314 // to the requester.
315- let record_batch_stream =
316- record_batch_stream_builder
317- . build ( ) ?
318- . map ( move |batch| match batch {
315+ let record_batch_stream = record_batch_stream_builder. build ( ) ?. map ( move |batch| {
316+ if matches ! ( task. data_file_content, DataContentType :: PositionDeletes ) {
317+ Ok ( batch?)
318+ } else {
319+ match batch {
319320 Ok ( batch) => record_batch_transformer. process_record_batch ( batch) ,
320321 Err ( err) => Err ( err. into ( ) ) ,
321- } ) ;
322+ }
323+ }
324+ } ) ;
322325
323326 Ok ( Box :: pin ( record_batch_stream) as ArrowRecordBatchStream )
324327 }
@@ -1443,15 +1446,15 @@ mod tests {
14431446 use roaring:: RoaringTreemap ;
14441447 use tempfile:: TempDir ;
14451448
1446- use crate :: ErrorKind ;
14471449 use crate :: arrow:: reader:: { CollectFieldIdVisitor , PARQUET_FIELD_ID_META_KEY } ;
14481450 use crate :: arrow:: { ArrowReader , ArrowReaderBuilder } ;
14491451 use crate :: delete_vector:: DeleteVector ;
14501452 use crate :: expr:: visitors:: bound_predicate_visitor:: visit;
14511453 use crate :: expr:: { Bind , Predicate , Reference } ;
14521454 use crate :: io:: FileIO ;
14531455 use crate :: scan:: { FileScanTask , FileScanTaskStream } ;
1454- use crate :: spec:: { DataFileFormat , Datum , NestedField , PrimitiveType , Schema , SchemaRef , Type } ;
1456+ use crate :: spec:: { DataContentType , DataFileFormat , Datum , NestedField , PrimitiveType , Schema , SchemaRef , Type } ;
1457+ use crate :: ErrorKind ;
14551458
14561459 fn table_schema_simple ( ) -> SchemaRef {
14571460 Arc :: new (
@@ -1740,11 +1743,14 @@ message schema {
17401743 length: 0 ,
17411744 record_count: None ,
17421745 data_file_path: format!( "{}/1.parquet" , table_location) ,
1746+ data_file_content: DataContentType :: Data ,
17431747 data_file_format: DataFileFormat :: Parquet ,
17441748 schema: schema. clone( ) ,
17451749 project_field_ids: vec![ 1 ] ,
17461750 predicate: Some ( predicate. bind( schema, true ) . unwrap( ) ) ,
17471751 deletes: vec![ ] ,
1752+ sequence_number: 0 ,
1753+ equality_ids: vec![ ] ,
17481754 } ) ]
17491755 . into_iter ( ) ,
17501756 ) ) as FileScanTaskStream ;
@@ -1774,19 +1780,25 @@ message schema {
17741780 let schema = Arc :: new (
17751781 Schema :: builder ( )
17761782 . with_schema_id ( 1 )
1777- . with_fields ( vec ! [
1778- NestedField :: optional( 1 , "a" , Type :: Primitive ( PrimitiveType :: String ) ) . into( ) ,
1779- ] )
1783+ . with_fields ( vec ! [ NestedField :: optional(
1784+ 1 ,
1785+ "a" ,
1786+ Type :: Primitive ( PrimitiveType :: String ) ,
1787+ )
1788+ . into( ) ] )
17801789 . build ( )
17811790 . unwrap ( ) ,
17821791 ) ;
17831792
1784- let arrow_schema = Arc :: new ( ArrowSchema :: new ( vec ! [
1785- Field :: new( "a" , col_a_type. clone( ) , true ) . with_metadata( HashMap :: from( [ (
1786- PARQUET_FIELD_ID_META_KEY . to_string( ) ,
1787- "1" . to_string( ) ,
1788- ) ] ) ) ,
1789- ] ) ) ;
1793+ let arrow_schema = Arc :: new ( ArrowSchema :: new ( vec ! [ Field :: new(
1794+ "a" ,
1795+ col_a_type. clone( ) ,
1796+ true ,
1797+ )
1798+ . with_metadata( HashMap :: from( [ (
1799+ PARQUET_FIELD_ID_META_KEY . to_string( ) ,
1800+ "1" . to_string( ) ,
1801+ ) ] ) ) ] ) ) ;
17901802
17911803 let tmp_dir = TempDir :: new ( ) . unwrap ( ) ;
17921804 let table_location = tmp_dir. path ( ) . to_str ( ) . unwrap ( ) . to_string ( ) ;
0 commit comments