@@ -40,7 +40,7 @@ use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMe
4040use parquet:: schema:: types:: { SchemaDescriptor , Type as ParquetType } ;
4141use roaring:: RoaringTreemap ;
4242
43- use crate :: arrow:: delete_file_manager:: DeleteFileManager ;
43+ use crate :: arrow:: delete_file_manager:: CachingDeleteFileManager ;
4444use crate :: arrow:: record_batch_transformer:: RecordBatchTransformer ;
4545use crate :: arrow:: { arrow_schema_to_schema, get_arrow_datum} ;
4646use crate :: error:: Result ;
@@ -106,7 +106,11 @@ impl ArrowReaderBuilder {
106106 pub fn build ( self ) -> ArrowReader {
107107 ArrowReader {
108108 batch_size : self . batch_size ,
109- file_io : self . file_io ,
109+ file_io : self . file_io . clone ( ) ,
110+ delete_file_manager : CachingDeleteFileManager :: new (
111+ self . file_io . clone ( ) ,
112+ self . concurrency_limit_data_files ,
113+ ) ,
110114 concurrency_limit_data_files : self . concurrency_limit_data_files ,
111115 row_group_filtering_enabled : self . row_group_filtering_enabled ,
112116 row_selection_enabled : self . row_selection_enabled ,
@@ -119,6 +123,7 @@ impl ArrowReaderBuilder {
119123pub struct ArrowReader {
120124 batch_size : Option < usize > ,
121125 file_io : FileIO ,
126+ delete_file_manager : CachingDeleteFileManager ,
122127
123128 /// the maximum number of data files that can be fetched at the same time
124129 concurrency_limit_data_files : usize ,
@@ -145,9 +150,9 @@ impl ArrowReader {
145150 task,
146151 batch_size,
147152 file_io,
153+ self . delete_file_manager . clone ( ) ,
148154 row_group_filtering_enabled,
149155 row_selection_enabled,
150- concurrency_limit_data_files,
151156 )
152157 } )
153158 . map_err ( |err| {
@@ -163,20 +168,16 @@ impl ArrowReader {
163168 task : FileScanTask ,
164169 batch_size : Option < usize > ,
165170 file_io : FileIO ,
171+ delete_file_manager : CachingDeleteFileManager ,
166172 row_group_filtering_enabled : bool ,
167173 row_selection_enabled : bool ,
168- concurrency_limit_data_files : usize ,
169174 ) -> Result < ArrowRecordBatchStream > {
170175 let should_load_page_index =
171176 ( row_selection_enabled && task. predicate . is_some ( ) ) || !task. deletes . is_empty ( ) ;
172177
173178 // concurrently retrieve delete files and create RecordBatchStreamBuilder
174- let ( delete_file_manager, mut record_batch_stream_builder) = try_join ! (
175- DeleteFileManager :: load_deletes(
176- task. deletes. clone( ) ,
177- file_io. clone( ) ,
178- concurrency_limit_data_files
179- ) ,
179+ let ( _, mut record_batch_stream_builder) = try_join ! (
180+ delete_file_manager. load_deletes( task. deletes. clone( ) ) ,
180181 Self :: create_parquet_record_batch_stream_builder(
181182 & task. data_file_path,
182183 file_io. clone( ) ,
0 commit comments