Skip to content

Commit e135b3a

Browse files
committed
refactor: pass an Arc of the field_ids to the FileScanTaskContext rather than cloning
1 parent 3c643ec commit e135b3a

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

crates/iceberg/src/arrow/reader.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ const CONCURRENCY_LIMIT_TASKS: usize = 10;
5454
/// Builder to create ArrowReader
5555
pub struct ArrowReaderBuilder {
5656
batch_size: Option<usize>,
57-
field_ids: Vec<usize>,
57+
field_ids: Arc<Vec<usize>>,
5858
file_io: FileIO,
5959
schema: SchemaRef,
6060
predicate: Option<BoundPredicate>,
@@ -65,7 +65,7 @@ impl ArrowReaderBuilder {
6565
pub fn new(file_io: FileIO, schema: SchemaRef) -> Self {
6666
ArrowReaderBuilder {
6767
batch_size: None,
68-
field_ids: vec![],
68+
field_ids: Arc::new(vec![]),
6969
file_io,
7070
schema,
7171
predicate: None,
@@ -81,7 +81,10 @@ impl ArrowReaderBuilder {
8181

8282
/// Sets the desired column projection with a list of field ids.
8383
pub fn with_field_ids(mut self, field_ids: impl IntoIterator<Item = usize>) -> Self {
84-
self.field_ids = field_ids.into_iter().collect();
84+
let field_ids = field_ids.into_iter().collect();
85+
let field_ids_arc = Arc::new(field_ids);
86+
self.field_ids = field_ids_arc;
87+
8588
self
8689
}
8790

@@ -106,7 +109,7 @@ impl ArrowReaderBuilder {
106109
/// Reads data from Parquet files
107110
pub struct ArrowReader {
108111
batch_size: Option<usize>,
109-
field_ids: Vec<usize>,
112+
field_ids: Arc<Vec<usize>>,
110113
#[allow(dead_code)]
111114
schema: SchemaRef,
112115
file_io: FileIO,
@@ -207,7 +210,7 @@ struct FileScanTaskContext {
207210
file_io: FileIO,
208211
sender: Sender<Result<RecordBatch>>,
209212
batch_size: Option<usize>,
210-
field_ids: Vec<usize>,
213+
field_ids: Arc<Vec<usize>>,
211214
schema: SchemaRef,
212215
predicate: Option<BoundPredicate>,
213216
}
@@ -218,7 +221,7 @@ impl FileScanTaskContext {
218221
file_io: FileIO,
219222
sender: Sender<Result<RecordBatch>>,
220223
batch_size: Option<usize>,
221-
field_ids: Vec<usize>,
224+
field_ids: Arc<Vec<usize>>,
222225
schema: SchemaRef,
223226
predicate: Option<BoundPredicate>,
224227
) -> Self {
@@ -294,7 +297,7 @@ impl FileScanTaskContext {
294297
}
295298

296299
let mut indices = vec![];
297-
for field_id in &self.field_ids {
300+
for field_id in self.field_ids.as_ref() {
298301
if let Some(col_idx) = column_map.get(&(*field_id as i32)) {
299302
indices.push(*col_idx);
300303
} else {

0 commit comments

Comments
 (0)