Skip to content

Commit 30010ca

Browse files
committed
fix
1 parent abc1c2a commit 30010ca

File tree

2 files changed

+32
-21
lines changed

2 files changed

+32
-21
lines changed

crates/iceberg/src/arrow.rs

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use crate::spec::SchemaRef;
3232
/// Builder to create ArrowReader
3333
pub struct ArrowReaderBuilder {
3434
batch_size: Option<usize>,
35-
column_names: Vec<String>,
35+
columns: Vec<usize>,
3636
file_io: FileIO,
3737
schema: SchemaRef,
3838
}
@@ -42,7 +42,7 @@ impl ArrowReaderBuilder {
4242
pub fn new(file_io: FileIO, schema: SchemaRef) -> Self {
4343
ArrowReaderBuilder {
4444
batch_size: None,
45-
column_names: vec![],
45+
columns: vec![],
4646
file_io,
4747
schema,
4848
}
@@ -56,16 +56,16 @@ impl ArrowReaderBuilder {
5656
}
5757

5858
/// Sets the desired column projection.
59-
pub fn with_column_projection(mut self, column_names: Vec<String>) -> Self {
60-
self.column_names = column_names;
59+
pub fn with_column_projection(mut self, columns: Vec<usize>) -> Self {
60+
self.columns = columns;
6161
self
6262
}
6363

6464
/// Build the ArrowReader.
6565
pub fn build(self) -> ArrowReader {
6666
ArrowReader {
6767
batch_size: self.batch_size,
68-
column_names: self.column_names,
68+
columns: self.columns,
6969
schema: self.schema,
7070
file_io: self.file_io,
7171
}
@@ -75,7 +75,7 @@ impl ArrowReaderBuilder {
7575
/// Reads data from Parquet files
7676
pub struct ArrowReader {
7777
batch_size: Option<usize>,
78-
column_names: Vec<String>,
78+
columns: Vec<usize>,
7979
#[allow(dead_code)]
8080
schema: SchemaRef,
8181
file_io: FileIO,
@@ -121,23 +121,21 @@ impl ArrowReader {
121121
metadata: &Arc<ParquetMetaData>,
122122
parquet_schema: &ArrowSchemaRef,
123123
) -> crate::Result<ProjectionMask> {
124-
if self.column_names.is_empty() {
124+
if self.columns.is_empty() {
125125
Ok(ProjectionMask::all())
126126
} else {
127127
let mut indices = vec![];
128-
for column_name in &self.column_names {
129-
match parquet_schema.index_of(column_name) {
130-
Ok(index) => indices.push(index),
131-
Err(_) => {
132-
return Err(Error::new(
133-
ErrorKind::DataInvalid,
134-
format!(
135-
"Column {} not found in table. Schema: {}",
136-
column_name, parquet_schema
137-
),
138-
));
139-
}
128+
for col in &self.columns {
129+
if *col > parquet_schema.fields().len() {
130+
return Err(Error::new(
131+
ErrorKind::DataInvalid,
132+
format!(
133+
"Column index {} out of range. Schema: {}",
134+
col, parquet_schema
135+
),
136+
));
140137
}
138+
indices.push(*col - 1);
141139
}
142140
Ok(ProjectionMask::roots(
143141
metadata.file_metadata().schema_descr(),

crates/iceberg/src/scan.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,21 @@ impl TableScan {
183183
let mut arrow_reader_builder =
184184
ArrowReaderBuilder::new(self.file_io.clone(), self.schema.clone());
185185

186-
arrow_reader_builder =
187-
arrow_reader_builder.with_column_projection(self.column_names.clone());
186+
let mut field_ids = vec![];
187+
for column_name in &self.column_names {
188+
let field_id = self.schema.field_id_by_name(column_name).ok_or_else(|| {
189+
Error::new(
190+
ErrorKind::DataInvalid,
191+
format!(
192+
"Column {} not found in table. Schema: {}",
193+
column_name, self.schema
194+
),
195+
)
196+
})?;
197+
field_ids.push(field_id as usize);
198+
}
199+
200+
arrow_reader_builder = arrow_reader_builder.with_column_projection(field_ids);
188201

189202
if let Some(batch_size) = self.batch_size {
190203
arrow_reader_builder = arrow_reader_builder.with_batch_size(batch_size);

0 commit comments

Comments
 (0)