Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 31 additions & 20 deletions parquet/src/arrow/arrow_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ impl ArrowReaderOptions {
///
/// // Configure options with virtual columns
/// let options = ArrowReaderOptions::new()
/// .with_virtual_columns(vec![row_number_field]);
/// .with_virtual_columns(vec![row_number_field])?;
///
/// // Create a reader with the options
/// let mut reader = ParquetRecordBatchReaderBuilder::try_new_with_options(
Expand All @@ -622,19 +622,20 @@ impl ArrowReaderOptions {
/// # Ok(())
/// # }
/// ```
pub fn with_virtual_columns(self, virtual_columns: Vec<FieldRef>) -> Self {
pub fn with_virtual_columns(self, virtual_columns: Vec<FieldRef>) -> Result<Self> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The point of the PR is to change this API to avoid panic'ing

// Validate that all fields are virtual columns
for field in &virtual_columns {
assert!(
is_virtual_column(field),
"Field '{}' is not a virtual column. Virtual columns must have extension type names starting with 'arrow.virtual.'",
field.name()
);
if !is_virtual_column(field) {
return Err(ParquetError::General(format!(
"Field '{}' is not a virtual column. Virtual columns must have extension type names starting with 'arrow.virtual.'",
field.name()
)));
}
}
Self {
Ok(Self {
virtual_columns,
..self
}
})
}

/// Retrieve the currently set page index behavior.
Expand Down Expand Up @@ -5469,8 +5470,10 @@ pub(crate) mod tests {
Field::new("row_number", ArrowDataType::Int64, false).with_extension_type(RowNumber),
);

let options = ArrowReaderOptions::new().with_schema(Arc::new(Schema::new(supplied_fields)));
let options = options.with_virtual_columns(vec![row_number_field.clone()]);
let options = ArrowReaderOptions::new()
.with_schema(Arc::new(Schema::new(supplied_fields)))
.with_virtual_columns(vec![row_number_field.clone()])
.unwrap();
let mut arrow_reader = ParquetRecordBatchReaderBuilder::try_new_with_options(
file.try_clone().unwrap(),
options,
Expand Down Expand Up @@ -5515,8 +5518,9 @@ pub(crate) mod tests {
let row_number_field = Arc::new(
Field::new("row_number", ArrowDataType::Int64, false).with_extension_type(RowNumber),
);
let options =
ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field.clone()]);
let options = ArrowReaderOptions::new()
.with_virtual_columns(vec![row_number_field.clone()])
.unwrap();
let metadata = ArrowReaderMetadata::load(&file, options).unwrap();
let num_columns = metadata
.metadata
Expand Down Expand Up @@ -5568,7 +5572,7 @@ pub(crate) mod tests {
let buffer = Bytes::from(buffer);

let options =
ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field.clone()]);
ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field.clone()])?;

// read out with normal options
let arrow_reader =
Expand Down Expand Up @@ -5637,11 +5641,16 @@ pub(crate) mod tests {
}

#[test]
#[should_panic(expected = "is not a virtual column")]
fn test_with_virtual_columns_rejects_non_virtual_fields() {
// Try to pass a regular field (not a virtual column) to with_virtual_columns
let regular_field = Arc::new(Field::new("regular_column", ArrowDataType::Int64, false));
let _options = ArrowReaderOptions::new().with_virtual_columns(vec![regular_field]);
assert_eq!(
ArrowReaderOptions::new()
.with_virtual_columns(vec![regular_field])
.unwrap_err()
.to_string(),
"Parquet error: Field 'regular_column' is not a virtual column. Virtual columns must have extension type names starting with 'arrow.virtual.'"
);
}

#[test]
Expand All @@ -5654,8 +5663,9 @@ pub(crate) mod tests {
Field::new("row_number", ArrowDataType::Int64, false)
.with_extension_type(RowNumber),
);
let options =
ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field]);
let options = ArrowReaderOptions::new()
.with_virtual_columns(vec![row_number_field])
.unwrap();
let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options)
.unwrap()
.with_row_selection(selection)
Expand All @@ -5679,8 +5689,9 @@ pub(crate) mod tests {
Field::new("row_number", ArrowDataType::Int64, false)
.with_extension_type(RowNumber),
);
let options =
ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field]);
let options = ArrowReaderOptions::new()
.with_virtual_columns(vec![row_number_field])
.unwrap();
let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options)
.unwrap()
.with_row_selection(selection)
Expand Down
10 changes: 6 additions & 4 deletions parquet/src/arrow/async_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2202,8 +2202,9 @@ mod tests {
Field::new("row_number", DataType::Int64, false)
.with_extension_type(RowNumber),
);
let options =
ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field]);
let options = ArrowReaderOptions::new()
.with_virtual_columns(vec![row_number_field])
.unwrap();
let reader = ParquetRecordBatchStreamBuilder::new_with_options(file, options)
.await
.unwrap()
Expand Down Expand Up @@ -2232,8 +2233,9 @@ mod tests {
Field::new("row_number", DataType::Int64, false)
.with_extension_type(RowNumber),
);
let options =
ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field]);
let options = ArrowReaderOptions::new()
.with_virtual_columns(vec![row_number_field])
.unwrap();
let reader = ParquetRecordBatchStreamBuilder::new_with_options(file, options)
.await
.unwrap()
Expand Down
Loading