From 90a2814ce3dafa63b89347212a0ba1f1796d8549 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 18 Nov 2025 14:42:55 -0500 Subject: [PATCH 1/2] Make `ArrowReaderOptions::with_virtual_columns` error rather than panic on invalid input --- parquet/src/arrow/arrow_reader/mod.rs | 33 +++++++++++++++------------ parquet/src/arrow/async_reader/mod.rs | 10 ++++---- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 16e01a6ebe27..46ad71068416 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -605,7 +605,7 @@ impl ArrowReaderOptions { /// /// // Configure options with virtual columns /// let options = ArrowReaderOptions::new() - /// .with_virtual_columns(vec![row_number_field]); + /// .with_virtual_columns(vec![row_number_field])?; /// /// // Create a reader with the options /// let mut reader = ParquetRecordBatchReaderBuilder::try_new_with_options( @@ -622,20 +622,20 @@ impl ArrowReaderOptions { /// # Ok(()) /// # } /// ``` - pub fn with_virtual_columns(self, virtual_columns: Vec) -> Self { + pub fn with_virtual_columns(self, virtual_columns: Vec) -> Result { // Validate that all fields are virtual columns for field in &virtual_columns { if !is_virtual_column(field) { - panic!( + return Err(ParquetError::General(format!( "Field '{}' is not a virtual column. Virtual columns must have extension type names starting with 'arrow.virtual.'", field.name() - ); + ))); } } - Self { + Ok(Self { virtual_columns, ..self - } + }) } /// Retrieve the currently set page index behavior. @@ -5470,8 +5470,10 @@ pub(crate) mod tests { Field::new("row_number", ArrowDataType::Int64, false).with_extension_type(RowNumber), ); - let options = ArrowReaderOptions::new().with_schema(Arc::new(Schema::new(supplied_fields))); - let options = options.with_virtual_columns(vec![row_number_field.clone()]); + let options = ArrowReaderOptions::new() + .with_schema(Arc::new(Schema::new(supplied_fields))) + .with_virtual_columns(vec![row_number_field.clone()]) + .unwrap(); let mut arrow_reader = ParquetRecordBatchReaderBuilder::try_new_with_options( file.try_clone().unwrap(), options, @@ -5516,8 +5518,9 @@ pub(crate) mod tests { let row_number_field = Arc::new( Field::new("row_number", ArrowDataType::Int64, false).with_extension_type(RowNumber), ); - let options = - ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field.clone()]); + let options = ArrowReaderOptions::new() + .with_virtual_columns(vec![row_number_field.clone()]) + .unwrap(); let metadata = ArrowReaderMetadata::load(&file, options).unwrap(); let num_columns = metadata .metadata @@ -5564,8 +5567,9 @@ pub(crate) mod tests { Field::new("row_number", ArrowDataType::Int64, false) .with_extension_type(RowNumber), ); - let options = - ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field]); + let options = ArrowReaderOptions::new() + .with_virtual_columns(vec![row_number_field]) + .unwrap(); let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options) .unwrap() .with_row_selection(selection) @@ -5589,8 +5593,9 @@ pub(crate) mod tests { Field::new("row_number", ArrowDataType::Int64, false) .with_extension_type(RowNumber), ); - let options = - ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field]); + let options = ArrowReaderOptions::new() + .with_virtual_columns(vec![row_number_field]) + .unwrap(); let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options) .unwrap() .with_row_selection(selection) diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index 35acc5fbf17b..7156caf80fcc 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -2202,8 +2202,9 @@ mod tests { Field::new("row_number", DataType::Int64, false) .with_extension_type(RowNumber), ); - let options = - ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field]); + let options = ArrowReaderOptions::new() + .with_virtual_columns(vec![row_number_field]) + .unwrap(); let reader = ParquetRecordBatchStreamBuilder::new_with_options(file, options) .await .unwrap() @@ -2232,8 +2233,9 @@ mod tests { Field::new("row_number", DataType::Int64, false) .with_extension_type(RowNumber), ); - let options = - ArrowReaderOptions::new().with_virtual_columns(vec![row_number_field]); + let options = ArrowReaderOptions::new() + .with_virtual_columns(vec![row_number_field]) + .unwrap(); let reader = ParquetRecordBatchStreamBuilder::new_with_options(file, options) .await .unwrap() From 27726552310d72e0e67f0a5bd63efce07b08fdb2 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 18 Nov 2025 14:48:28 -0500 Subject: [PATCH 2/2] Update tests --- parquet/src/arrow/arrow_reader/mod.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 46ad71068416..4c4ebfa09045 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -5550,11 +5550,16 @@ pub(crate) mod tests { } #[test] - #[should_panic(expected = "is not a virtual column")] fn test_with_virtual_columns_rejects_non_virtual_fields() { // Try to pass a regular field (not a virtual column) to with_virtual_columns let regular_field = Arc::new(Field::new("regular_column", ArrowDataType::Int64, false)); - let _options = ArrowReaderOptions::new().with_virtual_columns(vec![regular_field]); + assert_eq!( + ArrowReaderOptions::new() + .with_virtual_columns(vec![regular_field]) + .unwrap_err() + .to_string(), + "Parquet error: Field 'regular_column' is not a virtual column. Virtual columns must have extension type names starting with 'arrow.virtual.'" + ); } #[test]