@@ -23,7 +23,9 @@ use arrow_schema::{ArrowError, DataType, Field, Fields};
2323use parquet_variant:: {
2424 BuilderSpecificState , ListBuilder , MetadataBuilder , ObjectBuilder , Variant , VariantBuilderExt ,
2525} ;
26- use parquet_variant:: { ParentState , ValueBuilder , WritableMetadataBuilder } ;
26+ use parquet_variant:: {
27+ ParentState , ReadOnlyMetadataBuilder , ValueBuilder , WritableMetadataBuilder ,
28+ } ;
2729use std:: sync:: Arc ;
2830
2931/// A builder for [`VariantArray`]
@@ -205,6 +207,154 @@ impl VariantBuilderExt for VariantArrayBuilder {
205207 }
206208}
207209
210+ /// A builder for creating only the value column of a [`VariantArray`]
211+ ///
212+ /// This builder is used when you have existing metadata and only need to build
213+ /// the value column. It's useful for scenarios like variant unshredding, data
214+ /// transformation, or filtering where you want to reuse existing metadata.
215+ ///
216+ /// The builder produces a [`BinaryViewArray`] that can be combined with existing
217+ /// metadata to create a complete [`VariantArray`].
218+ ///
219+ /// # Example:
220+ /// ```
221+ /// # use arrow::array::Array;
222+ /// # use parquet_variant::{Variant};
223+ /// # use parquet_variant_compute::VariantValueArrayBuilder;
224+ /// // Create a variant value builder for 10 rows
225+ /// let mut builder = VariantValueArrayBuilder::new(10);
226+ ///
227+ /// // Append some values with their corresponding metadata, which the
228+ /// // builder takes advantage of to avoid creating new metadata.
229+ /// builder.append_value(Variant::from(42));
230+ /// builder.append_null();
231+ /// builder.append_value(Variant::from("hello"));
232+ ///
233+ /// // Build the final value array
234+ /// let value_array = builder.build().unwrap();
235+ /// assert_eq!(value_array.len(), 3);
236+ /// ```
237+ #[ derive( Debug ) ]
238+ pub struct VariantValueArrayBuilder {
239+ value_builder : ValueBuilder ,
240+ value_offsets : Vec < usize > ,
241+ nulls : NullBufferBuilder ,
242+ }
243+
244+ impl VariantValueArrayBuilder {
245+ /// Create a new `VariantValueArrayBuilder` with the specified row capacity
246+ pub fn new ( row_capacity : usize ) -> Self {
247+ Self {
248+ value_builder : ValueBuilder :: new ( ) ,
249+ value_offsets : Vec :: with_capacity ( row_capacity) ,
250+ nulls : NullBufferBuilder :: new ( row_capacity) ,
251+ }
252+ }
253+
254+ /// Build the final value array
255+ ///
256+ /// Returns a [`BinaryViewArray`] containing the serialized variant values.
257+ /// This can be combined with existing metadata to create a complete [`VariantArray`].
258+ pub fn build ( mut self ) -> Result < BinaryViewArray , ArrowError > {
259+ let value_buffer = self . value_builder . into_inner ( ) ;
260+ let mut array = binary_view_array_from_buffers ( value_buffer, self . value_offsets ) ;
261+ if let Some ( nulls) = self . nulls . finish ( ) {
262+ let ( views, buffers, _) = array. into_parts ( ) ;
263+ array = BinaryViewArray :: try_new ( views, buffers, Some ( nulls) ) ?;
264+ }
265+ Ok ( array)
266+ }
267+
268+ /// Append a null row to the builder
269+ ///
270+ /// WARNING: It is only valid to call this method when building the `value` field of a shredded
271+ /// variant column (which is nullable). The `value` field of a binary (unshredded) variant
272+ /// column is non-nullable, and callers should instead invoke [`Self::append_value`] with
273+ /// `Variant::Null`, passing the appropriate metadata value.
274+ pub fn append_null ( & mut self ) {
275+ self . value_offsets . push ( self . value_builder . offset ( ) ) ;
276+ self . nulls . append_null ( ) ;
277+ }
278+
279+ /// Append a variant value with its corresponding metadata
280+ ///
281+ /// # Arguments
282+ /// * `value` - The variant value to append
283+ /// * `metadata` - The metadata dictionary for this variant (used for field name resolution)
284+ ///
285+ /// # Returns
286+ /// * `Ok(())` if the value was successfully appended
287+ /// * `Err(ArrowError)` if the variant contains field names not found in the metadata
288+ ///
289+ /// # Example
290+ /// ```
291+ /// # use parquet_variant::Variant;
292+ /// # use parquet_variant_compute::VariantValueArrayBuilder;
293+ /// let mut builder = VariantValueArrayBuilder::new(10);
294+ /// builder.append_value(Variant::from(42));
295+ /// ```
296+ pub fn append_value ( & mut self , value : Variant < ' _ , ' _ > ) {
297+ let mut metadata_builder = ReadOnlyMetadataBuilder :: new ( value. metadata ( ) . clone ( ) ) ;
298+ ValueBuilder :: append_variant_bytes ( self . parent_state ( & mut metadata_builder) , value) ;
299+ }
300+
301+ /// Creates a builder-specific parent state.
302+ ///
303+ /// For example, this can be useful for code that wants to copy a subset of fields from an
304+ /// object `value` as a new row of `value_array_builder`:
305+ ///
306+ /// ```no_run
307+ /// # use parquet_variant::{ObjectBuilder, ReadOnlyMetadataBuilder, Variant};
308+ /// # use parquet_variant_compute::VariantValueArrayBuilder;
309+ /// # let value = Variant::Null;
310+ /// # let mut value_array_builder = VariantValueArrayBuilder::new(0);
311+ /// # fn should_keep(field_name: &str) -> bool { todo!() };
312+ /// let Variant::Object(obj) = value else {
313+ /// panic!("Not a variant object");
314+ /// };
315+ /// let mut metadata_builder = ReadOnlyMetadataBuilder::new(obj.metadata.clone());
316+ /// let state = value_array_builder.parent_state(&mut metadata_builder);
317+ /// let mut object_builder = ObjectBuilder::new(state, false);
318+ /// for (field_name, field_value) in obj.iter() {
319+ /// if should_keep(field_name) {
320+ /// object_builder.insert_bytes(field_name, field_value);
321+ /// }
322+ /// }
323+ /// object_builder.finish(); // appends the filtered object
324+ /// ```
325+ pub fn parent_state < ' a > (
326+ & ' a mut self ,
327+ metadata_builder : & ' a mut dyn MetadataBuilder ,
328+ ) -> ParentState < ' a , ValueArrayBuilderState < ' a > > {
329+ let state = ValueArrayBuilderState {
330+ value_offsets : & mut self . value_offsets ,
331+ nulls : & mut self . nulls ,
332+ } ;
333+
334+ ParentState :: new ( & mut self . value_builder , metadata_builder, state)
335+ }
336+ }
337+
338+ /// Builder-specific state for array building that manages array-level offsets and nulls. See
339+ /// [`VariantBuilderExt`] for details.
340+ #[ derive( Debug ) ]
341+ pub struct ValueArrayBuilderState < ' a > {
342+ value_offsets : & ' a mut Vec < usize > ,
343+ nulls : & ' a mut NullBufferBuilder ,
344+ }
345+
346+ // All changes are pending until finalized
347+ impl BuilderSpecificState for ValueArrayBuilderState < ' _ > {
348+ fn finish (
349+ & mut self ,
350+ _metadata_builder : & mut dyn MetadataBuilder ,
351+ value_builder : & mut ValueBuilder ,
352+ ) {
353+ self . value_offsets . push ( value_builder. offset ( ) ) ;
354+ self . nulls . append_non_null ( ) ;
355+ }
356+ }
357+
208358fn binary_view_array_from_buffers ( buffer : Vec < u8 > , offsets : Vec < usize > ) -> BinaryViewArray {
209359 // All offsets are less than or equal to the buffer length, so we can safely cast all offsets
210360 // inside the loop below, as long as the buffer length fits in u32.
@@ -228,6 +378,7 @@ fn binary_view_array_from_buffers(buffer: Vec<u8>, offsets: Vec<usize>) -> Binar
228378mod test {
229379 use super :: * ;
230380 use arrow:: array:: Array ;
381+ use parquet_variant:: Variant ;
231382
232383 /// Test that both the metadata and value buffers are non nullable
233384 #[ test]
@@ -288,4 +439,92 @@ mod test {
288439 let list = variant. as_list ( ) . expect ( "variant to be a list" ) ;
289440 assert_eq ! ( list. len( ) , 2 ) ;
290441 }
442+
443+ #[ test]
444+ fn test_variant_value_array_builder_basic ( ) {
445+ let mut builder = VariantValueArrayBuilder :: new ( 10 ) ;
446+
447+ // Append some values
448+ builder. append_value ( Variant :: from ( 42i32 ) ) ;
449+ builder. append_null ( ) ;
450+ builder. append_value ( Variant :: from ( "hello" ) ) ;
451+
452+ let value_array = builder. build ( ) . unwrap ( ) ;
453+ assert_eq ! ( value_array. len( ) , 3 ) ;
454+ }
455+
456+ #[ test]
457+ fn test_variant_value_array_builder_with_objects ( ) {
458+ // Populate a variant array with objects
459+ let mut builder = VariantArrayBuilder :: new ( 3 ) ;
460+ builder
461+ . new_object ( )
462+ . with_field ( "name" , "Alice" )
463+ . with_field ( "age" , 30i32 )
464+ . finish ( ) ;
465+
466+ builder
467+ . new_object ( )
468+ . with_field ( "name" , "Bob" )
469+ . with_field ( "age" , 42i32 )
470+ . with_field ( "city" , "Wonderland" )
471+ . finish ( ) ;
472+
473+ builder
474+ . new_object ( )
475+ . with_field ( "name" , "Charlie" )
476+ . with_field ( "age" , 1i32 )
477+ . finish ( ) ;
478+
479+ let array = builder. build ( ) ;
480+
481+ // Copy (some of) the objects over to the value array builder
482+ //
483+ // NOTE: Because we will reuse the metadata column, we cannot reorder rows. We can only
484+ // filter or manipulate values within a row.
485+ let mut builder = VariantValueArrayBuilder :: new ( 3 ) ;
486+
487+ // straight copy
488+ builder. append_value ( array. value ( 0 ) ) ;
489+
490+ // filtering fields takes more work because we need to manually create an object builder
491+ let value = array. value ( 1 ) ;
492+ let mut metadata_builder = ReadOnlyMetadataBuilder :: new ( value. metadata ( ) . clone ( ) ) ;
493+ let state = builder. parent_state ( & mut metadata_builder) ;
494+ ObjectBuilder :: new ( state, false )
495+ . with_field ( "name" , value. get_object_field ( "name" ) . unwrap ( ) )
496+ . with_field ( "age" , value. get_object_field ( "age" ) . unwrap ( ) )
497+ . finish ( ) ;
498+
499+ // same bytes, but now nested and duplicated inside a list
500+ let value = array. value ( 2 ) ;
501+ let mut metadata_builder = ReadOnlyMetadataBuilder :: new ( value. metadata ( ) . clone ( ) ) ;
502+ let state = builder. parent_state ( & mut metadata_builder) ;
503+ ListBuilder :: new ( state, false )
504+ . with_value ( value. clone ( ) )
505+ . with_value ( value. clone ( ) )
506+ . finish ( ) ;
507+
508+ let array2 = VariantArray :: from_parts (
509+ array. metadata_field ( ) . clone ( ) ,
510+ Some ( builder. build ( ) . unwrap ( ) ) ,
511+ None ,
512+ None ,
513+ ) ;
514+
515+ assert_eq ! ( array2. len( ) , 3 ) ;
516+ assert_eq ! ( array. value( 0 ) , array2. value( 0 ) ) ;
517+
518+ assert_eq ! (
519+ array. value( 1 ) . get_object_field( "name" ) ,
520+ array2. value( 1 ) . get_object_field( "name" )
521+ ) ;
522+ assert_eq ! (
523+ array. value( 1 ) . get_object_field( "age" ) ,
524+ array2. value( 1 ) . get_object_field( "age" )
525+ ) ;
526+
527+ assert_eq ! ( array. value( 2 ) , array2. value( 2 ) . get_list_element( 0 ) . unwrap( ) ) ;
528+ assert_eq ! ( array. value( 2 ) , array2. value( 2 ) . get_list_element( 1 ) . unwrap( ) ) ;
529+ }
291530}
0 commit comments