@@ -821,6 +821,193 @@ get_parquet_stat_as_datum!(min);
821821
822822get_parquet_stat_as_datum ! ( max) ;
823823
824+ /// Utilities to deal with [arrow_array::builder] types in the Iceberg context.
825+ pub ( crate ) mod builder {
826+ use arrow_array:: builder:: * ;
827+ use arrow_array:: cast:: AsArray ;
828+ use arrow_array:: types:: * ;
829+ use arrow_array:: { ArrayRef , Datum as ArrowDatum } ;
830+ use arrow_schema:: { DataType , TimeUnit } ;
831+ use ordered_float:: OrderedFloat ;
832+
833+ use crate :: spec:: { Literal , PrimitiveLiteral } ;
834+ use crate :: { Error , ErrorKind } ;
835+
836+ /// A helper wrapping [ArrayBuilder] for building arrays without declaring the inner type at
837+ /// compile-time when types are determined dynamically (e.g. based on some column type).
838+ /// A [DataType] is given at construction time which is used to later downcast the inner array
839+ /// and provided values.
840+ pub ( crate ) struct AnyArrayBuilder {
841+ data_type : DataType ,
842+ inner : Box < dyn ArrayBuilder > ,
843+ }
844+
845+ impl AnyArrayBuilder {
846+ pub ( crate ) fn new ( data_type : & DataType ) -> Self {
847+ Self {
848+ data_type : data_type. clone ( ) ,
849+ inner : make_builder ( data_type, 0 ) ,
850+ }
851+ }
852+
853+ pub ( crate ) fn finish ( & mut self ) -> ArrayRef {
854+ self . inner . finish ( )
855+ }
856+
857+ /// Append an [[arrow_array::Datum]] value.
858+ pub ( crate ) fn append_datum ( & mut self , value : & dyn ArrowDatum ) -> crate :: Result < ( ) > {
859+ let ( array, is_scalar) = value. get ( ) ;
860+ assert ! ( is_scalar, "Can only append scalar datum" ) ;
861+
862+ match array. data_type ( ) {
863+ DataType :: Boolean => self
864+ . builder :: < BooleanBuilder > ( ) ?
865+ . append_value ( array. as_boolean ( ) . value ( 0 ) ) ,
866+ DataType :: Int32 => self
867+ . builder :: < Int32Builder > ( ) ?
868+ . append_value ( array. as_primitive :: < Int32Type > ( ) . value ( 0 ) ) ,
869+ DataType :: Int64 => self
870+ . builder :: < Int64Builder > ( ) ?
871+ . append_value ( array. as_primitive :: < Int64Type > ( ) . value ( 0 ) ) ,
872+ DataType :: Float32 => self
873+ . builder :: < Float32Builder > ( ) ?
874+ . append_value ( array. as_primitive :: < Float32Type > ( ) . value ( 0 ) ) ,
875+ DataType :: Float64 => self
876+ . builder :: < Float64Builder > ( ) ?
877+ . append_value ( array. as_primitive :: < Float64Type > ( ) . value ( 0 ) ) ,
878+ DataType :: Decimal128 ( _, _) => self
879+ . builder :: < Decimal128Builder > ( ) ?
880+ . append_value ( array. as_primitive :: < Decimal128Type > ( ) . value ( 0 ) ) ,
881+ DataType :: Date32 => self
882+ . builder :: < Date32Builder > ( ) ?
883+ . append_value ( array. as_primitive :: < Date32Type > ( ) . value ( 0 ) ) ,
884+ DataType :: Time64 ( TimeUnit :: Microsecond ) => self
885+ . builder :: < Time64MicrosecondBuilder > ( ) ?
886+ . append_value ( array. as_primitive :: < Time64MicrosecondType > ( ) . value ( 0 ) ) ,
887+ DataType :: Timestamp ( TimeUnit :: Microsecond , _) => self
888+ . builder :: < TimestampMicrosecondBuilder > ( ) ?
889+ . append_value ( array. as_primitive :: < TimestampMicrosecondType > ( ) . value ( 0 ) ) ,
890+ DataType :: Timestamp ( TimeUnit :: Nanosecond , _) => self
891+ . builder :: < TimestampNanosecondBuilder > ( ) ?
892+ . append_value ( array. as_primitive :: < TimestampNanosecondType > ( ) . value ( 0 ) ) ,
893+ DataType :: Utf8 => self
894+ . builder :: < StringBuilder > ( ) ?
895+ . append_value ( array. as_string :: < i32 > ( ) . value ( 0 ) ) ,
896+ DataType :: FixedSizeBinary ( _) => self
897+ . builder :: < BinaryBuilder > ( ) ?
898+ . append_value ( array. as_fixed_size_binary ( ) . value ( 0 ) ) ,
899+ DataType :: LargeBinary => self
900+ . builder :: < LargeBinaryBuilder > ( ) ?
901+ . append_value ( array. as_binary :: < i64 > ( ) . value ( 0 ) ) ,
902+ _ => {
903+ return Err ( Error :: new (
904+ ErrorKind :: FeatureUnsupported ,
905+ format ! ( "Cannot append data type: {:?}" , array. data_type( ) , ) ,
906+ ) ) ;
907+ }
908+ }
909+ Ok ( ( ) )
910+ }
911+
912+ /// Append a literal with the provided [DataType]. We're not solely relying on the literal to
913+ /// infer the type because [Literal] values do not specify the expected type of builder. E.g.,
914+ /// a [PrimitiveLiteral::Long] may go into an array builder for longs but also for timestamps.
915+ pub ( crate ) fn append_literal ( & mut self , value : & Literal ) -> crate :: Result < ( ) > {
916+ let Some ( primitive) = value. as_primitive_literal ( ) else {
917+ return Err ( Error :: new (
918+ ErrorKind :: FeatureUnsupported ,
919+ "Expected primitive type" ,
920+ ) ) ;
921+ } ;
922+
923+ match ( & self . data_type , primitive. clone ( ) ) {
924+ ( DataType :: Boolean , PrimitiveLiteral :: Boolean ( value) ) => {
925+ self . builder :: < BooleanBuilder > ( ) ?. append_value ( value)
926+ }
927+ ( DataType :: Int32 , PrimitiveLiteral :: Int ( value) ) => {
928+ self . builder :: < Int32Builder > ( ) ?. append_value ( value)
929+ }
930+ ( DataType :: Int64 , PrimitiveLiteral :: Long ( value) ) => {
931+ self . builder :: < Int64Builder > ( ) ?. append_value ( value)
932+ }
933+ ( DataType :: Float32 , PrimitiveLiteral :: Float ( OrderedFloat ( value) ) ) => {
934+ self . builder :: < Float32Builder > ( ) ?. append_value ( value)
935+ }
936+ ( DataType :: Float64 , PrimitiveLiteral :: Double ( OrderedFloat ( value) ) ) => {
937+ self . builder :: < Float64Builder > ( ) ?. append_value ( value)
938+ }
939+ ( DataType :: Utf8 , PrimitiveLiteral :: String ( value) ) => {
940+ self . builder :: < StringBuilder > ( ) ?. append_value ( value)
941+ }
942+ ( DataType :: FixedSizeBinary ( _) , PrimitiveLiteral :: Binary ( value) ) => self
943+ . builder :: < FixedSizeBinaryBuilder > ( ) ?
944+ . append_value ( value) ?,
945+ ( DataType :: LargeBinary , PrimitiveLiteral :: Binary ( value) ) => {
946+ self . builder :: < LargeBinaryBuilder > ( ) ?. append_value ( value)
947+ }
948+ ( _, _) => {
949+ return Err ( Error :: new (
950+ ErrorKind :: FeatureUnsupported ,
951+ format ! (
952+ "Builder of type {:?} does not accept literal {:?}" ,
953+ self . data_type, primitive
954+ ) ,
955+ ) ) ;
956+ }
957+ }
958+
959+ Ok ( ( ) )
960+ }
961+
962+ /// Append a null value for the provided [DataType].
963+ pub ( crate ) fn append_null ( & mut self ) -> crate :: Result < ( ) > {
964+ match self . data_type {
965+ DataType :: Boolean => self . builder :: < BooleanBuilder > ( ) ?. append_null ( ) ,
966+ DataType :: Int32 => self . builder :: < Int32Builder > ( ) ?. append_null ( ) ,
967+ DataType :: Int64 => self . builder :: < Int64Builder > ( ) ?. append_null ( ) ,
968+ DataType :: Float32 => self . builder :: < Float32Builder > ( ) ?. append_null ( ) ,
969+ DataType :: Float64 => self . builder :: < Float64Builder > ( ) ?. append_null ( ) ,
970+ DataType :: Decimal128 ( _, _) => self . builder :: < Decimal128Builder > ( ) ?. append_null ( ) ,
971+ DataType :: Date32 => self . builder :: < Date32Builder > ( ) ?. append_null ( ) ,
972+ DataType :: Time64 ( TimeUnit :: Microsecond ) => {
973+ self . builder :: < Time64MicrosecondBuilder > ( ) ?. append_null ( )
974+ }
975+ DataType :: Timestamp ( TimeUnit :: Microsecond , _) => {
976+ self . builder :: < TimestampMicrosecondBuilder > ( ) ?. append_null ( )
977+ }
978+ DataType :: Timestamp ( TimeUnit :: Nanosecond , _) => {
979+ self . builder :: < TimestampNanosecondBuilder > ( ) ?. append_null ( )
980+ }
981+ DataType :: Utf8 => self . builder :: < StringBuilder > ( ) ?. append_null ( ) ,
982+ DataType :: FixedSizeBinary ( _) => {
983+ self . builder :: < FixedSizeBinaryBuilder > ( ) ?. append_null ( )
984+ }
985+ DataType :: LargeBinary => self . builder :: < LargeBinaryBuilder > ( ) ?. append_null ( ) ,
986+ _ => {
987+ return Err ( Error :: new (
988+ ErrorKind :: FeatureUnsupported ,
989+ format ! (
990+ "Cannot append null values for data type: {:?}" ,
991+ self . data_type
992+ ) ,
993+ ) )
994+ }
995+ }
996+ Ok ( ( ) )
997+ }
998+
999+ /// Cast the `inner` builder to a specific type or return [Error].
1000+ fn builder < T : ArrayBuilder > ( & mut self ) -> crate :: Result < & mut T > {
1001+ self . inner . as_any_mut ( ) . downcast_mut :: < T > ( ) . ok_or_else ( || {
1002+ Error :: new (
1003+ ErrorKind :: Unexpected ,
1004+ "Failed to cast builder to expected type" ,
1005+ )
1006+ } )
1007+ }
1008+ }
1009+ }
1010+
8241011impl TryFrom < & ArrowSchema > for crate :: spec:: Schema {
8251012 type Error = Error ;
8261013
0 commit comments