@@ -189,18 +189,17 @@ fn visit_type<V: ArrowSchemaVisitor>(r#type: &DataType, visitor: &mut V) -> Resu
189189 p,
190190 DataType :: Boolean
191191 | DataType :: Utf8
192+ | DataType :: LargeUtf8
192193 | DataType :: Binary
194+ | DataType :: LargeBinary
193195 | DataType :: FixedSizeBinary ( _)
194196 ) =>
195197 {
196198 visitor. primitive ( p)
197199 }
198- DataType :: List ( element_field) => {
199- visitor. before_list_element ( element_field) ?;
200- let value = visit_type ( element_field. data_type ( ) , visitor) ?;
201- visitor. after_list_element ( element_field) ?;
202- visitor. list ( r#type, value)
203- }
200+ DataType :: List ( element_field) => visit_list ( r#type, element_field, visitor) ,
201+ DataType :: LargeList ( element_field) => visit_list ( r#type, element_field, visitor) ,
202+ DataType :: FixedSizeList ( element_field, _) => visit_list ( r#type, element_field, visitor) ,
204203 DataType :: Map ( field, _) => match field. data_type ( ) {
205204 DataType :: Struct ( fields) => {
206205 if fields. len ( ) != 2 {
@@ -242,6 +241,19 @@ fn visit_type<V: ArrowSchemaVisitor>(r#type: &DataType, visitor: &mut V) -> Resu
242241 }
243242}
244243
244+ /// Visit list types in post order.
245+ #[ allow( dead_code) ]
246+ fn visit_list < V : ArrowSchemaVisitor > (
247+ data_type : & DataType ,
248+ element_field : & Field ,
249+ visitor : & mut V ,
250+ ) -> Result < V :: T > {
251+ visitor. before_list_element ( element_field) ?;
252+ let value = visit_type ( element_field. data_type ( ) , visitor) ?;
253+ visitor. after_list_element ( element_field) ?;
254+ visitor. list ( data_type, value)
255+ }
256+
245257/// Visit struct type in post order.
246258#[ allow( dead_code) ]
247259fn visit_struct < V : ArrowSchemaVisitor > ( fields : & Fields , visitor : & mut V ) -> Result < V :: T > {
@@ -347,26 +359,30 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter {
347359 }
348360
349361 fn list ( & mut self , list : & DataType , value : Self :: T ) -> Result < Self :: T > {
350- match list {
351- DataType :: List ( element_field) => {
352- let id = get_field_id ( element_field) ?;
353- let doc = get_field_doc ( element_field) ;
354- let element_field = Arc :: new ( NestedField {
355- id,
356- doc,
357- name : "element" . to_string ( ) ,
358- required : !element_field. is_nullable ( ) ,
359- field_type : Box :: new ( value. clone ( ) ) ,
360- initial_default : None ,
361- write_default : None ,
362- } ) ;
363- Ok ( Type :: List ( ListType { element_field } ) )
362+ let element_field = match list {
363+ DataType :: List ( element_field) => element_field,
364+ DataType :: LargeList ( element_field) => element_field,
365+ DataType :: FixedSizeList ( element_field, _) => element_field,
366+ _ => {
367+ return Err ( Error :: new (
368+ ErrorKind :: DataInvalid ,
369+ "List type must have list data type" ,
370+ ) )
364371 }
365- _ => Err ( Error :: new (
366- ErrorKind :: DataInvalid ,
367- "List type must have list data type" ,
368- ) ) ,
369- }
372+ } ;
373+
374+ let id = get_field_id ( element_field) ?;
375+ let doc = get_field_doc ( element_field) ;
376+ let element_field = Arc :: new ( NestedField {
377+ id,
378+ doc,
379+ name : "element" . to_string ( ) ,
380+ required : !element_field. is_nullable ( ) ,
381+ field_type : Box :: new ( value. clone ( ) ) ,
382+ initial_default : None ,
383+ write_default : None ,
384+ } ) ;
385+ Ok ( Type :: List ( ListType { element_field } ) )
370386 }
371387
372388 fn map ( & mut self , map : & DataType , key_value : Self :: T , value : Self :: T ) -> Result < Self :: T > {
@@ -444,11 +460,11 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter {
444460 {
445461 Ok ( Type :: Primitive ( PrimitiveType :: Timestamptz ) )
446462 }
447- DataType :: Binary => Ok ( Type :: Primitive ( PrimitiveType :: Binary ) ) ,
463+ DataType :: Binary | DataType :: LargeBinary => Ok ( Type :: Primitive ( PrimitiveType :: Binary ) ) ,
448464 DataType :: FixedSizeBinary ( width) => {
449465 Ok ( Type :: Primitive ( PrimitiveType :: Fixed ( * width as u64 ) ) )
450466 }
451- DataType :: Utf8 => Ok ( Type :: Primitive ( PrimitiveType :: String ) ) ,
467+ DataType :: Utf8 | DataType :: LargeUtf8 => Ok ( Type :: Primitive ( PrimitiveType :: String ) ) ,
452468 _ => Err ( Error :: new (
453469 ErrorKind :: DataInvalid ,
454470 format ! ( "Unsupported Arrow data type: {p}" ) ,
@@ -526,6 +542,10 @@ mod tests {
526542 ARROW_FIELD_ID_KEY . to_string( ) ,
527543 "3" . to_string( ) ,
528544 ) ] ) ) ,
545+ Field :: new( "n" , DataType :: LargeUtf8 , false ) . with_metadata( HashMap :: from( [ (
546+ ARROW_FIELD_ID_KEY . to_string( ) ,
547+ "21" . to_string( ) ,
548+ ) ] ) ) ,
529549 Field :: new( "d" , DataType :: Timestamp ( TimeUnit :: Microsecond , None ) , true ) . with_metadata(
530550 HashMap :: from( [ ( ARROW_FIELD_ID_KEY . to_string( ) , "4" . to_string( ) ) ] ) ,
531551 ) ,
@@ -570,6 +590,10 @@ mod tests {
570590 ARROW_FIELD_ID_KEY . to_string( ) ,
571591 "13" . to_string( ) ,
572592 ) ] ) ) ,
593+ Field :: new( "o" , DataType :: LargeBinary , false ) . with_metadata( HashMap :: from( [ (
594+ ARROW_FIELD_ID_KEY . to_string( ) ,
595+ "22" . to_string( ) ,
596+ ) ] ) ) ,
573597 Field :: new( "m" , DataType :: FixedSizeBinary ( 10 ) , false ) . with_metadata( HashMap :: from( [ (
574598 ARROW_FIELD_ID_KEY . to_string( ) ,
575599 "11" . to_string( ) ,
@@ -588,6 +612,36 @@ mod tests {
588612 ARROW_FIELD_ID_KEY . to_string( ) ,
589613 "14" . to_string( ) ,
590614 ) ] ) ) ,
615+ Field :: new(
616+ "large_list" ,
617+ DataType :: LargeList ( Arc :: new(
618+ Field :: new( "element" , DataType :: Utf8 , false ) . with_metadata( HashMap :: from( [ (
619+ ARROW_FIELD_ID_KEY . to_string( ) ,
620+ "23" . to_string( ) ,
621+ ) ] ) ) ,
622+ ) ) ,
623+ true ,
624+ )
625+ . with_metadata( HashMap :: from( [ (
626+ ARROW_FIELD_ID_KEY . to_string( ) ,
627+ "24" . to_string( ) ,
628+ ) ] ) ) ,
629+ Field :: new(
630+ "fixed_list" ,
631+ DataType :: FixedSizeList (
632+ Arc :: new(
633+ Field :: new( "element" , DataType :: Binary , false ) . with_metadata(
634+ HashMap :: from( [ ( ARROW_FIELD_ID_KEY . to_string( ) , "26" . to_string( ) ) ] ) ,
635+ ) ,
636+ ) ,
637+ 10 ,
638+ ) ,
639+ true ,
640+ )
641+ . with_metadata( HashMap :: from( [ (
642+ ARROW_FIELD_ID_KEY . to_string( ) ,
643+ "25" . to_string( ) ,
644+ ) ] ) ) ,
591645 Field :: new( "map" , map, false ) . with_metadata( HashMap :: from( [ (
592646 ARROW_FIELD_ID_KEY . to_string( ) ,
593647 "16" . to_string( ) ,
@@ -622,6 +676,12 @@ mod tests {
622676 "required":true,
623677 "type":"string"
624678 },
679+ {
680+ "id":21,
681+ "name":"n",
682+ "required":true,
683+ "type":"string"
684+ },
625685 {
626686 "id":4,
627687 "name":"d",
@@ -676,6 +736,12 @@ mod tests {
676736 "required":true,
677737 "type":"binary"
678738 },
739+ {
740+ "id":22,
741+ "name":"o",
742+ "required":true,
743+ "type":"binary"
744+ },
679745 {
680746 "id":11,
681747 "name":"m",
@@ -693,6 +759,28 @@ mod tests {
693759 "element": "int"
694760 }
695761 },
762+ {
763+ "id":24,
764+ "name":"large_list",
765+ "required": false,
766+ "type": {
767+ "type": "list",
768+ "element-id": 23,
769+ "element-required": true,
770+ "element": "string"
771+ }
772+ },
773+ {
774+ "id":25,
775+ "name":"fixed_list",
776+ "required": false,
777+ "type": {
778+ "type": "list",
779+ "element-id": 26,
780+ "element-required": true,
781+ "element": "binary"
782+ }
783+ },
696784 {
697785 "id":16,
698786 "name":"map",
0 commit comments