1515// specific language governing permissions and limitations 
1616// under the License. 
1717
18- use  crate :: { buffer:: MutableBuffer ,  datatypes:: DataType ,  util:: bit_util} ; 
18+ use  crate :: { 
19+     buffer:: MutableBuffer , 
20+     datatypes:: DataType , 
21+     error:: { ArrowError ,  Result } , 
22+     util:: bit_util, 
23+ } ; 
1924
2025use  super :: { 
2126    data:: { into_buffers,  new_buffers} , 
@@ -166,6 +171,65 @@ impl<'a> std::fmt::Debug for MutableArrayData<'a> {
166171    } 
167172} 
168173
174+ /// Builds an extend that adds `offset` to the source primitive 
175+ /// Additionally validates that `max` fits into the 
176+ /// the underlying primitive returning None if not 
177+ fn  build_extend_dictionary ( 
178+     array :  & ArrayData , 
179+     offset :  usize , 
180+     max :  usize , 
181+ )  -> Option < Extend >  { 
182+     use  crate :: datatypes:: * ; 
183+     use  std:: convert:: TryInto ; 
184+ 
185+     match  array. data_type ( )  { 
186+         DataType :: Dictionary ( child_data_type,  _)  => match  child_data_type. as_ref ( )  { 
187+             DataType :: UInt8  => { 
188+                 let  _:  u8  = max. try_into ( ) . ok ( ) ?; 
189+                 let  offset:  u8  = offset. try_into ( ) . ok ( ) ?; 
190+                 Some ( primitive:: build_extend_with_offset ( array,  offset) ) 
191+             } 
192+             DataType :: UInt16  => { 
193+                 let  _:  u16  = max. try_into ( ) . ok ( ) ?; 
194+                 let  offset:  u16  = offset. try_into ( ) . ok ( ) ?; 
195+                 Some ( primitive:: build_extend_with_offset ( array,  offset) ) 
196+             } 
197+             DataType :: UInt32  => { 
198+                 let  _:  u32  = max. try_into ( ) . ok ( ) ?; 
199+                 let  offset:  u32  = offset. try_into ( ) . ok ( ) ?; 
200+                 Some ( primitive:: build_extend_with_offset ( array,  offset) ) 
201+             } 
202+             DataType :: UInt64  => { 
203+                 let  _:  u64  = max. try_into ( ) . ok ( ) ?; 
204+                 let  offset:  u64  = offset. try_into ( ) . ok ( ) ?; 
205+                 Some ( primitive:: build_extend_with_offset ( array,  offset) ) 
206+             } 
207+             DataType :: Int8  => { 
208+                 let  _:  i8  = max. try_into ( ) . ok ( ) ?; 
209+                 let  offset:  i8  = offset. try_into ( ) . ok ( ) ?; 
210+                 Some ( primitive:: build_extend_with_offset ( array,  offset) ) 
211+             } 
212+             DataType :: Int16  => { 
213+                 let  _:  i16  = max. try_into ( ) . ok ( ) ?; 
214+                 let  offset:  i16  = offset. try_into ( ) . ok ( ) ?; 
215+                 Some ( primitive:: build_extend_with_offset ( array,  offset) ) 
216+             } 
217+             DataType :: Int32  => { 
218+                 let  _:  i32  = max. try_into ( ) . ok ( ) ?; 
219+                 let  offset:  i32  = offset. try_into ( ) . ok ( ) ?; 
220+                 Some ( primitive:: build_extend_with_offset ( array,  offset) ) 
221+             } 
222+             DataType :: Int64  => { 
223+                 let  _:  i64  = max. try_into ( ) . ok ( ) ?; 
224+                 let  offset:  i64  = offset. try_into ( ) . ok ( ) ?; 
225+                 Some ( primitive:: build_extend_with_offset ( array,  offset) ) 
226+             } 
227+             _ => unreachable ! ( ) , 
228+         } , 
229+         _ => None , 
230+     } 
231+ } 
232+ 
169233fn  build_extend ( array :  & ArrayData )  -> Extend  { 
170234    use  crate :: datatypes:: * ; 
171235    match  array. data_type ( )  { 
@@ -199,17 +263,7 @@ fn build_extend(array: &ArrayData) -> Extend {
199263        } 
200264        DataType :: List ( _)  => list:: build_extend :: < i32 > ( array) , 
201265        DataType :: LargeList ( _)  => list:: build_extend :: < i64 > ( array) , 
202-         DataType :: Dictionary ( child_data_type,  _)  => match  child_data_type. as_ref ( )  { 
203-             DataType :: UInt8  => primitive:: build_extend :: < u8 > ( array) , 
204-             DataType :: UInt16  => primitive:: build_extend :: < u16 > ( array) , 
205-             DataType :: UInt32  => primitive:: build_extend :: < u32 > ( array) , 
206-             DataType :: UInt64  => primitive:: build_extend :: < u64 > ( array) , 
207-             DataType :: Int8  => primitive:: build_extend :: < i8 > ( array) , 
208-             DataType :: Int16  => primitive:: build_extend :: < i16 > ( array) , 
209-             DataType :: Int32  => primitive:: build_extend :: < i32 > ( array) , 
210-             DataType :: Int64  => primitive:: build_extend :: < i64 > ( array) , 
211-             _ => unreachable ! ( ) , 
212-         } , 
266+         DataType :: Dictionary ( _,  _)  => unreachable ! ( "should use build_extend_dictionary" ) , 
213267        DataType :: Struct ( _)  => structure:: build_extend ( array) , 
214268        DataType :: FixedSizeBinary ( _)  => fixed_binary:: build_extend ( array) , 
215269        DataType :: Float16  => unreachable ! ( ) , 
@@ -339,7 +393,29 @@ impl<'a> MutableArrayData<'a> {
339393        } ; 
340394
341395        let  dictionary = match  & data_type { 
342-             DataType :: Dictionary ( _,  _)  => Some ( arrays[ 0 ] . child_data ( ) [ 0 ] . clone ( ) ) , 
396+             DataType :: Dictionary ( _,  _)  => match  arrays. len ( )  { 
397+                 0  => unreachable ! ( ) , 
398+                 1  => Some ( arrays[ 0 ] . child_data ( ) [ 0 ] . clone ( ) ) , 
399+                 _ => { 
400+                     // Concat dictionaries together 
401+                     let  dictionaries:  Vec < _ >  =
402+                         arrays. iter ( ) . map ( |array| & array. child_data ( ) [ 0 ] ) . collect ( ) ; 
403+                     let  lengths:  Vec < _ >  = dictionaries
404+                         . iter ( ) 
405+                         . map ( |dictionary| dictionary. len ( ) ) 
406+                         . collect ( ) ; 
407+                     let  capacity = lengths. iter ( ) . sum ( ) ; 
408+ 
409+                     let  mut  mutable =
410+                         MutableArrayData :: new ( dictionaries,  false ,  capacity) ; 
411+ 
412+                     for  ( i,  len)  in  lengths. iter ( ) . enumerate ( )  { 
413+                         mutable. extend ( i,  0 ,  * len) 
414+                     } 
415+ 
416+                     Some ( mutable. freeze ( ) ) 
417+                 } 
418+             } , 
343419            _ => None , 
344420        } ; 
345421
@@ -353,7 +429,23 @@ impl<'a> MutableArrayData<'a> {
353429        let  null_bytes = bit_util:: ceil ( capacity,  8 ) ; 
354430        let  null_buffer = MutableBuffer :: from_len_zeroed ( null_bytes) ; 
355431
356-         let  extend_values = arrays. iter ( ) . map ( |array| build_extend ( array) ) . collect ( ) ; 
432+         let  extend_values = match  & data_type { 
433+             DataType :: Dictionary ( _,  _)  => { 
434+                 let  mut  next_offset = 0 ; 
435+                 let  extend_values:  Result < Vec < _ > >  = arrays
436+                     . iter ( ) 
437+                     . map ( |array| { 
438+                         let  offset = next_offset; 
439+                         next_offset += array. child_data ( ) [ 0 ] . len ( ) ; 
440+                         build_extend_dictionary ( array,  offset,  next_offset) 
441+                             . ok_or ( ArrowError :: DictionaryKeyOverflowError ) 
442+                     } ) 
443+                     . collect ( ) ; 
444+ 
445+                 extend_values. expect ( "MutableArrayData::new is infallible" ) 
446+             } 
447+             _ => arrays. iter ( ) . map ( |array| build_extend ( array) ) . collect ( ) , 
448+         } ; 
357449
358450        let  data = _MutableArrayData  { 
359451            data_type :  data_type. clone ( ) , 
0 commit comments