@@ -752,7 +752,7 @@ where
752752
753753 fn size ( & self ) -> usize {
754754 self . vals . capacity ( ) * size_of :: < T :: Native > ( )
755- + self . null_builder . capacity ( ) / 8 // capacity is in bits, so convert to bytes
755+ + self . null_builder . capacity ( ) / 8 // capacity is in bits, so convert to bytes
756756 + self . is_sets . capacity ( ) / 8
757757 + self . size_of_orderings
758758 + self . min_of_each_group_buf . 0 . capacity ( ) * size_of :: < usize > ( )
@@ -827,9 +827,14 @@ impl FirstValueAccumulator {
827827 }
828828
829829 // Updates state with the values in the given row.
830- fn update_with_new_row ( & mut self , row : & [ ScalarValue ] ) {
831- self . first = row[ 0 ] . clone ( ) ;
832- self . orderings = row[ 1 ..] . to_vec ( ) ;
830+ fn update_with_new_row ( & mut self , mut row : Vec < ScalarValue > ) {
831+ // Ensure any Array based scalars hold have a single value to reduce memory pressure
832+ row. iter_mut ( ) . for_each ( |s| {
833+ s. compact ( ) ;
834+ } ) ;
835+
836+ self . first = row. remove ( 0 ) ;
837+ self . orderings = row;
833838 self . is_set = true ;
834839 }
835840
@@ -888,7 +893,7 @@ impl Accumulator for FirstValueAccumulator {
888893 if !self . is_set {
889894 if let Some ( first_idx) = self . get_first_idx ( values) ? {
890895 let row = get_row_at_idx ( values, first_idx) ?;
891- self . update_with_new_row ( & row) ;
896+ self . update_with_new_row ( row) ;
892897 }
893898 } else if !self . requirement_satisfied {
894899 if let Some ( first_idx) = self . get_first_idx ( values) ? {
@@ -901,7 +906,7 @@ impl Accumulator for FirstValueAccumulator {
901906 ) ?
902907 . is_gt ( )
903908 {
904- self . update_with_new_row ( & row) ;
909+ self . update_with_new_row ( row) ;
905910 }
906911 }
907912 }
@@ -925,7 +930,7 @@ impl Accumulator for FirstValueAccumulator {
925930 let min = ( 0 ..filtered_states[ 0 ] . len ( ) ) . min_by ( |& a, & b| comparator. compare ( a, b) ) ;
926931
927932 if let Some ( first_idx) = min {
928- let first_row = get_row_at_idx ( & filtered_states, first_idx) ?;
933+ let mut first_row = get_row_at_idx ( & filtered_states, first_idx) ?;
929934 // When collecting orderings, we exclude the is_set flag from the state.
930935 let first_ordering = & first_row[ 1 ..is_set_idx] ;
931936 let sort_options = get_sort_options ( self . ordering_req . as_ref ( ) ) ;
@@ -936,7 +941,9 @@ impl Accumulator for FirstValueAccumulator {
936941 // Update with first value in the state. Note that we should exclude the
937942 // is_set flag from the state. Otherwise, we will end up with a state
938943 // containing two is_set flags.
939- self . update_with_new_row ( & first_row[ 0 ..is_set_idx] ) ;
944+ assert ! ( is_set_idx <= first_row. len( ) ) ;
945+ first_row. resize ( is_set_idx, ScalarValue :: Null ) ;
946+ self . update_with_new_row ( first_row) ;
940947 }
941948 }
942949 Ok ( ( ) )
@@ -1226,9 +1233,14 @@ impl LastValueAccumulator {
12261233 }
12271234
12281235 // Updates state with the values in the given row.
1229- fn update_with_new_row ( & mut self , row : & [ ScalarValue ] ) {
1230- self . last = row[ 0 ] . clone ( ) ;
1231- self . orderings = row[ 1 ..] . to_vec ( ) ;
1236+ fn update_with_new_row ( & mut self , mut row : Vec < ScalarValue > ) {
1237+ // Ensure any Array based scalars hold have a single value to reduce memory pressure
1238+ row. iter_mut ( ) . for_each ( |s| {
1239+ s. compact ( ) ;
1240+ } ) ;
1241+
1242+ self . last = row. remove ( 0 ) ;
1243+ self . orderings = row;
12321244 self . is_set = true ;
12331245 }
12341246
@@ -1289,7 +1301,7 @@ impl Accumulator for LastValueAccumulator {
12891301 if !self . is_set || self . requirement_satisfied {
12901302 if let Some ( last_idx) = self . get_last_idx ( values) ? {
12911303 let row = get_row_at_idx ( values, last_idx) ?;
1292- self . update_with_new_row ( & row) ;
1304+ self . update_with_new_row ( row) ;
12931305 }
12941306 } else if let Some ( last_idx) = self . get_last_idx ( values) ? {
12951307 let row = get_row_at_idx ( values, last_idx) ?;
@@ -1302,7 +1314,7 @@ impl Accumulator for LastValueAccumulator {
13021314 ) ?
13031315 . is_lt ( )
13041316 {
1305- self . update_with_new_row ( & row) ;
1317+ self . update_with_new_row ( row) ;
13061318 }
13071319 }
13081320
@@ -1326,7 +1338,7 @@ impl Accumulator for LastValueAccumulator {
13261338 let max = ( 0 ..filtered_states[ 0 ] . len ( ) ) . max_by ( |& a, & b| comparator. compare ( a, b) ) ;
13271339
13281340 if let Some ( last_idx) = max {
1329- let last_row = get_row_at_idx ( & filtered_states, last_idx) ?;
1341+ let mut last_row = get_row_at_idx ( & filtered_states, last_idx) ?;
13301342 // When collecting orderings, we exclude the is_set flag from the state.
13311343 let last_ordering = & last_row[ 1 ..is_set_idx] ;
13321344 let sort_options = get_sort_options ( self . ordering_req . as_ref ( ) ) ;
@@ -1339,7 +1351,9 @@ impl Accumulator for LastValueAccumulator {
13391351 // Update with last value in the state. Note that we should exclude the
13401352 // is_set flag from the state. Otherwise, we will end up with a state
13411353 // containing two is_set flags.
1342- self . update_with_new_row ( & last_row[ 0 ..is_set_idx] ) ;
1354+ assert ! ( is_set_idx <= last_row. len( ) ) ;
1355+ last_row. resize ( is_set_idx, ScalarValue :: Null ) ;
1356+ self . update_with_new_row ( last_row) ;
13431357 }
13441358 }
13451359 Ok ( ( ) )
@@ -1382,7 +1396,13 @@ fn convert_to_sort_cols(arrs: &[ArrayRef], sort_exprs: &LexOrdering) -> Vec<Sort
13821396
13831397#[ cfg( test) ]
13841398mod tests {
1385- use arrow:: { array:: Int64Array , compute:: SortOptions , datatypes:: Schema } ;
1399+ use std:: iter:: repeat_with;
1400+
1401+ use arrow:: {
1402+ array:: { Int64Array , ListArray } ,
1403+ compute:: SortOptions ,
1404+ datatypes:: Schema ,
1405+ } ;
13861406 use datafusion_physical_expr:: { expressions:: col, PhysicalSortExpr } ;
13871407
13881408 use super :: * ;
@@ -1772,4 +1792,60 @@ mod tests {
17721792
17731793 Ok ( ( ) )
17741794 }
1795+
1796+ #[ test]
1797+ fn test_first_list_acc_size ( ) -> Result < ( ) > {
1798+ fn size_after_batch ( values : & [ ArrayRef ] ) -> Result < usize > {
1799+ let mut first_accumulator = FirstValueAccumulator :: try_new (
1800+ & DataType :: List ( Arc :: new ( Field :: new_list_field ( DataType :: Int64 , false ) ) ) ,
1801+ & [ ] ,
1802+ LexOrdering :: default ( ) ,
1803+ false ,
1804+ ) ?;
1805+
1806+ first_accumulator. update_batch ( values) ?;
1807+
1808+ Ok ( first_accumulator. size ( ) )
1809+ }
1810+
1811+ let batch1 = ListArray :: from_iter_primitive :: < Int32Type , _ , _ > (
1812+ repeat_with ( || Some ( vec ! [ Some ( 1 ) ] ) ) . take ( 10000 ) ,
1813+ ) ;
1814+ let batch2 =
1815+ ListArray :: from_iter_primitive :: < Int32Type , _ , _ > ( [ Some ( vec ! [ Some ( 1 ) ] ) ] ) ;
1816+
1817+ let size1 = size_after_batch ( & [ Arc :: new ( batch1) ] ) ?;
1818+ let size2 = size_after_batch ( & [ Arc :: new ( batch2) ] ) ?;
1819+ assert_eq ! ( size1, size2) ;
1820+
1821+ Ok ( ( ) )
1822+ }
1823+
1824+ #[ test]
1825+ fn test_last_list_acc_size ( ) -> Result < ( ) > {
1826+ fn size_after_batch ( values : & [ ArrayRef ] ) -> Result < usize > {
1827+ let mut last_accumulator = LastValueAccumulator :: try_new (
1828+ & DataType :: List ( Arc :: new ( Field :: new_list_field ( DataType :: Int64 , false ) ) ) ,
1829+ & [ ] ,
1830+ LexOrdering :: default ( ) ,
1831+ false ,
1832+ ) ?;
1833+
1834+ last_accumulator. update_batch ( values) ?;
1835+
1836+ Ok ( last_accumulator. size ( ) )
1837+ }
1838+
1839+ let batch1 = ListArray :: from_iter_primitive :: < Int32Type , _ , _ > (
1840+ repeat_with ( || Some ( vec ! [ Some ( 1 ) ] ) ) . take ( 10000 ) ,
1841+ ) ;
1842+ let batch2 =
1843+ ListArray :: from_iter_primitive :: < Int32Type , _ , _ > ( [ Some ( vec ! [ Some ( 1 ) ] ) ] ) ;
1844+
1845+ let size1 = size_after_batch ( & [ Arc :: new ( batch1) ] ) ?;
1846+ let size2 = size_after_batch ( & [ Arc :: new ( batch2) ] ) ?;
1847+ assert_eq ! ( size1, size2) ;
1848+
1849+ Ok ( ( ) )
1850+ }
17751851}
0 commit comments