@@ -2914,7 +2914,7 @@ def test_groupby_with_timegrouper(self):
29142914 # TimeGrouper requires a sorted index
29152915 # also verifies that the resultant index has the correct name
29162916 import datetime as DT
2917- df = DataFrame ({
2917+ df_original = DataFrame ({
29182918 'Buyer' : 'Carl Carl Carl Carl Joe Carl' .split (),
29192919 'Quantity' : [18 ,3 ,5 ,1 ,9 ,3 ],
29202920 'Date' : [
@@ -2925,29 +2925,34 @@ def test_groupby_with_timegrouper(self):
29252925 DT .datetime (2013 ,12 ,2 ,12 ,0 ),
29262926 DT .datetime (2013 ,9 ,2 ,14 ,0 ),
29272927 ]})
2928- df = df .set_index (['Date' ])
2928+
2929+ # GH 6908 change target column's order
2930+ df_reordered = df_original .sort (columns = 'Quantity' )
29292931
2930- expected = DataFrame ({ 'Quantity' : np .nan },
2931- index = date_range ('20130901 13:00:00' ,'20131205 13:00:00' ,
2932- freq = '5D' ,name = 'Date' ,closed = 'left' ))
2933- expected .iloc [[0 ,6 ,18 ],0 ] = np .array ([24. ,6. ,9. ],dtype = 'float64' )
2932+ for df in [df_original , df_reordered ]:
2933+ df = df .set_index (['Date' ])
29342934
2935- result1 = df .resample ('5D' ,how = sum )
2936- assert_frame_equal (result1 , expected )
2935+ expected = DataFrame ({ 'Quantity' : np .nan },
2936+ index = date_range ('20130901 13:00:00' ,'20131205 13:00:00' ,
2937+ freq = '5D' ,name = 'Date' ,closed = 'left' ))
2938+ expected .iloc [[0 ,6 ,18 ],0 ] = np .array ([24. ,6. ,9. ],dtype = 'float64' )
29372939
2938- df_sorted = df .sort_index ()
2939- result2 = df_sorted .groupby (pd .TimeGrouper (freq = '5D' )).sum ()
2940- assert_frame_equal (result2 , expected )
2940+ result1 = df .resample ('5D' ,how = sum )
2941+ assert_frame_equal (result1 , expected )
29412942
2942- result3 = df .groupby (pd .TimeGrouper (freq = '5D' )).sum ()
2943- assert_frame_equal (result3 , expected )
2943+ df_sorted = df .sort_index ()
2944+ result2 = df_sorted .groupby (pd .TimeGrouper (freq = '5D' )).sum ()
2945+ assert_frame_equal (result2 , expected )
2946+
2947+ result3 = df .groupby (pd .TimeGrouper (freq = '5D' )).sum ()
2948+ assert_frame_equal (result3 , expected )
29442949
29452950 def test_groupby_with_timegrouper_methods (self ):
29462951 # GH 3881
29472952 # make sure API of timegrouper conforms
29482953
29492954 import datetime as DT
2950- df = pd .DataFrame ({
2955+ df_original = pd .DataFrame ({
29512956 'Branch' : 'A A A A A B' .split (),
29522957 'Buyer' : 'Carl Mark Carl Joe Joe Carl' .split (),
29532958 'Quantity' : [1 ,3 ,5 ,8 ,9 ,3 ],
@@ -2960,13 +2965,16 @@ def test_groupby_with_timegrouper_methods(self):
29602965 DT .datetime (2013 ,12 ,2 ,14 ,0 ),
29612966 ]})
29622967
2963- df = df .set_index ('Date' , drop = False )
2964- g = df .groupby (pd .TimeGrouper ('6M' ))
2965- self .assertTrue (g .group_keys )
2966- self .assertTrue (isinstance (g .grouper ,pd .core .groupby .BinGrouper ))
2967- groups = g .groups
2968- self .assertTrue (isinstance (groups ,dict ))
2969- self .assertTrue (len (groups ) == 3 )
2968+ df_sorted = df_original .sort (columns = 'Quantity' , ascending = False )
2969+
2970+ for df in [df_original , df_sorted ]:
2971+ df = df .set_index ('Date' , drop = False )
2972+ g = df .groupby (pd .TimeGrouper ('6M' ))
2973+ self .assertTrue (g .group_keys )
2974+ self .assertTrue (isinstance (g .grouper ,pd .core .groupby .BinGrouper ))
2975+ groups = g .groups
2976+ self .assertTrue (isinstance (groups ,dict ))
2977+ self .assertTrue (len (groups ) == 3 )
29702978
29712979 def test_timegrouper_with_reg_groups (self ):
29722980
@@ -2975,7 +2983,7 @@ def test_timegrouper_with_reg_groups(self):
29752983
29762984 import datetime as DT
29772985
2978- df = DataFrame ({
2986+ df_original = DataFrame ({
29792987 'Branch' : 'A A A A A A A B' .split (),
29802988 'Buyer' : 'Carl Mark Carl Carl Joe Joe Joe Carl' .split (),
29812989 'Quantity' : [1 ,3 ,5 ,1 ,8 ,1 ,9 ,3 ],
@@ -2990,32 +2998,34 @@ def test_timegrouper_with_reg_groups(self):
29902998 DT .datetime (2013 ,12 ,2 ,14 ,0 ),
29912999 ]}).set_index ('Date' )
29923000
2993- expected = DataFrame ({
2994- 'Buyer' : 'Carl Joe Mark' .split (),
2995- 'Quantity' : [10 ,18 ,3 ],
2996- 'Date' : [
2997- DT .datetime (2013 ,12 ,31 ,0 ,0 ),
2998- DT .datetime (2013 ,12 ,31 ,0 ,0 ),
2999- DT .datetime (2013 ,12 ,31 ,0 ,0 ),
3000- ]}).set_index (['Date' ,'Buyer' ])
3001-
3002- result = df .groupby ([pd .Grouper (freq = 'A' ),'Buyer' ]).sum ()
3003- assert_frame_equal (result ,expected )
3004-
3005- expected = DataFrame ({
3006- 'Buyer' : 'Carl Mark Carl Joe' .split (),
3007- 'Quantity' : [1 ,3 ,9 ,18 ],
3008- 'Date' : [
3009- DT .datetime (2013 ,1 ,1 ,0 ,0 ),
3010- DT .datetime (2013 ,1 ,1 ,0 ,0 ),
3011- DT .datetime (2013 ,7 ,1 ,0 ,0 ),
3012- DT .datetime (2013 ,7 ,1 ,0 ,0 ),
3013- ]}).set_index (['Date' ,'Buyer' ])
3014-
3015- result = df .groupby ([pd .Grouper (freq = '6MS' ),'Buyer' ]).sum ()
3016- assert_frame_equal (result ,expected )
3017-
3018- df = DataFrame ({
3001+ df_sorted = df_original .sort (columns = 'Quantity' , ascending = False )
3002+
3003+ for df in [df_original , df_sorted ]:
3004+ expected = DataFrame ({
3005+ 'Buyer' : 'Carl Joe Mark' .split (),
3006+ 'Quantity' : [10 ,18 ,3 ],
3007+ 'Date' : [
3008+ DT .datetime (2013 ,12 ,31 ,0 ,0 ),
3009+ DT .datetime (2013 ,12 ,31 ,0 ,0 ),
3010+ DT .datetime (2013 ,12 ,31 ,0 ,0 ),
3011+ ]}).set_index (['Date' ,'Buyer' ])
3012+
3013+ result = df .groupby ([pd .Grouper (freq = 'A' ),'Buyer' ]).sum ()
3014+ assert_frame_equal (result ,expected )
3015+
3016+ expected = DataFrame ({
3017+ 'Buyer' : 'Carl Mark Carl Joe' .split (),
3018+ 'Quantity' : [1 ,3 ,9 ,18 ],
3019+ 'Date' : [
3020+ DT .datetime (2013 ,1 ,1 ,0 ,0 ),
3021+ DT .datetime (2013 ,1 ,1 ,0 ,0 ),
3022+ DT .datetime (2013 ,7 ,1 ,0 ,0 ),
3023+ DT .datetime (2013 ,7 ,1 ,0 ,0 ),
3024+ ]}).set_index (['Date' ,'Buyer' ])
3025+ result = df .groupby ([pd .Grouper (freq = '6MS' ),'Buyer' ]).sum ()
3026+ assert_frame_equal (result ,expected )
3027+
3028+ df_original = DataFrame ({
30193029 'Branch' : 'A A A A A A A B' .split (),
30203030 'Buyer' : 'Carl Mark Carl Carl Joe Joe Joe Carl' .split (),
30213031 'Quantity' : [1 ,3 ,5 ,1 ,8 ,1 ,9 ,3 ],
@@ -3030,81 +3040,105 @@ def test_timegrouper_with_reg_groups(self):
30303040 DT .datetime (2013 ,10 ,2 ,14 ,0 ),
30313041 ]}).set_index ('Date' )
30323042
3033- expected = DataFrame ({
3034- 'Buyer' : 'Carl Joe Mark Carl Joe' .split (),
3035- 'Quantity' : [6 ,8 ,3 ,4 ,10 ],
3036- 'Date' : [
3037- DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3038- DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3039- DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3040- DT .datetime (2013 ,10 ,2 ,0 ,0 ),
3041- DT .datetime (2013 ,10 ,2 ,0 ,0 ),
3042- ]}).set_index (['Date' ,'Buyer' ])
3043-
3044- result = df .groupby ([pd .Grouper (freq = '1D' ),'Buyer' ]).sum ()
3045- assert_frame_equal (result ,expected )
3046-
3047- result = df .groupby ([pd .Grouper (freq = '1M' ),'Buyer' ]).sum ()
3048- expected = DataFrame ({
3049- 'Buyer' : 'Carl Joe Mark' .split (),
3050- 'Quantity' : [10 ,18 ,3 ],
3051- 'Date' : [
3052- DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3053- DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3054- DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3055- ]}).set_index (['Date' ,'Buyer' ])
3056- assert_frame_equal (result ,expected )
3057-
3058- # passing the name
3059- df = df .reset_index ()
3060- result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ),'Buyer' ]).sum ()
3061- assert_frame_equal (result ,expected )
3062-
3063- self .assertRaises (KeyError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,key = 'foo' ),'Buyer' ]).sum ())
3064-
3065- # passing the level
3066- df = df .set_index ('Date' )
3067- result = df .groupby ([pd .Grouper (freq = '1M' ,level = 'Date' ),'Buyer' ]).sum ()
3068- assert_frame_equal (result ,expected )
3069- result = df .groupby ([pd .Grouper (freq = '1M' ,level = 0 ),'Buyer' ]).sum ()
3070- assert_frame_equal (result ,expected )
3071-
3072- self .assertRaises (ValueError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,level = 'foo' ),'Buyer' ]).sum ())
3073-
3074- # multi names
3075- df = df .copy ()
3076- df ['Date' ] = df .index + pd .offsets .MonthEnd (2 )
3077- result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ),'Buyer' ]).sum ()
3078- expected = DataFrame ({
3079- 'Buyer' : 'Carl Joe Mark' .split (),
3080- 'Quantity' : [10 ,18 ,3 ],
3081- 'Date' : [
3082- DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3083- DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3084- DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3085- ]}).set_index (['Date' ,'Buyer' ])
3086- assert_frame_equal (result ,expected )
3087-
3088- # error as we have both a level and a name!
3089- self .assertRaises (ValueError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ,level = 'Date' ),'Buyer' ]).sum ())
3090-
3043+ df_sorted = df_original .sort (columns = 'Quantity' , ascending = False )
3044+ for df in [df_original , df_sorted ]:
3045+
3046+ expected = DataFrame ({
3047+ 'Buyer' : 'Carl Joe Mark Carl Joe' .split (),
3048+ 'Quantity' : [6 ,8 ,3 ,4 ,10 ],
3049+ 'Date' : [
3050+ DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3051+ DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3052+ DT .datetime (2013 ,10 ,1 ,0 ,0 ),
3053+ DT .datetime (2013 ,10 ,2 ,0 ,0 ),
3054+ DT .datetime (2013 ,10 ,2 ,0 ,0 ),
3055+ ]}).set_index (['Date' ,'Buyer' ])
3056+
3057+ result = df .groupby ([pd .Grouper (freq = '1D' ),'Buyer' ]).sum ()
3058+ assert_frame_equal (result ,expected )
3059+
3060+ result = df .groupby ([pd .Grouper (freq = '1M' ),'Buyer' ]).sum ()
3061+ expected = DataFrame ({
3062+ 'Buyer' : 'Carl Joe Mark' .split (),
3063+ 'Quantity' : [10 ,18 ,3 ],
3064+ 'Date' : [
3065+ DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3066+ DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3067+ DT .datetime (2013 ,10 ,31 ,0 ,0 ),
3068+ ]}).set_index (['Date' ,'Buyer' ])
3069+ assert_frame_equal (result ,expected )
3070+
3071+ # passing the name
3072+ df = df .reset_index ()
3073+ result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ),'Buyer' ]).sum ()
3074+ assert_frame_equal (result ,expected )
3075+
3076+ self .assertRaises (KeyError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,key = 'foo' ),'Buyer' ]).sum ())
3077+
3078+ # passing the level
3079+ df = df .set_index ('Date' )
3080+ result = df .groupby ([pd .Grouper (freq = '1M' ,level = 'Date' ),'Buyer' ]).sum ()
3081+ assert_frame_equal (result ,expected )
3082+ result = df .groupby ([pd .Grouper (freq = '1M' ,level = 0 ),'Buyer' ]).sum ()
3083+ assert_frame_equal (result ,expected )
3084+
3085+ self .assertRaises (ValueError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,level = 'foo' ),'Buyer' ]).sum ())
3086+
3087+ # multi names
3088+ df = df .copy ()
3089+ df ['Date' ] = df .index + pd .offsets .MonthEnd (2 )
3090+ result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ),'Buyer' ]).sum ()
3091+ expected = DataFrame ({
3092+ 'Buyer' : 'Carl Joe Mark' .split (),
3093+ 'Quantity' : [10 ,18 ,3 ],
3094+ 'Date' : [
3095+ DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3096+ DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3097+ DT .datetime (2013 ,11 ,30 ,0 ,0 ),
3098+ ]}).set_index (['Date' ,'Buyer' ])
3099+ assert_frame_equal (result ,expected )
3100+
3101+ # error as we have both a level and a name!
3102+ self .assertRaises (ValueError , lambda : df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' ,level = 'Date' ),'Buyer' ]).sum ())
3103+
3104+
3105+ # single groupers
3106+ expected = DataFrame ({ 'Quantity' : [31 ],
3107+ 'Date' : [DT .datetime (2013 ,10 ,31 ,0 ,0 )] }).set_index ('Date' )
3108+ result = df .groupby (pd .Grouper (freq = '1M' )).sum ()
3109+ assert_frame_equal (result , expected )
30913110
3092- # single groupers
3093- expected = DataFrame ({ 'Quantity' : [31 ],
3094- 'Date' : [DT .datetime (2013 ,10 ,31 ,0 ,0 )] }).set_index ('Date' )
3095- result = df .groupby (pd .Grouper (freq = '1M' )).sum ()
3096- assert_frame_equal (result , expected )
3111+ result = df .groupby ([pd .Grouper (freq = '1M' )]).sum ()
3112+ assert_frame_equal (result , expected )
30973113
3098- result = df .groupby ([pd .Grouper (freq = '1M' )]).sum ()
3099- assert_frame_equal (result , expected )
3114+ expected = DataFrame ({ 'Quantity' : [31 ],
3115+ 'Date' : [DT .datetime (2013 ,11 ,30 ,0 ,0 )] }).set_index ('Date' )
3116+ result = df .groupby (pd .Grouper (freq = '1M' ,key = 'Date' )).sum ()
3117+ assert_frame_equal (result , expected )
31003118
3101- expected = DataFrame ({ 'Quantity' : [31 ],
3102- 'Date' : [DT .datetime (2013 ,11 ,30 ,0 ,0 )] }).set_index ('Date' )
3103- result = df .groupby (pd .Grouper (freq = '1M' ,key = 'Date' )).sum ()
3104- assert_frame_equal (result , expected )
3119+ result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' )]).sum ()
3120+ assert_frame_equal (result , expected )
31053121
3106- result = df .groupby ([pd .Grouper (freq = '1M' ,key = 'Date' )]).sum ()
3107- assert_frame_equal (result , expected )
3122+ # GH 6764 multiple grouping with/without sort
3123+ df = DataFrame ({
3124+ 'date' : pd .to_datetime ([
3125+ '20121002' ,'20121007' ,'20130130' ,'20130202' ,'20130305' ,'20121002' ,
3126+ '20121207' ,'20130130' ,'20130202' ,'20130305' ,'20130202' ,'20130305' ]),
3127+ 'user_id' : [1 ,1 ,1 ,1 ,1 ,3 ,3 ,3 ,5 ,5 ,5 ,5 ],
3128+ 'whole_cost' : [1790 ,364 ,280 ,259 ,201 ,623 ,90 ,312 ,359 ,301 ,359 ,801 ],
3129+ 'cost1' : [12 ,15 ,10 ,24 ,39 ,1 ,0 ,90 ,45 ,34 ,1 ,12 ] }).set_index ('date' )
3130+
3131+ for freq in ['D' , 'M' , 'A' , 'Q-APR' ]:
3132+ expected = df .groupby ('user_id' )['whole_cost' ].resample (
3133+ freq , how = 'sum' ).dropna ().reorder_levels (
3134+ ['date' ,'user_id' ]).sortlevel ().astype ('int64' )
3135+ expected .name = 'whole_cost'
3136+
3137+ result1 = df .sort_index ().groupby ([pd .TimeGrouper (freq = freq ), 'user_id' ])['whole_cost' ].sum ()
3138+ assert_series_equal (result1 , expected )
3139+
3140+ result2 = df .groupby ([pd .TimeGrouper (freq = freq ), 'user_id' ])['whole_cost' ].sum ()
3141+ assert_series_equal (result2 , expected )
31083142
31093143 def test_cumcount (self ):
31103144 df = DataFrame ([['a' ], ['a' ], ['a' ], ['b' ], ['a' ]], columns = ['A' ])
0 commit comments