@@ -379,15 +379,24 @@ def time_groupby_dt_timegrouper_size(self):
379379#----------------------------------------------------------------------
380380# groupby with a variable value for ngroups
381381
382- class groupby_ngroups_10000 (object ):
382+ class groupby_ngroups_int_10000 (object ):
383383 goal_time = 0.2
384+ dtype = 'int'
385+ ngroups = 10000
384386
385387 def setup (self ):
386388 np .random .seed (1234 )
387- self .ngroups = 10000
388- self .size = (self .ngroups * 2 )
389- self .rng = np .arange (self .ngroups )
390- self .df = DataFrame (dict (timestamp = self .rng .take (np .random .randint (0 , self .ngroups , size = self .size )), value = np .random .randint (0 , self .size , size = self .size )))
389+ size = self .ngroups * 2
390+ rng = np .arange (self .ngroups )
391+ ts = rng .take (np .random .randint (0 , self .ngroups , size = size ))
392+ if self .dtype == 'int' :
393+ value = np .random .randint (0 , size , size = size )
394+ else :
395+ value = np .concatenate ([np .random .random (self .ngroups ) * 0.1 ,
396+ np .random .random (self .ngroups ) * 10.0 ])
397+
398+ self .df = DataFrame ({'timestamp' : ts ,
399+ 'value' : value })
391400
392401 def time_all (self ):
393402 self .df .groupby ('value' )['timestamp' ].all ()
@@ -482,109 +491,35 @@ def time_value_counts(self):
482491 def time_var (self ):
483492 self .df .groupby ('value' )['timestamp' ].var ()
484493
485-
486- class groupby_ngroups_100 (object ):
494+ class groupby_ngroups_int_100 (groupby_ngroups_int_10000 ):
487495 goal_time = 0.2
496+ dtype = 'int'
497+ ngroups = 100
488498
489- def setup (self ):
490- np .random .seed (1234 )
491- self .ngroups = 100
492- self .size = (self .ngroups * 2 )
493- self .rng = np .arange (self .ngroups )
494- self .df = DataFrame (dict (timestamp = self .rng .take (np .random .randint (0 , self .ngroups , size = self .size )), value = np .random .randint (0 , self .size , size = self .size )))
495-
496- def time_all (self ):
497- self .df .groupby ('value' )['timestamp' ].all ()
498-
499- def time_any (self ):
500- self .df .groupby ('value' )['timestamp' ].any ()
501-
502- def time_count (self ):
503- self .df .groupby ('value' )['timestamp' ].count ()
504-
505- def time_cumcount (self ):
506- self .df .groupby ('value' )['timestamp' ].cumcount ()
507-
508- def time_cummax (self ):
509- self .df .groupby ('value' )['timestamp' ].cummax ()
510-
511- def time_cummin (self ):
512- self .df .groupby ('value' )['timestamp' ].cummin ()
513-
514- def time_cumprod (self ):
515- self .df .groupby ('value' )['timestamp' ].cumprod ()
516-
517- def time_cumsum (self ):
518- self .df .groupby ('value' )['timestamp' ].cumsum ()
519-
520- def time_describe (self ):
521- self .df .groupby ('value' )['timestamp' ].describe ()
522-
523- def time_diff (self ):
524- self .df .groupby ('value' )['timestamp' ].diff ()
525-
526- def time_first (self ):
527- self .df .groupby ('value' )['timestamp' ].first ()
528-
529- def time_head (self ):
530- self .df .groupby ('value' )['timestamp' ].head ()
531-
532- def time_last (self ):
533- self .df .groupby ('value' )['timestamp' ].last ()
534-
535- def time_mad (self ):
536- self .df .groupby ('value' )['timestamp' ].mad ()
537-
538- def time_max (self ):
539- self .df .groupby ('value' )['timestamp' ].max ()
540-
541- def time_mean (self ):
542- self .df .groupby ('value' )['timestamp' ].mean ()
543-
544- def time_median (self ):
545- self .df .groupby ('value' )['timestamp' ].median ()
546-
547- def time_min (self ):
548- self .df .groupby ('value' )['timestamp' ].min ()
549-
550- def time_nunique (self ):
551- self .df .groupby ('value' )['timestamp' ].nunique ()
552-
553- def time_pct_change (self ):
554- self .df .groupby ('value' )['timestamp' ].pct_change ()
555-
556- def time_prod (self ):
557- self .df .groupby ('value' )['timestamp' ].prod ()
558-
559- def time_rank (self ):
560- self .df .groupby ('value' )['timestamp' ].rank ()
561-
562- def time_sem (self ):
563- self .df .groupby ('value' )['timestamp' ].sem ()
564-
565- def time_size (self ):
566- self .df .groupby ('value' )['timestamp' ].size ()
567-
568- def time_skew (self ):
569- self .df .groupby ('value' )['timestamp' ].skew ()
570-
571- def time_std (self ):
572- self .df .groupby ('value' )['timestamp' ].std ()
499+ class groupby_ngroups_float_100 (groupby_ngroups_int_10000 ):
500+ goal_time = 0.2
501+ dtype = 'float'
502+ ngroups = 100
573503
574- def time_sum (self ):
575- self .df .groupby ('value' )['timestamp' ].sum ()
504+ class groupby_ngroups_float_10000 (groupby_ngroups_int_10000 ):
505+ goal_time = 0.2
506+ dtype = 'float'
507+ ngroups = 10000
576508
577- def time_tail (self ):
578- self .df .groupby ('value' )['timestamp' ].tail ()
579509
580- def time_unique (self ):
581- self .df .groupby ('value' )['timestamp' ].unique ()
510+ class groupby_float32 (object ):
511+ # GH 13335
512+ goal_time = 0.2
582513
583- def time_value_counts (self ):
584- self .df .groupby ('value' )['timestamp' ].value_counts ()
514+ def setup (self ):
515+ tmp1 = (np .random .random (10000 ) * 0.1 ).astype (np .float32 )
516+ tmp2 = (np .random .random (10000 ) * 10.0 ).astype (np .float32 )
517+ tmp = np .concatenate ((tmp1 , tmp2 ))
518+ arr = np .repeat (tmp , 10 )
519+ self .df = DataFrame (dict (a = arr , b = arr ))
585520
586- def time_var (self ):
587- self .df .groupby ('value' )['timestamp ' ].var ()
521+ def time_groupby_sum (self ):
522+ self .df .groupby ([ 'a' ] )['b ' ].sum ()
588523
589524
590525#----------------------------------------------------------------------
0 commit comments