@@ -42,9 +42,11 @@ class DataFrame(Picklable, Groupable):
4242 ----------
4343 data : dict
4444 Mapping of column name --> array or Series/TimeSeries objects
45- index : array-like
46- Specific index to use for the Frame, Series will be conformed to this
47- if you provide it.
45+ index : array-like, optional
46+ Specific index to use for the Frame, Series will be conformed
47+ to this if you provide it. If not input, index will be
48+ inferred from input Series
49+ columns : array-like, optional
4850
4951 Notes
5052 -----
@@ -56,12 +58,12 @@ class DataFrame(Picklable, Groupable):
5658 --------
5759 DataMatrix: more efficient version of DataFrame for most operations
5860
59- Example usage
60- -------------
61+ Example
62+ -------
6163 >>> d = {'col1' : ts1, 'col2' : ts2}
6264 >>> df = DataFrame(data=d, index=someIndex)
6365 """
64- def __init__ (self , data = None , index = None ):
66+ def __init__ (self , data = None , index = None , columns = None ):
6567 self ._series = {}
6668 if data is not None and len (data ) > 0 :
6769 if index is None :
@@ -75,7 +77,7 @@ def __init__(self, data=None, index=None):
7577
7678 for k , v in data .iteritems ():
7779 if isinstance (v , Series ):
78- # Forces homogoneity and copies data
80+ # Forces homogeneity and copies data
7981 self ._series [k ] = v .reindex (self .index )
8082 else :
8183 # Copies data and checks length
@@ -169,8 +171,8 @@ def fromDict(cls, inputDict=None, castFloat=True, **kwds):
169171
170172 def toDict (self ):
171173 """
172- Simpler pseudo-inverse operation of dictToDataFrame , NaN values will be
173- included in the resulting dict-tree.
174+ Simpler pseudo-inverse operation of DataFrame.fromDict , NaN
175+ values will be included in the resulting dict-tree.
174176
175177 Return
176178 ------
@@ -316,9 +318,9 @@ def __setitem__(self, key, value):
316318
317319 def __delitem__ (self , key ):
318320 """
319- Delete column from DataFrame (only deletes the reference)
321+ Delete column from DataFrame
320322 """
321- self ._series . pop ( key , None )
323+ del self ._series [ key ]
322324
323325 def pop (self , item ):
324326 """
@@ -611,16 +613,16 @@ def append(self, otherFrame):
611613
612614 def asfreq (self , freq , fillMethod = None ):
613615 """
614- Convert all TimeSeries inside to specified frequency using DateOffset
615- objects. Optionally provide fill method to pad/backfill/interpolate
616- missing values.
616+ Convert all TimeSeries inside to specified frequency using
617+ DateOffset objects. Optionally provide fill method to pad or
618+ backfill missing values.
617619
618620 Parameters
619621 ----------
620622 offset : DateOffset object, or string in {'WEEKDAY', 'EOM'}
621623 DateOffset object or subclass (e.g. monthEnd)
622624
623- fillMethod : {'backfill', 'pad', 'interpolate', None}
625+ fillMethod : {'backfill', 'pad', None}
624626 Method to use for filling holes in new inde
625627 """
626628 if isinstance (freq , datetools .DateOffset ):
@@ -886,38 +888,53 @@ def pivot(self, index=None, columns=None, values=None):
886888
887889 return _slow_pivot (self [index ], self [columns ], self [values ])
888890
889- def reindex (self , newIndex , fillMethod = None ):
891+ def reindex (self , index = None , columns = None , fillMethod = None ):
890892 """
891893 Reindex data inside, optionally filling according to some rule.
892894
893895 Parameters
894896 ----------
895- newIndex : array-like
897+ index : array-like, optional
896898 preferably an Index object (to avoid duplicating data)
897- fillMethod : {'backfill', 'pad', 'interpolate', None}
898- Method to use for filling holes in reindexed DataFrame
899+ columns : array-like, optional
900+ fillMethod : {'backfill', 'pad', None}
901+ Method to use for filling data holes using the index
902+
903+ Returns
904+ -------
905+ y : same type as calling instance
899906 """
900- if self .index .equals (newIndex ):
907+ fillMethod = fillMethod .upper () if fillMethod else ''
908+
909+ if fillMethod not in ['BACKFILL' , 'PAD' , '' ]:
910+ raise Exception ("Don't recognize fillMethod: %s" % fillMethod )
911+
912+ frame = self
913+
914+ if index is not None :
915+ frame = frame ._reindex_index (index , fillMethod )
916+
917+ if columns is not None :
918+ frame = frame ._reindex_columns (columns )
919+
920+ return frame
921+
922+ def _reindex_index (self , index , method ):
923+ if self .index .equals (index ):
901924 return self .copy ()
902925
903- if len (newIndex ) == 0 :
926+ if len (index ) == 0 :
904927 return DataFrame (index = NULL_INDEX )
905928
906- if not isinstance (newIndex , Index ):
907- newIndex = Index (newIndex )
929+ if not isinstance (index , Index ):
930+ index = Index (index )
908931
909932 if len (self .index ) == 0 :
910- return DataFrame (index = newIndex )
933+ return DataFrame (index = index )
911934
912- oldMap = self .index .indexMap
913- newMap = newIndex .indexMap
914-
915- fillMethod = fillMethod .upper () if fillMethod else ''
916- if fillMethod not in ['BACKFILL' , 'PAD' , '' ]:
917- raise Exception ("Don't recognize fillMethod: %s" % fillMethod )
918-
919- fillVec , mask = tseries .getFillVec (self .index , newIndex , oldMap ,
920- newMap , fillMethod )
935+ fillVec , mask = tseries .getFillVec (self .index , index ,
936+ self .index .indexMap ,
937+ index .indexMap , method )
921938
922939 # Maybe this is a bit much? Wish I had unit tests...
923940 typeHierarchy = [
@@ -938,14 +955,26 @@ def reindex(self, newIndex, fillMethod=None):
938955 newSeries = {}
939956 for col , series in self .iteritems ():
940957 series = series .view (np .ndarray )
941- for type , dest in typeHierarchy :
942- if issubclass (series .dtype .type , type ):
958+ for klass , dest in typeHierarchy :
959+ if issubclass (series .dtype .type , klass ):
943960 new = series .take (fillVec ).astype (dest )
944961 new [- mask ] = missingValue [dest ]
945962 newSeries [col ] = new
946963 break
947964
948- return DataFrame (newSeries , index = newIndex )
965+ return DataFrame (newSeries , index = index )
966+
967+ def _reindex_columns (self , columns ):
968+ if len (columns ) == 0 :
969+ return DataFrame (index = self .index )
970+
971+ newFrame = self .filterItems (columns )
972+
973+ for col in columns :
974+ if col not in newFrame :
975+ newFrame [col ] = NaN
976+
977+ return newFrame
949978
950979 @property
951980 def T (self ):
@@ -1000,7 +1029,7 @@ def shift(self, periods, offset=None, timeRule=None):
10001029 for col , series in self .iteritems ()])
10011030 return DataFrame (data = newValues , index = newIndex )
10021031
1003- def apply (self , func ):
1032+ def apply (self , func , axis = 0 ):
10041033 """
10051034 Applies func to columns (Series) of this DataFrame and returns either
10061035 a DataFrame (if the function produces another series) or a Series
@@ -1011,6 +1040,7 @@ def apply(self, func):
10111040 ----------
10121041 func : function
10131042 Function to apply to each column
1043+ axis : {0, 1}
10141044
10151045 Example
10161046 -------
@@ -1019,30 +1049,28 @@ def apply(self, func):
10191049
10201050 Note
10211051 ----
1022- Do NOT use functions that might toy with the index.
1052+ Functions altering the index are not supported (yet)
10231053 """
10241054 if not len (self .cols ()):
10251055 return self
10261056
1027- results = {}
1028- for col , series in self .iteritems ():
1029- result = func (series )
1030- results [col ] = result
1057+ if axis == 0 :
1058+ target = self
1059+ elif axis == 1 :
1060+ target = self .T
1061+
1062+ results = dict ([(k , func (target [k ])) for k in target .columns ])
10311063
10321064 if hasattr (results .values ()[0 ], '__iter__' ):
10331065 return DataFrame (data = results , index = self .index )
10341066 else :
1035- keyArray = np .asarray (sorted (set (results .keys ())), dtype = object )
1036- newIndex = Index (keyArray )
1037-
1038- arr = np .array ([results [idx ] for idx in newIndex ])
1039- return Series (arr , index = newIndex )
1067+ return Series .fromDict (results )
10401068
10411069 def tapply (self , func ):
10421070 """
10431071 Apply func to the transposed DataFrame, results as per apply
10441072 """
1045- return self .T . apply (func )
1073+ return self .apply (func , axis = 1 )
10461074
10471075 def applymap (self , func ):
10481076 """
@@ -1323,8 +1351,8 @@ def plot(self, kind='line', **kwds):
13231351 Plot the DataFrame's series with the index on the x-axis using
13241352 matplotlib / pylab.
13251353
1326- Params
1327- ------
1354+ Parameters
1355+ ----------
13281356 kind : {'line', 'bar', 'hist'}
13291357 Default: line for TimeSeries, hist for Series
13301358
@@ -1414,10 +1442,7 @@ def sum(self, axis=0, asarray=False):
14141442 theCount = self .count (axis )
14151443 theSum [theCount == 0 ] = NaN
14161444 except Exception :
1417- if axis == 0 :
1418- theSum = self .apply (np .sum )
1419- else :
1420- theSum = self .tapply (np .sum )
1445+ theSum = self .apply (np .sum , axis = axis )
14211446
14221447 if asarray :
14231448 return theSum
@@ -1428,6 +1453,27 @@ def sum(self, axis=0, asarray=False):
14281453 else :
14291454 raise Exception ('Must have 0<= axis <= 1' )
14301455
1456+ def cumsum (self , axis = 0 ):
1457+ """
1458+ Return cumulative sum over requested axis as DataFrame
1459+
1460+ Parameters
1461+ ----------
1462+ axis : {0, 1}
1463+ 0 for row-wise, 1 for column-wise
1464+
1465+ Returns
1466+ -------
1467+ y : DataFrame
1468+ """
1469+ def get_cumsum (y ):
1470+ y = np .array (y )
1471+ if not issubclass (y .dtype .type , np .int_ ):
1472+ y [np .isnan (y )] = 0
1473+ return y .cumsum ()
1474+
1475+ return self .apply (get_cumsum , axis = axis )
1476+
14311477 def product (self , axis = 0 , asarray = False ):
14321478 """
14331479 Return array or Series of products over requested axis.
@@ -1664,22 +1710,6 @@ def skew(self, axis=0, asarray=False):
16641710 else :
16651711 raise Exception ('Must have 0<= axis <= 1' )
16661712
1667- def _withColumns (self , newCols ):
1668- """
1669- Utility method, force values matrix to have particular columns
1670- Can make this as cute as we like
1671- """
1672- if len (newCols ) == 0 :
1673- return DataFrame (index = self .index )
1674-
1675- newFrame = self .filterItems (newCols )
1676-
1677- for col in newCols :
1678- if col not in newFrame :
1679- newFrame [col ] = NaN
1680-
1681- return newFrame
1682-
16831713def _pfixed (s , space , nanRep = None ):
16841714 if isinstance (s , float ):
16851715 fstring = '%-' + str (space - 4 ) + 'g'
0 commit comments