@@ -394,9 +394,14 @@ def _combineFrame(self, other, func):
394394
395395 if self .index .equals (other .index ):
396396 newIndex = self .index
397+
398+ this = self
397399 else :
398400 newIndex = self .index + other .index
399401
402+ this = self .reindex (newIndex )
403+ other = other .reindex (newIndex )
404+
400405 if not self and not other :
401406 return DataFrame (index = newIndex )
402407
@@ -406,10 +411,9 @@ def _combineFrame(self, other, func):
406411 if not self :
407412 return other * NaN
408413
409- for col , series in self .iteritems ():
414+ for col , series in this .iteritems ():
410415 if col in other :
411- newSeries = func (series , other [col ])
412- newColumns [col ] = newSeries .reindex (newIndex )
416+ newColumns [col ] = func (series , other [col ])
413417 else :
414418 newColumns [col ] = series .fromValue (np .NaN , index = newIndex )
415419
@@ -432,23 +436,32 @@ def _combineSeries(self, other, func):
432436
433437 if self .index .equals (other .index ):
434438 newIndex = self .index
439+
440+ this = self
435441 else :
436442 newIndex = self .index + other .index
437443
438- other = other .reindex (newIndex )
439- for col , series in self .iteritems ():
440- newColumns [col ] = func (series .reindex (newIndex ), other )
444+ this = self .reindex (newIndex )
445+ other = other .reindex (newIndex )
446+
447+ for col , series in this .iteritems ():
448+ newColumns [col ] = func (series , other )
449+
450+ result = DataFrame (newColumns , index = newIndex )
441451
442452 else :
443- for col , series in self .iteritems ():
444- if col in other .index :
445- newColumns [col ] = func (series , other [col ])
446- else :
447- cls = series .__class__
448- newColumns [col ] = cls (np .repeat (NaN , len (self .index )),
449- index = self .index )
453+ union = other .index .union (self .cols ())
454+ intersection = other .index .intersection (self .cols ())
450455
451- return DataFrame (data = newColumns , index = newIndex )
456+ for col in intersection :
457+ newColumns [col ] = func (self [col ], other [col ])
458+
459+ result = DataFrame (newColumns , index = self .index )
460+
461+ for col in (x for x in union if x not in intersection ):
462+ result [col ] = NaN
463+
464+ return result
452465
453466 def _combineFunc (self , other , func ):
454467 """
@@ -1013,21 +1026,35 @@ def shift(self, periods, offset=None, timeRule=None):
10131026 if timeRule is not None and offset is None :
10141027 offset = datetools .getOffset (timeRule )
10151028
1029+ N = len (self )
1030+
10161031 if offset is None :
1032+ newIndex = self .index
1033+
1034+ indexer = np .zeros (N , dtype = int )
10171035 if periods > 0 :
1018- newIndex = self .index [periods :]
1019- newValues = dict ([(col , np .asarray (series )[:- periods ])
1020- for col , series in self .iteritems ()])
1036+ indexer [periods :] = np .arange (N - periods )
1037+ def do_shift (series ):
1038+ values = np .asarray (series ).take (indexer )
1039+ values [:periods ] = NaN
1040+ return values
1041+
10211042 else :
1022- newIndex = self .index [:periods ]
1023- newValues = dict ([(col , np .asarray (series )[- periods :])
1024- for col , series in self .iteritems ()])
1043+ indexer [:periods ] = np .arange (- periods , N )
1044+ def do_shift (series ):
1045+ values = np .asarray (series ).take (indexer )
1046+ values [periods :] = NaN
1047+ return values
1048+
1049+ newValues = dict ([(col , do_shift (series ))
1050+ for col , series in self .iteritems ()])
10251051 else :
10261052 offset = periods * offset
10271053 newIndex = Index ([idx + offset for idx in self .index ])
10281054 newValues = dict ([(col , np .asarray (series ))
10291055 for col , series in self .iteritems ()])
1030- return DataFrame (data = newValues , index = newIndex )
1056+
1057+ return DataFrame (data = newValues , index = newIndex )
10311058
10321059 def apply (self , func , axis = 0 ):
10331060 """
@@ -1094,6 +1121,23 @@ def tgroupby(self, keyfunc, applyfunc):
10941121 """
10951122 return self .T .groupby (keyfunc ).aggregate (applyfunc ).T
10961123
1124+ def filter (self , items = None , like = None , regex = None ):
1125+ """
1126+ TODO
1127+ """
1128+ if items :
1129+ data = dict ([(r , self [r ]) for r in items if r in self ])
1130+ return DataFrame (data = data , index = self .index )
1131+ elif like :
1132+ mycopy = self .copy ()
1133+ for col in mycopy ._series .keys ():
1134+ series = mycopy ._series .pop (col )
1135+ if like in col :
1136+ mycopy ._series [col ] = series
1137+ return mycopy
1138+ elif regex :
1139+ pass
1140+
10971141 def filterItems (self , items ):
10981142 """
10991143 Restrict frame's columns to input set of items.
@@ -1107,8 +1151,23 @@ def filterItems(self, items):
11071151 -------
11081152 DataFrame with filtered columns
11091153 """
1110- data = dict ([(r , self [r ]) for r in items if r in self ])
1111- return DataFrame (data = data , index = self .index )
1154+ return self .filter (items = items )
1155+
1156+ def filterLike (self , arg ):
1157+ """
1158+ Filter to columns partially matching the import argument.
1159+
1160+ Keep columns where "arg in col == True"
1161+
1162+ Parameter
1163+ ---------
1164+ arg : string
1165+
1166+ Return
1167+ ------
1168+ DataFrame with matching columns
1169+ """
1170+ return self .filter (like = arg )
11121171
11131172 def sortUp (self , column = None ):
11141173 """
@@ -1137,27 +1196,6 @@ def sortDown(self, column=None):
11371196 newIndex = self .index [idx .astype (int )]
11381197 return self .reindex (newIndex )
11391198
1140- def filterLike (self , arg ):
1141- """
1142- Filter to columns partially matching the import argument.
1143-
1144- Keep columns where "arg in col == True"
1145-
1146- Parameter
1147- ---------
1148- arg : string
1149-
1150- Return
1151- ------
1152- DataFrame with matching columns
1153- """
1154- mycopy = self .copy ()
1155- for col in mycopy ._series .keys ():
1156- series = mycopy ._series .pop (col )
1157- if arg in col :
1158- mycopy ._series [col ] = series
1159- return mycopy
1160-
11611199 def combineFirst (self , otherFrame ):
11621200 """
11631201 Combine two DataFrame / DataMatrix objects and default to value
@@ -1204,7 +1242,10 @@ def combineFirst(self, otherFrame):
12041242 if col not in self :
12051243 result [col ] = series
12061244
1207- return DataFrame (result , index = unionIndex )
1245+ return DataFrame (result , index = unionIndex )
1246+
1247+ def combine (self , func , fill_value = np .NaN ):
1248+ pass
12081249
12091250 def combineAdd (self , otherFrame ):
12101251 """
@@ -1613,7 +1654,9 @@ def mad(self, axis=0, asarray=False):
16131654 demeaned = self - self .mean (axis = axis )
16141655 else :
16151656 demeaned = (self .T - self .mean (axis = axis )).T
1657+
16161658 y = np .array (demeaned .values , subok = True )
1659+
16171660 if not issubclass (y .dtype .type , np .int_ ):
16181661 y [np .isnan (y )] = 0
16191662
0 commit comments