Skip to content

Commit 0c1139c

Browse files
committed
optimized combineFrame, added DataFrame.filter function
git-svn-id: http://pandas.googlecode.com/svn/trunk@99 d5231056-7de3-11de-ac95-d976489f1ece
1 parent 623d5c1 commit 0c1139c

File tree

1 file changed

+88
-45
lines changed

1 file changed

+88
-45
lines changed

pandas/core/frame.py

Lines changed: 88 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -394,9 +394,14 @@ def _combineFrame(self, other, func):
394394

395395
if self.index.equals(other.index):
396396
newIndex = self.index
397+
398+
this = self
397399
else:
398400
newIndex = self.index + other.index
399401

402+
this = self.reindex(newIndex)
403+
other = other.reindex(newIndex)
404+
400405
if not self and not other:
401406
return DataFrame(index=newIndex)
402407

@@ -406,10 +411,9 @@ def _combineFrame(self, other, func):
406411
if not self:
407412
return other * NaN
408413

409-
for col, series in self.iteritems():
414+
for col, series in this.iteritems():
410415
if col in other:
411-
newSeries = func(series, other[col])
412-
newColumns[col] = newSeries.reindex(newIndex)
416+
newColumns[col] = func(series, other[col])
413417
else:
414418
newColumns[col] = series.fromValue(np.NaN, index=newIndex)
415419

@@ -432,23 +436,32 @@ def _combineSeries(self, other, func):
432436

433437
if self.index.equals(other.index):
434438
newIndex = self.index
439+
440+
this = self
435441
else:
436442
newIndex = self.index + other.index
437443

438-
other = other.reindex(newIndex)
439-
for col, series in self.iteritems():
440-
newColumns[col] = func(series.reindex(newIndex), other)
444+
this = self.reindex(newIndex)
445+
other = other.reindex(newIndex)
446+
447+
for col, series in this.iteritems():
448+
newColumns[col] = func(series, other)
449+
450+
result = DataFrame(newColumns, index=newIndex)
441451

442452
else:
443-
for col, series in self.iteritems():
444-
if col in other.index:
445-
newColumns[col] = func(series, other[col])
446-
else:
447-
cls = series.__class__
448-
newColumns[col] = cls(np.repeat(NaN, len(self.index)),
449-
index=self.index)
453+
union = other.index.union(self.cols())
454+
intersection = other.index.intersection(self.cols())
450455

451-
return DataFrame(data=newColumns, index=newIndex)
456+
for col in intersection:
457+
newColumns[col] = func(self[col], other[col])
458+
459+
result = DataFrame(newColumns, index=self.index)
460+
461+
for col in (x for x in union if x not in intersection):
462+
result[col] = NaN
463+
464+
return result
452465

453466
def _combineFunc(self, other, func):
454467
"""
@@ -1013,21 +1026,35 @@ def shift(self, periods, offset=None, timeRule=None):
10131026
if timeRule is not None and offset is None:
10141027
offset = datetools.getOffset(timeRule)
10151028

1029+
N = len(self)
1030+
10161031
if offset is None:
1032+
newIndex = self.index
1033+
1034+
indexer = np.zeros(N, dtype=int)
10171035
if periods > 0:
1018-
newIndex = self.index[periods:]
1019-
newValues = dict([(col, np.asarray(series)[:-periods])
1020-
for col, series in self.iteritems()])
1036+
indexer[periods:] = np.arange(N - periods)
1037+
def do_shift(series):
1038+
values = np.asarray(series).take(indexer)
1039+
values[:periods] = NaN
1040+
return values
1041+
10211042
else:
1022-
newIndex = self.index[:periods]
1023-
newValues = dict([(col, np.asarray(series)[-periods:])
1024-
for col, series in self.iteritems()])
1043+
indexer[:periods] = np.arange(-periods, N)
1044+
def do_shift(series):
1045+
values = np.asarray(series).take(indexer)
1046+
values[periods:] = NaN
1047+
return values
1048+
1049+
newValues = dict([(col, do_shift(series))
1050+
for col, series in self.iteritems()])
10251051
else:
10261052
offset = periods * offset
10271053
newIndex = Index([idx + offset for idx in self.index])
10281054
newValues = dict([(col, np.asarray(series))
10291055
for col, series in self.iteritems()])
1030-
return DataFrame(data = newValues, index= newIndex)
1056+
1057+
return DataFrame(data=newValues, index=newIndex)
10311058

10321059
def apply(self, func, axis=0):
10331060
"""
@@ -1094,6 +1121,23 @@ def tgroupby(self, keyfunc, applyfunc):
10941121
"""
10951122
return self.T.groupby(keyfunc).aggregate(applyfunc).T
10961123

1124+
def filter(self, items=None, like=None, regex=None):
1125+
"""
1126+
TODO
1127+
"""
1128+
if items:
1129+
data = dict([(r, self[r]) for r in items if r in self])
1130+
return DataFrame(data=data, index=self.index)
1131+
elif like:
1132+
mycopy = self.copy()
1133+
for col in mycopy._series.keys():
1134+
series = mycopy._series.pop(col)
1135+
if like in col:
1136+
mycopy._series[col] = series
1137+
return mycopy
1138+
elif regex:
1139+
pass
1140+
10971141
def filterItems(self, items):
10981142
"""
10991143
Restrict frame's columns to input set of items.
@@ -1107,8 +1151,23 @@ def filterItems(self, items):
11071151
-------
11081152
DataFrame with filtered columns
11091153
"""
1110-
data = dict([(r, self[r]) for r in items if r in self])
1111-
return DataFrame(data=data, index=self.index)
1154+
return self.filter(items=items)
1155+
1156+
def filterLike(self, arg):
1157+
"""
1158+
Filter to columns partially matching the import argument.
1159+
1160+
Keep columns where "arg in col == True"
1161+
1162+
Parameter
1163+
---------
1164+
arg : string
1165+
1166+
Return
1167+
------
1168+
DataFrame with matching columns
1169+
"""
1170+
return self.filter(like=arg)
11121171

11131172
def sortUp(self, column=None):
11141173
"""
@@ -1137,27 +1196,6 @@ def sortDown(self, column=None):
11371196
newIndex = self.index[idx.astype(int)]
11381197
return self.reindex(newIndex)
11391198

1140-
def filterLike(self, arg):
1141-
"""
1142-
Filter to columns partially matching the import argument.
1143-
1144-
Keep columns where "arg in col == True"
1145-
1146-
Parameter
1147-
---------
1148-
arg : string
1149-
1150-
Return
1151-
------
1152-
DataFrame with matching columns
1153-
"""
1154-
mycopy = self.copy()
1155-
for col in mycopy._series.keys():
1156-
series = mycopy._series.pop(col)
1157-
if arg in col:
1158-
mycopy._series[col] = series
1159-
return mycopy
1160-
11611199
def combineFirst(self, otherFrame):
11621200
"""
11631201
Combine two DataFrame / DataMatrix objects and default to value
@@ -1204,7 +1242,10 @@ def combineFirst(self, otherFrame):
12041242
if col not in self:
12051243
result[col] = series
12061244

1207-
return DataFrame(result, index = unionIndex)
1245+
return DataFrame(result, index=unionIndex)
1246+
1247+
def combine(self, func, fill_value=np.NaN):
1248+
pass
12081249

12091250
def combineAdd(self, otherFrame):
12101251
"""
@@ -1613,7 +1654,9 @@ def mad(self, axis=0, asarray=False):
16131654
demeaned = self-self.mean(axis=axis)
16141655
else:
16151656
demeaned = (self.T-self.mean(axis=axis)).T
1657+
16161658
y = np.array(demeaned.values, subok=True)
1659+
16171660
if not issubclass(y.dtype.type, np.int_):
16181661
y[np.isnan(y)] = 0
16191662

0 commit comments

Comments
 (0)