Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ Bug Fixes
- Fixed missing arg validation in get_options_data (:issue:`6105`)
- Bug in assignment with duplicate columns in a frame where the locations
are a slice (e.g. next to each other) (:issue:`6120`)
- Bug in propogating _ref_locs during construction of a DataFrame with dups
index/columns (:issue:`6121`)

pandas 0.13.0
-------------
Expand Down
54 changes: 37 additions & 17 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,25 @@ def ref_locs(self):
self._ref_locs = indexer
return self._ref_locs

def take_ref_locs(self, indexer):
"""
need to preserve the ref_locs and just shift them
return None if ref_locs is None

see GH6509
"""

ref_locs = self._ref_locs
if ref_locs is None:
return None

tindexer = np.ones(len(ref_locs),dtype=bool)
tindexer[indexer] = False
tindexer = tindexer.astype(int).cumsum()[indexer]
ref_locs = ref_locs[indexer]
ref_locs -= tindexer
return ref_locs

def reset_ref_locs(self):
""" reset the block ref_locs """
self._ref_locs = np.empty(len(self.items), dtype='int64')
Expand Down Expand Up @@ -866,13 +885,20 @@ def func(x):
ndim=self.ndim, klass=self.__class__, fastpath=True)]
return self._maybe_downcast(blocks, downcast)

def take(self, indexer, ref_items, axis=1):
def take(self, indexer, ref_items, new_axis, axis=1):
if axis < 1:
raise AssertionError('axis must be at least 1, got %d' % axis)
new_values = com.take_nd(self.values, indexer, axis=axis,
allow_fill=False)

# need to preserve the ref_locs and just shift them
# GH6121
ref_locs = None
if not new_axis.is_unique:
ref_locs = self._ref_locs

return [make_block(new_values, self.items, ref_items, ndim=self.ndim,
klass=self.__class__, fastpath=True)]
klass=self.__class__, placement=ref_locs, fastpath=True)]

def get_values(self, dtype=None):
return self.values
Expand Down Expand Up @@ -1820,7 +1846,7 @@ def shift(self, indexer, periods, axis=0):
new_values[periods:] = fill_value
return [self.make_block(new_values)]

def take(self, indexer, ref_items, axis=1):
def take(self, indexer, ref_items, new_axis, axis=1):
""" going to take our items
along the long dimension"""
if axis < 1:
Expand Down Expand Up @@ -2601,18 +2627,7 @@ def get_slice(self, slobj, axis=0, raise_on_error=False):
if len(self.blocks) == 1:

blk = self.blocks[0]

# see GH 6059
ref_locs = blk._ref_locs
if ref_locs is not None:

# need to preserve the ref_locs and just shift them
indexer = np.ones(len(ref_locs),dtype=bool)
indexer[slobj] = False
indexer = indexer.astype(int).cumsum()[slobj]
ref_locs = ref_locs[slobj]
ref_locs -= indexer

ref_locs = blk.take_ref_locs(slobj)
newb = make_block(blk._slice(slobj), new_items, new_items,
klass=blk.__class__, fastpath=True,
placement=ref_locs)
Expand Down Expand Up @@ -3371,6 +3386,7 @@ def take(self, indexer, new_index=None, axis=1, verify=True):
if axis < 1:
raise AssertionError('axis must be at least 1, got %d' % axis)

self._consolidate_inplace()
if isinstance(indexer, list):
indexer = np.array(indexer)

Expand All @@ -3388,8 +3404,12 @@ def take(self, indexer, new_index=None, axis=1, verify=True):
new_index = self.axes[axis].take(indexer)

new_axes[axis] = new_index
return self.apply('take', axes=new_axes, indexer=indexer,
ref_items=new_axes[0], axis=axis)
return self.apply('take',
axes=new_axes,
indexer=indexer,
ref_items=new_axes[0],
new_axis=new_axes[axis],
axis=axis)

def merge(self, other, lsuffix=None, rsuffix=None):
if not self._is_indexed_like(other):
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3303,6 +3303,21 @@ def check(result, expected=None):
result = dfbool[['one', 'three', 'one']]
check(result,expected)

# multi-axis dups
# GH 6121
df = DataFrame(np.arange(25.).reshape(5,5),
index=['a', 'b', 'c', 'd', 'e'],
columns=['A', 'B', 'C', 'D', 'E'])
z = df[['A', 'C', 'A']].copy()
expected = z.ix[['a', 'c', 'a']]

df = DataFrame(np.arange(25.).reshape(5,5),
index=['a', 'b', 'c', 'd', 'e'],
columns=['A', 'B', 'C', 'D', 'E'])
z = df[['A', 'C', 'A']]
result = z.ix[['a', 'c', 'a']]
check(result,expected)

def test_insert_benchmark(self):
# from the vb_suite/frame_methods/frame_insert_columns
N = 10
Expand Down